Scraping Codewars Challenges: Kata Metadata, Completion Stats & Rankings with Python (2026)

2026-04-09 [python codewars api challenges scraping]

Scraping Codewars Challenges: Kata Metadata, Completion Stats & Rankings (2026)

Codewars hosts programming challenges (kata) across 50+ languages, ranked from 8 kyu (beginner) to 1 kyu (expert). The catalog is rich data for multiple use cases: difficulty distribution analysis, completion rates as a proxy for problem-solving patterns, tag co-occurrence for curriculum design, language popularity trends, and ranking benchmarks.

The public API gives you direct access to kata metadata and user profiles without authentication. This guide covers the full pipeline: API usage, user history collection, anti-bot handling, SQLite storage, discovery strategies, and analytics.

What the Codewars API Provides

Base URL: https://www.codewars.com/api/v1

Key endpoints: - GET /code-challenges/{id-or-slug} — kata metadata, completion stats, description, tags - GET /users/{username} — profile, honor, rank, language rankings - GET /users/{username}/code-challenges/completed — paginated completion history - GET /users/{username}/code-challenges/authored — kata created by a user - GET /api/v1/code-challenges/search/{query} — search for kata by keyword

No API key needed. Rate limits aren't published, but sustained traffic over ~1 req/s triggers 429 responses. The website itself is behind Cloudflare.

Installation

pip install requests beautifulsoup4 httpx

Fetching Kata Details

import requests
import time
import json
import sqlite3
from datetime import datetime, timezone

BASE_URL = "https://www.codewars.com/api/v1"


def get_kata(kata_id: str, session: requests.Session) -> dict | None:
    """Fetch full metadata for a kata by ID or slug."""
    url = f"{BASE_URL}/code-challenges/{kata_id}"
    try:
        resp = session.get(url, timeout=10)
        if resp.status_code == 404:
            return None
        if resp.status_code == 429:
            time.sleep(10)
            resp = session.get(url, timeout=10)
        resp.raise_for_status()
        data = resp.json()

        return {
            "id": data.get("id"),
            "name": data.get("name"),
            "slug": data.get("slug"),
            "url": f"https://www.codewars.com/kata/{data.get('slug', '')}",
            "description": data.get("description", ""),
            "rank_id": data.get("rank", {}).get("id"),
            "rank_name": data.get("rank", {}).get("name"),
            "languages": data.get("languages", []),
            "total_completed": data.get("totalCompleted", 0),
            "total_attempts": data.get("totalAttempts", 0),
            "total_stars": data.get("totalStars", 0),
            "vote_score": data.get("voteScore", 0),
            "tags": data.get("tags", []),
            "created_by": data.get("createdBy", {}).get("username"),
            "published_at": data.get("publishedAt"),
            "approved_at": data.get("approvedAt"),
        }
    except requests.RequestException as e:
        print(f"Error fetching {kata_id}: {e}")
        return None


def make_session() -> requests.Session:
    """Create a session with appropriate headers."""
    s = requests.Session()
    s.headers.update({
        "Accept": "application/json",
        "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36",
    })
    return s


# Fetch a batch of kata
def fetch_kata_batch(kata_ids: list[str], delay: float = 1.2) -> list[dict]:
    """Fetch metadata for a list of kata IDs."""
    session = make_session()
    results = []

    for i, kata_id in enumerate(kata_ids):
        kata = get_kata(kata_id, session)
        if kata:
            results.append(kata)
            print(f"  [{i+1}/{len(kata_ids)}] {kata['name']} | {kata['rank_name']} | "
                  f"{kata['total_completed']:,} completions")
        time.sleep(delay)

    return results


# Example: well-known kata
sample_ids = [
    "5571d9fc11526780a000011a",  # Pete, the baker
    "51b66044bce5799a7f000003",  # Highest Scoring Word
    "526571aae218b8ee490006f4",  # Tribonacci Sequence
]
kata_list = fetch_kata_batch(sample_ids)

Rank IDs run from -8 (8 kyu, easiest) to -1 (1 kyu, hardest). The API uses negative integers — keep that in mind when sorting by difficulty.

Fetching User Profiles

def get_user(username: str, session: requests.Session) -> dict | None:
    """Fetch a user profile with rank and honor data."""
    url = f"{BASE_URL}/users/{username}"
    try:
        resp = session.get(url, timeout=10)
        if resp.status_code == 404:
            return None
        resp.raise_for_status()
        data = resp.json()

        lang_ranks = {}
        for lang, info in data.get("ranks", {}).get("languages", {}).items():
            lang_ranks[lang] = {
                "rank": info.get("id"),
                "rank_name": info.get("name"),
                "score": info.get("score", 0),
            }

        return {
            "username": data.get("username"),
            "honor": data.get("honor", 0),
            "clan": data.get("clan", ""),
            "leaderboard_position": data.get("leaderboardPosition"),
            "overall_rank": data.get("ranks", {}).get("overall", {}).get("name"),
            "overall_score": data.get("ranks", {}).get("overall", {}).get("score", 0),
            "language_ranks": lang_ranks,
            "completed_count": data.get("codeChallenges", {}).get("totalCompleted", 0),
            "authored_count": data.get("codeChallenges", {}).get("totalAuthored", 0),
        }
    except requests.RequestException as e:
        print(f"Error fetching user {username}: {e}")
        return None

Paginated Completion History

def get_completed_kata(username: str, session: requests.Session) -> list[dict]:
    """Get all completed kata for a user (paginated, 200 per page)."""
    completed = []
    page = 0

    while True:
        url = f"{BASE_URL}/users/{username}/code-challenges/completed?page={page}"
        try:
            resp = session.get(url, timeout=10)
            if resp.status_code == 429:
                print(f"Rate limited (user {username}, page {page}). Waiting 15s...")
                time.sleep(15)
                continue
            if resp.status_code == 404:
                break
            resp.raise_for_status()
            data = resp.json()

            items = data.get("data", [])
            if not items:
                break

            for item in items:
                completed.append({
                    "kata_id": item.get("id"),
                    "kata_name": item.get("name"),
                    "slug": item.get("slug"),
                    "completed_at": item.get("completedAt"),
                    "completed_languages": item.get("completedLanguages", []),
                })

            total_pages = data.get("totalPages", 1)
            print(f"  {username}: page {page+1}/{total_pages} ({len(completed)} total)")

            if page >= total_pages - 1:
                break
            page += 1
            time.sleep(1.0)

        except requests.RequestException as e:
            print(f"Page {page} failed for {username}: {e}")
            break

    return completed


def get_authored_kata(username: str, session: requests.Session) -> list[dict]:
    """Get kata authored by a user."""
    url = f"{BASE_URL}/users/{username}/code-challenges/authored"
    try:
        resp = session.get(url, timeout=10)
        resp.raise_for_status()
        data = resp.json()
        return [
            {
                "id": item.get("id"),
                "name": item.get("name"),
                "slug": item.get("slug"),
                "rank_id": item.get("rank", {}).get("id"),
                "rank_name": item.get("rank", {}).get("name"),
                "total_completed": item.get("totalCompleted", 0),
                "languages": item.get("languages", []),
            }
            for item in data.get("data", [])
        ]
    except Exception as e:
        print(f"Error fetching authored for {username}: {e}")
        return []

Kata Search

def search_kata(query: str, session: requests.Session = None) -> list[dict]:
    """Search for kata by keyword using the search endpoint."""
    s = session or make_session()
    url = f"{BASE_URL}/code-challenges/search/{requests.utils.quote(query)}"

    try:
        resp = s.get(url, timeout=10)
        resp.raise_for_status()
        data = resp.json()

        return [
            {
                "id": item.get("id"),
                "name": item.get("name"),
                "slug": item.get("slug"),
                "rank_id": item.get("rank", {}).get("id"),
                "rank_name": item.get("rank", {}).get("name"),
                "total_completed": item.get("totalCompleted", 0),
                "languages": item.get("languages", []),
                "tags": item.get("tags", []),
            }
            for item in data.get("data", [])
        ]
    except Exception as e:
        print(f"Search failed for '{query}': {e}")
        return []


# Discover kata via common topics
topics = ["binary tree", "dynamic programming", "sorting", "graph", "string manipulation"]
all_discovered = []
session = make_session()
for topic in topics:
    results = search_kata(topic, session)
    all_discovered.extend(results)
    print(f"  '{topic}': {len(results)} kata")
    time.sleep(1.5)

print(f"\nTotal discovered: {len(all_discovered)}")

Anti-Bot Measures and Proxy Setup

The Codewars API (/api/v1/*) is relatively forgiving at 1 req/s. The website is a different story — kata listing pages, search results, and user leaderboards sit behind Cloudflare's bot protection. Browser fingerprinting, JS challenges, and IP reputation checks activate when you try to scrape those pages at scale.

Practical limits from testing: - API: ~1 req/s sustained before 429s. Backoff exponentially on 429, retry up to 5 times. - Website: A single residential IP gets flagged within 50-100 requests without browser fingerprint. Datacenter IPs get blocked almost immediately.

For large-scale collection — tens of thousands of kata, leaderboard data not in the API — residential proxy rotation is the standard solution. ThorData offers a residential proxy pool that handles IP rotation automatically, keeping individual IPs well under rate-limit thresholds and sidestepping Cloudflare reputation checks:

import requests

PROXY_CONFIG = {
    "http": "http://USER:[email protected]:9000",
    "https": "http://USER:[email protected]:9000",
}


def get_kata_proxied(kata_id: str) -> dict | None:
    """Fetch kata metadata through rotating proxy."""
    url = f"{BASE_URL}/code-challenges/{kata_id}"
    try:
        resp = requests.get(
            url,
            proxies=PROXY_CONFIG,
            timeout=15,
            headers={"Accept": "application/json"},
        )
        if resp.status_code == 404:
            return None
        resp.raise_for_status()
        return resp.json()
    except Exception as e:
        print(f"Proxied fetch failed for {kata_id}: {e}")
        return None

Rotate the proxy per-request rather than per-session. Combined with a 1-2 second delay, this keeps throughput high while avoiding blocks.

SQLite Kata Database

def init_db(db_path: str = "codewars.db") -> sqlite3.Connection:
    """Initialize the Codewars SQLite database."""
    conn = sqlite3.connect(db_path)
    conn.execute("PRAGMA journal_mode=WAL")

    conn.executescript("""
        CREATE TABLE IF NOT EXISTS kata (
            id TEXT PRIMARY KEY,
            name TEXT,
            slug TEXT UNIQUE,
            url TEXT,
            description TEXT,
            rank_id INTEGER,
            rank_name TEXT,
            languages TEXT,
            total_completed INTEGER,
            total_attempts INTEGER,
            total_stars INTEGER,
            vote_score REAL,
            tags TEXT,
            created_by TEXT,
            published_at TEXT,
            fetched_at TEXT DEFAULT (datetime('now'))
        );

        CREATE TABLE IF NOT EXISTS users (
            username TEXT PRIMARY KEY,
            honor INTEGER,
            clan TEXT,
            leaderboard_position INTEGER,
            overall_rank TEXT,
            overall_score INTEGER,
            language_ranks TEXT,
            completed_count INTEGER,
            authored_count INTEGER,
            fetched_at TEXT DEFAULT (datetime('now'))
        );

        CREATE TABLE IF NOT EXISTS completions (
            id INTEGER PRIMARY KEY AUTOINCREMENT,
            username TEXT NOT NULL,
            kata_id TEXT NOT NULL,
            kata_name TEXT,
            completed_at TEXT,
            completed_languages TEXT,
            UNIQUE(username, kata_id),
            FOREIGN KEY (username) REFERENCES users(username),
            FOREIGN KEY (kata_id) REFERENCES kata(id)
        );

        CREATE INDEX IF NOT EXISTS idx_kata_rank ON kata(rank_id);
        CREATE INDEX IF NOT EXISTS idx_kata_completed ON kata(total_completed DESC);
        CREATE INDEX IF NOT EXISTS idx_completions_user ON completions(username);
        CREATE INDEX IF NOT EXISTS idx_completions_kata ON completions(kata_id);
    """)
    conn.commit()
    return conn


def upsert_kata(conn: sqlite3.Connection, kata: dict) -> None:
    """Insert or update a kata record."""
    conn.execute("""
        INSERT INTO kata (id, name, slug, url, description, rank_id, rank_name,
            languages, total_completed, total_attempts, total_stars, vote_score,
            tags, created_by, published_at, fetched_at)
        VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
        ON CONFLICT(id) DO UPDATE SET
            total_completed=excluded.total_completed,
            total_attempts=excluded.total_attempts,
            total_stars=excluded.total_stars,
            vote_score=excluded.vote_score,
            fetched_at=excluded.fetched_at
    """, (
        kata["id"], kata["name"], kata["slug"], kata.get("url"),
        kata.get("description"), kata["rank_id"], kata["rank_name"],
        json.dumps(kata["languages"]), kata["total_completed"],
        kata.get("total_attempts", 0), kata["total_stars"],
        kata.get("vote_score", 0), json.dumps(kata["tags"]),
        kata.get("created_by"), kata.get("published_at"),
        datetime.now(timezone.utc).isoformat(),
    ))
    conn.commit()


def insert_completions(conn: sqlite3.Connection, username: str, completions: list[dict]) -> int:
    """Insert user completion records."""
    inserted = 0
    for c in completions:
        try:
            conn.execute("""
                INSERT OR IGNORE INTO completions
                    (username, kata_id, kata_name, completed_at, completed_languages)
                VALUES (?, ?, ?, ?, ?)
            """, (
                username, c["kata_id"], c.get("kata_name"),
                c.get("completed_at"),
                json.dumps(c.get("completed_languages", [])),
            ))
            inserted += conn.execute("SELECT changes()").fetchone()[0]
        except sqlite3.Error:
            pass
    conn.commit()
    return inserted

Discovery Strategies

The API doesn't expose a "list all kata" endpoint. Use these indirect approaches:

User completion histories. Pull completed kata for a sample of active users. A few hundred active users yields thousands of unique kata IDs covering most of the popular catalog:

def discover_kata_via_users(
    usernames: list[str],
    conn: sqlite3.Connection,
) -> set[str]:
    """Discover kata IDs by collecting user completion histories."""
    discovered_ids = set()
    session = make_session()

    for username in usernames:
        completions = get_completed_kata(username, session)
        for c in completions:
            discovered_ids.add(c["kata_id"])
        insert_completions(conn, username, completions)
        print(f"  {username}: {len(completions)} completions. "
              f"Total unique kata: {len(discovered_ids)}")
        time.sleep(1.5)

    return discovered_ids


# Seed users — mix of ranks for broad coverage
seed_users = [
    "g964", "jhoffner", "kazk", "GiacomoSorbi",
    "user2244877", "Voile", "smol", "dbuezas",
]
db = init_db("codewars.db")
kata_ids = discover_kata_via_users(seed_users, db)
print(f"\nDiscovered {len(kata_ids)} unique kata IDs")

Keyword search. Query common programming topics to sweep the catalog:

def discover_via_search(conn: sqlite3.Connection) -> set[str]:
    """Discover kata by searching common topics."""
    topics = [
        "sort", "tree", "graph", "string", "array", "math",
        "recursion", "dynamic programming", "regex", "matrix",
        "heap", "stack", "queue", "linked list", "binary search",
    ]
    discovered = set()
    session = make_session()

    for topic in topics:
        results = search_kata(topic, session)
        for r in results:
            if r.get("id"):
                discovered.add(r["id"])
                upsert_kata(conn, {**r, "description": ""})  # partial record
        print(f"  '{topic}': {len(results)} kata (total: {len(discovered)})")
        time.sleep(1.5)

    return discovered

Analytics and Statistics

Once you have data in SQLite, analysis is straightforward:

from collections import Counter


def analyze_kata_catalog(conn: sqlite3.Connection) -> None:
    """Print summary statistics for the kata database."""
    rows = conn.execute("SELECT rank_name, COUNT(*) FROM kata GROUP BY rank_name").fetchall()
    print("\nDifficulty distribution:")
    for rank, count in sorted(rows, key=lambda x: x[0] or ""):
        print(f"  {rank or 'Unknown':15s}: {count:4d}")

    # Language coverage
    kata_with_langs = conn.execute("SELECT languages FROM kata WHERE languages != '[]'").fetchall()
    all_langs: list[str] = []
    for (langs_json,) in kata_with_langs:
        all_langs.extend(json.loads(langs_json))
    lang_counts = Counter(all_langs).most_common(15)
    print("\nTop 15 languages:")
    for lang, count in lang_counts:
        print(f"  {lang:20s}: {count}")

    # Most completed kata
    top_kata = conn.execute(
        "SELECT name, rank_name, total_completed FROM kata ORDER BY total_completed DESC LIMIT 10"
    ).fetchall()
    print("\nMost completed kata:")
    for name, rank, n in top_kata:
        print(f"  {name[:50]:50s} [{rank}] {n:,}")

    # Tag popularity
    tag_rows = conn.execute("SELECT tags FROM kata WHERE tags != '[]'").fetchall()
    all_tags: list[str] = []
    for (tags_json,) in tag_rows:
        all_tags.extend(json.loads(tags_json))
    tag_counts = Counter(all_tags).most_common(15)
    print("\nTop 15 tags:")
    for tag, count in tag_counts:
        print(f"  {tag:30s}: {count}")


def completion_rate_by_difficulty(conn: sqlite3.Connection) -> list[dict]:
    """Calculate average completion rate by difficulty level."""
    rows = conn.execute("""
        SELECT rank_name,
               AVG(CASE WHEN total_attempts > 0
                        THEN CAST(total_completed AS REAL) / total_attempts
                        ELSE NULL END) AS avg_rate,
               COUNT(*) AS kata_count
        FROM kata
        WHERE rank_name IS NOT NULL AND total_attempts > 0
        GROUP BY rank_name
        ORDER BY rank_id
    """).fetchall()
    return [{"rank": r[0], "avg_completion_rate": r[1], "kata_count": r[2]} for r in rows]


def user_language_profile(conn: sqlite3.Connection, username: str) -> dict:
    """Analyze which languages a user completes kata in."""
    rows = conn.execute(
        "SELECT completed_languages FROM completions WHERE username = ?",
        (username,)
    ).fetchall()
    all_langs: list[str] = []
    for (langs_json,) in rows:
        all_langs.extend(json.loads(langs_json))
    return dict(Counter(all_langs).most_common())


# Run analysis
db = init_db("codewars.db")
analyze_kata_catalog(db)

Full Collection Run

def full_collection_pipeline(seed_users: list[str], db_path: str = "codewars.db") -> None:
    """Complete pipeline: discover kata via users, then fetch full metadata."""
    conn = init_db(db_path)
    session = make_session()

    print("Phase 1: Collecting user histories...")
    kata_ids = discover_kata_via_users(seed_users, conn)

    print(f"\nPhase 2: Fetching metadata for {len(kata_ids)} kata...")
    fetched = 0
    errors = 0
    for kata_id in kata_ids:
        # Skip if we have recent data
        existing = conn.execute(
            "SELECT fetched_at FROM kata WHERE id = ?", (kata_id,)
        ).fetchone()
        if existing:
            continue

        kata = get_kata(kata_id, session)
        if kata:
            upsert_kata(conn, kata)
            fetched += 1
        else:
            errors += 1
        time.sleep(1.2)

        if (fetched + errors) % 50 == 0:
            print(f"  {fetched} fetched, {errors} errors")

    print(f"\nCollection complete: {fetched} kata stored ({errors} errors)")

    print("\nPhase 3: Analysis...")
    analyze_kata_catalog(conn)
    conn.close()


# Run the pipeline
seed_users = [
    "g964", "jhoffner", "kazk", "GiacomoSorbi",
    "user2244877", "Voile", "smol", "dbuezas", "Blind4Basics",
]
full_collection_pipeline(seed_users)

Summary

The Codewars public API is clean and stable for kata metadata and user profiles. The main challenges are: no pagination for the full kata catalog (requiring indirect discovery), strict rate limits (~1 req/s), and Cloudflare blocking on the website for anything beyond the JSON API.

For large-scale collection, the combination of user-history seeding + keyword search covers the majority of the popular catalog efficiently. Store in SQLite with upserts so you can re-run to refresh completion counts as they grow over time.

Building a Curriculum from Kata Data

One practical use case: generating a structured learning path by analyzing the kata catalog for a given language.

def build_learning_path(conn: sqlite3.Connection, language: str = "python") -> dict:
    """Build a difficulty-ordered learning path for a language."""
    rows = conn.execute("""
        SELECT id, name, slug, rank_id, rank_name, total_completed,
               tags, description
        FROM kata
        WHERE languages LIKE ?
          AND rank_id IS NOT NULL
          AND total_completed > 100
        ORDER BY rank_id ASC, total_completed DESC
    """, (f'%"{language}"%',)).fetchall()

    path = {}
    for r in rows:
        rank_name = r[4] or "Unknown"
        if rank_name not in path:
            path[rank_name] = []
        path[rank_name].append({
            "id": r[0],
            "name": r[1],
            "slug": r[2],
            "url": f"https://www.codewars.com/kata/{r[2]}",
            "completions": r[5],
            "tags": json.loads(r[6]) if r[6] else [],
        })

    return path


def print_curriculum(conn: sqlite3.Connection, language: str = "python") -> None:
    """Print a structured learning curriculum."""
    path = build_learning_path(conn, language)
    rank_order = ["8 kyu", "7 kyu", "6 kyu", "5 kyu", "4 kyu", "3 kyu", "2 kyu", "1 kyu"]

    print(f"\nCurriculum for {language.upper()}:")
    for rank in rank_order:
        kata_list = path.get(rank, [])
        if not kata_list:
            continue
        print(f"\n{rank} ({len(kata_list)} kata):")
        # Show top 5 by completion count
        for k in kata_list[:5]:
            print(f"  [{k['completions']:>8,}] {k['name'][:60]}")

Comparing User Skill Profiles

Another use case: comparing the skill profiles of different users based on their completed kata distributions.

def user_skill_profile(conn: sqlite3.Connection, username: str) -> dict:
    """Build a skill profile from a user's completion history."""
    rows = conn.execute("""
        SELECT k.rank_name, k.rank_id, k.tags, c.completed_languages
        FROM completions c
        JOIN kata k ON k.id = c.kata_id
        WHERE c.username = ? AND k.rank_id IS NOT NULL
    """, (username,)).fetchall()

    if not rows:
        return {"username": username, "error": "No completion data"}

    from collections import Counter
    rank_counts = Counter()
    tag_counts = Counter()
    language_counts = Counter()

    for rank_name, rank_id, tags_json, langs_json in rows:
        rank_counts[rank_name] += 1
        if tags_json:
            for tag in json.loads(tags_json):
                tag_counts[tag] += 1
        if langs_json:
            for lang in json.loads(langs_json):
                language_counts[lang] += 1

    # Compute a simple "score" weighted by difficulty
    rank_weights = {-1: 32, -2: 16, -3: 8, -4: 4, -5: 2, -6: 1, -7: 0.5, -8: 0.25}
    weighted_score = sum(
        count * rank_weights.get(rank_id, 0)
        for (_, rank_id, _, _), count in zip(rows, rank_counts.values())
    )

    return {
        "username": username,
        "total_completed": len(rows),
        "rank_distribution": dict(rank_counts.most_common()),
        "top_topics": dict(tag_counts.most_common(10)),
        "languages": dict(language_counts.most_common()),
        "weighted_score": round(weighted_score, 1),
    }


def compare_users(conn: sqlite3.Connection, usernames: list[str]) -> None:
    """Compare skill profiles side by side."""
    profiles = [user_skill_profile(conn, u) for u in usernames]

    print(f"\n{'Username':20s} {'Total':>8s} {'Score':>8s} {'Top Lang':>12s}")
    print("-" * 55)
    for p in profiles:
        if "error" in p:
            continue
        langs = p.get("languages", {})
        top_lang = max(langs, key=langs.get) if langs else "N/A"
        print(f"  {p['username']:18s} {p['total_completed']:>8,} "
              f"{p['weighted_score']:>8.0f} {top_lang:>12s}")

Monitoring Kata Completion Counts Over Time

Completion counts grow as more users finish kata. Tracking this gives you a signal for which challenges are gaining traction:

def add_completion_snapshot_table(conn: sqlite3.Connection) -> None:
    """Add a table for tracking completion count history."""
    conn.execute("""
        CREATE TABLE IF NOT EXISTS kata_snapshots (
            id INTEGER PRIMARY KEY AUTOINCREMENT,
            kata_id TEXT NOT NULL,
            total_completed INTEGER,
            total_stars INTEGER,
            snapshot_date TEXT DEFAULT (date('now')),
            FOREIGN KEY (kata_id) REFERENCES kata(id)
        )
    """)
    conn.execute("""
        CREATE INDEX IF NOT EXISTS idx_snapshots_kata
        ON kata_snapshots(kata_id, snapshot_date)
    """)
    conn.commit()


def snapshot_popular_kata(conn: sqlite3.Connection, min_completed: int = 10000) -> int:
    """Take a snapshot of completion counts for popular kata."""
    add_completion_snapshot_table(conn)
    rows = conn.execute("""
        SELECT id, total_completed, total_stars
        FROM kata WHERE total_completed >= ?
    """, (min_completed,)).fetchall()

    for kata_id, completed, stars in rows:
        conn.execute("""
            INSERT INTO kata_snapshots (kata_id, total_completed, total_stars)
            VALUES (?, ?, ?)
        """, (kata_id, completed, stars))
    conn.commit()
    return len(rows)


def fastest_growing_kata(conn: sqlite3.Connection, days_back: int = 7) -> list[dict]:
    """Find kata with the fastest growing completion counts."""
    rows = conn.execute("""
        SELECT k.name, k.slug, k.rank_name,
               s2.total_completed - s1.total_completed AS growth,
               s2.total_completed
        FROM kata_snapshots s1
        JOIN kata_snapshots s2 ON s1.kata_id = s2.kata_id
        JOIN kata k ON k.id = s1.kata_id
        WHERE s1.snapshot_date = date('now', ?)
          AND s2.snapshot_date = date('now')
          AND s2.total_completed > s1.total_completed
        ORDER BY growth DESC
        LIMIT 20
    """, (f"-{days_back} days",)).fetchall()

    return [
        {"name": r[0], "slug": r[1], "rank": r[2], "growth": r[3], "total": r[4]}
        for r in rows
    ]