How to Scrape Duolingo Course Data via Undocumented API (2026)

2026-04-09 [python duolingo scraping api reverse-engineering]

How to Scrape Duolingo Course Data via Undocumented API (2026)

Duolingo has no public API. They shut down their old API in 2023 and never replaced it. But the mobile app and website still talk to backend endpoints — and those endpoints are accessible if you know where to look.

This guide covers Duolingo's undocumented API endpoints that you can use to extract course data, language pairs, user profiles, streaks, and leaderboard information. All discovered through reverse-engineering the web app's network traffic.

How the Endpoints Were Found

Open Duolingo in Chrome, open DevTools → Network tab, and browse around. The web app makes GraphQL and REST calls to www.duolingo.com endpoints. The authentication token is a JWT stored in a cookie. Some endpoints work without auth, others require it.

Setup

pip install requests

No browser automation needed — these are plain HTTP endpoints.

Public Endpoints (No Auth Required)

Course Catalog

Duolingo's course data is partially available without authentication:

import requests
import json

def get_duolingo_courses():
    """Fetch all available Duolingo courses (language pairs)."""
    url = "https://www.duolingo.com/api/1/courses/list"
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36",
        "Accept": "application/json"
    }

    response = requests.get(url, headers=headers, timeout=15)
    if response.status_code != 200:
        # Fallback: scrape the courses page
        return get_courses_from_web()

    data = response.json()
    courses = []

    for course in data:
        courses.append({
            "learning_language": course.get("learning_language"),
            "learning_language_name": course.get("learning_language_name"),
            "from_language": course.get("from_language"),
            "from_language_name": course.get("from_language_name"),
            "num_learners": course.get("num_learners"),
            "phase": course.get("phase")  # 1=hatching, 2=beta, 3=stable
        })

    return sorted(courses, key=lambda x: x.get("num_learners", 0), reverse=True)


def get_courses_from_web():
    """Fallback: scrape course data from the incubator/courses page."""
    from bs4 import BeautifulSoup

    url = "https://www.duolingo.com/courses"
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
    }
    response = requests.get(url, headers=headers, timeout=15)
    soup = BeautifulSoup(response.text, "lxml")

    # Duolingo embeds course data in a __NEXT_DATA__ script tag
    next_data = soup.find("script", {"id": "__NEXT_DATA__"})
    if next_data:
        data = json.loads(next_data.string)
        # Navigate the nested structure (changes periodically)
        props = data.get("props", {}).get("pageProps", {})
        return props.get("courses", [])

    return []

courses = get_duolingo_courses()
print(f"Found {len(courses)} language courses")
for c in courses[:10]:
    print(f"  {c['learning_language_name']} for {c['from_language_name']} speakers — {c.get('num_learners', 'N/A')} learners")

User Profiles

Public user profiles are accessible without authentication:

def get_user_profile(username):
    """Fetch a Duolingo user's public profile."""
    url = f"https://www.duolingo.com/2017-06-30/users?username={username}&fields=streak,totalXp,currentCourseId,courses,streakData,creationDate"
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36",
        "Accept": "application/json"
    }

    response = requests.get(url, headers=headers, timeout=15)
    if response.status_code != 200:
        return {"error": f"HTTP {response.status_code}"}

    data = response.json()
    users = data.get("users", [])
    if not users:
        return {"error": "User not found"}

    user = users[0]
    return {
        "username": username,
        "streak": user.get("streak"),
        "total_xp": user.get("totalXp"),
        "creation_date": user.get("creationDate"),
        "current_course": user.get("currentCourseId"),
        "courses": [
            {
                "id": c.get("id"),
                "title": c.get("title"),
                "xp": c.get("xp"),
                "crowns": c.get("crowns"),
                "level": c.get("level", {}).get("level") if isinstance(c.get("level"), dict) else c.get("level")
            }
            for c in user.get("courses", [])
        ]
    }

profile = get_user_profile("LuisvonAhn")
print(json.dumps(profile, indent=2))

Authenticated Endpoints

Some endpoints require a valid session. You can get one by logging in:

def duolingo_login(username, password):
    """Authenticate with Duolingo and get a session token."""
    url = "https://www.duolingo.com/login"
    payload = {
        "login": username,
        "password": password
    }
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
        "Content-Type": "application/json"
    }

    session = requests.Session()
    response = session.post(url, json=payload, headers=headers, timeout=15)

    if response.status_code == 200:
        # JWT token is in the response and set as cookie
        token = response.json().get("jwt") or response.headers.get("jwt")
        return session, token
    else:
        return None, None


def get_skill_tree(session, user_id, course_id):
    """Get the full skill tree for a course (requires auth)."""
    url = f"https://www.duolingo.com/2017-06-30/users/{user_id}/courses/{course_id}"
    params = {
        "fields": "skills,sections,currentSection"
    }

    response = session.get(url, params=params, timeout=15)
    if response.status_code != 200:
        return None

    data = response.json()
    skills = []
    for skill in data.get("skills", []):
        skills.append({
            "name": skill.get("name"),
            "short_name": skill.get("shortName"),
            "levels": skill.get("levels"),
            "lessons": skill.get("lessons"),
            "words": skill.get("words", []),
            "tips_available": skill.get("tipsAndNotes") is not None
        })
    return skills

Leaderboard Data

The leaderboard API reveals top users and their XP:

def get_leaderboard(session, user_id):
    """Fetch the user's current leaderboard."""
    url = f"https://www.duolingo.com/leaderboards/7d9f5dd1-8423-4e7c-8c68-1ca2f2f2d1f0/users/{user_id}"
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
        "Accept": "application/json"
    }

    response = session.get(url, headers=headers, timeout=15)
    if response.status_code != 200:
        return None

    data = response.json()
    return {
        "tier": data.get("tier"),
        "cohort": [
            {
                "display_name": u.get("display_name"),
                "total_xp": u.get("total_xp_this_period"),
                "streak": u.get("streak"),
                "has_plus": u.get("has_plus")
            }
            for u in data.get("active", [])
        ]
    }

Streak and Activity Data

Streak information is one of the most interesting public data points:

def get_streak_info(username):
    """Get detailed streak data for a user."""
    url = f"https://www.duolingo.com/2017-06-30/users?username={username}&fields=streak,streakData,practiceReminderSettings,currentCourseId"
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
        "Accept": "application/json"
    }

    response = requests.get(url, headers=headers, timeout=15)
    data = response.json()
    users = data.get("users", [])
    if not users:
        return None

    user = users[0]
    streak_data = user.get("streakData", {})

    return {
        "username": username,
        "current_streak": user.get("streak"),
        "longest_streak": streak_data.get("longestStreak", {}).get("length"),
        "longest_streak_start": streak_data.get("longestStreak", {}).get("startDate"),
        "longest_streak_end": streak_data.get("longestStreak", {}).get("endDate"),
        "streak_freeze_used": streak_data.get("currentStreak", {}).get("lastExtendedDate")
    }

streak = get_streak_info("LuisvonAhn")
print(json.dumps(streak, indent=2))

Batch Scraping Multiple Users

If you're collecting data across many users, batch your requests carefully:

import time
import random

def batch_scrape_users(usernames, delay_range=(2.0, 4.0)):
    """Scrape profile data for a list of usernames."""
    results = []

    for i, username in enumerate(usernames):
        try:
            profile = get_user_profile(username)
            streak = get_streak_info(username)
            if streak:
                profile.update({
                    "longest_streak": streak.get("longest_streak"),
                    "streak_freeze_used": streak.get("streak_freeze_used")
                })
            results.append(profile)
        except Exception as e:
            print(f"Error for {username}: {e}")
            results.append({"username": username, "error": str(e)})

        if (i + 1) % 10 == 0:
            print(f"Scraped {i + 1}/{len(usernames)} users")

        time.sleep(random.uniform(*delay_range))

    return results

Anti-Bot Measures and Rate Limits

Duolingo's API endpoints have specific protections you need to understand:

Rate limiting: The public endpoints allow roughly 30-40 requests per minute from a single IP before returning 429 errors. The 2017-06-30 versioned API is more generous than the newer endpoints.

JWT validation: Authenticated endpoints check JWT expiry strictly. Tokens expire after roughly 30 minutes of inactivity. Re-authenticate when you get 401 responses.

Cloudflare protection: The web pages (not API) sit behind Cloudflare. If you're scraping the HTML as a fallback, you'll hit JavaScript challenges. The API endpoints bypass most of this since they return JSON directly.

IP reputation: Duolingo tracks request patterns per IP. Sustained automated access from datacenter IPs gets flagged within minutes. For any batch data collection, residential proxies are the way to go. ThorData gives you rotating residential IPs that look like regular app users connecting from home networks — which is exactly what Duolingo expects to see.

def create_proxy_session(proxy_url):
    """Create a requests session routed through a proxy."""
    session = requests.Session()
    session.proxies = {
        "http": proxy_url,
        "https": proxy_url
    }
    session.headers.update({
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36",
        "Accept": "application/json"
    })
    return session

# Usage:
# session = create_proxy_session("http://user:[email protected]:9000")
# profile = session.get(f"https://www.duolingo.com/2017-06-30/users?username=someuser")

Fingerprint consistency: When using proxies, make sure your User-Agent and other headers are consistent within a session. Duolingo correlates headers with IP — a residential IP sending requests with a bot-like User-Agent gets flagged faster than a datacenter IP with a browser User-Agent.

Monitoring for API Changes

Undocumented APIs break without warning. Build in resilience:

def safe_api_call(url, headers=None, max_retries=3):
    """Make an API call with retry logic and change detection."""
    default_headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
        "Accept": "application/json"
    }
    if headers:
        default_headers.update(headers)

    for attempt in range(max_retries):
        try:
            response = requests.get(url, headers=default_headers, timeout=15)

            if response.status_code == 200:
                return response.json()
            elif response.status_code == 404:
                print(f"Endpoint may have changed: {url}")
                return None
            elif response.status_code == 429:
                wait = (2 ** attempt) * 5
                print(f"Rate limited, waiting {wait}s")
                time.sleep(wait)
            else:
                print(f"HTTP {response.status_code} for {url}")
                time.sleep(2)

        except requests.exceptions.Timeout:
            time.sleep(2)

    return None

Data Export

import csv

def export_user_data(users, filename="duolingo_users.csv"):
    """Export user profile data to CSV."""
    if not users:
        return

    flat = []
    for u in users:
        if "error" in u:
            continue
        flat.append({
            "username": u.get("username"),
            "streak": u.get("streak"),
            "total_xp": u.get("total_xp"),
            "creation_date": u.get("creation_date"),
            "current_course": u.get("current_course"),
            "num_courses": len(u.get("courses", [])),
            "longest_streak": u.get("longest_streak")
        })

    if not flat:
        return

    with open(filename, "w", newline="", encoding="utf-8") as f:
        writer = csv.DictWriter(f, fieldnames=flat[0].keys())
        writer.writeheader()
        writer.writerows(flat)
    print(f"Exported {len(flat)} users to {filename}")

Endpoint Reference

Here's a summary of the undocumented endpoints covered in this guide:

Endpoint	Auth	Returns
`/api/1/courses/list`	No	All available language courses
`/2017-06-30/users?username=X`	No	Public user profile, streak, XP
`/login`	POST	JWT session token
`/2017-06-30/users/{id}/courses/{id}`	Yes	Skill tree, lessons, words
`/leaderboards/.../users/{id}`	Yes	Leaderboard tier and cohort

These endpoints have been stable since mid-2024 but can change at any time. The versioned endpoints (2017-06-30) tend to be more stable than unversioned ones.

Legal and Ethical Notes

Duolingo's Terms of Service prohibit automated access. These endpoints are undocumented and unsupported — Duolingo can change or block them at any time.

This code is for educational purposes and personal research. Don't use it to build competing products, harvest user data at scale, or interfere with Duolingo's services. If you're collecting user data, stick to genuinely public profiles and respect people's privacy.

Collecting Language Learning Statistics at Scale

If you want to analyze Duolingo's learner base systematically, here is a structured approach for collecting public user data responsibly:

import requests
import json
import time
import random
import sqlite3
from datetime import datetime

BASE_URL = "https://www.duolingo.com"
HEADERS = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36",
    "Accept": "application/json",
    "Accept-Language": "en-US,en;q=0.9",
}


def get_leaderboard_users(session: requests.Session = None) -> list:
    """
    Collect users from public leaderboards.
    Returns list of username strings.
    """
    # The global leaderboard page exposes top users publicly
    url = f"{BASE_URL}/2017-06-30/leaderboards"
    headers = {**HEADERS}

    if session:
        resp = session.get(url, timeout=15)
    else:
        resp = requests.get(url, headers=headers, timeout=15)

    if resp.status_code != 200:
        return []

    data = resp.json()
    users = []
    for lb in data.get("leaderboards", []):
        for user in lb.get("cohort", []):
            username = user.get("display_name")
            if username:
                users.append(username)

    return list(set(users))  # Deduplicate


def get_course_statistics() -> dict:
    """Fetch aggregate statistics for all Duolingo courses."""
    courses = get_duolingo_courses()
    if not courses:
        return {}

    stats = {
        "total_courses": len(courses),
        "total_learners": sum(c.get("num_learners", 0) for c in courses),
        "by_phase": {},
        "by_learning_language": [],
        "top_10_by_learners": sorted(
            courses, key=lambda x: x.get("num_learners", 0), reverse=True
        )[:10],
    }

    # Group by phase
    from collections import Counter
    phase_counts = Counter(c.get("phase") for c in courses)
    stats["by_phase"] = dict(phase_counts)

    # Languages sorted by learner count
    lang_learners = {}
    for c in courses:
        lang = c.get("learning_language_name", "Unknown")
        learners = c.get("num_learners", 0)
        lang_learners[lang] = lang_learners.get(lang, 0) + learners

    stats["by_learning_language"] = sorted(
        [{"language": k, "total_learners": v} for k, v in lang_learners.items()],
        key=lambda x: x["total_learners"],
        reverse=True,
    )

    return stats


def init_duolingo_db(db_path: str = "duolingo.db") -> sqlite3.Connection:
    conn = sqlite3.connect(db_path)
    conn.executescript("""
        CREATE TABLE IF NOT EXISTS users (
            username TEXT PRIMARY KEY,
            streak INTEGER,
            total_xp INTEGER,
            creation_date INTEGER,
            current_course TEXT,
            num_courses INTEGER,
            longest_streak INTEGER,
            scraped_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
        );

        CREATE TABLE IF NOT EXISTS user_courses (
            username TEXT,
            course_id TEXT,
            course_title TEXT,
            xp INTEGER,
            crowns INTEGER,
            level INTEGER,
            PRIMARY KEY (username, course_id)
        );

        CREATE TABLE IF NOT EXISTS course_stats (
            snapshot_date TEXT,
            learning_language TEXT,
            from_language TEXT,
            num_learners INTEGER,
            phase INTEGER,
            PRIMARY KEY (snapshot_date, learning_language, from_language)
        );

        CREATE INDEX IF NOT EXISTS idx_users_streak
            ON users(streak DESC);
        CREATE INDEX IF NOT EXISTS idx_users_xp
            ON users(total_xp DESC);
    """)
    conn.commit()
    return conn


def save_user_profile(conn: sqlite3.Connection, profile: dict):
    conn.execute(
        """INSERT OR REPLACE INTO users
           (username, streak, total_xp, creation_date, current_course,
            num_courses, longest_streak)
           VALUES (?,?,?,?,?,?,?)""",
        (
            profile.get("username"),
            profile.get("streak"),
            profile.get("total_xp"),
            profile.get("creation_date"),
            profile.get("current_course"),
            len(profile.get("courses", [])),
            profile.get("longest_streak"),
        ),
    )

    for course in profile.get("courses", []):
        conn.execute(
            """INSERT OR REPLACE INTO user_courses
               (username, course_id, course_title, xp, crowns, level)
               VALUES (?,?,?,?,?,?)""",
            (
                profile.get("username"),
                course.get("id"),
                course.get("title"),
                course.get("xp"),
                course.get("crowns"),
                course.get("level"),
            ),
        )

    conn.commit()

Analyzing Learning Patterns

Once you have user data, you can analyze learning behavior:

import statistics

def analyze_learning_patterns(db_path: str = "duolingo.db") -> dict:
    """Analyze patterns in collected Duolingo user data."""
    conn = sqlite3.connect(db_path)

    # Streak distribution
    streaks = [row[0] for row in conn.execute(
        "SELECT streak FROM users WHERE streak IS NOT NULL AND streak > 0"
    ).fetchall()]

    # XP distribution
    xp_values = [row[0] for row in conn.execute(
        "SELECT total_xp FROM users WHERE total_xp IS NOT NULL AND total_xp > 0"
    ).fetchall()]

    # Most popular courses
    popular_courses = conn.execute("""
        SELECT course_title, COUNT(*) as user_count, AVG(xp) as avg_xp
        FROM user_courses
        WHERE course_title IS NOT NULL
        GROUP BY course_title
        ORDER BY user_count DESC
        LIMIT 10
    """).fetchall()

    # Streak retention (users with streak > 30, > 100, > 365 days)
    streak_milestones = {}
    for milestone in [7, 30, 100, 365, 1000]:
        count = conn.execute(
            "SELECT COUNT(*) FROM users WHERE streak >= ?", (milestone,)
        ).fetchone()[0]
        streak_milestones[f"{milestone}_days"] = count

    conn.close()

    result = {}

    if streaks:
        result["streak_stats"] = {
            "count": len(streaks),
            "median": statistics.median(streaks),
            "mean": round(statistics.mean(streaks), 1),
            "max": max(streaks),
            "milestones": streak_milestones,
        }

    if xp_values:
        result["xp_stats"] = {
            "count": len(xp_values),
            "median": statistics.median(xp_values),
            "mean": round(statistics.mean(xp_values), 0),
            "max": max(xp_values),
        }

    result["popular_courses"] = [
        {"course": row[0], "users": row[1], "avg_xp": round(row[2] or 0, 0)}
        for row in popular_courses
    ]

    return result

Monitoring for API Changes

Duolingo's undocumented endpoints change without notice. Build monitoring to detect when they break:

import hashlib

EXPECTED_PROFILE_FIELDS = {
    "streak", "totalXp", "currentCourseId", "courses", "creationDate"
}

def validate_api_response(data: dict, endpoint: str) -> tuple:
    """Validate that an API response has the expected structure."""
    issues = []

    if endpoint == "user_profile":
        users = data.get("users", [])
        if not users:
            issues.append("no_users_in_response")
        else:
            user = users[0]
            missing = EXPECTED_PROFILE_FIELDS - set(user.keys())
            if missing:
                issues.append(f"missing_fields: {missing}")

    elif endpoint == "courses":
        if not isinstance(data, list):
            issues.append("not_a_list")
        elif data and "learning_language" not in data[0]:
            issues.append("course_format_changed")

    return len(issues) == 0, issues


def check_endpoint_health() -> dict:
    """Quick health check of all Duolingo endpoints."""
    results = {}

    # Test courses endpoint
    try:
        resp = requests.get(f"{BASE_URL}/api/1/courses/list", headers=HEADERS, timeout=10)
        if resp.status_code == 200:
            data = resp.json()
            valid, issues = validate_api_response(data, "courses")
            results["courses"] = {"status": "ok" if valid else "degraded", "issues": issues}
        else:
            results["courses"] = {"status": "down", "http_code": resp.status_code}
    except Exception as e:
        results["courses"] = {"status": "error", "message": str(e)}

    # Test user profile endpoint
    try:
        resp = requests.get(
            f"{BASE_URL}/2017-06-30/users?username=LuisvonAhn&fields=streak,totalXp",
            headers=HEADERS, timeout=10
        )
        if resp.status_code == 200:
            data = resp.json()
            valid, issues = validate_api_response(data, "user_profile")
            results["user_profile"] = {"status": "ok" if valid else "degraded", "issues": issues}
        else:
            results["user_profile"] = {"status": "down", "http_code": resp.status_code}
    except Exception as e:
        results["user_profile"] = {"status": "error", "message": str(e)}

    return results


# Run health check before any bulk scraping
health = check_endpoint_health()
for endpoint, status in health.items():
    print(f"  {endpoint}: {status['status']}")
    if status.get("issues"):
        print(f"    Issues: {status['issues']}")

Proxy Configuration for Sustained Collection

For collecting data across many users, distribute requests to avoid Duolingo's per-IP rate limits:

def create_proxy_session(proxy_url: str) -> requests.Session:
    """Create a requests session with proxy and consistent headers."""
    session = requests.Session()
    session.proxies = {
        "http": proxy_url,
        "https": proxy_url,
    }
    session.headers.update({
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36",
        "Accept": "application/json",
        "Accept-Language": "en-US,en;q=0.9",
    })
    return session


# ThorData rotating residential proxy
# Use residential IPs to avoid Duolingo flagging datacenter traffic
PROXY_URL = "http://USERNAME:[email protected]:9000"
session = create_proxy_session(PROXY_URL)

# Each request through a rotating proxy appears from a different residential IP
for username in usernames[:100]:
    profile = get_user_profile(username, session=session)
    time.sleep(random.uniform(2, 4))

Complete Collection Pipeline

def run_duolingo_pipeline(
    usernames: list = None,
    db_path: str = "duolingo.db",
    proxy_url: str = None,
):
    """
    Full data collection pipeline:
    1. Collect course statistics
    2. Fetch user profiles (if usernames provided)
    3. Store everything in SQLite
    """
    conn = init_duolingo_db(db_path)

    # Phase 1: Course statistics
    print("Collecting course statistics...")
    stats = get_course_statistics()
    today = datetime.now().strftime("%Y-%m-%d")

    # Save course learner counts
    courses = get_duolingo_courses()
    for course in courses:
        conn.execute(
            """INSERT OR REPLACE INTO course_stats
               (snapshot_date, learning_language, from_language, num_learners, phase)
               VALUES (?,?,?,?,?)""",
            (today, course.get("learning_language"),
             course.get("from_language"),
             course.get("num_learners"),
             course.get("phase")),
        )
    conn.commit()
    print(f"  Saved stats for {len(courses)} courses")

    # Phase 2: User profiles
    if usernames:
        session = create_proxy_session(proxy_url) if proxy_url else None
        print(f"\nCollecting {len(usernames)} user profiles...")

        for i, username in enumerate(usernames):
            try:
                profile = get_user_profile(username)
                streak_info = get_streak_info(username)
                if streak_info:
                    profile["longest_streak"] = streak_info.get("longest_streak")

                save_user_profile(conn, profile)

                if (i + 1) % 10 == 0:
                    print(f"  Progress: {i+1}/{len(usernames)}")

            except Exception as e:
                print(f"  Error ({username}): {e}")

            time.sleep(random.uniform(2.0, 4.0))

    conn.close()
    print("\nPipeline complete")

    return stats


# Run it
stats = run_duolingo_pipeline(proxy_url="http://USER:[email protected]:9000")
print(f"\nTop 5 courses by learner count:")
for course in stats.get("top_10_by_learners", [])[:5]:
    print(f"  {course['learning_language_name']}: {course['num_learners']:,} learners")

Endpoint Reference (2026)

Endpoint	Auth	Rate Limit	Returns
`/api/1/courses/list`	No	~60/min	All courses with learner counts
`/2017-06-30/users?username=X`	No	~30/min	Profile, streak, XP, courses
`/2017-06-30/users?username=X&fields=streakData`	No	~30/min	Extended streak history
`/login` (POST)	POST credentials	Strict	JWT session token
`/2017-06-30/users/{id}/courses/{id}`	Yes	~30/min	Skill tree, lessons
`/leaderboards/.../users/{id}`	Yes	~20/min	Leaderboard cohort

The versioned 2017-06-30 endpoints have been stable since mid-2024, making them more reliable for production pipelines than the newer unversioned endpoints.

Legal and Ethical Notes

Duolingo's Terms of Service prohibit automated access. These endpoints are undocumented and Duolingo can change or block them at any time.

Keep use limited to research, personal projects, and analytics. Do not collect or store personal user data at scale. Do not use this to build competing products or interfere with Duolingo's services.

The course statistics (learner counts, language data) are published by Duolingo in marketing materials and press releases, so collecting them for research purposes is lower risk than collecting individual user data.