How to Scrape Discord Public Servers: Python Guide (2026)

2026-04-09 [python discord scraping playwright]

How to Scrape Discord Public Servers: Python Guide (2026)

Discord has over 200 million monthly active users and millions of public servers. If you're building a server directory, analyzing communities, or doing market research, you need to pull this data programmatically.

The catch: Discord's own API requires bot tokens with guild membership, and their ToS is aggressive about scraping. But there are several legitimate angles — public listing sites, Discord's own widget endpoints, and invite metadata — that give you rich server data without violating anything.

What Data Is Available

Before writing code, know what you can actually get:

Disboard.org — the largest public server directory. Categories, tags, descriptions, member counts, bump history
Discord widget.json — if a server has widgets enabled, you get member count, online count, channel list, invite link
Invite metadata — any public invite link gives you server name, icon, member counts, verification level
top.gg — bot listing site that also has server directories with reviews and categories

Scraping Disboard Server Listings

Disboard is the most data-rich source. Servers are listed by category with tags, descriptions, and member counts.

import httpx
from selectolax.parser import HTMLParser
import time
import json

def scrape_disboard_category(category: str, pages: int = 5) -> list[dict]:
    """Scrape server listings from a Disboard category."""
    servers = []

    for page in range(1, pages + 1):
        url = f"https://disboard.org/servers/tag/{category}/{page}"

        headers = {
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36",
            "Accept-Language": "en-US,en;q=0.9",
        }

        resp = httpx.get(url, headers=headers, timeout=15)
        if resp.status_code != 200:
            print(f"Page {page}: status {resp.status_code}")
            break

        tree = HTMLParser(resp.text)

        for card in tree.css(".server-info"):
            name_el = card.css_first(".server-info-name")
            desc_el = card.css_first(".server-info-description")
            count_el = card.css_first(".server-info-member-count")

            server = {
                "name": name_el.text(strip=True) if name_el else None,
                "description": desc_el.text(strip=True) if desc_el else None,
                "member_count": count_el.text(strip=True) if count_el else None,
                "category": category,
                "page": page,
            }

            # Extract tags
            tags = [t.text(strip=True) for t in card.css(".tag")]
            server["tags"] = tags

            # Extract invite link
            link_el = card.css_first("a[href*='discord']")
            if link_el:
                server["invite_url"] = link_el.attributes.get("href")

            servers.append(server)

        print(f"Page {page}: found {len(servers)} servers total")
        time.sleep(3)  # be respectful

    return servers

# Usage
gaming_servers = scrape_disboard_category("gaming", pages=3)
print(json.dumps(gaming_servers[:2], indent=2))

Disboard uses Cloudflare, so plain HTTP requests may get challenged on repeated hits. More on handling that below.

Discord Widget.json — The Official Backdoor

If a server has the widget enabled (many public servers do), Discord serves a JSON endpoint with no authentication needed:

import httpx

def get_server_widget(server_id: str) -> dict | None:
    """Fetch public widget data for a Discord server."""
    url = f"https://discord.com/api/guilds/{server_id}/widget.json"

    resp = httpx.get(url, timeout=10)

    if resp.status_code == 200:
        data = resp.json()
        return {
            "id": data.get("id"),
            "name": data.get("name"),
            "instant_invite": data.get("instant_invite"),
            "presence_count": data.get("presence_count"),
            "channels": [
                {"id": ch["id"], "name": ch["name"], "position": ch["position"]}
                for ch in data.get("channels", [])
            ],
            "members_online": len(data.get("members", [])),
        }
    elif resp.status_code == 403:
        return None  # widget disabled
    else:
        print(f"Error {resp.status_code} for {server_id}")
        return None

# Example — a large public server
widget = get_server_widget("1234567890")
if widget:
    print(f"{widget['name']}: {widget['presence_count']} online")

This is rate-limited to about 5 requests per second per IP. Stay under that and you're fine.

Extracting Server Metadata from Invite Links

Every public invite link contains a server ID. Hit the invite API endpoint and you get server metadata without joining:

import httpx

def resolve_invite(invite_code: str) -> dict | None:
    """Resolve a Discord invite to get server metadata."""
    url = f"https://discord.com/api/v10/invites/{invite_code}"
    params = {"with_counts": "true", "with_expiration": "true"}

    resp = httpx.get(url, params=params, timeout=10)

    if resp.status_code != 200:
        return None

    data = resp.json()
    guild = data.get("guild", {})

    return {
        "server_id": guild.get("id"),
        "name": guild.get("name"),
        "description": guild.get("description"),
        "icon_url": f"https://cdn.discordapp.com/icons/{guild['id']}/{guild['icon']}.png" if guild.get("icon") else None,
        "splash_url": f"https://cdn.discordapp.com/splashes/{guild['id']}/{guild['splash']}.png" if guild.get("splash") else None,
        "member_count": data.get("approximate_member_count"),
        "online_count": data.get("approximate_presence_count"),
        "verification_level": guild.get("verification_level"),
        "features": guild.get("features", []),
        "nsfw": guild.get("nsfw", False),
        "vanity_url": guild.get("vanity_url_code"),
    }

# Example
info = resolve_invite("python")  # Python Discord's vanity URL
if info:
    print(f"{info['name']}: {info['member_count']} members, {info['online_count']} online")

Server Discovery with Playwright

For Disboard and similar directories that use Cloudflare protection, Playwright handles the JavaScript challenge automatically:

from playwright.sync_api import sync_playwright
from selectolax.parser import HTMLParser
import time

def discover_servers_playwright(category: str, max_pages: int = 3) -> list[dict]:
    """Use Playwright to scrape Disboard with full JS rendering."""
    servers = []

    with sync_playwright() as p:
        browser = p.chromium.launch(headless=True)
        context = browser.new_context(
            user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36",
            viewport={"width": 1920, "height": 1080},
        )
        page = context.new_page()

        for pg in range(1, max_pages + 1):
            url = f"https://disboard.org/servers/tag/{category}/{pg}"
            page.goto(url, wait_until="networkidle")

            # Wait for server cards to render
            page.wait_for_selector(".server-info", timeout=10000)

            html = page.content()
            tree = HTMLParser(html)

            for card in tree.css(".server-info"):
                name_el = card.css_first(".server-info-name")
                if name_el:
                    servers.append({
                        "name": name_el.text(strip=True),
                        "page": pg,
                    })

            print(f"Page {pg}: {len(servers)} total servers")
            time.sleep(5)

        browser.close()

    return servers

Handling Anti-Bot Measures

Disboard and top.gg both use Cloudflare. Discord's own API has rate limits. Here's what works:

Rate limits on Discord API: The invite and widget endpoints allow about 50 requests per second globally, but per-IP it's closer to 5/s. Space your requests 200-300ms apart and you'll never hit a 429.

Cloudflare on listing sites: Playwright handles most challenges, but if you're running at scale (thousands of pages), you need residential proxies. ThorData's residential proxy network works well here — their rotating residential IPs pass Cloudflare's checks consistently, and you can target specific regions if you need geo-specific server listings.

Fingerprinting: Disboard checks for automation signatures. Use playwright-stealth to patch common detection vectors:

# pip install playwright-stealth
from playwright_stealth import stealth_sync

# After creating page, before navigation:
stealth_sync(page)

Building a Server Database

Combine all three data sources into a single pipeline:

import sqlite3
import json

def init_db(path: str = "discord_servers.db"):
    conn = sqlite3.connect(path)
    conn.execute("""
        CREATE TABLE IF NOT EXISTS servers (
            server_id TEXT PRIMARY KEY,
            name TEXT,
            description TEXT,
            member_count INTEGER,
            online_count INTEGER,
            verification_level INTEGER,
            features TEXT,
            tags TEXT,
            source TEXT,
            scraped_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
        )
    """)
    conn.commit()
    return conn

def upsert_server(conn, server: dict):
    conn.execute("""
        INSERT INTO servers (server_id, name, description, member_count, online_count,
                           verification_level, features, tags, source)
        VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
        ON CONFLICT(server_id) DO UPDATE SET
            name=excluded.name,
            member_count=excluded.member_count,
            online_count=excluded.online_count,
            scraped_at=CURRENT_TIMESTAMP
    """, (
        server.get("server_id"),
        server.get("name"),
        server.get("description"),
        server.get("member_count"),
        server.get("online_count"),
        server.get("verification_level"),
        json.dumps(server.get("features", [])),
        json.dumps(server.get("tags", [])),
        server.get("source", "unknown"),
    ))
    conn.commit()

What to Watch Out For

Discord ToS: Scraping Discord directly (via their API without a bot token, or automating the web client) violates their ToS. The methods here use public endpoints and third-party directories, which is a gray area. Don't automate the Discord web client.
Disboard rate limiting: They'll soft-ban your IP after about 50 rapid requests. Keep it to one page every 3-5 seconds.
Stale invite codes: Many invite links expire or get revoked. The invite API will return 404 — handle that gracefully.
Widget adoption: Only about 30% of public servers have widgets enabled. Don't rely on widget.json as your only data source.
Member count accuracy: Disboard member counts lag behind reality by hours or days. The invite API's approximate_member_count is more current but still approximate.

Wrapping Up

The combination of Disboard scraping, widget.json, and invite resolution gives you solid coverage of public Discord servers. Disboard gets you discovery and categorization, widget.json gives real-time presence data, and the invite API fills in server metadata.

Start with the invite API for targeted lookups, use Disboard for bulk discovery, and fall back to Playwright when Cloudflare blocks plain HTTP. Keep your request rates reasonable and you'll build a comprehensive server database without getting blocked.

Bulk Server ID Discovery

Finding server IDs at scale requires systematic approaches since Discord doesn't have a public server directory:

import httpx
import json
import time
import random
from selectolax.parser import HTMLParser

# Multiple listing sources
LISTING_SOURCES = {
    "disboard": "https://disboard.org/servers",
    "top_gg": "https://top.gg/servers",
    "discord_me": "https://discord.me/servers",
    "discordservers": "https://discordservers.com",
}

def scrape_top_gg_servers(category: str = "gaming", pages: int = 5) -> list:
    """Scrape server listings from top.gg (requires Playwright for JS rendering)."""
    from playwright.sync_api import sync_playwright

    servers = []

    with sync_playwright() as p:
        browser = p.chromium.launch(headless=True)
        context = browser.new_context(
            user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36",
            viewport={"width": 1920, "height": 1080},
        )
        page = context.new_page()

        for pg in range(1, pages + 1):
            url = f"https://top.gg/servers/{category}?page={pg}"
            try:
                page.goto(url, wait_until="networkidle", timeout=25000)
                page.wait_for_selector("[class*='ServerCard'], [data-server-id]", timeout=10000)

                html = page.content()
                tree = HTMLParser(html)

                for card in tree.css("[class*='ServerCard'], [data-server-id]"):
                    server_id = card.attributes.get("data-server-id")
                    name_el = card.css_first("[class*='name'], h3")
                    member_el = card.css_first("[class*='members'], [class*='member-count']")
                    desc_el = card.css_first("[class*='description'], p")

                    if name_el:
                        servers.append({
                            "server_id": server_id,
                            "name": name_el.text(strip=True),
                            "member_count": member_el.text(strip=True) if member_el else None,
                            "description": desc_el.text(strip=True)[:200] if desc_el else None,
                            "source": "top_gg",
                            "category": category,
                        })

                time.sleep(random.uniform(3, 6))
            except Exception as e:
                print(f"  Page {pg}: {e}")
                break

        browser.close()

    return servers


def scrape_discord_me(category: str = None, pages: int = 5) -> list:
    """Scrape discord.me for server listings."""
    servers = []
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36",
        "Accept": "text/html,application/xhtml+xml",
        "Accept-Language": "en-US,en;q=0.9",
    }

    for pg in range(1, pages + 1):
        url = "https://discord.me/servers"
        if category:
            url += f"/{category}"
        url += f"/{pg}"

        with httpx.Client(headers=headers, timeout=15, follow_redirects=True) as client:
            resp = client.get(url)

        if resp.status_code != 200:
            break

        tree = HTMLParser(resp.text)
        for card in tree.css(".server-card, .server"):
            name_el = card.css_first(".server-name, h3")
            desc_el = card.css_first(".server-description, p")
            invite_el = card.css_first("a[href*='discord.gg']")
            member_el = card.css_first(".member-count, [class*='members']")

            if name_el:
                servers.append({
                    "name": name_el.text(strip=True),
                    "description": desc_el.text(strip=True)[:200] if desc_el else None,
                    "invite_url": invite_el.attributes.get("href") if invite_el else None,
                    "member_count_text": member_el.text(strip=True) if member_el else None,
                    "source": "discord_me",
                })

        time.sleep(random.uniform(2, 4))

    return servers

Tracking Server Growth Over Time

Store widget snapshots regularly to track member count trends:

import sqlite3
from datetime import datetime

def init_tracking_db(db_path: str = "discord_tracking.db") -> sqlite3.Connection:
    conn = sqlite3.connect(db_path)
    conn.executescript("""
        CREATE TABLE IF NOT EXISTS servers (
            server_id TEXT PRIMARY KEY,
            name TEXT,
            description TEXT,
            icon_url TEXT,
            verification_level INTEGER,
            features TEXT,
            vanity_url TEXT,
            source TEXT,
            first_seen TEXT,
            last_seen TEXT
        );

        CREATE TABLE IF NOT EXISTS member_snapshots (
            id INTEGER PRIMARY KEY AUTOINCREMENT,
            server_id TEXT,
            member_count INTEGER,
            online_count INTEGER,
            snapshot_at TEXT,
            FOREIGN KEY (server_id) REFERENCES servers(server_id)
        );

        CREATE TABLE IF NOT EXISTS widget_data (
            server_id TEXT,
            snapshot_at TEXT,
            presence_count INTEGER,
            channel_count INTEGER,
            channels TEXT,
            PRIMARY KEY (server_id, snapshot_at)
        );

        CREATE INDEX IF NOT EXISTS idx_snapshots_server
            ON member_snapshots(server_id, snapshot_at);
    """)
    conn.commit()
    return conn


def record_server_snapshot(conn: sqlite3.Connection, server_data: dict):
    """Save a server data snapshot for trend tracking."""
    now = datetime.now().isoformat()
    server_id = server_data.get("server_id")

    if not server_id:
        return

    # Upsert server record
    conn.execute(
        """INSERT INTO servers (server_id, name, description, icon_url,
               verification_level, features, vanity_url, source, first_seen, last_seen)
           VALUES (?,?,?,?,?,?,?,?,?,?)
           ON CONFLICT(server_id) DO UPDATE SET
               name=excluded.name,
               member_count=excluded.member_count,
               last_seen=excluded.last_seen""",
        (
            server_id,
            server_data.get("name"),
            server_data.get("description"),
            server_data.get("icon_url"),
            server_data.get("verification_level"),
            json.dumps(server_data.get("features", [])),
            server_data.get("vanity_url"),
            server_data.get("source", "widget"),
            now, now,
        ),
    )

    # Record member count snapshot
    member_count = server_data.get("member_count")
    online_count = server_data.get("online_count")
    if member_count is not None or online_count is not None:
        conn.execute(
            "INSERT INTO member_snapshots (server_id, member_count, online_count, snapshot_at) VALUES (?,?,?,?)",
            (server_id, member_count, online_count, now),
        )

    conn.commit()


def compute_growth_rates(conn: sqlite3.Connection, days_back: int = 30) -> list:
    """Compute member growth rates over the past N days."""
    cutoff = (datetime.now() - __import__("datetime").timedelta(days=days_back)).isoformat()

    growth_data = conn.execute("""
        WITH first_snap AS (
            SELECT server_id, member_count,
                   ROW_NUMBER() OVER (PARTITION BY server_id ORDER BY snapshot_at ASC) rn
            FROM member_snapshots WHERE snapshot_at >= ?
        ),
        last_snap AS (
            SELECT server_id, member_count,
                   ROW_NUMBER() OVER (PARTITION BY server_id ORDER BY snapshot_at DESC) rn
            FROM member_snapshots
        ),
        combined AS (
            SELECT l.server_id,
                   f.member_count as first_count,
                   l.member_count as last_count
            FROM first_snap f
            JOIN last_snap l ON f.server_id = l.server_id
            WHERE f.rn = 1 AND l.rn = 1
        )
        SELECT s.name, c.server_id,
               c.first_count, c.last_count,
               CASE WHEN c.first_count > 0
                    THEN ROUND((c.last_count - c.first_count) * 100.0 / c.first_count, 1)
                    ELSE NULL END as growth_pct
        FROM combined c
        JOIN servers s ON s.server_id = c.server_id
        WHERE c.last_count > c.first_count
        ORDER BY growth_pct DESC
        LIMIT 20
    """, (cutoff,)).fetchall()

    return [
        {
            "name": row[0],
            "server_id": row[1],
            "start_count": row[2],
            "current_count": row[3],
            "growth_pct": row[4],
        }
        for row in growth_data
    ]

Rate Limits and Proxy Configuration

Discord's API endpoints have different rate limits depending on the endpoint:

Endpoint	Rate Limit	Notes
`widget.json`	~5/sec global	Shared across all bots/scrapers
Invite API	~5/sec per IP	IP-based, resets per minute
Discord website	Strict	Cloudflare, needs Playwright
Disboard	~1/3sec	Cloudflare protected
top.gg	~1/5sec	Bot management

For the invite and widget APIs, you can typically make 200-300 requests per minute total across all IPs before hitting global limits. For the web scraping of directory sites, ThorData's residential proxies handle Cloudflare checks that block datacenter IPs.

PROXY = "http://USER:[email protected]:9000"

def batch_resolve_invites(invite_codes: list, proxy: str = None) -> list:
    """Resolve multiple invite codes with rate limiting."""
    results = []

    for code in invite_codes:
        data = resolve_invite(code, proxy=proxy)
        if data:
            results.append(data)
            record_server_snapshot(conn, data)

        time.sleep(random.uniform(0.5, 1.5))  # Discord invite API: ~1/sec is safe

    return results


def batch_fetch_widgets(server_ids: list, proxy: str = None) -> list:
    """Fetch widget data for multiple servers."""
    results = []

    headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"}

    client_kwargs = {"headers": headers, "timeout": 10}
    if proxy:
        client_kwargs["proxies"] = {"all://": proxy}

    with httpx.Client(**client_kwargs) as client:
        for server_id in server_ids:
            try:
                resp = client.get(f"https://discord.com/api/guilds/{server_id}/widget.json")
                if resp.status_code == 200:
                    data = resp.json()
                    results.append({
                        "server_id": server_id,
                        "name": data.get("name"),
                        "presence_count": data.get("presence_count"),
                        "channels": len(data.get("channels", [])),
                        "has_widget": True,
                    })
                elif resp.status_code == 403:
                    results.append({"server_id": server_id, "has_widget": False})
            except Exception as e:
                results.append({"server_id": server_id, "error": str(e)})

            time.sleep(0.3)  # Stay under ~3 req/sec per IP

    return results

Building a Server Intelligence Database

Combine all sources into a comprehensive server dataset:

async def run_discord_pipeline(
    categories: list = None,
    db_path: str = "discord_tracking.db",
):
    """
    Full pipeline combining Disboard, top.gg, and Discord APIs.
    Runs discovery, resolves invites, fetches widget data.
    """
    if categories is None:
        categories = ["gaming", "programming", "finance", "art", "music"]

    conn = init_tracking_db(db_path)

    print("Phase 1: Discovery from listing sites")
    all_servers = []

    for category in categories:
        print(f"  Scraping Disboard: {category}")
        servers = scrape_disboard_category(category, pages=3)
        all_servers.extend(servers)
        time.sleep(random.uniform(5, 10))

    print(f"  Found {len(all_servers)} servers from listing sites")

    print("\nPhase 2: Resolve invite codes")
    invite_results = []
    for server in all_servers:
        invite_url = server.get("invite_url", "")
        if "discord.gg/" in invite_url or "discord.com/invite/" in invite_url:
            code = invite_url.split("/")[-1]
            if code:
                data = resolve_invite(code)
                if data:
                    invite_results.append(data)
                    record_server_snapshot(conn, data)
                time.sleep(0.8)

    print(f"  Resolved {len(invite_results)} invites")

    print("\nPhase 3: Fetch widget data for resolved servers")
    server_ids = [s.get("server_id") for s in invite_results if s.get("server_id")]
    widget_results = batch_fetch_widgets(server_ids[:500])  # Widget API limit

    widget_count = sum(1 for w in widget_results if w.get("has_widget"))
    print(f"  {widget_count}/{len(widget_results)} servers have widgets enabled")

    conn.close()
    print(f"\nPipeline complete. Data saved to {db_path}")


import asyncio
asyncio.run(run_discord_pipeline())

Key Takeaways

Discord's widget.json endpoint (/api/guilds/{id}/widget.json) is the cleanest data source -- no authentication, returns member/online counts and channel list
The invite API (/api/v10/invites/{code}) gives server name, icon, and approximate member counts for any public invite
Disboard and top.gg are the main server directories; both use Cloudflare and need Playwright or residential proxies at scale
ThorData's residential proxies handle Cloudflare on listing sites; the Discord API endpoints are accessible without proxies at low rates
Store snapshots in SQLite and track growth rates over time -- the velocity data is more useful than point-in-time counts
About 30% of public servers have widgets enabled; combine multiple data sources for better coverage