How to Scrape Discord Public Servers: Python Guide (2026)
How to Scrape Discord Public Servers: Python Guide (2026)
Discord has over 200 million monthly active users and millions of public servers. If you're building a server directory, analyzing communities, or doing market research, you need to pull this data programmatically.
The catch: Discord's own API requires bot tokens with guild membership, and their ToS is aggressive about scraping. But there are several legitimate angles — public listing sites, Discord's own widget endpoints, and invite metadata — that give you rich server data without violating anything.
What Data Is Available
Before writing code, know what you can actually get:
- Disboard.org — the largest public server directory. Categories, tags, descriptions, member counts, bump history
- Discord widget.json — if a server has widgets enabled, you get member count, online count, channel list, invite link
- Invite metadata — any public invite link gives you server name, icon, member counts, verification level
- top.gg — bot listing site that also has server directories with reviews and categories
Scraping Disboard Server Listings
Disboard is the most data-rich source. Servers are listed by category with tags, descriptions, and member counts.
import httpx
from selectolax.parser import HTMLParser
import time
import json
def scrape_disboard_category(category: str, pages: int = 5) -> list[dict]:
"""Scrape server listings from a Disboard category."""
servers = []
for page in range(1, pages + 1):
url = f"https://disboard.org/servers/tag/{category}/{page}"
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36",
"Accept-Language": "en-US,en;q=0.9",
}
resp = httpx.get(url, headers=headers, timeout=15)
if resp.status_code != 200:
print(f"Page {page}: status {resp.status_code}")
break
tree = HTMLParser(resp.text)
for card in tree.css(".server-info"):
name_el = card.css_first(".server-info-name")
desc_el = card.css_first(".server-info-description")
count_el = card.css_first(".server-info-member-count")
server = {
"name": name_el.text(strip=True) if name_el else None,
"description": desc_el.text(strip=True) if desc_el else None,
"member_count": count_el.text(strip=True) if count_el else None,
"category": category,
"page": page,
}
# Extract tags
tags = [t.text(strip=True) for t in card.css(".tag")]
server["tags"] = tags
# Extract invite link
link_el = card.css_first("a[href*='discord']")
if link_el:
server["invite_url"] = link_el.attributes.get("href")
servers.append(server)
print(f"Page {page}: found {len(servers)} servers total")
time.sleep(3) # be respectful
return servers
# Usage
gaming_servers = scrape_disboard_category("gaming", pages=3)
print(json.dumps(gaming_servers[:2], indent=2))
Disboard uses Cloudflare, so plain HTTP requests may get challenged on repeated hits. More on handling that below.
Discord Widget.json — The Official Backdoor
If a server has the widget enabled (many public servers do), Discord serves a JSON endpoint with no authentication needed:
import httpx
def get_server_widget(server_id: str) -> dict | None:
"""Fetch public widget data for a Discord server."""
url = f"https://discord.com/api/guilds/{server_id}/widget.json"
resp = httpx.get(url, timeout=10)
if resp.status_code == 200:
data = resp.json()
return {
"id": data.get("id"),
"name": data.get("name"),
"instant_invite": data.get("instant_invite"),
"presence_count": data.get("presence_count"),
"channels": [
{"id": ch["id"], "name": ch["name"], "position": ch["position"]}
for ch in data.get("channels", [])
],
"members_online": len(data.get("members", [])),
}
elif resp.status_code == 403:
return None # widget disabled
else:
print(f"Error {resp.status_code} for {server_id}")
return None
# Example — a large public server
widget = get_server_widget("1234567890")
if widget:
print(f"{widget['name']}: {widget['presence_count']} online")
This is rate-limited to about 5 requests per second per IP. Stay under that and you're fine.
Extracting Server Metadata from Invite Links
Every public invite link contains a server ID. Hit the invite API endpoint and you get server metadata without joining:
import httpx
def resolve_invite(invite_code: str) -> dict | None:
"""Resolve a Discord invite to get server metadata."""
url = f"https://discord.com/api/v10/invites/{invite_code}"
params = {"with_counts": "true", "with_expiration": "true"}
resp = httpx.get(url, params=params, timeout=10)
if resp.status_code != 200:
return None
data = resp.json()
guild = data.get("guild", {})
return {
"server_id": guild.get("id"),
"name": guild.get("name"),
"description": guild.get("description"),
"icon_url": f"https://cdn.discordapp.com/icons/{guild['id']}/{guild['icon']}.png" if guild.get("icon") else None,
"splash_url": f"https://cdn.discordapp.com/splashes/{guild['id']}/{guild['splash']}.png" if guild.get("splash") else None,
"member_count": data.get("approximate_member_count"),
"online_count": data.get("approximate_presence_count"),
"verification_level": guild.get("verification_level"),
"features": guild.get("features", []),
"nsfw": guild.get("nsfw", False),
"vanity_url": guild.get("vanity_url_code"),
}
# Example
info = resolve_invite("python") # Python Discord's vanity URL
if info:
print(f"{info['name']}: {info['member_count']} members, {info['online_count']} online")
Server Discovery with Playwright
For Disboard and similar directories that use Cloudflare protection, Playwright handles the JavaScript challenge automatically:
from playwright.sync_api import sync_playwright
from selectolax.parser import HTMLParser
import time
def discover_servers_playwright(category: str, max_pages: int = 3) -> list[dict]:
"""Use Playwright to scrape Disboard with full JS rendering."""
servers = []
with sync_playwright() as p:
browser = p.chromium.launch(headless=True)
context = browser.new_context(
user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36",
viewport={"width": 1920, "height": 1080},
)
page = context.new_page()
for pg in range(1, max_pages + 1):
url = f"https://disboard.org/servers/tag/{category}/{pg}"
page.goto(url, wait_until="networkidle")
# Wait for server cards to render
page.wait_for_selector(".server-info", timeout=10000)
html = page.content()
tree = HTMLParser(html)
for card in tree.css(".server-info"):
name_el = card.css_first(".server-info-name")
if name_el:
servers.append({
"name": name_el.text(strip=True),
"page": pg,
})
print(f"Page {pg}: {len(servers)} total servers")
time.sleep(5)
browser.close()
return servers
Handling Anti-Bot Measures
Disboard and top.gg both use Cloudflare. Discord's own API has rate limits. Here's what works:
Rate limits on Discord API: The invite and widget endpoints allow about 50 requests per second globally, but per-IP it's closer to 5/s. Space your requests 200-300ms apart and you'll never hit a 429.
Cloudflare on listing sites: Playwright handles most challenges, but if you're running at scale (thousands of pages), you need residential proxies. ThorData's residential proxy network works well here — their rotating residential IPs pass Cloudflare's checks consistently, and you can target specific regions if you need geo-specific server listings.
Fingerprinting: Disboard checks for automation signatures. Use playwright-stealth to patch common detection vectors:
# pip install playwright-stealth
from playwright_stealth import stealth_sync
# After creating page, before navigation:
stealth_sync(page)
Building a Server Database
Combine all three data sources into a single pipeline:
import sqlite3
import json
def init_db(path: str = "discord_servers.db"):
conn = sqlite3.connect(path)
conn.execute("""
CREATE TABLE IF NOT EXISTS servers (
server_id TEXT PRIMARY KEY,
name TEXT,
description TEXT,
member_count INTEGER,
online_count INTEGER,
verification_level INTEGER,
features TEXT,
tags TEXT,
source TEXT,
scraped_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
)
""")
conn.commit()
return conn
def upsert_server(conn, server: dict):
conn.execute("""
INSERT INTO servers (server_id, name, description, member_count, online_count,
verification_level, features, tags, source)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
ON CONFLICT(server_id) DO UPDATE SET
name=excluded.name,
member_count=excluded.member_count,
online_count=excluded.online_count,
scraped_at=CURRENT_TIMESTAMP
""", (
server.get("server_id"),
server.get("name"),
server.get("description"),
server.get("member_count"),
server.get("online_count"),
server.get("verification_level"),
json.dumps(server.get("features", [])),
json.dumps(server.get("tags", [])),
server.get("source", "unknown"),
))
conn.commit()
What to Watch Out For
-
Discord ToS: Scraping Discord directly (via their API without a bot token, or automating the web client) violates their ToS. The methods here use public endpoints and third-party directories, which is a gray area. Don't automate the Discord web client.
-
Disboard rate limiting: They'll soft-ban your IP after about 50 rapid requests. Keep it to one page every 3-5 seconds.
-
Stale invite codes: Many invite links expire or get revoked. The invite API will return 404 — handle that gracefully.
-
Widget adoption: Only about 30% of public servers have widgets enabled. Don't rely on widget.json as your only data source.
-
Member count accuracy: Disboard member counts lag behind reality by hours or days. The invite API's
approximate_member_countis more current but still approximate.
Wrapping Up
The combination of Disboard scraping, widget.json, and invite resolution gives you solid coverage of public Discord servers. Disboard gets you discovery and categorization, widget.json gives real-time presence data, and the invite API fills in server metadata.
Start with the invite API for targeted lookups, use Disboard for bulk discovery, and fall back to Playwright when Cloudflare blocks plain HTTP. Keep your request rates reasonable and you'll build a comprehensive server database without getting blocked.
Bulk Server ID Discovery
Finding server IDs at scale requires systematic approaches since Discord doesn't have a public server directory:
import httpx
import json
import time
import random
from selectolax.parser import HTMLParser
# Multiple listing sources
LISTING_SOURCES = {
"disboard": "https://disboard.org/servers",
"top_gg": "https://top.gg/servers",
"discord_me": "https://discord.me/servers",
"discordservers": "https://discordservers.com",
}
def scrape_top_gg_servers(category: str = "gaming", pages: int = 5) -> list:
"""Scrape server listings from top.gg (requires Playwright for JS rendering)."""
from playwright.sync_api import sync_playwright
servers = []
with sync_playwright() as p:
browser = p.chromium.launch(headless=True)
context = browser.new_context(
user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36",
viewport={"width": 1920, "height": 1080},
)
page = context.new_page()
for pg in range(1, pages + 1):
url = f"https://top.gg/servers/{category}?page={pg}"
try:
page.goto(url, wait_until="networkidle", timeout=25000)
page.wait_for_selector("[class*='ServerCard'], [data-server-id]", timeout=10000)
html = page.content()
tree = HTMLParser(html)
for card in tree.css("[class*='ServerCard'], [data-server-id]"):
server_id = card.attributes.get("data-server-id")
name_el = card.css_first("[class*='name'], h3")
member_el = card.css_first("[class*='members'], [class*='member-count']")
desc_el = card.css_first("[class*='description'], p")
if name_el:
servers.append({
"server_id": server_id,
"name": name_el.text(strip=True),
"member_count": member_el.text(strip=True) if member_el else None,
"description": desc_el.text(strip=True)[:200] if desc_el else None,
"source": "top_gg",
"category": category,
})
time.sleep(random.uniform(3, 6))
except Exception as e:
print(f" Page {pg}: {e}")
break
browser.close()
return servers
def scrape_discord_me(category: str = None, pages: int = 5) -> list:
"""Scrape discord.me for server listings."""
servers = []
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36",
"Accept": "text/html,application/xhtml+xml",
"Accept-Language": "en-US,en;q=0.9",
}
for pg in range(1, pages + 1):
url = "https://discord.me/servers"
if category:
url += f"/{category}"
url += f"/{pg}"
with httpx.Client(headers=headers, timeout=15, follow_redirects=True) as client:
resp = client.get(url)
if resp.status_code != 200:
break
tree = HTMLParser(resp.text)
for card in tree.css(".server-card, .server"):
name_el = card.css_first(".server-name, h3")
desc_el = card.css_first(".server-description, p")
invite_el = card.css_first("a[href*='discord.gg']")
member_el = card.css_first(".member-count, [class*='members']")
if name_el:
servers.append({
"name": name_el.text(strip=True),
"description": desc_el.text(strip=True)[:200] if desc_el else None,
"invite_url": invite_el.attributes.get("href") if invite_el else None,
"member_count_text": member_el.text(strip=True) if member_el else None,
"source": "discord_me",
})
time.sleep(random.uniform(2, 4))
return servers
Tracking Server Growth Over Time
Store widget snapshots regularly to track member count trends:
import sqlite3
from datetime import datetime
def init_tracking_db(db_path: str = "discord_tracking.db") -> sqlite3.Connection:
conn = sqlite3.connect(db_path)
conn.executescript("""
CREATE TABLE IF NOT EXISTS servers (
server_id TEXT PRIMARY KEY,
name TEXT,
description TEXT,
icon_url TEXT,
verification_level INTEGER,
features TEXT,
vanity_url TEXT,
source TEXT,
first_seen TEXT,
last_seen TEXT
);
CREATE TABLE IF NOT EXISTS member_snapshots (
id INTEGER PRIMARY KEY AUTOINCREMENT,
server_id TEXT,
member_count INTEGER,
online_count INTEGER,
snapshot_at TEXT,
FOREIGN KEY (server_id) REFERENCES servers(server_id)
);
CREATE TABLE IF NOT EXISTS widget_data (
server_id TEXT,
snapshot_at TEXT,
presence_count INTEGER,
channel_count INTEGER,
channels TEXT,
PRIMARY KEY (server_id, snapshot_at)
);
CREATE INDEX IF NOT EXISTS idx_snapshots_server
ON member_snapshots(server_id, snapshot_at);
""")
conn.commit()
return conn
def record_server_snapshot(conn: sqlite3.Connection, server_data: dict):
"""Save a server data snapshot for trend tracking."""
now = datetime.now().isoformat()
server_id = server_data.get("server_id")
if not server_id:
return
# Upsert server record
conn.execute(
"""INSERT INTO servers (server_id, name, description, icon_url,
verification_level, features, vanity_url, source, first_seen, last_seen)
VALUES (?,?,?,?,?,?,?,?,?,?)
ON CONFLICT(server_id) DO UPDATE SET
name=excluded.name,
member_count=excluded.member_count,
last_seen=excluded.last_seen""",
(
server_id,
server_data.get("name"),
server_data.get("description"),
server_data.get("icon_url"),
server_data.get("verification_level"),
json.dumps(server_data.get("features", [])),
server_data.get("vanity_url"),
server_data.get("source", "widget"),
now, now,
),
)
# Record member count snapshot
member_count = server_data.get("member_count")
online_count = server_data.get("online_count")
if member_count is not None or online_count is not None:
conn.execute(
"INSERT INTO member_snapshots (server_id, member_count, online_count, snapshot_at) VALUES (?,?,?,?)",
(server_id, member_count, online_count, now),
)
conn.commit()
def compute_growth_rates(conn: sqlite3.Connection, days_back: int = 30) -> list:
"""Compute member growth rates over the past N days."""
cutoff = (datetime.now() - __import__("datetime").timedelta(days=days_back)).isoformat()
growth_data = conn.execute("""
WITH first_snap AS (
SELECT server_id, member_count,
ROW_NUMBER() OVER (PARTITION BY server_id ORDER BY snapshot_at ASC) rn
FROM member_snapshots WHERE snapshot_at >= ?
),
last_snap AS (
SELECT server_id, member_count,
ROW_NUMBER() OVER (PARTITION BY server_id ORDER BY snapshot_at DESC) rn
FROM member_snapshots
),
combined AS (
SELECT l.server_id,
f.member_count as first_count,
l.member_count as last_count
FROM first_snap f
JOIN last_snap l ON f.server_id = l.server_id
WHERE f.rn = 1 AND l.rn = 1
)
SELECT s.name, c.server_id,
c.first_count, c.last_count,
CASE WHEN c.first_count > 0
THEN ROUND((c.last_count - c.first_count) * 100.0 / c.first_count, 1)
ELSE NULL END as growth_pct
FROM combined c
JOIN servers s ON s.server_id = c.server_id
WHERE c.last_count > c.first_count
ORDER BY growth_pct DESC
LIMIT 20
""", (cutoff,)).fetchall()
return [
{
"name": row[0],
"server_id": row[1],
"start_count": row[2],
"current_count": row[3],
"growth_pct": row[4],
}
for row in growth_data
]
Rate Limits and Proxy Configuration
Discord's API endpoints have different rate limits depending on the endpoint:
| Endpoint | Rate Limit | Notes |
|---|---|---|
widget.json |
~5/sec global | Shared across all bots/scrapers |
| Invite API | ~5/sec per IP | IP-based, resets per minute |
| Discord website | Strict | Cloudflare, needs Playwright |
| Disboard | ~1/3sec | Cloudflare protected |
| top.gg | ~1/5sec | Bot management |
For the invite and widget APIs, you can typically make 200-300 requests per minute total across all IPs before hitting global limits. For the web scraping of directory sites, ThorData's residential proxies handle Cloudflare checks that block datacenter IPs.
PROXY = "http://USER:[email protected]:9000"
def batch_resolve_invites(invite_codes: list, proxy: str = None) -> list:
"""Resolve multiple invite codes with rate limiting."""
results = []
for code in invite_codes:
data = resolve_invite(code, proxy=proxy)
if data:
results.append(data)
record_server_snapshot(conn, data)
time.sleep(random.uniform(0.5, 1.5)) # Discord invite API: ~1/sec is safe
return results
def batch_fetch_widgets(server_ids: list, proxy: str = None) -> list:
"""Fetch widget data for multiple servers."""
results = []
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"}
client_kwargs = {"headers": headers, "timeout": 10}
if proxy:
client_kwargs["proxies"] = {"all://": proxy}
with httpx.Client(**client_kwargs) as client:
for server_id in server_ids:
try:
resp = client.get(f"https://discord.com/api/guilds/{server_id}/widget.json")
if resp.status_code == 200:
data = resp.json()
results.append({
"server_id": server_id,
"name": data.get("name"),
"presence_count": data.get("presence_count"),
"channels": len(data.get("channels", [])),
"has_widget": True,
})
elif resp.status_code == 403:
results.append({"server_id": server_id, "has_widget": False})
except Exception as e:
results.append({"server_id": server_id, "error": str(e)})
time.sleep(0.3) # Stay under ~3 req/sec per IP
return results
Building a Server Intelligence Database
Combine all sources into a comprehensive server dataset:
async def run_discord_pipeline(
categories: list = None,
db_path: str = "discord_tracking.db",
):
"""
Full pipeline combining Disboard, top.gg, and Discord APIs.
Runs discovery, resolves invites, fetches widget data.
"""
if categories is None:
categories = ["gaming", "programming", "finance", "art", "music"]
conn = init_tracking_db(db_path)
print("Phase 1: Discovery from listing sites")
all_servers = []
for category in categories:
print(f" Scraping Disboard: {category}")
servers = scrape_disboard_category(category, pages=3)
all_servers.extend(servers)
time.sleep(random.uniform(5, 10))
print(f" Found {len(all_servers)} servers from listing sites")
print("\nPhase 2: Resolve invite codes")
invite_results = []
for server in all_servers:
invite_url = server.get("invite_url", "")
if "discord.gg/" in invite_url or "discord.com/invite/" in invite_url:
code = invite_url.split("/")[-1]
if code:
data = resolve_invite(code)
if data:
invite_results.append(data)
record_server_snapshot(conn, data)
time.sleep(0.8)
print(f" Resolved {len(invite_results)} invites")
print("\nPhase 3: Fetch widget data for resolved servers")
server_ids = [s.get("server_id") for s in invite_results if s.get("server_id")]
widget_results = batch_fetch_widgets(server_ids[:500]) # Widget API limit
widget_count = sum(1 for w in widget_results if w.get("has_widget"))
print(f" {widget_count}/{len(widget_results)} servers have widgets enabled")
conn.close()
print(f"\nPipeline complete. Data saved to {db_path}")
import asyncio
asyncio.run(run_discord_pipeline())
Key Takeaways
- Discord's widget.json endpoint (
/api/guilds/{id}/widget.json) is the cleanest data source -- no authentication, returns member/online counts and channel list - The invite API (
/api/v10/invites/{code}) gives server name, icon, and approximate member counts for any public invite - Disboard and top.gg are the main server directories; both use Cloudflare and need Playwright or residential proxies at scale
- ThorData's residential proxies handle Cloudflare on listing sites; the Discord API endpoints are accessible without proxies at low rates
- Store snapshots in SQLite and track growth rates over time -- the velocity data is more useful than point-in-time counts
- About 30% of public servers have widgets enabled; combine multiple data sources for better coverage