How to Scrape Metacritic Scores: Python Guide (2026)
How to Scrape Metacritic Scores: Python Guide (2026)
Metacritic aggregates review scores from hundreds of publications into a single number. It's the de facto standard for game review scores and widely used for movies and TV shows. If you're building a game database, tracking review trends, or doing media analysis, you need Metacritic data.
There's no official public API. Metacritic partnered with Fandom in 2024 and rebuilt the site, which changed the scraping landscape. The good news: the new site uses JSON-LD structured data and a relatively clean HTML structure with data-testid attributes that are more stable than the old CSS class-based selectors.
What Data You Can Scrape
From Metacritic's public pages you can extract:
- Metascore — the critic aggregate score (0-100)
- User score — the community average (0-10)
- Critic review count — how many publications contributed
- User review count — community ratings
- Individual critic reviews — publication name, score, quoted excerpt
- Individual user reviews — score, text, helpful count
- Game/movie metadata — title, platform, genre, publisher, developer, release date
- Platform-specific scores — the same game can have different scores on different platforms
- Summary classification — "Universal Acclaim", "Generally Favorable", etc.
Dependencies and Setup
pip install httpx selectolax beautifulsoup4 requests
We use selectolax for fast HTML parsing and fall back to httpx for requests. For pages that require Playwright (Cloudflare-challenged), we'll note where that applies.
import httpx
import json
import re
import time
import random
from selectolax.parser import HTMLParser
from bs4 import BeautifulSoup
USER_AGENTS = [
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36",
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:126.0) Gecko/20100101 Firefox/126.0",
]
def get_headers() -> dict:
return {
"User-Agent": random.choice(USER_AGENTS),
"Accept": "text/html,application/xhtml+xml",
"Accept-Language": "en-US,en;q=0.9",
"Accept-Encoding": "gzip, deflate, br",
"Cache-Control": "max-age=0",
}
JSON-LD Structured Data — The Easy Path
Every Metacritic product page embeds JSON-LD structured data in a <script> tag. This is the cleanest data source and doesn't require parsing HTML tables:
def get_metacritic_jsonld(url: str, proxy: str = None) -> dict | None:
"""Extract JSON-LD structured data from a Metacritic page."""
proxies = {"http://": proxy, "https://": proxy} if proxy else None
resp = httpx.get(
url,
headers=get_headers(),
proxies=proxies,
timeout=15,
follow_redirects=True,
)
if resp.status_code != 200:
return None
tree = HTMLParser(resp.text)
# Try to find the most informative JSON-LD block
for script in tree.css('script[type="application/ld+json"]'):
try:
data = json.loads(script.text())
# Metacritic uses AggregateRating schema
if "aggregateRating" in data or data.get("@type") in (
"VideoGame", "Movie", "TVSeries", "MusicAlbum"
):
return data
except json.JSONDecodeError:
continue
return None
# Usage
data = get_metacritic_jsonld("https://www.metacritic.com/game/elden-ring/")
if data:
rating = data.get("aggregateRating", {})
print(f"Name: {data.get('name')}")
print(f"Type: {data.get('@type')}")
print(f"Score: {rating.get('ratingValue')}/100")
print(f"Best: {rating.get('bestRating')}")
print(f"Review count: {rating.get('reviewCount')}")
print(f"Platform: {data.get('gamePlatform', data.get('publisher', 'N/A'))}")
print(f"Release: {data.get('datePublished', 'N/A')}")
The JSON-LD includes the title, aggregate rating, genre, publisher, release date, and platform — often enough for a database without parsing any HTML.
Scraping Critic vs User Scores
Metacritic's value is the split between critic (Metascore) and user scores. The HTML structure separates these clearly:
def scrape_metacritic_scores(url: str, proxy: str = None) -> dict | None:
"""Scrape both critic and user scores from a Metacritic page."""
proxies = {"http://": proxy, "https://": proxy} if proxy else None
resp = httpx.get(url, headers=get_headers(), proxies=proxies, timeout=15, follow_redirects=True)
if resp.status_code != 200:
print(f"HTTP {resp.status_code} for {url}")
return None
tree = HTMLParser(resp.text)
result = {"url": url}
# Title
title_el = tree.css_first("h1")
result["title"] = title_el.text(strip=True) if title_el else None
# Metascore (critic score) — try multiple selector patterns
for selector in [
'[data-testid="critic-score-value"]',
'.c-siteReviewScore[title="Metascore"] span',
'.c-productScoreInfo_scoreNumber span',
]:
meta_el = tree.css_first(selector)
if meta_el:
score_text = meta_el.text(strip=True)
result["metascore"] = int(score_text) if score_text.isdigit() else None
break
# User score — similar pattern, different selector
for selector in [
'[data-testid="user-score-value"]',
'.c-siteReviewScore[title="User Score"] span',
'[class*="userScore"] span',
]:
user_el = tree.css_first(selector)
if user_el:
score_text = user_el.text(strip=True)
try:
result["user_score"] = float(score_text)
except ValueError:
result["user_score"] = None
break
# Critic review count
for selector in ['[data-testid="critic-score-count"]', '.c-productScoreInfo_reviewsTotal a']:
critic_count_el = tree.css_first(selector)
if critic_count_el:
count_text = re.search(r'\d+', critic_count_el.text())
result["critic_count"] = int(count_text.group()) if count_text else None
break
# User review count
for selector in ['[data-testid="user-score-count"]', '[class*="userReviews"] a']:
user_count_el = tree.css_first(selector)
if user_count_el:
count_text = re.search(r'\d+', user_count_el.text())
result["user_count"] = int(count_text.group()) if count_text else None
break
# Platform
for selector in ['[data-testid="product-platform"]', '.c-ProductHero_platform span']:
platform_el = tree.css_first(selector)
if platform_el:
result["platform"] = platform_el.text(strip=True)
break
# Release date
for selector in ['[data-testid="product-release-date"]', '.c-ProductHero_metaItem time']:
date_el = tree.css_first(selector)
if date_el:
result["release_date"] = date_el.text(strip=True)
break
# Score classification (Universal Acclaim, etc.)
for selector in ['[class*="label"]', '.c-productScoreInfo_scoreLabel']:
label_el = tree.css_first(selector)
if label_el and any(word in label_el.text() for word in ["Acclaim", "Favorable", "Mixed", "Unfavorable", "Disaster"]):
result["score_classification"] = label_el.text(strip=True)
break
# Genres and summary
for script in tree.css('script[type="application/ld+json"]'):
try:
ld = json.loads(script.text())
if "genre" in ld:
result["genres"] = ld["genre"] if isinstance(ld["genre"], list) else [ld["genre"]]
if "description" in ld:
result["description"] = ld["description"][:500]
if "publisher" in ld:
result["publisher"] = ld["publisher"]
except (json.JSONDecodeError, KeyError):
continue
return result
# Example
scores = scrape_metacritic_scores("https://www.metacritic.com/game/elden-ring/")
if scores:
print(f"{scores['title']} ({scores.get('platform', 'N/A')})")
print(f" Metascore: {scores.get('metascore', 'N/A')}/100 ({scores.get('critic_count', '?')} critics)")
print(f" User score: {scores.get('user_score', 'N/A')}/10 ({scores.get('user_count', '?')} users)")
print(f" Classification: {scores.get('score_classification', 'N/A')}")
Search and Discovery
Metacritic doesn't expose a public search API, but the search page returns HTML that's easy to parse:
def search_metacritic(query: str, category: str = "game") -> list:
"""
Search Metacritic and return matching results.
category: 'game', 'movie', 'tv', 'music', 'person', 'company'
"""
url = f"https://www.metacritic.com/search/{query}/"
params = {"category": category}
resp = httpx.get(
url,
headers=get_headers(),
params=params,
timeout=15,
follow_redirects=True,
)
if resp.status_code != 200:
return []
tree = HTMLParser(resp.text)
results = []
# Try data-testid based selectors first (newer Fandom-era site)
for card in tree.css('[data-testid="search-result"], .c-pageSiteSearch-results-item'):
title_el = card.css_first("h3, [class*='title']")
link_el = card.css_first("a")
score_el = card.css_first('[data-testid="critic-score-value"], [class*="metascore"] span')
platform_el = card.css_first('[class*="platform"]')
result = {
"title": title_el.text(strip=True) if title_el else None,
"url": None,
}
if link_el:
href = link_el.attributes.get("href", "")
result["url"] = (
f"https://www.metacritic.com{href}"
if href.startswith("/")
else href
)
if score_el:
s = score_el.text(strip=True)
result["metascore"] = int(s) if s.isdigit() else None
if platform_el:
result["platform"] = platform_el.text(strip=True)
if result["title"]:
results.append(result)
return results
# Find games
results = search_metacritic("zelda", category="game")
for r in results[:5]:
score = r.get("metascore", "N/A")
platform = r.get("platform", "")
print(f" [{score}] {r['title']} {f'({platform})' if platform else ''}")
Bulk Scraping: Game Lists by Platform
Build a comprehensive database starting from Metacritic's browse pages which list games by platform and score:
def scrape_game_list(platform: str = "pc", page: int = 1) -> list:
"""Scrape a page of game listings from Metacritic browse."""
url = f"https://www.metacritic.com/browse/game/{platform}/all/date/desc/"
params = {"page": page}
resp = httpx.get(url, headers=get_headers(), params=params, timeout=15, follow_redirects=True)
if resp.status_code != 200:
return []
tree = HTMLParser(resp.text)
games = []
for card in tree.css('[data-testid="product-card"], .c-finderProductCard'):
title_el = card.css_first("h3, [class*='title']")
score_el = card.css_first('[data-testid="critic-score"], .c-siteReviewScore span')
date_el = card.css_first('[data-testid="product-date"], [class*="releaseDate"]')
link_el = card.css_first("a")
game = {
"title": title_el.text(strip=True) if title_el else None,
"platform": platform,
}
if score_el:
s = score_el.text(strip=True)
game["metascore"] = int(s) if s.isdigit() else None
if date_el:
game["release_date"] = date_el.text(strip=True)
if link_el:
href = link_el.attributes.get("href", "")
game["url"] = (
f"https://www.metacritic.com{href}"
if not href.startswith("http")
else href
)
if game["title"]:
games.append(game)
return games
def scrape_all_games(platform: str = "pc", max_pages: int = 5, delay: float = 2.0) -> list:
"""Scrape multiple pages of game listings for a platform."""
all_games = []
for page in range(1, max_pages + 1):
games = scrape_game_list(platform, page)
if not games:
print(f"No results on page {page}, stopping")
break
all_games.extend(games)
print(f"Page {page}: {len(games)} games ({len(all_games)} total)")
time.sleep(delay + random.uniform(0, 1))
return all_games
# Available platforms
PLATFORMS = [
"pc", "playstation-5", "xbox-series-x",
"nintendo-switch", "playstation-4", "xbox-one",
"ios", "android",
]
pc_games = scrape_all_games("pc", max_pages=3)
ps5_games = scrape_all_games("playstation-5", max_pages=3)
Review Aggregation: Collecting Individual Critic Reviews
Beyond the aggregate score, you can pull individual critic reviews from the review pages:
def scrape_critic_reviews(game_url: str, proxy: str = None) -> list:
"""Scrape individual critic reviews for a game."""
reviews_url = f"{game_url.rstrip('/')}/critic-reviews/"
proxies = {"http://": proxy, "https://": proxy} if proxy else None
resp = httpx.get(reviews_url, headers=get_headers(), proxies=proxies, timeout=15, follow_redirects=True)
if resp.status_code != 200:
return []
tree = HTMLParser(resp.text)
reviews = []
for card in tree.css('[data-testid="critic-review"], .c-siteReview'):
publication_el = card.css_first(
'[data-testid="review-publication"], .c-siteReview_publicationName'
)
score_el = card.css_first(
'[data-testid="review-score"], .c-siteReviewScore span'
)
quote_el = card.css_first(
'[data-testid="review-quote"], .c-siteReview_quote p'
)
author_el = card.css_first('[data-testid="review-author"], .c-siteReview_author')
date_el = card.css_first('[data-testid="review-date"], time')
link_el = card.css_first('a[href*="review"]')
review = {
"publication": publication_el.text(strip=True) if publication_el else None,
"quote": quote_el.text(strip=True) if quote_el else None,
"author": author_el.text(strip=True) if author_el else None,
"date": date_el.text(strip=True) if date_el else None,
"review_url": link_el.attributes.get("href") if link_el else None,
}
if score_el:
s = score_el.text(strip=True)
review["score"] = int(s) if s.isdigit() else None
if review["publication"]:
reviews.append(review)
return reviews
def scrape_user_reviews(game_url: str, page: int = 1, proxy: str = None) -> list:
"""Scrape user reviews from Metacritic."""
reviews_url = f"{game_url.rstrip('/')}/user-reviews/"
params = {"page": page}
proxies = {"http://": proxy, "https://": proxy} if proxy else None
resp = httpx.get(
reviews_url, headers=get_headers(), params=params,
proxies=proxies, timeout=15, follow_redirects=True
)
if resp.status_code != 200:
return []
tree = HTMLParser(resp.text)
reviews = []
for card in tree.css('[data-testid="user-review"], .c-siteReview_isUser'):
score_el = card.css_first(".c-siteReviewScore span")
text_el = card.css_first(".c-siteReview_quote p, [data-testid='review-quote']")
author_el = card.css_first(".c-siteReview_author, [data-testid='review-author']")
date_el = card.css_first("time")
helpful_el = card.css_first("[class*='helpfulCount'], [data-testid='helpful-count']")
review = {
"text": text_el.text(strip=True) if text_el else None,
"author": author_el.text(strip=True) if author_el else None,
"date": date_el.text(strip=True) if date_el else None,
}
if score_el:
s = score_el.text(strip=True)
try:
review["score"] = float(s)
except ValueError:
review["score"] = None
if helpful_el:
helpful_text = re.search(r'\d+', helpful_el.text())
review["helpful_count"] = int(helpful_text.group()) if helpful_text else 0
if review.get("text") or review.get("score") is not None:
reviews.append(review)
return reviews
# Get individual reviews
reviews = scrape_critic_reviews("https://www.metacritic.com/game/elden-ring/")
for r in reviews[:5]:
score = r.get("score", "?")
pub = r.get("publication", "Unknown")
quote = (r.get("quote") or "")[:80]
print(f" [{score}] {pub}: {quote}...")
Cross-Platform Score Comparison
For multi-platform games, Metacritic scores often differ across platforms. Here's how to collect and compare them:
PLATFORM_SLUGS = {
"PC": "pc",
"PlayStation 5": "playstation-5",
"Xbox Series X": "xbox-series-x",
"Nintendo Switch": "nintendo-switch",
"PlayStation 4": "playstation-4",
}
def scrape_multiplatform_scores(game_slug: str, proxy: str = None) -> dict:
"""
Collect Metacritic scores for a game across all available platforms.
game_slug: the URL slug (e.g., 'elden-ring' from the URL)
"""
results = {}
for platform_name, platform_slug in PLATFORM_SLUGS.items():
url = f"https://www.metacritic.com/game/{game_slug}/"
# Platform-specific URL uses the platform slug as a path segment
platform_url = f"https://www.metacritic.com/game/{game_slug}/?platform={platform_slug}"
try:
scores = scrape_metacritic_scores(url, proxy=proxy)
if scores and scores.get("metascore"):
results[platform_name] = scores
print(f" {platform_name}: {scores.get('metascore')}/100 ({scores.get('critic_count', '?')} critics)")
except Exception as e:
print(f" {platform_name}: error — {e}")
time.sleep(random.uniform(1.5, 3.0))
return results
comparison = scrape_multiplatform_scores("elden-ring")
Anti-Bot Measures and Proxies
Metacritic (now on Fandom's infrastructure) uses Cloudflare for bot protection. Here's what you'll run into:
Cloudflare challenges: Plain httpx or requests calls work for the first 20-30 requests, then you'll start getting 403s or challenge pages. The JSON-LD data is usually accessible longer than the full HTML parsing since it's served on the initial page load.
Rate limits: Metacritic throttles after about 60 requests per minute from a single IP. You'll get 429 responses or Cloudflare challenge pages.
What works at scale: For scraping hundreds of game pages, you need IP rotation. ThorData's residential proxies handle Cloudflare-protected sites reliably — the residential IPs avoid the reputation issues that datacenter IPs hit on Cloudflare. Rotate IPs every 10-15 requests to stay under per-IP rate limits.
# Using a rotating proxy with httpx
PROXY_URL = "http://user:[email protected]:9000"
proxies = {
"http://": PROXY_URL,
"https://": PROXY_URL,
}
# Session-based client
client = httpx.Client(proxies=proxies, timeout=15, follow_redirects=True)
resp = client.get("https://www.metacritic.com/game/elden-ring/", headers=get_headers())
# Playwright fallback for Cloudflare challenges
from playwright.sync_api import sync_playwright
def scrape_with_playwright(url: str, proxy: str = None) -> str:
"""Fetch page HTML via Playwright when httpx gets blocked by Cloudflare."""
launch_kwargs = {"headless": True, "args": ["--no-sandbox"]}
if proxy:
launch_kwargs["proxy"] = {"server": proxy}
with sync_playwright() as p:
browser = p.chromium.launch(**launch_kwargs)
context = browser.new_context(
user_agent=random.choice(USER_AGENTS),
viewport={"width": 1280, "height": 900},
locale="en-US",
)
page = context.new_page()
page.goto(url, wait_until="domcontentloaded", timeout=30000)
page.wait_for_timeout(2000)
html = page.content()
browser.close()
return html
Building a Score Database
Tie it all together with SQLite storage:
import sqlite3
from datetime import datetime
def init_metacritic_db(db_path: str = "metacritic.db") -> sqlite3.Connection:
"""Initialize the Metacritic database."""
conn = sqlite3.connect(db_path)
conn.executescript("""
CREATE TABLE IF NOT EXISTS games (
id INTEGER PRIMARY KEY AUTOINCREMENT,
title TEXT,
platform TEXT,
metascore INTEGER,
user_score REAL,
critic_count INTEGER,
user_count INTEGER,
release_date TEXT,
genres TEXT,
publisher TEXT,
description TEXT,
score_classification TEXT,
url TEXT UNIQUE,
scraped_at TEXT
);
CREATE TABLE IF NOT EXISTS critic_reviews (
id INTEGER PRIMARY KEY AUTOINCREMENT,
game_url TEXT,
publication TEXT,
score INTEGER,
quote TEXT,
author TEXT,
review_date TEXT,
review_url TEXT,
FOREIGN KEY (game_url) REFERENCES games(url)
);
CREATE TABLE IF NOT EXISTS score_history (
game_url TEXT,
snapshot_date TEXT,
metascore INTEGER,
user_score REAL,
critic_count INTEGER,
user_count INTEGER,
PRIMARY KEY (game_url, snapshot_date)
);
CREATE INDEX IF NOT EXISTS idx_games_platform ON games(platform);
CREATE INDEX IF NOT EXISTS idx_games_metascore ON games(metascore);
CREATE INDEX IF NOT EXISTS idx_games_release ON games(release_date);
""")
conn.commit()
return conn
def save_game(conn: sqlite3.Connection, game: dict):
"""Save or update a game record."""
now = datetime.utcnow().isoformat()
conn.execute(
"""INSERT OR REPLACE INTO games
(title, platform, metascore, user_score, critic_count, user_count,
release_date, genres, publisher, description, score_classification, url, scraped_at)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""",
(
game.get("title"), game.get("platform"), game.get("metascore"),
game.get("user_score"), game.get("critic_count"), game.get("user_count"),
game.get("release_date"),
json.dumps(game.get("genres", [])),
json.dumps(game.get("publisher")) if isinstance(game.get("publisher"), dict)
else game.get("publisher"),
game.get("description"),
game.get("score_classification"),
game.get("url"), now,
)
)
# Also log a score snapshot for historical tracking
conn.execute(
"""INSERT OR REPLACE INTO score_history
(game_url, snapshot_date, metascore, user_score, critic_count, user_count)
VALUES (?, ?, ?, ?, ?, ?)""",
(game.get("url"), now[:10], game.get("metascore"),
game.get("user_score"), game.get("critic_count"), game.get("user_count"))
)
conn.commit()
def build_metacritic_db(
platforms: list,
pages_per_platform: int = 5,
proxy: str = None,
db_path: str = "metacritic.db",
):
"""Build a Metacritic score database across platforms."""
conn = init_metacritic_db(db_path)
for platform in platforms:
print(f"\n--- Scraping {platform} ---")
games = scrape_all_games(platform, max_pages=pages_per_platform)
for game in games:
if not game.get("url"):
continue
# Get detailed scores
details = scrape_metacritic_scores(game["url"], proxy=proxy)
if details:
game.update(details)
time.sleep(random.uniform(2.0, 4.0))
try:
save_game(conn, game)
except Exception as e:
print(f"Error saving {game.get('title')}: {e}")
print(f"Saved {len(games)} {platform} games")
conn.close()
# Build database for major platforms
build_metacritic_db(
["pc", "playstation-5", "xbox-series-x", "nintendo-switch"],
pages_per_platform=3,
proxy="http://user:[email protected]:9000",
)
Analysis: Finding Score Divergence
The most interesting analysis from this dataset is finding where critic and user scores diverge significantly:
def find_score_divergence(db_path: str = "metacritic.db", min_reviews: int = 20) -> list:
"""Find games where critic and user scores diverge most strongly."""
conn = sqlite3.connect(db_path)
cursor = conn.execute("""
SELECT title, platform, metascore, user_score,
ROUND(metascore - (user_score * 10), 1) as critic_user_gap,
critic_count, user_count, release_date
FROM games
WHERE metascore IS NOT NULL
AND user_score IS NOT NULL
AND critic_count >= ?
AND user_count >= ?
ORDER BY ABS(metascore - (user_score * 10)) DESC
LIMIT 20
""", (min_reviews, min_reviews))
results = cursor.fetchall()
conn.close()
return results
def find_hidden_gems(db_path: str = "metacritic.db") -> list:
"""Find games with high user scores but low critic scores."""
conn = sqlite3.connect(db_path)
cursor = conn.execute("""
SELECT title, platform, metascore, user_score,
ROUND(user_score * 10 - metascore, 1) as user_advantage,
release_date, url
FROM games
WHERE metascore BETWEEN 50 AND 74
AND user_score >= 7.5
AND user_count >= 50
ORDER BY user_advantage DESC
LIMIT 20
""")
results = cursor.fetchall()
conn.close()
return results
# Run analysis
divergent = find_score_divergence()
print("Games with biggest critic/user score gap:")
for row in divergent[:5]:
title, platform, mc, us, gap, cc, uc, rel = row
direction = "Critics higher" if gap > 0 else "Users higher"
print(f" {title} ({platform}): {mc}/100 critic, {us}/10 user — gap {abs(gap):.0f} pts ({direction})")
hidden_gems = find_hidden_gems()
print("\nHidden gems (users loved them, critics didn't):")
for row in hidden_gems[:5]:
title, platform, mc, us, advantage, rel, url = row
print(f" {title} ({platform}): Metascore {mc}, User {us}/10")
Score Trend Tracking
Track score changes over time (useful because Metacritic updates scores as more reviews come in):
def track_score_changes(db_path: str = "metacritic.db", days: int = 30) -> list:
"""Find games whose scores changed significantly in the past N days."""
conn = sqlite3.connect(db_path)
from datetime import timedelta
since = (datetime.utcnow() - timedelta(days=days)).strftime("%Y-%m-%d")
cursor = conn.execute("""
SELECT
h1.game_url,
g.title, g.platform,
h2.metascore as old_score,
h1.metascore as new_score,
h1.metascore - h2.metascore as score_delta,
h2.snapshot_date as old_date,
h1.snapshot_date as new_date
FROM score_history h1
JOIN score_history h2 ON h1.game_url = h2.game_url
JOIN games g ON h1.game_url = g.url
WHERE h1.snapshot_date = (
SELECT MAX(snapshot_date) FROM score_history WHERE game_url = h1.game_url
)
AND h2.snapshot_date <= ?
AND ABS(h1.metascore - h2.metascore) >= 3
ORDER BY ABS(score_delta) DESC
""", (since,))
changes = cursor.fetchall()
conn.close()
return changes
Wrapping Up
Metacritic scraping in 2026 is straightforward once you know the data sources. JSON-LD gives you the aggregate score without parsing HTML. The HTML structure with data-testid attributes makes detailed scraping reliable. The search page works for discovery, and browse pages let you build comprehensive databases by platform.
The main friction is Cloudflare. Start with plain HTTP requests for the JSON-LD path — it's the most resilient. Fall back to Playwright when you need the full HTML. Keep your request rate under 30/minute per IP and rotate proxies using ThorData's residential network for bulk operations.
Metacritic's data is public and embedded as structured data in every page. They want search engines to index it. You're reading the same data Google reads — just programmatically. The score divergence analysis and historical tracking are what make this dataset genuinely interesting rather than just a lookup table.