How to Scrape Steam Game Data in 2026: Prices, Reviews & Player Counts
How to Scrape Steam Game Data in 2026: Prices, Reviews & Player Counts
Steam is the largest PC gaming marketplace -- over 70,000 games, 120 million monthly active users, and one of the most detailed public review systems anywhere. Whether you're building a game price tracker, analyzing player sentiment, or researching indie game market trends, Steam data is essential.
The good news: Steam actually has decent APIs. The bad news: those APIs are incomplete, rate-limited, and don't cover everything. For regional pricing, review text, and some store page details, you still need to scrape.
This guide covers both approaches -- the official API where it works, and scraping where it doesn't.
What Data Can You Extract?
Between the API and store pages, you can get:
- Game details -- name, description, genres, tags, developer, publisher, release date
- Pricing -- base price, current discount, regional pricing across 40+ countries
- Reviews -- full text, thumbs up/down, playtime at review, language, helpfulness
- Player counts -- current and peak concurrent players
- Achievements -- list of achievements with global unlock percentages
- Screenshots and videos -- media URLs from the store page
- System requirements -- minimum and recommended specs
- SteamSpy data -- estimated ownership numbers (via unofficial API)
- DLC listings -- downloadable content prices and descriptions
- News and announcements -- official developer posts
Steam's Anti-Bot Measures
Steam is relatively scraper-friendly compared to most sites, but they do have limits:
- API rate limiting -- The Steam Web API allows ~200 requests per 5 minutes per key. Exceeding this returns 429 responses.
- Store page rate limiting -- Store pages throttle around 200 requests per 5 minutes per IP. You get a 429 or redirect to a captcha.
- Region detection -- Steam uses IP geolocation for pricing. To get prices in different regions, you need IPs in those regions.
- Age gates -- Mature content pages require a cookie (
birthtime) set via the age check form. - Country-specific blocks -- Some games are region-locked; the store page simply 404s from certain IPs.
- Cloudflare on some endpoints -- The community pages use Cloudflare protection.
Setting Up: Steam Web API
Get a free API key at steamcommunity.com/dev/apikey.
pip install requests beautifulsoup4
Fetching Game Details via API
import requests
import time
import random
STEAM_API_KEY = "YOUR_API_KEY"
def get_app_details(app_id: int, cc: str = "us") -> dict:
url = "https://store.steampowered.com/api/appdetails"
params = {"appids": app_id, "cc": cc, "l": "en"}
resp = requests.get(url, params=params, timeout=15)
data = resp.json()
app_data = data.get(str(app_id), {})
if not app_data.get("success"):
return None
info = app_data["data"]
return {
"app_id": app_id,
"name": info.get("name"),
"type": info.get("type"),
"description": info.get("short_description"),
"developer": info.get("developers", []),
"publisher": info.get("publishers", []),
"genres": [g["description"] for g in info.get("genres", [])],
"categories": [c["description"] for c in info.get("categories", [])],
"release_date": info.get("release_date", {}).get("date"),
"coming_soon": info.get("release_date", {}).get("coming_soon", False),
"price": info.get("price_overview", {}),
"is_free": info.get("is_free", False),
"metacritic": info.get("metacritic", {}).get("score"),
"platforms": info.get("platforms", {}),
"header_image": info.get("header_image"),
"dlc_count": len(info.get("dlc", [])),
"supported_languages": info.get("supported_languages"),
}
Fetching Player Counts
def get_player_count(app_id: int) -> int:
url = "https://api.steampowered.com/ISteamUserStats/GetNumberOfCurrentPlayers/v1/"
params = {"appid": app_id, "key": STEAM_API_KEY}
resp = requests.get(url, params=params, timeout=10)
data = resp.json()
return data.get("response", {}).get("player_count", 0)
def get_global_achievement_percentages(app_id: int) -> list[dict]:
url = "https://api.steampowered.com/ISteamUserStats/GetGlobalAchievementPercentagesForApp/v2/"
params = {"gameid": app_id}
resp = requests.get(url, params=params, timeout=15)
data = resp.json()
achievements = data.get("achievementpercentages", {}).get("achievements", [])
return sorted(
[{"name": a["name"], "percent": a["percent"]} for a in achievements],
key=lambda x: x["percent"],
reverse=True,
)
Scraping Reviews
Steam's review API is public and well-structured:
def get_reviews(app_id: int, count: int = 100, language: str = "english") -> list:
url = f"https://store.steampowered.com/appreviews/{app_id}"
params = {
"json": 1,
"language": language,
"num_per_page": min(count, 100),
"review_type": "all",
"purchase_type": "all",
"filter": "recent",
}
reviews = []
cursor = "*"
while len(reviews) < count:
params["cursor"] = cursor
resp = requests.get(url, params=params, timeout=15)
data = resp.json()
batch = data.get("reviews", [])
if not batch:
break
for r in batch:
reviews.append({
"author_id": r["author"]["steamid"],
"recommended": r["voted_up"],
"text": r["review"],
"playtime_hours": round(r["author"]["playtime_forever"] / 60, 1),
"playtime_at_review": round(r.get("author", {}).get("playtime_at_review", 0) / 60, 1),
"posted_timestamp": r["timestamp_created"],
"helpful": r["votes_up"],
"funny": r["votes_funny"],
"early_access": r.get("written_during_early_access", False),
"language": r.get("language"),
})
cursor = data.get("cursor")
if not cursor or len(reviews) >= count:
break
time.sleep(1)
return reviews[:count]
def analyze_review_sentiment(reviews: list) -> dict:
if not reviews:
return {}
recommended = sum(1 for r in reviews if r["recommended"])
pos_pct = round(recommended / len(reviews) * 100, 1)
avg_playtime = sum(r["playtime_hours"] for r in reviews) / len(reviews)
avg_at_review = sum(r["playtime_at_review"] for r in reviews) / len(reviews)
return {
"total_reviews": len(reviews),
"recommended": recommended,
"not_recommended": len(reviews) - recommended,
"positive_pct": pos_pct,
"avg_playtime_hours": round(avg_playtime, 1),
"avg_playtime_at_review_hours": round(avg_at_review, 1),
"early_access_reviews": sum(1 for r in reviews if r["early_access"]),
}
Scraping the Full App List
Steam provides a full list of all app IDs -- useful for building complete game databases:
def get_all_app_ids() -> list[dict]:
url = "https://api.steampowered.com/ISteamApps/GetAppList/v2/"
resp = requests.get(url, timeout=30)
data = resp.json()
apps = data.get("applist", {}).get("apps", [])
print(f"Total apps in Steam catalog: {len(apps)}")
return apps
Scraping Regional Pricing
This is where the API falls short. appdetails only returns the price for the region your IP is in. To compare pricing across countries, you need to either pass the cc parameter (which Steam sometimes ignores for certain IPs) or use proxies in different regions.
A residential proxy service like ThorData is ideal for regional pricing because their IPs span 195+ countries. You get actual residential IPs in Brazil, Turkey, Argentina -- the regions where Steam prices differ the most.
def get_regional_prices(app_id: int, regions: list, proxy_template: str = None) -> dict:
prices = {}
for cc in regions:
proxy = None
if proxy_template:
# ThorData supports country-level targeting via URL param
proxy = f"{proxy_template}&country={cc}"
proxies_dict = {"http": proxy, "https": proxy} if proxy else None
try:
details = get_app_details(app_id, cc=cc)
if details and details.get("price"):
p = details["price"]
prices[cc] = {
"currency": p.get("currency"),
"final_formatted": p.get("final_formatted"),
"initial_formatted": p.get("initial_formatted"),
"final_usd_equiv": p.get("final", 0) / 100,
"discount_pct": p.get("discount_percent", 0),
}
elif details and details.get("is_free"):
prices[cc] = {"currency": "FREE", "final_usd_equiv": 0, "discount_pct": 0}
except Exception as e:
print(f"Failed for region {cc}: {e}")
time.sleep(random.uniform(1, 3))
return prices
# Compare Elden Ring pricing worldwide
regions = ["us", "br", "tr", "ar", "ru", "gb", "de", "jp", "au", "in", "mx", "co"]
PROXY = "http://USER:[email protected]:9000"
prices = get_regional_prices(1245620, regions, proxy_template=PROXY)
sorted_prices = sorted(prices.items(), key=lambda x: x[1].get("final_usd_equiv", 999))
for cc, p in sorted_prices:
disc = f" ({p['discount_pct']}% off)" if p['discount_pct'] else ""
print(f"{cc.upper()}: {p.get('final_formatted', 'N/A')}{disc}")
Discovering Games: Steam Search Scraping
The API doesn't have a proper search endpoint. Scrape the store search page:
from bs4 import BeautifulSoup
import re
def search_steam(query: str, max_results: int = 25) -> list:
url = "https://store.steampowered.com/search/"
params = {"term": query, "category1": 998}
headers = {"Cookie": "birthtime=0; wants_mature_content=1"}
resp = requests.get(url, params=params, headers=headers, timeout=15)
soup = BeautifulSoup(resp.text, "html.parser")
results = []
for row in soup.select("#search_resultsRows a")[:max_results]:
title_el = row.select_one(".title")
price_el = row.select_one(".discount_final_price")
original_price_el = row.select_one(".discount_original_price")
discount_el = row.select_one(".discount_pct")
review_el = row.select_one(".search_review_summary")
release_el = row.select_one(".search_released")
app_url = row.get("href", "")
app_id = None
if "/app/" in app_url:
app_id = int(app_url.split("/app/")[1].split("/")[0])
review_tooltip = review_el.get("data-tooltip-html", "") if review_el else ""
review_count = None
if "reviews" in review_tooltip:
match = re.search(r"([\\d,]+) user reviews", review_tooltip)
if match:
review_count = int(match.group(1).replace(",", ""))
results.append({
"app_id": app_id,
"name": title_el.get_text(strip=True) if title_el else None,
"price": price_el.get_text(strip=True) if price_el else "Free",
"original_price": original_price_el.get_text(strip=True) if original_price_el else None,
"discount_pct": int(discount_el.get_text(strip=True).replace("-", "").replace("%", "")) if discount_el else 0,
"review_sentiment": review_el.get("data-tooltip-html", "") if review_el else None,
"review_count": review_count,
"release_date": release_el.get_text(strip=True) if release_el else None,
"url": app_url.split("?")[0],
})
return results
Storing Data in SQLite
For game data that you'll query often, SQLite beats CSV:
import sqlite3
from datetime import datetime
def init_db(db_path: str = "steam_data.db"):
conn = sqlite3.connect(db_path)
conn.execute(\"\"\"
CREATE TABLE IF NOT EXISTS games (
app_id INTEGER PRIMARY KEY,
name TEXT, developer TEXT, publisher TEXT,
genres TEXT, categories TEXT, release_date TEXT,
metacritic INTEGER, price_usd REAL, is_free INTEGER,
scraped_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
)
\"\"\")
conn.execute(\"\"\"
CREATE TABLE IF NOT EXISTS player_counts (
app_id INTEGER,
timestamp TEXT,
player_count INTEGER,
PRIMARY KEY (app_id, timestamp)
)
\"\"\")
conn.execute(\"\"\"
CREATE TABLE IF NOT EXISTS reviews (
app_id INTEGER,
author_id TEXT,
recommended INTEGER,
text TEXT,
playtime_hours REAL,
playtime_at_review REAL,
posted_timestamp INTEGER,
helpful INTEGER,
PRIMARY KEY (app_id, author_id)
)
\"\"\")
conn.execute(\"\"\"
CREATE TABLE IF NOT EXISTS regional_prices (
app_id INTEGER,
region TEXT,
currency TEXT,
price_local REAL,
discount_pct INTEGER,
checked_at TEXT DEFAULT CURRENT_TIMESTAMP,
PRIMARY KEY (app_id, region)
)
\"\"\")
conn.commit()
return conn
def save_game(conn, game: dict):
conn.execute(
"INSERT OR REPLACE INTO games VALUES (?,?,?,?,?,?,?,?,?,?,CURRENT_TIMESTAMP)",
(
game["app_id"], game["name"],
", ".join(game.get("developer", [])),
", ".join(game.get("publisher", [])),
", ".join(game.get("genres", [])),
", ".join(game.get("categories", [])),
game.get("release_date"),
game.get("metacritic"),
game.get("price", {}).get("final", 0) / 100 if game.get("price") else None,
1 if game.get("is_free") else 0,
)
)
conn.commit()
Building a Price Drop Alert System
Monitor games on a watchlist and alert when they hit a price threshold:
def check_price_drops(watchlist: list[int], threshold_pct: float = 50.0) -> list[dict]:
alerts = []
for app_id in watchlist:
game = get_app_details(app_id)
if not game:
continue
price = game.get("price", {})
discount = price.get("discount_percent", 0)
if discount >= threshold_pct:
alerts.append({
"app_id": app_id,
"name": game["name"],
"original_price": price.get("initial_formatted"),
"sale_price": price.get("final_formatted"),
"discount_pct": discount,
"url": f"https://store.steampowered.com/app/{app_id}",
})
time.sleep(1)
return alerts
# Example: check for 75%+ discounts
watchlist = [730, 570, 1245620, 814380, 413150] # CS2, Dota, Elden Ring, Sekiro, Stardew
drops = check_price_drops(watchlist, threshold_pct=75.0)
for alert in drops:
print(f"SALE: {alert['name']} -- {alert['original_price']} -> {alert['sale_price']} ({alert['discount_pct']}% off)")
print(f" {alert['url']}")
SteamSpy Integration
SteamSpy provides estimated ownership data that Steam doesn't expose directly:
def get_steamspy_data(app_id: int) -> dict:
url = f"https://steamspy.com/api.php?request=appdetails&appid={app_id}"
resp = requests.get(url, timeout=15)
data = resp.json()
return {
"app_id": app_id,
"name": data.get("name"),
"owners_estimate": data.get("owners"),
"average_forever_minutes": data.get("average_forever"),
"average_2weeks_minutes": data.get("average_2weeks"),
"peak_ccu": data.get("ccu"),
"positive": data.get("positive"),
"negative": data.get("negative"),
"tags": data.get("tags", {}),
}
Tracking Player Count Trends
Player counts tell you a game's health over time. Schedule daily snapshots:
def track_player_counts(app_ids: list[int], db_path: str = "steam_data.db"):
conn = init_db(db_path)
timestamp = datetime.utcnow().isoformat()
for app_id in app_ids:
try:
count = get_player_count(app_id)
conn.execute(
"INSERT OR REPLACE INTO player_counts VALUES (?,?,?)",
(app_id, timestamp, count)
)
print(f"App {app_id}: {count:,} players")
except Exception as e:
print(f"App {app_id} error: {e}")
time.sleep(0.5)
conn.commit()
conn.close()
# Run daily -- top 20 games by player count
top_games = [730, 570, 578080, 1172470, 1245620, 1085660, 292030, 359550, 1091500, 1817070]
track_player_counts(top_games)
Legal Considerations
Steam's Subscriber Agreement restricts automated access, but their public APIs are clearly intended for developer use. The review API and storefront API don't require authentication beyond a free API key. Keep your request volume reasonable, don't scrape user profile data at scale, and don't build a store clone. Price comparison and market analysis tools are generally accepted -- SteamDB and IsThereAnyDeal have operated for years.
Key Takeaways
- Use Steam's official APIs first --
appdetails, reviews, player count, and achievement endpoints are free and reliable. - Regional pricing requires IP-based geolocation. ThorData's country-targeted residential proxies let you query prices from 195+ countries without managing your own proxy list.
- Set the
birthtimecookie to bypass age gates on mature content. - Steam's rate limits are generous (~200/5min) but will catch you if you hammer them. Add 1-3 second delays between requests.
- Store your data in SQLite rather than CSV -- game data has relationships worth querying.
- Combine Steam API data with SteamSpy's ownership estimates for a complete picture of a game's commercial performance.
- The full app list endpoint gives you all 70,000+ app IDs for bulk catalog analysis.
Analyzing Review Patterns Over Time
By fetching reviews with timestamps, you can detect "review bombing" events or launch-day sentiment spikes:
from datetime import datetime, timezone
def detect_review_anomalies(app_id: int, reviews: list) -> dict:
"""Detect unusual review patterns (review bombing, sudden spikes)."""
from collections import defaultdict
# Group reviews by date
daily_counts = defaultdict(lambda: {"total": 0, "positive": 0, "negative": 0})
for r in reviews:
ts = r.get("posted_timestamp")
if ts:
try:
date_str = datetime.fromtimestamp(ts, tz=timezone.utc).strftime("%Y-%m-%d")
daily_counts[date_str]["total"] += 1
if r["recommended"]:
daily_counts[date_str]["positive"] += 1
else:
daily_counts[date_str]["negative"] += 1
except (ValueError, TypeError, OSError):
pass
if not daily_counts:
return {}
# Find days with anomalously high negative reviews
import statistics
daily_negatives = [v["negative"] for v in daily_counts.values()]
if len(daily_negatives) < 3:
return {}
mean_neg = statistics.mean(daily_negatives)
stdev_neg = statistics.stdev(daily_negatives) if len(daily_negatives) > 1 else 0
anomalies = []
for date_str, counts in sorted(daily_counts.items()):
if stdev_neg > 0:
z = (counts["negative"] - mean_neg) / stdev_neg
if z > 2.0:
anomalies.append({
"date": date_str,
"total": counts["total"],
"negative": counts["negative"],
"pos_pct": round(counts["positive"] / counts["total"] * 100, 1) if counts["total"] else 0,
"z_score": round(z, 2),
})
return {
"app_id": app_id,
"total_reviews_analyzed": len(reviews),
"dates_covered": len(daily_counts),
"review_bomb_candidates": anomalies,
}
# Detect review bombing events
reviews = get_reviews(1245620, count=1000)
analysis = detect_review_anomalies(1245620, reviews)
if analysis.get("review_bomb_candidates"):
print("Potential review bomb dates:")
for event in analysis["review_bomb_candidates"]:
print(f" {event['date']}: {event['total']} reviews, {event['pos_pct']}% positive (z={event['z_score']})")
Tracking New Releases by Genre
Steam's genre filtering lets you monitor new releases in specific categories:
def get_new_releases_by_genre(genre: str, days_fresh: int = 30) -> list[dict]:
"""Get recently released games in a specific genre."""
url = "https://store.steampowered.com/search/"
params = {
"genre": genre.lower(),
"sort_by": "_ASC",
"os": "win",
"filter": "topsellers",
"ndl": 1, # recent releases
}
headers = {"Cookie": "birthtime=0; wants_mature_content=1"}
resp = requests.get(url, params=params, headers=headers, timeout=15)
soup = BeautifulSoup(resp.text, "html.parser")
from datetime import datetime, date, timedelta
cutoff = date.today() - timedelta(days=days_fresh)
results = []
for row in soup.select("#search_resultsRows a")[:50]:
release_el = row.select_one(".search_released")
release_text = release_el.get_text(strip=True) if release_el else ""
app_url = row.get("href", "")
app_id = None
if "/app/" in app_url:
app_id = int(app_url.split("/app/")[1].split("/")[0])
title_el = row.select_one(".title")
results.append({
"app_id": app_id,
"name": title_el.get_text(strip=True) if title_el else None,
"release_date": release_text,
})
return results
# Get new RPG releases
new_rpgs = get_new_releases_by_genre("RPG", days_fresh=60)
print(f"Found {len(new_rpgs)} recent RPG releases")
for game in new_rpgs[:10]:
print(f" [{game['app_id']}] {game['name']} - {game['release_date']}")
Monitoring Developer Release History
Track when developers release games to understand release cadence and studio health:
def get_developer_games(developer_name: str) -> list[dict]:
"""Find all Steam games by a specific developer."""
results = search_steam(developer_name, max_results=50)
developer_games = []
for r in results[:20]:
if r.get("app_id"):
details = get_app_details(r["app_id"])
if details and developer_name.lower() in " ".join(details.get("developer", [])).lower():
developer_games.append({
"app_id": details["app_id"],
"name": details["name"],
"release_date": details.get("release_date"),
"genres": details.get("genres", []),
"metacritic": details.get("metacritic"),
"price": details.get("price", {}).get("final_formatted"),
})
time.sleep(0.5)
return sorted(developer_games, key=lambda x: x.get("release_date") or "", reverse=True)
Batch Enriching a Game Catalog
When you have a list of app IDs, process them efficiently with checkpointing:
import sqlite3
from datetime import datetime
def enrich_game_catalog(app_ids: list[int], db_path: str = "steam_data.db", batch_size: int = 100):
"""Fetch and store details for a list of app IDs with progress saving."""
conn = init_db(db_path)
conn.execute("""
CREATE TABLE IF NOT EXISTS scrape_queue (
app_id INTEGER PRIMARY KEY,
status TEXT DEFAULT 'pending',
updated_at TEXT
)
""")
# Initialize queue
for app_id in app_ids:
conn.execute(
"INSERT OR IGNORE INTO scrape_queue VALUES (?,?,?)",
(app_id, "pending", datetime.utcnow().isoformat())
)
conn.commit()
# Process pending items
while True:
pending = conn.execute(
"SELECT app_id FROM scrape_queue WHERE status = 'pending' LIMIT ?",
(batch_size,)
).fetchall()
if not pending:
break
for (app_id,) in pending:
try:
game = get_app_details(app_id)
if game:
save_game(conn, game)
status = "done"
else:
status = "not_found"
except Exception as e:
status = f"error: {str(e)[:50]}"
conn.execute(
"UPDATE scrape_queue SET status = ?, updated_at = ? WHERE app_id = ?",
(status, datetime.utcnow().isoformat(), app_id)
)
conn.commit()
time.sleep(1)
print(f"Batch done. Remaining: {conn.execute('SELECT COUNT(*) FROM scrape_queue WHERE status = ?', ('pending',)).fetchone()[0]}")
conn.close()
print("Catalog enrichment complete.")