How to Scrape Steam Game Data in 2026: Prices, Reviews & Player Counts

2026-04-09 ["steam" "web scraping" "python" "game data" "steam api"]

How to Scrape Steam Game Data in 2026: Prices, Reviews & Player Counts

Steam is the largest PC gaming marketplace -- over 70,000 games, 120 million monthly active users, and one of the most detailed public review systems anywhere. Whether you're building a game price tracker, analyzing player sentiment, or researching indie game market trends, Steam data is essential.

The good news: Steam actually has decent APIs. The bad news: those APIs are incomplete, rate-limited, and don't cover everything. For regional pricing, review text, and some store page details, you still need to scrape.

This guide covers both approaches -- the official API where it works, and scraping where it doesn't.

What Data Can You Extract?

Between the API and store pages, you can get:

Game details -- name, description, genres, tags, developer, publisher, release date
Pricing -- base price, current discount, regional pricing across 40+ countries
Reviews -- full text, thumbs up/down, playtime at review, language, helpfulness
Player counts -- current and peak concurrent players
Achievements -- list of achievements with global unlock percentages
Screenshots and videos -- media URLs from the store page
System requirements -- minimum and recommended specs
SteamSpy data -- estimated ownership numbers (via unofficial API)
DLC listings -- downloadable content prices and descriptions
News and announcements -- official developer posts

Steam's Anti-Bot Measures

Steam is relatively scraper-friendly compared to most sites, but they do have limits:

API rate limiting -- The Steam Web API allows ~200 requests per 5 minutes per key. Exceeding this returns 429 responses.
Store page rate limiting -- Store pages throttle around 200 requests per 5 minutes per IP. You get a 429 or redirect to a captcha.
Region detection -- Steam uses IP geolocation for pricing. To get prices in different regions, you need IPs in those regions.
Age gates -- Mature content pages require a cookie (birthtime) set via the age check form.
Country-specific blocks -- Some games are region-locked; the store page simply 404s from certain IPs.
Cloudflare on some endpoints -- The community pages use Cloudflare protection.

Setting Up: Steam Web API

Get a free API key at steamcommunity.com/dev/apikey.

pip install requests beautifulsoup4

Fetching Game Details via API

import requests
import time
import random

STEAM_API_KEY = "YOUR_API_KEY"

def get_app_details(app_id: int, cc: str = "us") -> dict:
    url = "https://store.steampowered.com/api/appdetails"
    params = {"appids": app_id, "cc": cc, "l": "en"}

    resp = requests.get(url, params=params, timeout=15)
    data = resp.json()

    app_data = data.get(str(app_id), {})
    if not app_data.get("success"):
        return None

    info = app_data["data"]
    return {
        "app_id": app_id,
        "name": info.get("name"),
        "type": info.get("type"),
        "description": info.get("short_description"),
        "developer": info.get("developers", []),
        "publisher": info.get("publishers", []),
        "genres": [g["description"] for g in info.get("genres", [])],
        "categories": [c["description"] for c in info.get("categories", [])],
        "release_date": info.get("release_date", {}).get("date"),
        "coming_soon": info.get("release_date", {}).get("coming_soon", False),
        "price": info.get("price_overview", {}),
        "is_free": info.get("is_free", False),
        "metacritic": info.get("metacritic", {}).get("score"),
        "platforms": info.get("platforms", {}),
        "header_image": info.get("header_image"),
        "dlc_count": len(info.get("dlc", [])),
        "supported_languages": info.get("supported_languages"),
    }

Fetching Player Counts

def get_player_count(app_id: int) -> int:
    url = "https://api.steampowered.com/ISteamUserStats/GetNumberOfCurrentPlayers/v1/"
    params = {"appid": app_id, "key": STEAM_API_KEY}

    resp = requests.get(url, params=params, timeout=10)
    data = resp.json()
    return data.get("response", {}).get("player_count", 0)


def get_global_achievement_percentages(app_id: int) -> list[dict]:
    url = "https://api.steampowered.com/ISteamUserStats/GetGlobalAchievementPercentagesForApp/v2/"
    params = {"gameid": app_id}

    resp = requests.get(url, params=params, timeout=15)
    data = resp.json()

    achievements = data.get("achievementpercentages", {}).get("achievements", [])
    return sorted(
        [{"name": a["name"], "percent": a["percent"]} for a in achievements],
        key=lambda x: x["percent"],
        reverse=True,
    )

Scraping Reviews

Steam's review API is public and well-structured:

def get_reviews(app_id: int, count: int = 100, language: str = "english") -> list:
    url = f"https://store.steampowered.com/appreviews/{app_id}"
    params = {
        "json": 1,
        "language": language,
        "num_per_page": min(count, 100),
        "review_type": "all",
        "purchase_type": "all",
        "filter": "recent",
    }

    reviews = []
    cursor = "*"

    while len(reviews) < count:
        params["cursor"] = cursor
        resp = requests.get(url, params=params, timeout=15)
        data = resp.json()

        batch = data.get("reviews", [])
        if not batch:
            break

        for r in batch:
            reviews.append({
                "author_id": r["author"]["steamid"],
                "recommended": r["voted_up"],
                "text": r["review"],
                "playtime_hours": round(r["author"]["playtime_forever"] / 60, 1),
                "playtime_at_review": round(r.get("author", {}).get("playtime_at_review", 0) / 60, 1),
                "posted_timestamp": r["timestamp_created"],
                "helpful": r["votes_up"],
                "funny": r["votes_funny"],
                "early_access": r.get("written_during_early_access", False),
                "language": r.get("language"),
            })

        cursor = data.get("cursor")
        if not cursor or len(reviews) >= count:
            break
        time.sleep(1)

    return reviews[:count]


def analyze_review_sentiment(reviews: list) -> dict:
    if not reviews:
        return {}

    recommended = sum(1 for r in reviews if r["recommended"])
    pos_pct = round(recommended / len(reviews) * 100, 1)
    avg_playtime = sum(r["playtime_hours"] for r in reviews) / len(reviews)
    avg_at_review = sum(r["playtime_at_review"] for r in reviews) / len(reviews)

    return {
        "total_reviews": len(reviews),
        "recommended": recommended,
        "not_recommended": len(reviews) - recommended,
        "positive_pct": pos_pct,
        "avg_playtime_hours": round(avg_playtime, 1),
        "avg_playtime_at_review_hours": round(avg_at_review, 1),
        "early_access_reviews": sum(1 for r in reviews if r["early_access"]),
    }

Scraping the Full App List

Steam provides a full list of all app IDs -- useful for building complete game databases:

def get_all_app_ids() -> list[dict]:
    url = "https://api.steampowered.com/ISteamApps/GetAppList/v2/"
    resp = requests.get(url, timeout=30)
    data = resp.json()
    apps = data.get("applist", {}).get("apps", [])
    print(f"Total apps in Steam catalog: {len(apps)}")
    return apps

Scraping Regional Pricing

This is where the API falls short. appdetails only returns the price for the region your IP is in. To compare pricing across countries, you need to either pass the cc parameter (which Steam sometimes ignores for certain IPs) or use proxies in different regions.

A residential proxy service like ThorData is ideal for regional pricing because their IPs span 195+ countries. You get actual residential IPs in Brazil, Turkey, Argentina -- the regions where Steam prices differ the most.

def get_regional_prices(app_id: int, regions: list, proxy_template: str = None) -> dict:
    prices = {}
    for cc in regions:
        proxy = None
        if proxy_template:
            # ThorData supports country-level targeting via URL param
            proxy = f"{proxy_template}&country={cc}"

        proxies_dict = {"http": proxy, "https": proxy} if proxy else None

        try:
            details = get_app_details(app_id, cc=cc)
            if details and details.get("price"):
                p = details["price"]
                prices[cc] = {
                    "currency": p.get("currency"),
                    "final_formatted": p.get("final_formatted"),
                    "initial_formatted": p.get("initial_formatted"),
                    "final_usd_equiv": p.get("final", 0) / 100,
                    "discount_pct": p.get("discount_percent", 0),
                }
            elif details and details.get("is_free"):
                prices[cc] = {"currency": "FREE", "final_usd_equiv": 0, "discount_pct": 0}
        except Exception as e:
            print(f"Failed for region {cc}: {e}")

        time.sleep(random.uniform(1, 3))

    return prices


# Compare Elden Ring pricing worldwide
regions = ["us", "br", "tr", "ar", "ru", "gb", "de", "jp", "au", "in", "mx", "co"]
PROXY = "http://USER:[email protected]:9000"

prices = get_regional_prices(1245620, regions, proxy_template=PROXY)
sorted_prices = sorted(prices.items(), key=lambda x: x[1].get("final_usd_equiv", 999))
for cc, p in sorted_prices:
    disc = f" ({p['discount_pct']}% off)" if p['discount_pct'] else ""
    print(f"{cc.upper()}: {p.get('final_formatted', 'N/A')}{disc}")

Discovering Games: Steam Search Scraping

The API doesn't have a proper search endpoint. Scrape the store search page:

from bs4 import BeautifulSoup
import re

def search_steam(query: str, max_results: int = 25) -> list:
    url = "https://store.steampowered.com/search/"
    params = {"term": query, "category1": 998}
    headers = {"Cookie": "birthtime=0; wants_mature_content=1"}

    resp = requests.get(url, params=params, headers=headers, timeout=15)
    soup = BeautifulSoup(resp.text, "html.parser")

    results = []
    for row in soup.select("#search_resultsRows a")[:max_results]:
        title_el = row.select_one(".title")
        price_el = row.select_one(".discount_final_price")
        original_price_el = row.select_one(".discount_original_price")
        discount_el = row.select_one(".discount_pct")
        review_el = row.select_one(".search_review_summary")
        release_el = row.select_one(".search_released")

        app_url = row.get("href", "")
        app_id = None
        if "/app/" in app_url:
            app_id = int(app_url.split("/app/")[1].split("/")[0])

        review_tooltip = review_el.get("data-tooltip-html", "") if review_el else ""
        review_count = None
        if "reviews" in review_tooltip:
            match = re.search(r"([\\d,]+) user reviews", review_tooltip)
            if match:
                review_count = int(match.group(1).replace(",", ""))

        results.append({
            "app_id": app_id,
            "name": title_el.get_text(strip=True) if title_el else None,
            "price": price_el.get_text(strip=True) if price_el else "Free",
            "original_price": original_price_el.get_text(strip=True) if original_price_el else None,
            "discount_pct": int(discount_el.get_text(strip=True).replace("-", "").replace("%", "")) if discount_el else 0,
            "review_sentiment": review_el.get("data-tooltip-html", "") if review_el else None,
            "review_count": review_count,
            "release_date": release_el.get_text(strip=True) if release_el else None,
            "url": app_url.split("?")[0],
        })

    return results

Storing Data in SQLite

For game data that you'll query often, SQLite beats CSV:

import sqlite3
from datetime import datetime

def init_db(db_path: str = "steam_data.db"):
    conn = sqlite3.connect(db_path)
    conn.execute(\"\"\"
        CREATE TABLE IF NOT EXISTS games (
            app_id INTEGER PRIMARY KEY,
            name TEXT, developer TEXT, publisher TEXT,
            genres TEXT, categories TEXT, release_date TEXT,
            metacritic INTEGER, price_usd REAL, is_free INTEGER,
            scraped_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
        )
    \"\"\")
    conn.execute(\"\"\"
        CREATE TABLE IF NOT EXISTS player_counts (
            app_id INTEGER,
            timestamp TEXT,
            player_count INTEGER,
            PRIMARY KEY (app_id, timestamp)
        )
    \"\"\")
    conn.execute(\"\"\"
        CREATE TABLE IF NOT EXISTS reviews (
            app_id INTEGER,
            author_id TEXT,
            recommended INTEGER,
            text TEXT,
            playtime_hours REAL,
            playtime_at_review REAL,
            posted_timestamp INTEGER,
            helpful INTEGER,
            PRIMARY KEY (app_id, author_id)
        )
    \"\"\")
    conn.execute(\"\"\"
        CREATE TABLE IF NOT EXISTS regional_prices (
            app_id INTEGER,
            region TEXT,
            currency TEXT,
            price_local REAL,
            discount_pct INTEGER,
            checked_at TEXT DEFAULT CURRENT_TIMESTAMP,
            PRIMARY KEY (app_id, region)
        )
    \"\"\")
    conn.commit()
    return conn


def save_game(conn, game: dict):
    conn.execute(
        "INSERT OR REPLACE INTO games VALUES (?,?,?,?,?,?,?,?,?,?,CURRENT_TIMESTAMP)",
        (
            game["app_id"], game["name"],
            ", ".join(game.get("developer", [])),
            ", ".join(game.get("publisher", [])),
            ", ".join(game.get("genres", [])),
            ", ".join(game.get("categories", [])),
            game.get("release_date"),
            game.get("metacritic"),
            game.get("price", {}).get("final", 0) / 100 if game.get("price") else None,
            1 if game.get("is_free") else 0,
        )
    )
    conn.commit()

Building a Price Drop Alert System

Monitor games on a watchlist and alert when they hit a price threshold:

def check_price_drops(watchlist: list[int], threshold_pct: float = 50.0) -> list[dict]:
    alerts = []
    for app_id in watchlist:
        game = get_app_details(app_id)
        if not game:
            continue

        price = game.get("price", {})
        discount = price.get("discount_percent", 0)

        if discount >= threshold_pct:
            alerts.append({
                "app_id": app_id,
                "name": game["name"],
                "original_price": price.get("initial_formatted"),
                "sale_price": price.get("final_formatted"),
                "discount_pct": discount,
                "url": f"https://store.steampowered.com/app/{app_id}",
            })
        time.sleep(1)

    return alerts


# Example: check for 75%+ discounts
watchlist = [730, 570, 1245620, 814380, 413150]  # CS2, Dota, Elden Ring, Sekiro, Stardew
drops = check_price_drops(watchlist, threshold_pct=75.0)
for alert in drops:
    print(f"SALE: {alert['name']} -- {alert['original_price']} -> {alert['sale_price']} ({alert['discount_pct']}% off)")
    print(f"  {alert['url']}")

SteamSpy Integration

SteamSpy provides estimated ownership data that Steam doesn't expose directly:

def get_steamspy_data(app_id: int) -> dict:
    url = f"https://steamspy.com/api.php?request=appdetails&appid={app_id}"
    resp = requests.get(url, timeout=15)
    data = resp.json()

    return {
        "app_id": app_id,
        "name": data.get("name"),
        "owners_estimate": data.get("owners"),
        "average_forever_minutes": data.get("average_forever"),
        "average_2weeks_minutes": data.get("average_2weeks"),
        "peak_ccu": data.get("ccu"),
        "positive": data.get("positive"),
        "negative": data.get("negative"),
        "tags": data.get("tags", {}),
    }

Tracking Player Count Trends

Player counts tell you a game's health over time. Schedule daily snapshots:

def track_player_counts(app_ids: list[int], db_path: str = "steam_data.db"):
    conn = init_db(db_path)
    timestamp = datetime.utcnow().isoformat()

    for app_id in app_ids:
        try:
            count = get_player_count(app_id)
            conn.execute(
                "INSERT OR REPLACE INTO player_counts VALUES (?,?,?)",
                (app_id, timestamp, count)
            )
            print(f"App {app_id}: {count:,} players")
        except Exception as e:
            print(f"App {app_id} error: {e}")
        time.sleep(0.5)

    conn.commit()
    conn.close()


# Run daily -- top 20 games by player count
top_games = [730, 570, 578080, 1172470, 1245620, 1085660, 292030, 359550, 1091500, 1817070]
track_player_counts(top_games)

Legal Considerations

Steam's Subscriber Agreement restricts automated access, but their public APIs are clearly intended for developer use. The review API and storefront API don't require authentication beyond a free API key. Keep your request volume reasonable, don't scrape user profile data at scale, and don't build a store clone. Price comparison and market analysis tools are generally accepted -- SteamDB and IsThereAnyDeal have operated for years.

Key Takeaways

Use Steam's official APIs first -- appdetails, reviews, player count, and achievement endpoints are free and reliable.
Regional pricing requires IP-based geolocation. ThorData's country-targeted residential proxies let you query prices from 195+ countries without managing your own proxy list.
Set the birthtime cookie to bypass age gates on mature content.
Steam's rate limits are generous (~200/5min) but will catch you if you hammer them. Add 1-3 second delays between requests.
Store your data in SQLite rather than CSV -- game data has relationships worth querying.
Combine Steam API data with SteamSpy's ownership estimates for a complete picture of a game's commercial performance.
The full app list endpoint gives you all 70,000+ app IDs for bulk catalog analysis.

Analyzing Review Patterns Over Time

By fetching reviews with timestamps, you can detect "review bombing" events or launch-day sentiment spikes:

from datetime import datetime, timezone

def detect_review_anomalies(app_id: int, reviews: list) -> dict:
    """Detect unusual review patterns (review bombing, sudden spikes)."""
    from collections import defaultdict

    # Group reviews by date
    daily_counts = defaultdict(lambda: {"total": 0, "positive": 0, "negative": 0})

    for r in reviews:
        ts = r.get("posted_timestamp")
        if ts:
            try:
                date_str = datetime.fromtimestamp(ts, tz=timezone.utc).strftime("%Y-%m-%d")
                daily_counts[date_str]["total"] += 1
                if r["recommended"]:
                    daily_counts[date_str]["positive"] += 1
                else:
                    daily_counts[date_str]["negative"] += 1
            except (ValueError, TypeError, OSError):
                pass

    if not daily_counts:
        return {}

    # Find days with anomalously high negative reviews
    import statistics
    daily_negatives = [v["negative"] for v in daily_counts.values()]
    if len(daily_negatives) < 3:
        return {}

    mean_neg = statistics.mean(daily_negatives)
    stdev_neg = statistics.stdev(daily_negatives) if len(daily_negatives) > 1 else 0

    anomalies = []
    for date_str, counts in sorted(daily_counts.items()):
        if stdev_neg > 0:
            z = (counts["negative"] - mean_neg) / stdev_neg
            if z > 2.0:
                anomalies.append({
                    "date": date_str,
                    "total": counts["total"],
                    "negative": counts["negative"],
                    "pos_pct": round(counts["positive"] / counts["total"] * 100, 1) if counts["total"] else 0,
                    "z_score": round(z, 2),
                })

    return {
        "app_id": app_id,
        "total_reviews_analyzed": len(reviews),
        "dates_covered": len(daily_counts),
        "review_bomb_candidates": anomalies,
    }


# Detect review bombing events
reviews = get_reviews(1245620, count=1000)
analysis = detect_review_anomalies(1245620, reviews)
if analysis.get("review_bomb_candidates"):
    print("Potential review bomb dates:")
    for event in analysis["review_bomb_candidates"]:
        print(f"  {event['date']}: {event['total']} reviews, {event['pos_pct']}% positive (z={event['z_score']})")

Tracking New Releases by Genre

Steam's genre filtering lets you monitor new releases in specific categories:

def get_new_releases_by_genre(genre: str, days_fresh: int = 30) -> list[dict]:
    """Get recently released games in a specific genre."""
    url = "https://store.steampowered.com/search/"
    params = {
        "genre": genre.lower(),
        "sort_by": "_ASC",
        "os": "win",
        "filter": "topsellers",
        "ndl": 1,  # recent releases
    }
    headers = {"Cookie": "birthtime=0; wants_mature_content=1"}

    resp = requests.get(url, params=params, headers=headers, timeout=15)
    soup = BeautifulSoup(resp.text, "html.parser")

    from datetime import datetime, date, timedelta
    cutoff = date.today() - timedelta(days=days_fresh)

    results = []
    for row in soup.select("#search_resultsRows a")[:50]:
        release_el = row.select_one(".search_released")
        release_text = release_el.get_text(strip=True) if release_el else ""

        app_url = row.get("href", "")
        app_id = None
        if "/app/" in app_url:
            app_id = int(app_url.split("/app/")[1].split("/")[0])

        title_el = row.select_one(".title")
        results.append({
            "app_id": app_id,
            "name": title_el.get_text(strip=True) if title_el else None,
            "release_date": release_text,
        })

    return results


# Get new RPG releases
new_rpgs = get_new_releases_by_genre("RPG", days_fresh=60)
print(f"Found {len(new_rpgs)} recent RPG releases")
for game in new_rpgs[:10]:
    print(f"  [{game['app_id']}] {game['name']} - {game['release_date']}")

Monitoring Developer Release History

Track when developers release games to understand release cadence and studio health:

def get_developer_games(developer_name: str) -> list[dict]:
    """Find all Steam games by a specific developer."""
    results = search_steam(developer_name, max_results=50)

    developer_games = []
    for r in results[:20]:
        if r.get("app_id"):
            details = get_app_details(r["app_id"])
            if details and developer_name.lower() in " ".join(details.get("developer", [])).lower():
                developer_games.append({
                    "app_id": details["app_id"],
                    "name": details["name"],
                    "release_date": details.get("release_date"),
                    "genres": details.get("genres", []),
                    "metacritic": details.get("metacritic"),
                    "price": details.get("price", {}).get("final_formatted"),
                })
            time.sleep(0.5)

    return sorted(developer_games, key=lambda x: x.get("release_date") or "", reverse=True)

Batch Enriching a Game Catalog

When you have a list of app IDs, process them efficiently with checkpointing:

import sqlite3
from datetime import datetime

def enrich_game_catalog(app_ids: list[int], db_path: str = "steam_data.db", batch_size: int = 100):
    """Fetch and store details for a list of app IDs with progress saving."""
    conn = init_db(db_path)
    conn.execute("""
        CREATE TABLE IF NOT EXISTS scrape_queue (
            app_id INTEGER PRIMARY KEY,
            status TEXT DEFAULT 'pending',
            updated_at TEXT
        )
    """)

    # Initialize queue
    for app_id in app_ids:
        conn.execute(
            "INSERT OR IGNORE INTO scrape_queue VALUES (?,?,?)",
            (app_id, "pending", datetime.utcnow().isoformat())
        )
    conn.commit()

    # Process pending items
    while True:
        pending = conn.execute(
            "SELECT app_id FROM scrape_queue WHERE status = 'pending' LIMIT ?",
            (batch_size,)
        ).fetchall()

        if not pending:
            break

        for (app_id,) in pending:
            try:
                game = get_app_details(app_id)
                if game:
                    save_game(conn, game)
                    status = "done"
                else:
                    status = "not_found"
            except Exception as e:
                status = f"error: {str(e)[:50]}"

            conn.execute(
                "UPDATE scrape_queue SET status = ?, updated_at = ? WHERE app_id = ?",
                (status, datetime.utcnow().isoformat(), app_id)
            )
            conn.commit()
            time.sleep(1)

        print(f"Batch done. Remaining: {conn.execute('SELECT COUNT(*) FROM scrape_queue WHERE status = ?', ('pending',)).fetchone()[0]}")

    conn.close()
    print("Catalog enrichment complete.")