Scraping eBay Products and Prices (2026)

2026-04-01 ebay web-scraping python beautifulsoup api

Scraping eBay Products and Prices (2026)

eBay is one of the most scraped e-commerce sites on the internet, and also one of the most frustrating. Their Finding API was deprecated in 2022. Their official Browse API works but has strict rate limits and requires OAuth. Direct HTML scraping is possible — with the right approach and the right proxy infrastructure.

This guide covers the complete picture for 2026: the Browse API for structured data, HTML scraping with BeautifulSoup for what the API misses, full pagination, error handling, seller rating extraction, bid price tracking, and scaling with residential proxies.

The Official Path: eBay Browse API

The current official API is the Browse API, part of eBay's REST API suite. It replaced the old Finding API and returns structured JSON for search results and individual item lookups.

import httpx
import base64
import time
import random
import logging
import sqlite3
import json
from typing import Optional
from bs4 import BeautifulSoup

logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s [%(levelname)s] %(message)s"
)
logger = logging.getLogger(__name__)

CLIENT_ID = "your-client-id"
CLIENT_SECRET = "your-client-secret"
BROWSE_BASE = "https://api.ebay.com/buy/browse/v1"
MARKETPLACE_US = "EBAY_US"


def get_ebay_token() -> Optional[str]:
    """Get client credentials OAuth token for Browse API."""
    credentials = base64.b64encode(
        f"{CLIENT_ID}:{CLIENT_SECRET}".encode()
    ).decode()

    try:
        response = httpx.post(
            "https://api.ebay.com/identity/v1/oauth2/token",
            headers={
                "Authorization": f"Basic {credentials}",
                "Content-Type": "application/x-www-form-urlencoded",
            },
            data="grant_type=client_credentials&scope=https://api.ebay.com/oauth/api_scope",
            timeout=15,
        )
        response.raise_for_status()
        token_data = response.json()
        logger.info("eBay token acquired")
        return token_data.get("access_token")
    except httpx.RequestError as e:
        logger.error(f"Token request failed: {e}")
        return None
    except Exception as e:
        logger.error(f"Unexpected token error: {e}")
        return None


class EbayBrowseClient:
    """
    Client for the eBay Browse API with automatic token management.
    Handles rate limits, retries, and pagination.
    """

    def __init__(self, client_id: str, client_secret: str, marketplace: str = "EBAY_US"):
        self.client_id = client_id
        self.client_secret = client_secret
        self.marketplace = marketplace
        self.token = None
        self._refresh_token()

    def _refresh_token(self) -> None:
        """Refresh the OAuth token."""
        credentials = base64.b64encode(
            f"{self.client_id}:{self.client_secret}".encode()
        ).decode()

        try:
            response = httpx.post(
                "https://api.ebay.com/identity/v1/oauth2/token",
                headers={
                    "Authorization": f"Basic {credentials}",
                    "Content-Type": "application/x-www-form-urlencoded",
                },
                data="grant_type=client_credentials&scope=https://api.ebay.com/oauth/api_scope",
                timeout=15,
            )
            response.raise_for_status()
            self.token = response.json().get("access_token")
        except Exception as e:
            logger.error(f"Token refresh failed: {e}")
            self.token = None

    def _headers(self) -> dict:
        return {
            "Authorization": f"Bearer {self.token}",
            "X-EBAY-C-MARKETPLACE-ID": self.marketplace,
            "Content-Type": "application/json",
        }

    def get(
        self,
        endpoint: str,
        params: dict = None,
        max_retries: int = 3,
    ) -> Optional[dict]:
        """Make authenticated GET request with retry logic."""
        url = f"{BROWSE_BASE}/{endpoint.lstrip('/')}"

        for attempt in range(max_retries):
            try:
                resp = httpx.get(url, headers=self._headers(), params=params, timeout=20)

                if resp.status_code == 200:
                    return resp.json()

                elif resp.status_code == 401:
                    # Token expired, refresh and retry
                    logger.info("Token expired, refreshing...")
                    self._refresh_token()
                    continue

                elif resp.status_code == 429:
                    retry_after = int(resp.headers.get("retry-after", 60))
                    logger.warning(f"Rate limited, waiting {retry_after}s")
                    time.sleep(retry_after)
                    continue

                elif resp.status_code == 404:
                    return None

                else:
                    logger.warning(f"HTTP {resp.status_code}: {endpoint}")
                    time.sleep(2 ** attempt)

            except httpx.TimeoutException:
                logger.warning(f"Timeout on attempt {attempt+1}")
                time.sleep(2 ** attempt + 1)

            except httpx.NetworkError as e:
                logger.error(f"Network error: {e}")
                time.sleep(5)

        return None

Querying Item Summaries

def search_ebay(
    client: EbayBrowseClient,
    query: str,
    limit: int = 200,
    offset: int = 0,
    filters: str = None,
    sort: str = None,
    category_ids: str = None,
) -> list:
    """
    Search eBay listings via Browse API.

    filters examples:
      "price:[10..50],priceCurrency:USD"
      "buyingOptions:{AUCTION|FIXED_PRICE}"
      "conditions:{NEW|USED}"
      "itemLocationCountry:US"

    sort options: price (asc), -price (desc), distance, -watchCount, newlyListed
    """
    params = {
        "q": query,
        "limit": min(limit, 200),
        "offset": offset,
        "fieldgroups": "EXTENDED",
    }
    if filters:
        params["filter"] = filters
    if sort:
        params["sort"] = sort
    if category_ids:
        params["category_ids"] = category_ids

    data = client.get("item_summary/search", params=params)
    if not data:
        return []

    items = []
    for item in data.get("itemSummaries", []):
        items.append(parse_item_summary(item))

    return items


def parse_item_summary(item: dict) -> dict:
    """Parse an item summary from Browse API search results."""
    shipping = item.get("shippingOptions", [{}])[0] if item.get("shippingOptions") else {}
    seller = item.get("seller", {})

    return {
        "item_id": item.get("itemId"),
        "title": item.get("title"),
        "price": item.get("price", {}).get("value"),
        "currency": item.get("price", {}).get("currency"),
        "buying_options": item.get("buyingOptions", []),
        "condition": item.get("condition"),
        "condition_id": item.get("conditionId"),
        "seller_username": seller.get("username"),
        "seller_feedback_pct": seller.get("feedbackPercentage"),
        "seller_feedback_score": seller.get("feedbackScore"),
        "shipping_cost": shipping.get("shippingCost", {}).get("value") if shipping else None,
        "shipping_type": shipping.get("shippingServiceCode"),
        "item_url": item.get("itemWebUrl"),
        "image_url": item.get("image", {}).get("imageUrl"),
        "location_country": item.get("itemLocation", {}).get("country"),
        "categories": [c.get("categoryName") for c in item.get("categories", [])],
        "leaf_category_id": item.get("leafCategoryIds", [None])[0] if item.get("leafCategoryIds") else None,
    }

Handling Pagination

The Browse API uses offset-based pagination with a max of 200 per request. eBay caps total results at 10,000 per query regardless of pagination:

def search_all_pages(
    client: EbayBrowseClient,
    query: str,
    max_results: int = 1000,
    filters: str = None,
    sort: str = None,
    delay: float = 0.5,
) -> list:
    """
    Paginate through all Browse API results for a query.

    Break large queries by price range or category to get past 10K limit:
    - filter="price:[0..50],priceCurrency:USD"  -> 0-50 USD
    - filter="price:[50..150],priceCurrency:USD" -> 50-150 USD
    """
    all_items = []
    offset = 0
    limit = 200

    while len(all_items) < max_results:
        params = {
            "q": query,
            "limit": limit,
            "offset": offset,
            "fieldgroups": "EXTENDED",
        }
        if filters:
            params["filter"] = filters
        if sort:
            params["sort"] = sort

        data = client.get("item_summary/search", params=params)
        if not data:
            break

        items = [parse_item_summary(i) for i in data.get("itemSummaries", [])]
        if not items:
            break

        all_items.extend(items)

        total = int(data.get("total", 0))
        offset += limit

        logger.info(f"Pagination: {len(all_items)}/{min(total, max_results)} items")

        if offset >= min(total, 10000, max_results):
            break

        time.sleep(delay)

    return all_items[:max_results]


def search_by_price_ranges(
    client: EbayBrowseClient,
    query: str,
    ranges: list = None,
    max_per_range: int = 1000,
) -> list:
    """
    Break a query across price ranges to collect more than 10K results.

    ranges: list of (min, max) tuples, e.g., [(0,25), (25,75), (75,200)]
    """
    if ranges is None:
        ranges = [(0, 25), (25, 75), (75, 200), (200, 500), (500, 99999)]

    all_items = []
    seen_ids = set()

    for min_p, max_p in ranges:
        filter_str = f"price:[{min_p}..{max_p}],priceCurrency:USD"
        logger.info(f"Collecting range ${min_p}-${max_p}")

        items = search_all_pages(
            client, query,
            max_results=max_per_range,
            filters=filter_str,
        )

        # Deduplicate across ranges
        new_items = [i for i in items if i.get("item_id") not in seen_ids]
        for item in new_items:
            seen_ids.add(item.get("item_id"))

        all_items.extend(new_items)
        logger.info(f"Range ${min_p}-${max_p}: {len(new_items)} new items")

        time.sleep(1.0)

    return all_items

Bid Prices vs Buy It Now

Auction listings and fixed-price listings appear in the same search results. Differentiate and analyze them:

def classify_listing(item: dict) -> str:
    """Classify a listing by its buying options."""
    options = set(item.get("buying_options", []))

    if "AUCTION" in options and "FIXED_PRICE" not in options:
        return "auction"
    elif "FIXED_PRICE" in options and "AUCTION" not in options:
        return "buy_it_now"
    elif "AUCTION" in options and "FIXED_PRICE" in options:
        return "auction_with_bin"  # Auction with Buy It Now option
    elif "BEST_OFFER" in options:
        return "best_offer"
    else:
        return "unknown"


def analyze_listings(items: list) -> dict:
    """Statistical analysis of a set of listings."""
    if not items:
        return {}

    # Separate by type
    auctions = [i for i in items if "AUCTION" in (i.get("buying_options") or [])]
    fixed = [i for i in items if "FIXED_PRICE" in (i.get("buying_options") or []) and "AUCTION" not in (i.get("buying_options") or [])]

    auction_prices = [float(i.get("price", 0)) for i in auctions if i.get("price")]
    fixed_prices = [float(i.get("price", 0)) for i in fixed if i.get("price")]
    all_prices = [float(i.get("price", 0)) for i in items if i.get("price")]

    def stats(prices: list) -> dict:
        if not prices:
            return {}
        sorted_p = sorted(prices)
        n = len(sorted_p)
        mid = n // 2
        median = (sorted_p[mid-1] + sorted_p[mid]) / 2 if n % 2 == 0 else sorted_p[mid]
        return {
            "count": n,
            "avg": round(sum(prices) / n, 2),
            "median": round(median, 2),
            "min": round(min(prices), 2),
            "max": round(max(prices), 2),
        }

    # Top sellers by listing count
    seller_counts = {}
    for item in items:
        seller = item.get("seller_username", "unknown")
        seller_counts[seller] = seller_counts.get(seller, 0) + 1

    top_sellers = sorted(seller_counts.items(), key=lambda x: -x[1])[:10]

    return {
        "total_listings": len(items),
        "auction_stats": stats(auction_prices),
        "fixed_price_stats": stats(fixed_prices),
        "all_prices_stats": stats(all_prices),
        "top_sellers": top_sellers,
    }


def get_item_detail(
    client: EbayBrowseClient,
    item_id: str,
) -> Optional[dict]:
    """Fetch detailed data for a single item including bid count."""
    data = client.get(f"item/v1|{item_id}|0")
    if not data:
        return None

    # Extract item specifics (brand, model, etc.)
    aspects = {}
    for aspect in data.get("localizedAspects", []):
        aspects[aspect.get("name", "")] = aspect.get("value", "")

    return {
        "item_id": data.get("itemId"),
        "title": data.get("title"),
        "price": data.get("price", {}).get("value"),
        "current_bid_price": data.get("currentBidPrice", {}).get("value"),
        "bid_count": data.get("bidCount"),
        "end_date": data.get("itemEndDate"),
        "condition": data.get("condition"),
        "category": data.get("categoryPath"),
        "description": (data.get("description") or "")[:2000],
        "seller_username": data.get("seller", {}).get("username"),
        "seller_feedback_pct": data.get("seller", {}).get("feedbackPercentage"),
        "seller_feedback_score": data.get("seller", {}).get("feedbackScore"),
        "buying_options": data.get("buyingOptions", []),
        "shipping_options": data.get("shippingOptions", []),
        "return_terms": data.get("returnTerms", {}),
        "aspects": aspects,
        "images": [img.get("imageUrl") for img in data.get("additionalImages", [])],
    }

Direct HTML Scraping with BeautifulSoup

The Browse API has a rate limit of 5,000 calls/day on the free tier. If you need more volume, or data the API does not expose (completed listings, sold prices, watch counts), scraping the HTML is the other option.

eBay's search results render server-side HTML, so httpx works fine without JavaScript rendering:

SCRAPE_HEADERS = {
    "User-Agent": (
        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
        "AppleWebKit/537.36 (KHTML, like Gecko) "
        "Chrome/126.0.0.0 Safari/537.36"
    ),
    "Accept-Language": "en-US,en;q=0.9",
    "Accept": "text/html,application/xhtml+xml,application/xhtml;q=0.9,*/*;q=0.8",
    "Accept-Encoding": "gzip, deflate, br",
    "DNT": "1",
}

# Alternative User-Agents to rotate
USER_AGENTS = [
    "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36",
    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36",
    "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36",
]


def make_scrape_client(proxy_url: str = None) -> httpx.Client:
    """Create httpx client for HTML scraping."""
    headers = {**SCRAPE_HEADERS, "User-Agent": random.choice(USER_AGENTS)}
    kwargs = {
        "headers": headers,
        "follow_redirects": True,
        "timeout": httpx.Timeout(25.0, connect=10.0),
    }
    if proxy_url:
        kwargs["proxy"] = proxy_url
    return httpx.Client(**kwargs)


def scrape_ebay_search(
    query: str,
    pages: int = 3,
    sold_only: bool = False,
    proxy_url: str = None,
) -> list:
    """
    Scrape eBay search results via HTML.
    Works for active listings and sold/completed listings.
    """
    results = []
    client = make_scrape_client(proxy_url)

    for page in range(1, pages + 1):
        if sold_only:
            url = (
                f"https://www.ebay.com/sch/i.html"
                f"?_nkw={query.replace(' ', '+')}"
                f"&LH_Complete=1&LH_Sold=1"
                f"&_pgn={page}&_ipg=60"
            )
        else:
            url = (
                f"https://www.ebay.com/sch/i.html"
                f"?_nkw={query.replace(' ', '+')}"
                f"&_pgn={page}&_ipg=60"
            )

        try:
            response = client.get(url)
            response.raise_for_status()
        except httpx.HTTPStatusError as e:
            logger.warning(f"HTTP {e.response.status_code} on page {page}")
            break
        except httpx.RequestError as e:
            logger.error(f"Request error on page {page}: {e}")
            break

        soup = BeautifulSoup(response.text, "html.parser")
        listings = soup.select("li.s-item")

        page_count = 0
        for listing in listings:
            # Skip eBay's injected ghost item
            if not listing.select_one(".s-item__title"):
                continue
            # Skip "Shop on eBay" placeholder items
            title_el = listing.select_one(".s-item__title")
            if title_el and "Shop on eBay" in title_el.text:
                continue

            item = parse_html_listing(listing)
            if item.get("title"):
                results.append(item)
                page_count += 1

        logger.info(f"Page {page}: {page_count} listings")

        # Check if there's a next page
        next_page = soup.select_one("a.pagination__next")
        if not next_page:
            break

        # Human-like delay between pages
        time.sleep(1.5 + random.uniform(0, 1.5))

    client.close()
    return results


def parse_html_listing(listing) -> dict:
    """Parse a single listing element from eBay search results."""
    result = {}

    # Title
    title_el = listing.select_one(".s-item__title")
    result["title"] = title_el.text.strip() if title_el else ""

    # Price (can be a range for lot items)
    price_el = listing.select_one(".s-item__price")
    result["price_text"] = price_el.text.strip() if price_el else ""

    # Parse numeric price
    if price_el:
        price_text = price_el.text.strip()
        # Handle ranges "US $10.00 to US $20.00"
        import re
        prices = re.findall(r"\$?([\d,]+\.?\d*)", price_text)
        if prices:
            result["price_low"] = float(prices[0].replace(",", ""))
            result["price_high"] = float(prices[-1].replace(",", ""))

    # URL
    link_el = listing.select_one("a.s-item__link")
    result["url"] = link_el["href"].split("?")[0] if link_el else ""

    # Item ID from URL
    if result.get("url") and "/itm/" in result["url"]:
        result["item_id"] = result["url"].split("/itm/")[-1]

    # Shipping
    ship_el = listing.select_one(".s-item__shipping, .s-item__freeXDays")
    result["shipping"] = ship_el.text.strip() if ship_el else ""

    # Condition
    condition_el = listing.select_one(".SECONDARY_INFO")
    result["condition"] = condition_el.text.strip() if condition_el else ""

    # Seller info (limited in search results)
    seller_el = listing.select_one(".s-item__seller-info-text")
    result["seller_info"] = seller_el.text.strip() if seller_el else ""

    # Sold count (for buy it now items with multiple sold)
    sold_el = listing.select_one(".s-item__quantitySold")
    result["sold_count_text"] = sold_el.text.strip() if sold_el else ""

    # Sold price for completed listings (bold green text)
    sold_price_el = listing.select_one(".s-item__price.POSITIVE")
    if sold_price_el:
        result["is_sold"] = True

    # Image
    img_el = listing.select_one("img.s-item__image-img")
    if img_el:
        result["image_url"] = img_el.get("src") or img_el.get("data-src")

    # Time remaining (for auctions)
    time_el = listing.select_one(".s-item__time-left")
    result["time_left"] = time_el.text.strip() if time_el else ""

    return result

Extracting Seller Ratings from Listing Pages

Search results give you a seller feedback percentage but not the full details. Scraping individual listing pages gives you more:

def scrape_listing_detail(
    url: str,
    client: httpx.Client,
) -> dict:
    """
    Scrape full details from a single eBay listing page.
    Returns seller detail, item specifics, and bid information.
    """
    try:
        response = client.get(url)
        response.raise_for_status()
    except httpx.RequestError as e:
        logger.error(f"Error scraping {url}: {e}")
        return {}

    soup = BeautifulSoup(response.text, "html.parser")
    result = {}

    # Seller store name and feedback score
    seller_el = soup.select_one("[data-testid='str-title'] a")
    if seller_el:
        result["seller_username"] = seller_el.text.strip()

    feedback_el = soup.select_one(".str-value")
    if feedback_el:
        result["feedback_score"] = feedback_el.text.strip()

    # Bid count and current price (auctions)
    bid_count_el = soup.select_one(".x-bid-count")
    if bid_count_el:
        result["bid_count"] = bid_count_el.text.strip()

    # Time left (auctions)
    time_el = soup.select_one(".x-time-left")
    if time_el:
        result["time_left"] = time_el.text.strip()

    # Item specifics table
    specifics = {}
    for row in soup.select(".ux-labels-values__labels-content"):
        label_el = row.select_one(".ux-textspans--BOLD")
        if not label_el:
            label_el = row.select_one(".ux-textspans")

        value_container = row.find_next_sibling()
        if label_el and value_container:
            label = label_el.text.strip().rstrip(":")
            value = value_container.get_text(strip=True, separator=", ")
            if label:
                specifics[label] = value[:200]

    result["item_specifics"] = specifics

    # Returns policy
    returns_el = soup.select_one("[data-testid='ux-labels-values'] .ux-textspans")
    if returns_el:
        result["returns_info"] = returns_el.text.strip()

    # Watchers
    for el in soup.select(".ux-textspans"):
        text = el.text.strip()
        if "watcher" in text.lower() and any(c.isdigit() for c in text):
            result["watchers"] = text
            break

    return result

Data Storage

def init_database(db_path: str = "ebay_listings.db") -> sqlite3.Connection:
    """Initialize SQLite database for eBay listing data."""
    conn = sqlite3.connect(db_path)

    conn.executescript("""
        CREATE TABLE IF NOT EXISTS listings (
            item_id TEXT PRIMARY KEY,
            title TEXT,
            search_query TEXT,
            price REAL,
            price_low REAL,
            price_high REAL,
            currency TEXT,
            buying_options TEXT,
            listing_type TEXT,
            condition TEXT,
            condition_id TEXT,
            seller_username TEXT,
            seller_feedback_pct TEXT,
            seller_feedback_score INTEGER,
            shipping_cost TEXT,
            shipping_type TEXT,
            location_country TEXT,
            url TEXT,
            image_url TEXT,
            category_name TEXT,
            item_specifics TEXT,
            scraped_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
        );

        CREATE INDEX IF NOT EXISTS idx_query ON listings(search_query);
        CREATE INDEX IF NOT EXISTS idx_seller ON listings(seller_username);
        CREATE INDEX IF NOT EXISTS idx_price ON listings(price);
    """)

    conn.commit()
    return conn


def save_listings(conn: sqlite3.Connection, items: list, query: str) -> int:
    """Save listing records to database."""
    saved = 0
    for item in items:
        try:
            opts = item.get("buying_options", [])
            listing_type = "auction" if "AUCTION" in opts else "buy_it_now"

            conn.execute("""
                INSERT OR REPLACE INTO listings
                (item_id, title, search_query, price, currency, buying_options,
                 listing_type, condition, condition_id, seller_username,
                 seller_feedback_pct, seller_feedback_score, shipping_cost,
                 shipping_type, location_country, url, image_url, category_name)
                VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
            """, (
                item.get("item_id"),
                item.get("title"),
                query,
                item.get("price"),
                item.get("currency", "USD"),
                json.dumps(opts),
                listing_type,
                item.get("condition"),
                item.get("condition_id"),
                item.get("seller_username"),
                item.get("seller_feedback_pct"),
                item.get("seller_feedback_score"),
                item.get("shipping_cost"),
                item.get("shipping_type"),
                item.get("location_country"),
                item.get("item_url") or item.get("url", ""),
                item.get("image_url"),
                (item.get("categories") or [None])[0],
            ))
            saved += 1
        except sqlite3.Error as e:
            logger.error(f"DB error: {e}")

    conn.commit()
    return saved

eBay's Affiliate Program (Partner Network)

If you're building a price comparison tool or product listing site, eBay's Partner Network (EPN) pays commissions on traffic you send to eBay. You can combine it with the Browse API: use the API to pull listings, then tag the item URLs with your affiliate tracking.

EPN uses a rover link format:

ROVER_CAMPAIGN_ID = "your-rover-campaign-id"
EPN_SITE_ID = "711-53200-19255-0"  # US site ID

def make_affiliate_url(item_url: str) -> str:
    """Wrap an eBay item URL with EPN affiliate tracking."""
    import urllib.parse
    encoded = urllib.parse.quote(item_url, safe="")
    return (
        f"https://rover.ebay.com/rover/1/{EPN_SITE_ID}/{ROVER_CAMPAIGN_ID}"
        f"?mpre={encoded}&toolid=10001"
    )

# Usage: wrap search results before displaying them
for item in search_results:
    item["affiliate_url"] = make_affiliate_url(item.get("item_url", ""))

Commission rates vary by category — typically 1-4% on completed sales. High-volume categories like electronics pay on the lower end; collectibles and fashion pay higher. EPN is separate from API access and has its own application process.

When the API Is Not Enough: Residential Proxies

The Browse API won't give you historical sold prices, completed auction data, or full seller feedback history. For those, you're scraping HTML.

From a datacenter IP, eBay will soft-block you after 100-200 requests with a CAPTCHA interstitial. Akamai's bot management is effective at identifying datacenter traffic.

Residential proxies from ThorData pass as real users because they are real ISP IPs. Configure them in httpx:

THORDATA_USER = "your_user"
THORDATA_PASS = "your_pass"

def get_proxy(country: str = "US") -> str:
    """Get ThorData residential proxy for target country."""
    return f"http://{THORDATA_USER}-country-{country}:{THORDATA_PASS}@proxy.thordata.com:9000"


def scrape_with_proxy_rotation(
    queries: list,
    pages_per_query: int = 5,
    rotate_every: int = 50,
) -> dict:
    """
    Scrape multiple search queries with proxy rotation.
    Rotates proxy every rotate_every pages.
    """
    results = {}
    page_count = 0

    for query in queries:
        proxy_url = get_proxy(country="US")
        client = make_scrape_client(proxy_url)
        query_results = []

        for page in range(1, pages_per_query + 1):
            # Rotate proxy periodically
            if page_count > 0 and page_count % rotate_every == 0:
                client.close()
                proxy_url = get_proxy()
                client = make_scrape_client(proxy_url)
                logger.info(f"Rotated proxy at page {page_count}")

            url = f"https://www.ebay.com/sch/i.html?_nkw={query.replace(' ', '+')}&_pgn={page}&_ipg=60"

            try:
                resp = client.get(url)
                if resp.status_code == 200:
                    soup = BeautifulSoup(resp.text, "html.parser")
                    listings = soup.select("li.s-item")
                    for listing in listings:
                        item = parse_html_listing(listing)
                        if item.get("title") and "Shop on eBay" not in item.get("title", ""):
                            query_results.append(item)
            except Exception as e:
                logger.error(f"Error on page {page}: {e}")

            page_count += 1
            time.sleep(random.uniform(1.5, 3.5))

        results[query] = query_results
        client.close()
        logger.info(f"'{query}': {len(query_results)} listings")

    return results

Complete Research Pipeline

def run_market_research(
    queries: list,
    use_api: bool = True,
    use_scraping: bool = True,
    max_api_results: int = 500,
    scraping_pages: int = 5,
    db_path: str = "ebay_research.db",
) -> None:
    """
    Combined API and scraping pipeline for eBay market research.
    """
    conn = init_database(db_path)

    if use_api:
        token = get_ebay_token()
        if token:
            client = EbayBrowseClient(CLIENT_ID, CLIENT_SECRET)

            for query in queries:
                logger.info(f"API search: {query}")
                items = search_all_pages(client, query, max_results=max_api_results)
                saved = save_listings(conn, items, query)
                stats = analyze_listings(items)

                print(f"\n{query} (Browse API):")
                print(f"  Total listings: {stats.get('total_listings', 0)}")
                if stats.get("all_prices_stats"):
                    ps = stats["all_prices_stats"]
                    print(f"  Avg price: ${ps['avg']:.2f}")
                    print(f"  Price range: ${ps['min']:.2f} - ${ps['max']:.2f}")
                if stats.get("auction_stats", {}).get("count"):
                    a = stats["auction_stats"]
                    print(f"  Auctions ({a['count']}): avg ${a['avg']:.2f}")

    if use_scraping:
        proxy_url = get_proxy() if True else None  # Set to False to disable proxy

        for query in queries:
            logger.info(f"HTML scrape: {query}")
            items = scrape_ebay_search(
                query, pages=scraping_pages,
                proxy_url=proxy_url,
            )

            # Map HTML items to database format
            db_items = []
            for item in items:
                db_items.append({
                    "item_id": item.get("item_id"),
                    "title": item.get("title"),
                    "price": item.get("price_low"),
                    "currency": "USD",
                    "buying_options": [],
                    "condition": item.get("condition"),
                    "item_url": item.get("url"),
                    "image_url": item.get("image_url"),
                })

            save_listings(conn, db_items, f"{query}_html")
            logger.info(f"Scraped {len(items)} listings for '{query}'")

    conn.close()


if __name__ == "__main__":
    run_market_research(
        queries=[
            "mechanical keyboard 65 percent",
            "vintage camera 35mm film",
            "lego architecture",
        ],
        use_api=True,
        use_scraping=True,
        max_api_results=500,
        scraping_pages=5,
    )

Key Takeaways

The Browse API is the right starting point for most projects — it is structured, reliable, and handles the heavy lifting. Key points:

Browse API requires OAuth but is straightforward to set up. Free tier gives 5,000 calls/day.
Pagination caps at 10,000 total results per query. Break large searches by price range using the filter parameter to get more comprehensive coverage.
Auction vs. Buy It Now prices tell different stories — track them separately. Use buyingOptions:{AUCTION} filter to isolate auctions.
HTML scraping fills the gaps — sold prices, watch counts, detailed bid history. Keep selectors in a config so updating them doesn't require touching core logic.
Residential proxies for HTML scraping — ThorData is the practical solution for sustained volume. Datacenter IPs get CAPTCHA'd quickly.
Keep delays at 1-3 seconds per page even with residential proxies. eBay's bot detection also analyzes behavioral patterns, not just IP reputation.
Store everything in SQLite — once you have historical data, analytical value compounds. Price trends are only visible with history.