Scraping eBay Products and Prices (2026)
Scraping eBay Products and Prices (2026)
eBay is one of the most scraped e-commerce sites on the internet, and also one of the most frustrating. Their Finding API was deprecated in 2022. Their official Browse API works but has strict rate limits and requires OAuth. Direct HTML scraping is possible — with the right approach and the right proxy infrastructure.
This guide covers the complete picture for 2026: the Browse API for structured data, HTML scraping with BeautifulSoup for what the API misses, full pagination, error handling, seller rating extraction, bid price tracking, and scaling with residential proxies.
The Official Path: eBay Browse API
The current official API is the Browse API, part of eBay's REST API suite. It replaced the old Finding API and returns structured JSON for search results and individual item lookups.
Register an app at developer.ebay.com and generate a client credentials OAuth token:
import httpx
import base64
import time
import random
import logging
import sqlite3
import json
from typing import Optional
from bs4 import BeautifulSoup
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s [%(levelname)s] %(message)s"
)
logger = logging.getLogger(__name__)
CLIENT_ID = "your-client-id"
CLIENT_SECRET = "your-client-secret"
BROWSE_BASE = "https://api.ebay.com/buy/browse/v1"
MARKETPLACE_US = "EBAY_US"
def get_ebay_token() -> Optional[str]:
"""Get client credentials OAuth token for Browse API."""
credentials = base64.b64encode(
f"{CLIENT_ID}:{CLIENT_SECRET}".encode()
).decode()
try:
response = httpx.post(
"https://api.ebay.com/identity/v1/oauth2/token",
headers={
"Authorization": f"Basic {credentials}",
"Content-Type": "application/x-www-form-urlencoded",
},
data="grant_type=client_credentials&scope=https://api.ebay.com/oauth/api_scope",
timeout=15,
)
response.raise_for_status()
token_data = response.json()
logger.info("eBay token acquired")
return token_data.get("access_token")
except httpx.RequestError as e:
logger.error(f"Token request failed: {e}")
return None
except Exception as e:
logger.error(f"Unexpected token error: {e}")
return None
class EbayBrowseClient:
"""
Client for the eBay Browse API with automatic token management.
Handles rate limits, retries, and pagination.
"""
def __init__(self, client_id: str, client_secret: str, marketplace: str = "EBAY_US"):
self.client_id = client_id
self.client_secret = client_secret
self.marketplace = marketplace
self.token = None
self._refresh_token()
def _refresh_token(self) -> None:
"""Refresh the OAuth token."""
credentials = base64.b64encode(
f"{self.client_id}:{self.client_secret}".encode()
).decode()
try:
response = httpx.post(
"https://api.ebay.com/identity/v1/oauth2/token",
headers={
"Authorization": f"Basic {credentials}",
"Content-Type": "application/x-www-form-urlencoded",
},
data="grant_type=client_credentials&scope=https://api.ebay.com/oauth/api_scope",
timeout=15,
)
response.raise_for_status()
self.token = response.json().get("access_token")
except Exception as e:
logger.error(f"Token refresh failed: {e}")
self.token = None
def _headers(self) -> dict:
return {
"Authorization": f"Bearer {self.token}",
"X-EBAY-C-MARKETPLACE-ID": self.marketplace,
"Content-Type": "application/json",
}
def get(
self,
endpoint: str,
params: dict = None,
max_retries: int = 3,
) -> Optional[dict]:
"""Make authenticated GET request with retry logic."""
url = f"{BROWSE_BASE}/{endpoint.lstrip('/')}"
for attempt in range(max_retries):
try:
resp = httpx.get(url, headers=self._headers(), params=params, timeout=20)
if resp.status_code == 200:
return resp.json()
elif resp.status_code == 401:
# Token expired, refresh and retry
logger.info("Token expired, refreshing...")
self._refresh_token()
continue
elif resp.status_code == 429:
retry_after = int(resp.headers.get("retry-after", 60))
logger.warning(f"Rate limited, waiting {retry_after}s")
time.sleep(retry_after)
continue
elif resp.status_code == 404:
return None
else:
logger.warning(f"HTTP {resp.status_code}: {endpoint}")
time.sleep(2 ** attempt)
except httpx.TimeoutException:
logger.warning(f"Timeout on attempt {attempt+1}")
time.sleep(2 ** attempt + 1)
except httpx.NetworkError as e:
logger.error(f"Network error: {e}")
time.sleep(5)
return None
Querying Item Summaries
def search_ebay(
client: EbayBrowseClient,
query: str,
limit: int = 200,
offset: int = 0,
filters: str = None,
sort: str = None,
category_ids: str = None,
) -> list:
"""
Search eBay listings via Browse API.
filters examples:
"price:[10..50],priceCurrency:USD"
"buyingOptions:{AUCTION|FIXED_PRICE}"
"conditions:{NEW|USED}"
"itemLocationCountry:US"
sort options: price (asc), -price (desc), distance, -watchCount, newlyListed
"""
params = {
"q": query,
"limit": min(limit, 200),
"offset": offset,
"fieldgroups": "EXTENDED",
}
if filters:
params["filter"] = filters
if sort:
params["sort"] = sort
if category_ids:
params["category_ids"] = category_ids
data = client.get("item_summary/search", params=params)
if not data:
return []
items = []
for item in data.get("itemSummaries", []):
items.append(parse_item_summary(item))
return items
def parse_item_summary(item: dict) -> dict:
"""Parse an item summary from Browse API search results."""
shipping = item.get("shippingOptions", [{}])[0] if item.get("shippingOptions") else {}
seller = item.get("seller", {})
return {
"item_id": item.get("itemId"),
"title": item.get("title"),
"price": item.get("price", {}).get("value"),
"currency": item.get("price", {}).get("currency"),
"buying_options": item.get("buyingOptions", []),
"condition": item.get("condition"),
"condition_id": item.get("conditionId"),
"seller_username": seller.get("username"),
"seller_feedback_pct": seller.get("feedbackPercentage"),
"seller_feedback_score": seller.get("feedbackScore"),
"shipping_cost": shipping.get("shippingCost", {}).get("value") if shipping else None,
"shipping_type": shipping.get("shippingServiceCode"),
"item_url": item.get("itemWebUrl"),
"image_url": item.get("image", {}).get("imageUrl"),
"location_country": item.get("itemLocation", {}).get("country"),
"categories": [c.get("categoryName") for c in item.get("categories", [])],
"leaf_category_id": item.get("leafCategoryIds", [None])[0] if item.get("leafCategoryIds") else None,
}
Handling Pagination
The Browse API uses offset-based pagination with a max of 200 per request. eBay caps total results at 10,000 per query regardless of pagination:
def search_all_pages(
client: EbayBrowseClient,
query: str,
max_results: int = 1000,
filters: str = None,
sort: str = None,
delay: float = 0.5,
) -> list:
"""
Paginate through all Browse API results for a query.
Break large queries by price range or category to get past 10K limit:
- filter="price:[0..50],priceCurrency:USD" -> 0-50 USD
- filter="price:[50..150],priceCurrency:USD" -> 50-150 USD
"""
all_items = []
offset = 0
limit = 200
while len(all_items) < max_results:
params = {
"q": query,
"limit": limit,
"offset": offset,
"fieldgroups": "EXTENDED",
}
if filters:
params["filter"] = filters
if sort:
params["sort"] = sort
data = client.get("item_summary/search", params=params)
if not data:
break
items = [parse_item_summary(i) for i in data.get("itemSummaries", [])]
if not items:
break
all_items.extend(items)
total = int(data.get("total", 0))
offset += limit
logger.info(f"Pagination: {len(all_items)}/{min(total, max_results)} items")
if offset >= min(total, 10000, max_results):
break
time.sleep(delay)
return all_items[:max_results]
def search_by_price_ranges(
client: EbayBrowseClient,
query: str,
ranges: list = None,
max_per_range: int = 1000,
) -> list:
"""
Break a query across price ranges to collect more than 10K results.
ranges: list of (min, max) tuples, e.g., [(0,25), (25,75), (75,200)]
"""
if ranges is None:
ranges = [(0, 25), (25, 75), (75, 200), (200, 500), (500, 99999)]
all_items = []
seen_ids = set()
for min_p, max_p in ranges:
filter_str = f"price:[{min_p}..{max_p}],priceCurrency:USD"
logger.info(f"Collecting range ${min_p}-${max_p}")
items = search_all_pages(
client, query,
max_results=max_per_range,
filters=filter_str,
)
# Deduplicate across ranges
new_items = [i for i in items if i.get("item_id") not in seen_ids]
for item in new_items:
seen_ids.add(item.get("item_id"))
all_items.extend(new_items)
logger.info(f"Range ${min_p}-${max_p}: {len(new_items)} new items")
time.sleep(1.0)
return all_items
Bid Prices vs Buy It Now
Auction listings and fixed-price listings appear in the same search results. Differentiate and analyze them:
def classify_listing(item: dict) -> str:
"""Classify a listing by its buying options."""
options = set(item.get("buying_options", []))
if "AUCTION" in options and "FIXED_PRICE" not in options:
return "auction"
elif "FIXED_PRICE" in options and "AUCTION" not in options:
return "buy_it_now"
elif "AUCTION" in options and "FIXED_PRICE" in options:
return "auction_with_bin" # Auction with Buy It Now option
elif "BEST_OFFER" in options:
return "best_offer"
else:
return "unknown"
def analyze_listings(items: list) -> dict:
"""Statistical analysis of a set of listings."""
if not items:
return {}
# Separate by type
auctions = [i for i in items if "AUCTION" in (i.get("buying_options") or [])]
fixed = [i for i in items if "FIXED_PRICE" in (i.get("buying_options") or []) and "AUCTION" not in (i.get("buying_options") or [])]
auction_prices = [float(i.get("price", 0)) for i in auctions if i.get("price")]
fixed_prices = [float(i.get("price", 0)) for i in fixed if i.get("price")]
all_prices = [float(i.get("price", 0)) for i in items if i.get("price")]
def stats(prices: list) -> dict:
if not prices:
return {}
sorted_p = sorted(prices)
n = len(sorted_p)
mid = n // 2
median = (sorted_p[mid-1] + sorted_p[mid]) / 2 if n % 2 == 0 else sorted_p[mid]
return {
"count": n,
"avg": round(sum(prices) / n, 2),
"median": round(median, 2),
"min": round(min(prices), 2),
"max": round(max(prices), 2),
}
# Top sellers by listing count
seller_counts = {}
for item in items:
seller = item.get("seller_username", "unknown")
seller_counts[seller] = seller_counts.get(seller, 0) + 1
top_sellers = sorted(seller_counts.items(), key=lambda x: -x[1])[:10]
return {
"total_listings": len(items),
"auction_stats": stats(auction_prices),
"fixed_price_stats": stats(fixed_prices),
"all_prices_stats": stats(all_prices),
"top_sellers": top_sellers,
}
def get_item_detail(
client: EbayBrowseClient,
item_id: str,
) -> Optional[dict]:
"""Fetch detailed data for a single item including bid count."""
data = client.get(f"item/v1|{item_id}|0")
if not data:
return None
# Extract item specifics (brand, model, etc.)
aspects = {}
for aspect in data.get("localizedAspects", []):
aspects[aspect.get("name", "")] = aspect.get("value", "")
return {
"item_id": data.get("itemId"),
"title": data.get("title"),
"price": data.get("price", {}).get("value"),
"current_bid_price": data.get("currentBidPrice", {}).get("value"),
"bid_count": data.get("bidCount"),
"end_date": data.get("itemEndDate"),
"condition": data.get("condition"),
"category": data.get("categoryPath"),
"description": (data.get("description") or "")[:2000],
"seller_username": data.get("seller", {}).get("username"),
"seller_feedback_pct": data.get("seller", {}).get("feedbackPercentage"),
"seller_feedback_score": data.get("seller", {}).get("feedbackScore"),
"buying_options": data.get("buyingOptions", []),
"shipping_options": data.get("shippingOptions", []),
"return_terms": data.get("returnTerms", {}),
"aspects": aspects,
"images": [img.get("imageUrl") for img in data.get("additionalImages", [])],
}
Direct HTML Scraping with BeautifulSoup
The Browse API has a rate limit of 5,000 calls/day on the free tier. If you need more volume, or data the API does not expose (completed listings, sold prices, watch counts), scraping the HTML is the other option.
eBay's search results render server-side HTML, so httpx works fine without JavaScript rendering:
SCRAPE_HEADERS = {
"User-Agent": (
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
"AppleWebKit/537.36 (KHTML, like Gecko) "
"Chrome/126.0.0.0 Safari/537.36"
),
"Accept-Language": "en-US,en;q=0.9",
"Accept": "text/html,application/xhtml+xml,application/xhtml;q=0.9,*/*;q=0.8",
"Accept-Encoding": "gzip, deflate, br",
"DNT": "1",
}
# Alternative User-Agents to rotate
USER_AGENTS = [
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36",
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36",
]
def make_scrape_client(proxy_url: str = None) -> httpx.Client:
"""Create httpx client for HTML scraping."""
headers = {**SCRAPE_HEADERS, "User-Agent": random.choice(USER_AGENTS)}
kwargs = {
"headers": headers,
"follow_redirects": True,
"timeout": httpx.Timeout(25.0, connect=10.0),
}
if proxy_url:
kwargs["proxy"] = proxy_url
return httpx.Client(**kwargs)
def scrape_ebay_search(
query: str,
pages: int = 3,
sold_only: bool = False,
proxy_url: str = None,
) -> list:
"""
Scrape eBay search results via HTML.
Works for active listings and sold/completed listings.
"""
results = []
client = make_scrape_client(proxy_url)
for page in range(1, pages + 1):
if sold_only:
url = (
f"https://www.ebay.com/sch/i.html"
f"?_nkw={query.replace(' ', '+')}"
f"&LH_Complete=1&LH_Sold=1"
f"&_pgn={page}&_ipg=60"
)
else:
url = (
f"https://www.ebay.com/sch/i.html"
f"?_nkw={query.replace(' ', '+')}"
f"&_pgn={page}&_ipg=60"
)
try:
response = client.get(url)
response.raise_for_status()
except httpx.HTTPStatusError as e:
logger.warning(f"HTTP {e.response.status_code} on page {page}")
break
except httpx.RequestError as e:
logger.error(f"Request error on page {page}: {e}")
break
soup = BeautifulSoup(response.text, "html.parser")
listings = soup.select("li.s-item")
page_count = 0
for listing in listings:
# Skip eBay's injected ghost item
if not listing.select_one(".s-item__title"):
continue
# Skip "Shop on eBay" placeholder items
title_el = listing.select_one(".s-item__title")
if title_el and "Shop on eBay" in title_el.text:
continue
item = parse_html_listing(listing)
if item.get("title"):
results.append(item)
page_count += 1
logger.info(f"Page {page}: {page_count} listings")
# Check if there's a next page
next_page = soup.select_one("a.pagination__next")
if not next_page:
break
# Human-like delay between pages
time.sleep(1.5 + random.uniform(0, 1.5))
client.close()
return results
def parse_html_listing(listing) -> dict:
"""Parse a single listing element from eBay search results."""
result = {}
# Title
title_el = listing.select_one(".s-item__title")
result["title"] = title_el.text.strip() if title_el else ""
# Price (can be a range for lot items)
price_el = listing.select_one(".s-item__price")
result["price_text"] = price_el.text.strip() if price_el else ""
# Parse numeric price
if price_el:
price_text = price_el.text.strip()
# Handle ranges "US $10.00 to US $20.00"
import re
prices = re.findall(r"\$?([\d,]+\.?\d*)", price_text)
if prices:
result["price_low"] = float(prices[0].replace(",", ""))
result["price_high"] = float(prices[-1].replace(",", ""))
# URL
link_el = listing.select_one("a.s-item__link")
result["url"] = link_el["href"].split("?")[0] if link_el else ""
# Item ID from URL
if result.get("url") and "/itm/" in result["url"]:
result["item_id"] = result["url"].split("/itm/")[-1]
# Shipping
ship_el = listing.select_one(".s-item__shipping, .s-item__freeXDays")
result["shipping"] = ship_el.text.strip() if ship_el else ""
# Condition
condition_el = listing.select_one(".SECONDARY_INFO")
result["condition"] = condition_el.text.strip() if condition_el else ""
# Seller info (limited in search results)
seller_el = listing.select_one(".s-item__seller-info-text")
result["seller_info"] = seller_el.text.strip() if seller_el else ""
# Sold count (for buy it now items with multiple sold)
sold_el = listing.select_one(".s-item__quantitySold")
result["sold_count_text"] = sold_el.text.strip() if sold_el else ""
# Sold price for completed listings (bold green text)
sold_price_el = listing.select_one(".s-item__price.POSITIVE")
if sold_price_el:
result["is_sold"] = True
# Image
img_el = listing.select_one("img.s-item__image-img")
if img_el:
result["image_url"] = img_el.get("src") or img_el.get("data-src")
# Time remaining (for auctions)
time_el = listing.select_one(".s-item__time-left")
result["time_left"] = time_el.text.strip() if time_el else ""
return result
Extracting Seller Ratings from Listing Pages
Search results give you a seller feedback percentage but not the full details. Scraping individual listing pages gives you more:
def scrape_listing_detail(
url: str,
client: httpx.Client,
) -> dict:
"""
Scrape full details from a single eBay listing page.
Returns seller detail, item specifics, and bid information.
"""
try:
response = client.get(url)
response.raise_for_status()
except httpx.RequestError as e:
logger.error(f"Error scraping {url}: {e}")
return {}
soup = BeautifulSoup(response.text, "html.parser")
result = {}
# Seller store name and feedback score
seller_el = soup.select_one("[data-testid='str-title'] a")
if seller_el:
result["seller_username"] = seller_el.text.strip()
feedback_el = soup.select_one(".str-value")
if feedback_el:
result["feedback_score"] = feedback_el.text.strip()
# Bid count and current price (auctions)
bid_count_el = soup.select_one(".x-bid-count")
if bid_count_el:
result["bid_count"] = bid_count_el.text.strip()
# Time left (auctions)
time_el = soup.select_one(".x-time-left")
if time_el:
result["time_left"] = time_el.text.strip()
# Item specifics table
specifics = {}
for row in soup.select(".ux-labels-values__labels-content"):
label_el = row.select_one(".ux-textspans--BOLD")
if not label_el:
label_el = row.select_one(".ux-textspans")
value_container = row.find_next_sibling()
if label_el and value_container:
label = label_el.text.strip().rstrip(":")
value = value_container.get_text(strip=True, separator=", ")
if label:
specifics[label] = value[:200]
result["item_specifics"] = specifics
# Returns policy
returns_el = soup.select_one("[data-testid='ux-labels-values'] .ux-textspans")
if returns_el:
result["returns_info"] = returns_el.text.strip()
# Watchers
for el in soup.select(".ux-textspans"):
text = el.text.strip()
if "watcher" in text.lower() and any(c.isdigit() for c in text):
result["watchers"] = text
break
return result
Data Storage
def init_database(db_path: str = "ebay_listings.db") -> sqlite3.Connection:
"""Initialize SQLite database for eBay listing data."""
conn = sqlite3.connect(db_path)
conn.executescript("""
CREATE TABLE IF NOT EXISTS listings (
item_id TEXT PRIMARY KEY,
title TEXT,
search_query TEXT,
price REAL,
price_low REAL,
price_high REAL,
currency TEXT,
buying_options TEXT,
listing_type TEXT,
condition TEXT,
condition_id TEXT,
seller_username TEXT,
seller_feedback_pct TEXT,
seller_feedback_score INTEGER,
shipping_cost TEXT,
shipping_type TEXT,
location_country TEXT,
url TEXT,
image_url TEXT,
category_name TEXT,
item_specifics TEXT,
scraped_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
CREATE INDEX IF NOT EXISTS idx_query ON listings(search_query);
CREATE INDEX IF NOT EXISTS idx_seller ON listings(seller_username);
CREATE INDEX IF NOT EXISTS idx_price ON listings(price);
""")
conn.commit()
return conn
def save_listings(conn: sqlite3.Connection, items: list, query: str) -> int:
"""Save listing records to database."""
saved = 0
for item in items:
try:
opts = item.get("buying_options", [])
listing_type = "auction" if "AUCTION" in opts else "buy_it_now"
conn.execute("""
INSERT OR REPLACE INTO listings
(item_id, title, search_query, price, currency, buying_options,
listing_type, condition, condition_id, seller_username,
seller_feedback_pct, seller_feedback_score, shipping_cost,
shipping_type, location_country, url, image_url, category_name)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
""", (
item.get("item_id"),
item.get("title"),
query,
item.get("price"),
item.get("currency", "USD"),
json.dumps(opts),
listing_type,
item.get("condition"),
item.get("condition_id"),
item.get("seller_username"),
item.get("seller_feedback_pct"),
item.get("seller_feedback_score"),
item.get("shipping_cost"),
item.get("shipping_type"),
item.get("location_country"),
item.get("item_url") or item.get("url", ""),
item.get("image_url"),
(item.get("categories") or [None])[0],
))
saved += 1
except sqlite3.Error as e:
logger.error(f"DB error: {e}")
conn.commit()
return saved
eBay's Affiliate Program (Partner Network)
If you're building a price comparison tool or product listing site, eBay's Partner Network (EPN) pays commissions on traffic you send to eBay. You can combine it with the Browse API: use the API to pull listings, then tag the item URLs with your affiliate tracking.
EPN uses a rover link format:
ROVER_CAMPAIGN_ID = "your-rover-campaign-id"
EPN_SITE_ID = "711-53200-19255-0" # US site ID
def make_affiliate_url(item_url: str) -> str:
"""Wrap an eBay item URL with EPN affiliate tracking."""
import urllib.parse
encoded = urllib.parse.quote(item_url, safe="")
return (
f"https://rover.ebay.com/rover/1/{EPN_SITE_ID}/{ROVER_CAMPAIGN_ID}"
f"?mpre={encoded}&toolid=10001"
)
# Usage: wrap search results before displaying them
for item in search_results:
item["affiliate_url"] = make_affiliate_url(item.get("item_url", ""))
Commission rates vary by category — typically 1-4% on completed sales. High-volume categories like electronics pay on the lower end; collectibles and fashion pay higher. EPN is separate from API access and has its own application process.
When the API Is Not Enough: Residential Proxies
The Browse API won't give you historical sold prices, completed auction data, or full seller feedback history. For those, you're scraping HTML.
From a datacenter IP, eBay will soft-block you after 100-200 requests with a CAPTCHA interstitial. Akamai's bot management is effective at identifying datacenter traffic.
Residential proxies from ThorData pass as real users because they are real ISP IPs. Configure them in httpx:
THORDATA_USER = "your_user"
THORDATA_PASS = "your_pass"
def get_proxy(country: str = "US") -> str:
"""Get ThorData residential proxy for target country."""
return f"http://{THORDATA_USER}-country-{country}:{THORDATA_PASS}@proxy.thordata.com:9000"
def scrape_with_proxy_rotation(
queries: list,
pages_per_query: int = 5,
rotate_every: int = 50,
) -> dict:
"""
Scrape multiple search queries with proxy rotation.
Rotates proxy every rotate_every pages.
"""
results = {}
page_count = 0
for query in queries:
proxy_url = get_proxy(country="US")
client = make_scrape_client(proxy_url)
query_results = []
for page in range(1, pages_per_query + 1):
# Rotate proxy periodically
if page_count > 0 and page_count % rotate_every == 0:
client.close()
proxy_url = get_proxy()
client = make_scrape_client(proxy_url)
logger.info(f"Rotated proxy at page {page_count}")
url = f"https://www.ebay.com/sch/i.html?_nkw={query.replace(' ', '+')}&_pgn={page}&_ipg=60"
try:
resp = client.get(url)
if resp.status_code == 200:
soup = BeautifulSoup(resp.text, "html.parser")
listings = soup.select("li.s-item")
for listing in listings:
item = parse_html_listing(listing)
if item.get("title") and "Shop on eBay" not in item.get("title", ""):
query_results.append(item)
except Exception as e:
logger.error(f"Error on page {page}: {e}")
page_count += 1
time.sleep(random.uniform(1.5, 3.5))
results[query] = query_results
client.close()
logger.info(f"'{query}': {len(query_results)} listings")
return results
Complete Research Pipeline
def run_market_research(
queries: list,
use_api: bool = True,
use_scraping: bool = True,
max_api_results: int = 500,
scraping_pages: int = 5,
db_path: str = "ebay_research.db",
) -> None:
"""
Combined API and scraping pipeline for eBay market research.
"""
conn = init_database(db_path)
if use_api:
token = get_ebay_token()
if token:
client = EbayBrowseClient(CLIENT_ID, CLIENT_SECRET)
for query in queries:
logger.info(f"API search: {query}")
items = search_all_pages(client, query, max_results=max_api_results)
saved = save_listings(conn, items, query)
stats = analyze_listings(items)
print(f"\n{query} (Browse API):")
print(f" Total listings: {stats.get('total_listings', 0)}")
if stats.get("all_prices_stats"):
ps = stats["all_prices_stats"]
print(f" Avg price: ${ps['avg']:.2f}")
print(f" Price range: ${ps['min']:.2f} - ${ps['max']:.2f}")
if stats.get("auction_stats", {}).get("count"):
a = stats["auction_stats"]
print(f" Auctions ({a['count']}): avg ${a['avg']:.2f}")
if use_scraping:
proxy_url = get_proxy() if True else None # Set to False to disable proxy
for query in queries:
logger.info(f"HTML scrape: {query}")
items = scrape_ebay_search(
query, pages=scraping_pages,
proxy_url=proxy_url,
)
# Map HTML items to database format
db_items = []
for item in items:
db_items.append({
"item_id": item.get("item_id"),
"title": item.get("title"),
"price": item.get("price_low"),
"currency": "USD",
"buying_options": [],
"condition": item.get("condition"),
"item_url": item.get("url"),
"image_url": item.get("image_url"),
})
save_listings(conn, db_items, f"{query}_html")
logger.info(f"Scraped {len(items)} listings for '{query}'")
conn.close()
if __name__ == "__main__":
run_market_research(
queries=[
"mechanical keyboard 65 percent",
"vintage camera 35mm film",
"lego architecture",
],
use_api=True,
use_scraping=True,
max_api_results=500,
scraping_pages=5,
)
Key Takeaways
The Browse API is the right starting point for most projects — it is structured, reliable, and handles the heavy lifting. Key points:
-
Browse API requires OAuth but is straightforward to set up. Free tier gives 5,000 calls/day.
-
Pagination caps at 10,000 total results per query. Break large searches by price range using the
filterparameter to get more comprehensive coverage. -
Auction vs. Buy It Now prices tell different stories — track them separately. Use
buyingOptions:{AUCTION}filter to isolate auctions. -
HTML scraping fills the gaps — sold prices, watch counts, detailed bid history. Keep selectors in a config so updating them doesn't require touching core logic.
-
Residential proxies for HTML scraping — ThorData is the practical solution for sustained volume. Datacenter IPs get CAPTCHA'd quickly.
-
Keep delays at 1-3 seconds per page even with residential proxies. eBay's bot detection also analyzes behavioral patterns, not just IP reputation.
-
Store everything in SQLite — once you have historical data, analytical value compounds. Price trends are only visible with history.