How to Scrape Bing Search Results with Python in 2026
How to Scrape Bing Search Results with Python in 2026
Bing processes over 1.2 billion queries per month and powers several downstream search engines including DuckDuckGo and Yahoo in some markets. For SEO monitoring, market research, and competitive intelligence, Bing's search results are a valuable — and often overlooked — data source.
This guide covers three approaches to extracting Bing SERP data: direct HTML scraping, the Bing Web Search API, and browser automation with Playwright. We also cover storing results, handling rate limits, and using ThorData residential proxies for sustained collection.
Why Scrape Bing?
Bing's SERP data is distinct from Google's in important ways:
- Different ranking signals: Bing weights social signals and domain age differently. Sites that rank on Bing but not Google (or vice versa) reveal differences in ranking algorithms worth studying
- Lower competition for SEO monitoring: Most SEO tools focus on Google. Bing data is underutilized, giving you an edge
- Downstream engine coverage: DuckDuckGo and some Yahoo search results are partially powered by Bing — scraping Bing effectively gives you coverage for those too
- Rich SERP features: Bing has distinctive features — shopping panels, local results, knowledge cards, "People Also Ask" boxes — that differ from Google's equivalent features
- News and image search: Bing News and Bing Images have their own endpoints worth monitoring
- International markets: Bing has stronger market share in some regions and enterprise environments (Windows defaults)
Understanding Bing's SERP Structure
Bing's result pages follow a predictable HTML structure. Each organic result sits inside an li element with class b_algo. Key selectors:
- Title:
li.b_algo h2 a— the clickable result title - URL:
li.b_algo h2 a[href]— the destination link - Snippet:
li.b_algo .b_caption p— the description text - Display URL:
li.b_algo .b_attribution cite— the visible URL - Date:
li.b_algo .b_caption .news_dt— date shown for news results - Pagination:
a.sb_pagN— next page link
Pagination uses a first parameter. Page 1 has no first param, page 2 uses first=11, page 3 uses first=21, and so on in increments of 10.
Additional SERP features:
- People Also Ask: .b_rs .b_suggestionList li a
- Related searches: #relatedsearches ul li a
- Knowledge panel: .b_entityTP
- Shopping results: .b_focusTextSmall, .p_smartf_head
- News results: .news-card, #news-results
Approach 1: Direct HTTP Scraping
The simplest method uses requests and BeautifulSoup:
import requests
from bs4 import BeautifulSoup
import sqlite3
import hashlib
import time
import random
import re
from typing import Optional, Dict, List, Any
from datetime import datetime
USER_AGENTS = [
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:127.0) Gecko/20100101 Firefox/127.0",
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 14.5; rv:127.0) Gecko/20100101 Firefox/127.0",
]
def make_headers() -> Dict[str, str]:
"""Generate realistic browser headers for Bing."""
return {
"User-Agent": random.choice(USER_AGENTS),
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
"Accept-Language": "en-US,en;q=0.9",
"Accept-Encoding": "gzip, deflate, br",
"DNT": "1",
"Upgrade-Insecure-Requests": "1",
"Sec-Fetch-Dest": "document",
"Sec-Fetch-Mode": "navigate",
"Sec-Fetch-Site": "none",
"Sec-Fetch-User": "?1",
}
def scrape_bing_page(
query: str,
page: int = 1,
proxy: Optional[str] = None,
market: str = "en-US",
) -> Optional[Dict]:
"""Scrape a single Bing results page.
Returns dict with organic results and SERP features.
"""
params = {"q": query, "count": 10, "setlang": market}
if page > 1:
params["first"] = (page - 1) * 10 + 1
headers = make_headers()
proxies = {"http": proxy, "https": proxy} if proxy else None
try:
resp = requests.get(
"https://www.bing.com/search",
params=params,
headers=headers,
proxies=proxies,
timeout=20,
)
resp.raise_for_status()
except requests.HTTPError as e:
if e.response.status_code == 429:
print(f"[429] Rate limited on page {page}")
else:
print(f"[ERROR] HTTP {e.response.status_code}")
return None
except requests.RequestException as e:
print(f"[ERROR] {e}")
return None
soup = BeautifulSoup(resp.text, "html.parser")
# Check for CAPTCHA
if "captcha" in resp.url.lower() or soup.find("form", {"id": "captcha"}):
print(f"[CAPTCHA] Bing served a CAPTCHA challenge — rotate proxy")
return None
# Extract organic results
results = []
for position, item in enumerate(soup.select("li.b_algo"), start=(page - 1) * 10 + 1):
title_el = item.select_one("h2 a")
snippet_el = item.select_one(".b_caption p")
display_url_el = item.select_one(".b_attribution cite")
date_el = item.select_one(".b_caption .news_dt")
if not title_el:
continue
raw_url = title_el.get("href", "")
results.append({
"position": position,
"title": title_el.get_text(strip=True),
"url": raw_url,
"snippet": snippet_el.get_text(" ", strip=True) if snippet_el else "",
"display_url": display_url_el.get_text(strip=True) if display_url_el else "",
"date": date_el.get_text(strip=True) if date_el else None,
"page": page,
"query": query,
})
# Extract SERP features
paa_items = [a.get_text(strip=True) for a in soup.select(".b_rs .b_suggestionList li a")]
related = [a.get_text(strip=True) for a in soup.select("#relatedSearchesLinks a, .b_rs a")]
# Knowledge panel
knowledge_panel = None
kb = soup.select_one(".b_entityTP")
if kb:
title_el = kb.select_one(".b_entityTitle, h2")
desc_el = kb.select_one(".b_entitySubTitle, .b_entityDescription")
knowledge_panel = {
"title": title_el.get_text(strip=True) if title_el else "",
"description": desc_el.get_text(" ", strip=True) if desc_el else "",
}
# Shopping results
shopping = []
for item in soup.select(".p_smartf_head, .ShopNow"):
name_el = item.select_one(".p_txtBig, .title")
price_el = item.select_one(".p_price")
if name_el:
shopping.append({
"name": name_el.get_text(strip=True),
"price": price_el.get_text(strip=True) if price_el else "",
})
# Estimate total results count
results_count_el = soup.select_one(".sb_count")
total_count = None
if results_count_el:
text = results_count_el.get_text(strip=True)
match = re.search(r"([\d,]+)", text)
if match:
total_count = int(match.group(1).replace(",", ""))
return {
"query": query,
"page": page,
"organic_results": results,
"people_also_ask": paa_items,
"related_searches": related,
"knowledge_panel": knowledge_panel,
"shopping_results": shopping,
"estimated_total": total_count,
"scraped_at": datetime.utcnow().isoformat(),
}
def scrape_bing(
query: str,
pages: int = 3,
proxy: Optional[str] = None,
delay_range: tuple = (2.0, 5.0),
) -> List[Dict]:
"""Scrape multiple pages of Bing results."""
all_results = []
for page in range(1, pages + 1):
print(f" Page {page}/{pages}...")
page_data = scrape_bing_page(query, page=page, proxy=proxy)
if not page_data:
print(f" Stopping at page {page} — no data returned")
break
all_results.extend(page_data.get("organic_results", []))
if page < pages:
time.sleep(random.uniform(*delay_range))
return all_results
Approach 2: Bing Web Search API
Microsoft offers a legitimate API through Azure Cognitive Services. The free tier allows 1,000 calls per month — enough for monitoring use cases without any bot detection concerns.
class BingSearchAPI:
"""Microsoft Bing Web Search API client."""
ENDPOINT = "https://api.bing.microsoft.com/v7.0/search"
NEWS_ENDPOINT = "https://api.bing.microsoft.com/v7.0/news/search"
IMAGE_ENDPOINT = "https://api.bing.microsoft.com/v7.0/images/search"
def __init__(self, api_key: str):
self.api_key = api_key
self.session = requests.Session()
self.session.headers["Ocp-Apim-Subscription-Key"] = api_key
def web_search(
self,
query: str,
count: int = 50,
offset: int = 0,
market: str = "en-US",
freshness: Optional[str] = None,
site_filter: Optional[str] = None,
) -> Optional[Dict]:
"""Search the web via Bing API.
freshness: 'Day', 'Week', 'Month', or 'YYYY-MM-DD..YYYY-MM-DD'
site_filter: domain to restrict results to (e.g. 'python.org')
"""
q = query
if site_filter:
q = f"site:{site_filter} {query}"
params = {
"q": q,
"count": min(count, 50),
"offset": offset,
"mkt": market,
"responseFilter": "Webpages,RelatedSearches,SpellSuggestions",
}
if freshness:
params["freshness"] = freshness
try:
resp = self.session.get(self.ENDPOINT, params=params, timeout=15)
resp.raise_for_status()
return resp.json()
except requests.RequestException as e:
print(f"[ERROR] API request failed: {e}")
return None
def parse_web_results(self, data: Dict) -> List[Dict]:
"""Parse API response into clean result list."""
results = []
for i, item in enumerate(data.get("webPages", {}).get("value", [])):
results.append({
"position": i + 1,
"title": item["name"],
"url": item["url"],
"snippet": item["snippet"],
"display_url": item.get("displayUrl", ""),
"date_crawled": item.get("dateLastCrawled", ""),
"language": item.get("language", ""),
"is_family_friendly": item.get("isFamilyFriendly", True),
})
return results
def search_paginated(
self,
query: str,
total_results: int = 200,
market: str = "en-US",
) -> List[Dict]:
"""Get up to total_results via API pagination."""
all_results = []
offset = 0
per_page = 50
while offset < total_results:
data = self.web_search(query, count=per_page, offset=offset, market=market)
if not data:
break
results = self.parse_web_results(data)
if not results:
break
all_results.extend(results)
offset += len(results)
total_estimated = data.get("webPages", {}).get("totalEstimatedMatches", 0)
print(f" Offset {offset}/{min(total_results, total_estimated)}")
if len(results) < per_page:
break
time.sleep(0.5)
return all_results
def news_search(
self,
query: str,
count: int = 100,
freshness: str = "Week",
market: str = "en-US",
) -> List[Dict]:
"""Search Bing News."""
params = {
"q": query,
"count": min(count, 100),
"mkt": market,
"freshness": freshness,
"sortBy": "Date",
}
try:
resp = self.session.get(self.NEWS_ENDPOINT, params=params, timeout=15)
resp.raise_for_status()
data = resp.json()
except requests.RequestException as e:
print(f"[ERROR] {e}")
return []
articles = []
for item in data.get("value", []):
articles.append({
"title": item["name"],
"url": item["url"],
"description": item.get("description", ""),
"published": item.get("datePublished", ""),
"provider": item.get("provider", [{}])[0].get("name", ""),
"category": item.get("category", ""),
"image": item.get("image", {}).get("thumbnail", {}).get("contentUrl", ""),
})
return articles
Approach 3: Playwright for JavaScript-Heavy SERPs
Some Bing features — local packs, knowledge panels, shopping results — render via JavaScript. For these, use Playwright:
import asyncio
from playwright.async_api import async_playwright, Page, BrowserContext
from typing import Optional
STEALTH_JS = """
Object.defineProperty(navigator, 'webdriver', { get: () => undefined });
Object.defineProperty(navigator, 'plugins', { get: () => [1, 2, 3, 4, 5] });
Object.defineProperty(navigator, 'languages', { get: () => ['en-US', 'en'] });
window.chrome = { runtime: {} };
"""
async def scrape_bing_playwright(
query: str,
pages: int = 2,
proxy_server: Optional[str] = None,
) -> List[Dict]:
"""Scrape Bing using Playwright for JS-rendered features."""
all_results = []
async with async_playwright() as p:
launch_opts = {
"headless": True,
"args": ["--no-sandbox", "--disable-blink-features=AutomationControlled"],
}
if proxy_server:
launch_opts["proxy"] = {"server": proxy_server}
browser = await p.chromium.launch(**launch_opts)
context = await browser.new_context(
user_agent=random.choice(USER_AGENTS),
viewport={"width": 1366, "height": 768},
locale="en-US",
timezone_id="America/New_York",
)
await context.add_init_script(STEALTH_JS)
page = await context.new_page()
for pg in range(1, pages + 1):
url = f"https://www.bing.com/search?q={query}"
if pg > 1:
url += f"&first={(pg - 1) * 10 + 1}"
try:
await page.goto(url, wait_until="networkidle", timeout=30000)
await page.wait_for_selector("li.b_algo", timeout=15000)
except Exception as e:
print(f"[ERROR] Page {pg}: {e}")
break
# Scroll to trigger lazy loads
await page.evaluate("window.scrollTo(0, document.body.scrollHeight / 2)")
await page.wait_for_timeout(1000)
items = await page.query_selector_all("li.b_algo")
pos_start = (pg - 1) * 10 + 1
for i, item in enumerate(items):
title_el = await item.query_selector("h2 a")
snippet_el = await item.query_selector(".b_caption p")
display_el = await item.query_selector(".b_attribution cite")
if not title_el:
continue
all_results.append({
"position": pos_start + i,
"title": await title_el.inner_text(),
"url": await title_el.get_attribute("href"),
"snippet": await snippet_el.inner_text() if snippet_el else "",
"display_url": await display_el.inner_text() if display_el else "",
"page": pg,
"query": query,
})
# Extract PAA boxes
paa_items = []
for paa in await page.query_selector_all(".b_rs li a"):
text = await paa.inner_text()
if text.strip():
paa_items.append(text.strip())
print(f" Page {pg}: {len(items)} results, {len(paa_items)} PAA items")
if pg < pages:
await asyncio.sleep(random.uniform(2.5, 5.0))
await browser.close()
return all_results
ThorData Proxy Integration
Bing's defenses are moderate compared to Google but still significant at scale. IP-based throttling kicks in after 50-100 rapid requests from one IP, and you'll see CAPTCHAs or 429 responses.
ThorData offers residential proxy pools that distribute your requests across thousands of IPs, making your traffic pattern indistinguishable from real users. Their geo-targeting is useful for Bing because the search results vary significantly by country.
class ThorDataProxyPool:
"""ThorData rotating residential proxy pool."""
def __init__(self, username: str, password: str):
self.username = username
self.password = password
self.host = "gate.thordata.com"
self.port = 9000
def get_proxy(
self,
country: str = "US",
session_id: Optional[str] = None,
) -> str:
user = f"{self.username}-country-{country}"
if session_id:
user = f"{user}-session-{session_id}"
return f"http://{user}:{self.password}@{self.host}:{self.port}"
def get_rotating(self, country: str = "US") -> str:
"""New IP for each request."""
return self.get_proxy(country)
def get_sticky(self, session_id: str, country: str = "US") -> str:
"""Same IP for a browsing session (sticky by session ID)."""
return self.get_proxy(country, session_id=session_id)
def scrape_with_proxy_rotation(
queries: List[str],
proxy_pool: ThorDataProxyPool,
pages_per_query: int = 3,
country: str = "US",
) -> Dict[str, List[Dict]]:
"""Scrape multiple queries with automatic proxy rotation."""
results = {}
for i, query in enumerate(queries):
print(f"\n[QUERY {i+1}/{len(queries)}] {query}")
# Rotate proxy every query
proxy = proxy_pool.get_rotating(country=country)
query_results = scrape_bing(query, pages=pages_per_query, proxy=proxy)
results[query] = query_results
print(f" Got {len(query_results)} results")
# Random delay between queries
if i < len(queries) - 1:
time.sleep(random.uniform(5.0, 12.0))
return results
Extracting Rich SERP Features
Beyond organic results, Bing surfaces structured data worth capturing:
def extract_all_serp_features(html: str, query: str) -> Dict:
"""Extract all SERP features from a Bing results page."""
soup = BeautifulSoup(html, "html.parser")
# People Also Ask
paa = []
for item in soup.select(".b_rs .b_suggestionList li a, #relatedSearchesLinks a"):
text = item.get_text(strip=True)
if text:
paa.append(text)
# Knowledge panel / entity card
entity = None
kb = soup.select_one(".b_entityTP")
if kb:
entity = {
"title": (kb.select_one(".b_entityTitle") or kb.select_one("h2") or object()).get_text(strip=True) if hasattr(kb.select_one(".b_entityTitle") or kb.select_one("h2"), "get_text") else "",
"description": (kb.select_one(".b_entitySubTitle") or object()).get_text(" ", strip=True) if hasattr(kb.select_one(".b_entitySubTitle"), "get_text") else "",
"attributes": {}
}
for row in kb.select("tr.b_factrow"):
key_el = row.select_one("td:first-child")
val_el = row.select_one("td:last-child")
if key_el and val_el:
entity["attributes"][key_el.get_text(strip=True)] = val_el.get_text(strip=True)
# Shopping ads / product listings
products = []
for card in soup.select(".p_smartf_head, .ShopNow, [class*='ShoppingItem']"):
name = card.select_one(".p_txtBig, .title, h3")
price = card.select_one(".p_price, .price")
store = card.select_one(".p_shop, .store")
products.append({
"name": name.get_text(strip=True) if name else "",
"price": price.get_text(strip=True) if price else "",
"store": store.get_text(strip=True) if store else "",
})
# News carousel
news_items = []
for card in soup.select(".news-card, [class*='NewsCard'], #news-results article"):
title = card.select_one("h2, .title, a")
source = card.select_one(".source, .provider, [class*='Source']")
news_items.append({
"title": title.get_text(strip=True) if title else "",
"source": source.get_text(strip=True) if source else "",
})
# Total result count estimate
count_el = soup.select_one(".sb_count")
total = None
if count_el:
match = re.search(r"([\d,]+)", count_el.get_text())
if match:
total = int(match.group(1).replace(",", ""))
# Videos
videos = []
for vid in soup.select(".mc_vtvc_meta, [class*='VideoCard']"):
title = vid.select_one("h3, .title, [class*='Title']")
duration = vid.select_one(".mc_vtvc_con_rc, .duration, [class*='Duration']")
videos.append({
"title": title.get_text(strip=True) if title else "",
"duration": duration.get_text(strip=True) if duration else "",
})
return {
"query": query,
"people_also_ask": list(set(paa)),
"knowledge_panel": entity,
"shopping_results": products,
"news_results": news_items,
"video_results": videos,
"estimated_total_results": total,
}
Data Storage with Deduplication
For ongoing SERP monitoring, store results in SQLite with change tracking:
def init_database(db_path: str = "bing_serps.db") -> sqlite3.Connection:
"""Initialize SERP tracking database."""
conn = sqlite3.connect(db_path)
conn.execute("PRAGMA journal_mode=WAL")
conn.executescript("""
CREATE TABLE IF NOT EXISTS serp_results (
id TEXT PRIMARY KEY,
query TEXT NOT NULL,
position INTEGER,
title TEXT,
url TEXT,
snippet TEXT,
display_url TEXT,
date_shown TEXT,
page INTEGER,
scraped_at TEXT
);
CREATE TABLE IF NOT EXISTS serp_features (
id INTEGER PRIMARY KEY AUTOINCREMENT,
query TEXT,
feature_type TEXT,
content TEXT,
scraped_at TEXT
);
CREATE TABLE IF NOT EXISTS rank_tracking (
query TEXT,
domain TEXT,
position INTEGER,
tracked_at TEXT,
PRIMARY KEY (query, domain, tracked_at)
);
CREATE INDEX IF NOT EXISTS idx_results_query ON serp_results(query, scraped_at DESC);
CREATE INDEX IF NOT EXISTS idx_rank_query ON rank_tracking(query, tracked_at DESC);
""")
conn.commit()
return conn
def save_results(
conn: sqlite3.Connection,
query: str,
results: List[Dict],
features: Optional[Dict] = None,
):
"""Save SERP results with deduplication."""
now = datetime.utcnow().isoformat()
for r in results:
# Dedup key: query + URL (allows position changes to be tracked)
rid = hashlib.md5(f"{query}:{r['url']}:{now[:10]}".encode()).hexdigest()
conn.execute(
"""INSERT OR REPLACE INTO serp_results
(id, query, position, title, url, snippet, display_url, date_shown, page, scraped_at)
VALUES (?,?,?,?,?,?,?,?,?,?)""",
(rid, query, r.get("position"), r.get("title"), r.get("url"),
r.get("snippet"), r.get("display_url"), r.get("date"), r.get("page"), now)
)
# Track domain ranking
if r.get("url"):
try:
from urllib.parse import urlparse
domain = urlparse(r["url"]).netloc
conn.execute(
"INSERT OR REPLACE INTO rank_tracking (query, domain, position, tracked_at) VALUES (?,?,?,?)",
(query, domain, r.get("position"), now)
)
except Exception:
pass
if features:
for feature_type, content in features.items():
if content and feature_type not in ("query", "scraped_at"):
conn.execute(
"INSERT INTO serp_features (query, feature_type, content, scraped_at) VALUES (?,?,?,?)",
(query, feature_type, json.dumps(content) if not isinstance(content, str) else content, now)
)
conn.commit()
def get_rank_history(conn: sqlite3.Connection, query: str, domain: str, days: int = 30) -> List[Dict]:
"""Get ranking history for a domain on a query."""
cutoff = (datetime.utcnow() - timedelta(days=days)).isoformat() if days else "2000-01-01"
rows = conn.execute(
"""SELECT position, tracked_at FROM rank_tracking
WHERE query = ? AND domain = ? AND tracked_at >= ?
ORDER BY tracked_at ASC""",
(query, domain, cutoff)
).fetchall()
return [{"position": r[0], "date": r[1]} for r in rows]
def get_position_changes(conn: sqlite3.Connection, query: str) -> List[Dict]:
"""Detect domains that changed position since last scrape."""
rows = conn.execute(
"""SELECT domain, position, tracked_at
FROM rank_tracking
WHERE query = ?
AND tracked_at >= (SELECT MAX(tracked_at) FROM rank_tracking WHERE query = ?)
ORDER BY position ASC""",
(query, query)
).fetchall()
return [{"domain": r[0], "position": r[1], "date": r[2]} for r in rows]
Complete Production Pipeline
import json
from datetime import timedelta
def run_serp_monitor(
queries: List[str],
db_path: str = "bing_serps.db",
pages_per_query: int = 3,
api_key: Optional[str] = None,
proxy_pool: Optional[ThorDataProxyPool] = None,
use_api: bool = False,
) -> Dict:
"""Complete SERP monitoring pipeline.
use_api=True uses Bing API (cleaner, limited to 1K/month free).
use_api=False scrapes directly (unlimited, needs proxies).
"""
conn = init_database(db_path)
stats = {
"queries_processed": 0,
"results_saved": 0,
"errors": 0,
}
api_client = BingSearchAPI(api_key) if api_key and use_api else None
for i, query in enumerate(queries):
print(f"\n[{i+1}/{len(queries)}] {query}")
if api_client:
# Use official API
data = api_client.web_search(query, count=50)
if data:
results = api_client.parse_web_results(data)
save_results(conn, query, results)
stats["results_saved"] += len(results)
print(f" API: {len(results)} results")
else:
stats["errors"] += 1
else:
# Direct scraping with proxy rotation
proxy = proxy_pool.get_rotating() if proxy_pool else None
results = scrape_bing(query, pages=pages_per_query, proxy=proxy)
if results:
save_results(conn, query, results)
stats["results_saved"] += len(results)
print(f" Scraped: {len(results)} results")
else:
stats["errors"] += 1
stats["queries_processed"] += 1
time.sleep(random.uniform(3.0, 8.0))
conn.close()
print(f"\nDone: {stats}")
return stats
# Example usage
if __name__ == "__main__":
import json
QUERIES = [
"best python web scraping libraries 2026",
"residential proxy services comparison",
"SERP tracking tools",
]
# With ThorData proxy
# pool = ThorDataProxyPool("YOUR_USER", "YOUR_PASS")
# run_serp_monitor(QUERIES, proxy_pool=pool)
# With Bing API (cleaner but limited)
# run_serp_monitor(QUERIES, api_key="YOUR_AZURE_KEY", use_api=True)
# Basic scraping (no proxy)
run_serp_monitor(QUERIES, pages_per_query=2)
When to Use Each Approach
| Method | Best For | Limitations |
|---|---|---|
| Direct HTTP scraping | High volume, flexible queries | CAPTCHA after ~50-100 rapid requests from one IP |
| Bing Web Search API | Clean reliable data, production use | 1K free/month, then $3/1K calls |
| Playwright | JS-rendered SERP features, screenshots | Slowest (~3-5s per page), high resource usage |
| API + Proxy scraping | Best of both: use API for clean data, scrape for rich features | Complexity, proxy cost |
For production SEO monitoring workloads, the hybrid approach works best: use the Bing API for bulk rank tracking (fast, reliable, no CAPTCHAs) and fall back to direct scraping with ThorData residential proxies for rich SERP features the API doesn't return.
Keep your scraping request rate under 10-15 per minute per IP, rotate through a large enough proxy pool, and add realistic timing jitter to stay under Bing's radar indefinitely.