Scraping Fandango Movie Showtimes and Ticket Prices with Python (2026)
Scraping Fandango Movie Showtimes and Ticket Prices with Python (2026)
Fandango dominates the US movie ticket market. They own Rotten Tomatoes and Vudu, and aggregate showtimes from AMC, Regal, Cinemark, and thousands of independent theaters. If you're building a movie data product, tracking ticket pricing trends, analyzing theatrical release patterns, or just want programmatic access to what's playing near you, Fandango is the primary source.
There's no public API. But their website loads showtime data in structured formats we can extract. Here's how to do it comprehensively with Python in 2026.
How Fandango Serves Data
Fandango's showtime pages use a mix of server-rendered HTML and client-side API calls. The most useful pattern: when you load a theater or movie page, the initial HTML contains JSON-LD structured data with showtimes, and the page also fires XHR requests to internal APIs for additional details.
The JSON-LD is the easiest target — it's embedded in <script type="application/ld+json"> tags and follows Schema.org formatting. This is the SEO layer that search engines index, so it's maintained as a first-class data source and rarely breaks.
Setup
pip install httpx beautifulsoup4 lxml playwright
playwright install chromium
For the basic showtime scraping we'll use httpx. For checkout-flow price extraction, we'll fall back to Playwright since that path has stronger bot protections.
Understanding Fandango's URL Structure
Fandango uses predictable URL patterns:
# Theater pages (shows today's showtimes by default)
https://www.fandango.com/{theater-slug}/theater-page
# Theater page with specific date
https://www.fandango.com/{theater-slug}/theater-page?date=YYYY-MM-DD
# Movie pages
https://www.fandango.com/{movie-slug}/movie-overview
# Movie showtimes near ZIP code
https://www.fandango.com/movies-in-theaters/showtimes/{movie-slug}?zip={zipcode}
# Theater search by ZIP
https://www.fandango.com/theater-listings?zipCode={zip}&radius={miles}
# Ticket purchase (requires show selection)
https://www.fandango.com/purchase/tickets?showtime={showtime-id}
Theater slugs look like amc-empire-25-aabtj — a human-readable name plus a short alphanumeric identifier. Movie slugs look like inside-out-2-248143 — title plus Fandango's internal movie ID.
Scraping Showtimes by Theater
import httpx
import json
import time
import random
import re
import sqlite3
from bs4 import BeautifulSoup
from datetime import date, datetime, timedelta
HEADERS = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
"AppleWebKit/537.36 Chrome/126.0.0.0 Safari/537.36",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
"Accept-Language": "en-US,en;q=0.9",
"Accept-Encoding": "gzip, deflate, br",
"Sec-Fetch-Dest": "document",
"Sec-Fetch-Mode": "navigate",
"Sec-Fetch-Site": "same-origin",
}
def make_client(proxy_url: str = None) -> httpx.Client:
"""Create an httpx client with optional proxy."""
return httpx.Client(
headers=HEADERS,
proxy=proxy_url,
timeout=20,
follow_redirects=True,
http2=True,
)
def scrape_theater_showtimes(theater_slug: str,
show_date: str = None,
client: httpx.Client = None) -> dict:
"""
Scrape showtimes for a specific theater.
theater_slug: e.g., 'amc-empire-25-aabtj'
show_date: format 'YYYY-MM-DD', defaults to today
"""
if not show_date:
show_date = date.today().isoformat()
use_client = client or make_client()
url = f"https://www.fandango.com/{theater_slug}/theater-page"
params = {"date": show_date}
try:
response = use_client.get(url, params=params)
response.raise_for_status()
except httpx.HTTPStatusError as e:
return {"error": f"HTTP {e.response.status_code}", "theater_slug": theater_slug}
soup = BeautifulSoup(response.text, "lxml")
theater_data = {
"theater_slug": theater_slug,
"date": show_date,
"name": "",
"address": {},
"phone": "",
"amenities": [],
"movies": [],
}
# Extract JSON-LD structured data
for script in soup.find_all("script", type="application/ld+json"):
if not script.string:
continue
try:
ld_data = json.loads(script.string)
if isinstance(ld_data, dict):
if ld_data.get("@type") == "MovieTheater":
theater_data["name"] = ld_data.get("name", "")
theater_data["address"] = ld_data.get("address", {})
theater_data["phone"] = ld_data.get("telephone", "")
theater_data["geo"] = ld_data.get("geo", {})
elif isinstance(ld_data, list):
for item in ld_data:
if item.get("@type") == "ScreeningEvent":
movie_info = item.get("workPresented", {})
theater_data["movies"].append({
"title": movie_info.get("name", ""),
"movie_id": movie_info.get("@id", ""),
"rating": movie_info.get("contentRating", ""),
"duration": movie_info.get("duration", ""),
"genre": movie_info.get("genre", ""),
"start_time": item.get("startDate", ""),
"end_time": item.get("endDate", ""),
"url": item.get("url", ""),
"format": item.get("name", ""), # IMAX, Dolby, etc.
"language": item.get("inLanguage", "en"),
"subtitles": item.get("subtitleLanguage", ""),
})
except json.JSONDecodeError:
continue
# Fallback: parse HTML showtime cards for additional details
if not theater_data["movies"]:
theater_data["movies"] = _parse_html_showtimes(soup)
# Extract theater amenities
for amenity in soup.select("[class*='amenity'], [class*='feature']"):
text = amenity.get_text(strip=True)
if text and len(text) < 50:
theater_data["amenities"].append(text)
# Deduplicate movies by title + start time
seen = set()
unique_movies = []
for movie in theater_data["movies"]:
key = (movie.get("title"), movie.get("start_time"))
if key not in seen:
seen.add(key)
unique_movies.append(movie)
theater_data["movies"] = unique_movies
return theater_data
def _parse_html_showtimes(soup: BeautifulSoup) -> list[dict]:
"""Fallback HTML parser for showtime data when JSON-LD is incomplete."""
movies = []
movie_sections = soup.select(
"[class*='showtime-movie'], [data-movie-id], "
"[class*='theater-movie'], article[class*='movie']"
)
for section in movie_sections:
title_el = section.select_one(
"h3, h2, [class*='movie-title'], [class*='movieTitle'], "
"[itemprop='name']"
)
if not title_el:
continue
title = title_el.get_text(strip=True)
rating_el = section.select_one("[class*='rating'], [class*='mpaa']")
rating = rating_el.get_text(strip=True) if rating_el else ""
runtime_el = section.select_one("[class*='runtime'], [class*='duration']")
runtime = runtime_el.get_text(strip=True) if runtime_el else ""
# Group showtimes by format (Standard, IMAX, Dolby, etc.)
format_groups = section.select("[class*='format-group'], [class*='showtime-type']")
if format_groups:
for fmt_group in format_groups:
fmt_name_el = fmt_group.select_one("[class*='format-name'], strong, h4")
fmt_name = fmt_name_el.get_text(strip=True) if fmt_name_el else "Standard"
for btn in fmt_group.select("a[class*='showtime'], button[class*='showtime'], [data-showtime]"):
movies.append({
"title": title,
"rating": rating,
"runtime": runtime,
"format": fmt_name,
"start_time": btn.get_text(strip=True),
"ticket_url": btn.get("href", ""),
})
else:
for btn in section.select(
"a[class*='showtime'], button[class*='showtime'], "
"[data-showtime], a[href*='purchase']"
):
movies.append({
"title": title,
"rating": rating,
"runtime": runtime,
"format": "Standard",
"start_time": btn.get_text(strip=True),
"ticket_url": btn.get("href", ""),
})
return movies
Finding Theaters by Location
Fandango's theater search accepts ZIP codes or city names:
def find_theaters(zip_code: str, radius_miles: int = 15,
client: httpx.Client = None) -> list[dict]:
"""Find theaters within radius of a ZIP code."""
use_client = client or make_client()
url = "https://www.fandango.com/theater-listings"
params = {
"zipCode": zip_code,
"radius": radius_miles,
}
response = use_client.get(url, params=params)
response.raise_for_status()
soup = BeautifulSoup(response.text, "lxml")
theaters = []
seen_slugs = set()
# Multiple possible selector patterns
selectors = [
"[class*='theater-list'] a[href*='/theater-page']",
"a[class*='theater-name']",
"[class*='theater-item'] a[href*='theater-page']",
"h3[class*='theater'] a",
]
for selector in selectors:
for listing in soup.select(selector):
href = listing.get("href", "")
name = listing.get_text(strip=True)
# Extract theater slug from URL
slug_match = re.search(r'/([^/]+)/theater-page', href)
if slug_match:
slug = slug_match.group(1)
else:
slug = href.split("/")[-2] if "/" in href else ""
if slug and slug not in seen_slugs and name:
seen_slugs.add(slug)
# Try to get distance info
parent = listing.find_parent(class_=re.compile("theater"))
distance_el = parent.select_one("[class*='distance']") if parent else None
distance = distance_el.get_text(strip=True) if distance_el else ""
theaters.append({
"name": name,
"slug": slug,
"url": href if href.startswith("http") else f"https://www.fandango.com{href}",
"distance": distance,
"zip_code": zip_code,
})
return theaters
def find_theaters_multi_zip(zip_codes: list[str], radius_miles: int = 10,
client: httpx.Client = None) -> list[dict]:
"""Find unique theaters across multiple ZIP codes."""
use_client = client or make_client()
all_theaters = []
seen_slugs = set()
for zip_code in zip_codes:
theaters = find_theaters(zip_code, radius_miles, use_client)
for t in theaters:
if t["slug"] not in seen_slugs:
seen_slugs.add(t["slug"])
all_theaters.append(t)
print(f"ZIP {zip_code}: {len(theaters)} theaters ({len(all_theaters)} unique total)")
time.sleep(random.uniform(2, 4))
return all_theaters
Extracting Ticket Prices
Ticket prices appear during the checkout flow. For straightforward cases, they're embedded in JavaScript data objects:
def extract_prices_from_html(html: str) -> list[dict]:
"""Extract ticket pricing from Fandango page HTML."""
prices = []
# Try JSON embedded in page scripts
price_pattern = re.compile(
r'"ticketType"\s*:\s*"([^"]+)".*?"price"\s*:\s*([\d.]+)',
re.DOTALL,
)
for match in price_pattern.finditer(html):
prices.append({
"type": match.group(1),
"price": float(match.group(2)),
})
if prices:
return prices
# Fallback: look for price display elements
soup = BeautifulSoup(html, "lxml")
for price_el in soup.select("[class*='ticket-price'], [data-price], [class*='price-amount']"):
price_text = price_el.get_text(strip=True)
if "$" in price_text:
# Try to find associated label
parent = price_el.find_parent()
label_el = parent.select_one("[class*='ticket-type'], [class*='label']") if parent else None
prices.append({
"type": label_el.get_text(strip=True) if label_el else "unknown",
"price_text": price_text,
})
return prices
async def get_ticket_prices_playwright(ticket_url: str,
proxy_url: str = None) -> list[dict]:
"""Use Playwright to navigate the checkout flow and extract ticket prices."""
from playwright.async_api import async_playwright
prices = []
async with async_playwright() as p:
launch_args = {
"headless": True,
"args": ["--disable-blink-features=AutomationControlled"],
}
if proxy_url:
# Parse proxy URL for Playwright format
proxy_match = re.match(r'http://([^:]+):([^@]+)@([^:]+):(\d+)', proxy_url)
if proxy_match:
launch_args["proxy"] = {
"server": f"http://{proxy_match.group(3)}:{proxy_match.group(4)}",
"username": proxy_match.group(1),
"password": proxy_match.group(2),
}
browser = await p.chromium.launch(**launch_args)
context = await browser.new_context(
viewport={"width": 1440, "height": 900},
user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36",
)
page = await context.new_page()
try:
await page.goto(ticket_url, wait_until="networkidle", timeout=30000)
await page.wait_for_timeout(2000)
# Look for ticket type rows with prices
ticket_rows = await page.query_selector_all(
"[class*='ticket-row'], [class*='ticket-type'], "
"tr[class*='ticket'], [data-ticket-type]"
)
for row in ticket_rows:
type_el = await row.query_selector("[class*='type'], td:first-child, [class*='label']")
price_el = await row.query_selector("[class*='price'], td[class*='price'], [class*='amount']")
if type_el and price_el:
type_text = (await type_el.inner_text()).strip()
price_text = (await price_el.inner_text()).strip()
if "$" in price_text:
prices.append({"type": type_text, "price_text": price_text})
# Also check for price listed in page title or header
if not prices:
page_data = await page.evaluate("""
() => {
const data = [];
document.querySelectorAll('[data-price]').forEach(el => {
data.push({type: el.dataset.ticketType || 'unknown', price: el.dataset.price});
});
return data;
}
""")
prices.extend(page_data)
except Exception as e:
print(f"Playwright error: {e}")
finally:
await browser.close()
return prices
Anti-Bot Protections
Fandango uses a layered defense system:
Akamai Bot Manager is the primary challenge. Akamai does behavioral fingerprinting that tracks: - Mouse movements and scroll patterns (not applicable for headless, but timing matters) - JavaScript execution environment fingerprints - TLS handshake characteristics (JA3/JA3S hashes) - HTTP/2 fingerprint (header ordering, ALPN) - Cookie handling and session patterns
For the theater listing pages (the SEO-friendly ones), plain httpx with proper headers works reliably. These pages are designed to be crawlable for search indexing.
For checkout and pricing pages, you need a real browser. Playwright running headed-mode Chromium passes Akamai's browser environment checks.
For sustained scraping beyond a few dozen pages, residential proxies are necessary. Akamai's fingerprinting correlates IPs with behavior patterns, and datacenter IPs get flagged within minutes on their reputation database.
ThorData provides residential IPs with strong US geographic coverage — useful since Fandango showtimes are location-dependent and you may want to scrape from different metro areas to get regional pricing data:
import random
THORDATA_USER = "YOUR_USERNAME"
THORDATA_PASS = "YOUR_PASSWORD"
THORDATA_HOST = "proxy.thordata.com"
THORDATA_PORT = "9000"
def get_proxy(country: str = "US", state: str = None, city: str = None) -> str:
"""Build ThorData proxy URL with optional geo-targeting."""
username = THORDATA_USER
if country:
username += f"-country-{country}"
if state:
username += f"-state-{state}"
if city:
username += f"-city-{city.replace(' ', '_')}"
return f"http://{username}:{THORDATA_PASS}@{THORDATA_HOST}:{THORDATA_PORT}"
# Scrape NYC showtimes through NY-based proxy for geo-accurate results
nyc_proxy = get_proxy(country="US", state="NY")
client = make_client(nyc_proxy)
showtimes = scrape_theater_showtimes("amc-empire-25-aabtj", client=client)
Bulk Showtime Collection
A complete pipeline for collecting showtimes across multiple regions:
def collect_showtimes_by_region(
zip_codes: list[str],
target_date: str,
proxy_url: str = None,
max_theaters_per_zip: int = 10,
) -> list[dict]:
"""Collect showtimes for theaters near multiple ZIP codes."""
all_data = []
client = make_client(proxy_url)
seen_slugs = set()
for zip_code in zip_codes:
print(f"\nZIP: {zip_code}")
theaters = find_theaters(zip_code, client=client)
print(f" Found {len(theaters)} theaters")
for theater in theaters[:max_theaters_per_zip]:
slug = theater["slug"]
if slug in seen_slugs:
continue
seen_slugs.add(slug)
try:
showtimes = scrape_theater_showtimes(slug, target_date, client)
showtimes["zip_code"] = zip_code
all_data.append(showtimes)
movie_count = len(showtimes.get("movies", []))
print(f" + {theater['name']}: {movie_count} showings")
except Exception as e:
print(f" x {theater['name']}: {e}")
time.sleep(random.uniform(3, 7))
time.sleep(random.uniform(5, 10))
return all_data
def collect_week_ahead(zip_codes: list[str], proxy_url: str = None) -> dict:
"""Collect showtimes for the next 7 days across all ZIPs."""
all_by_date = {}
for days_ahead in range(7):
target = (date.today() + timedelta(days=days_ahead)).isoformat()
print(f"\n=== {target} ===")
data = collect_showtimes_by_region(zip_codes, target, proxy_url)
all_by_date[target] = data
print(f"Total: {len(data)} theaters, {sum(len(t['movies']) for t in data)} showings")
time.sleep(random.uniform(10, 20)) # Longer delay between dates
return all_by_date
SQLite Storage
def init_fandango_db(db_path: str = "fandango_data.db") -> sqlite3.Connection:
"""Initialize SQLite database for Fandango data."""
conn = sqlite3.connect(db_path)
conn.execute("PRAGMA journal_mode=WAL")
conn.execute("""
CREATE TABLE IF NOT EXISTS theaters (
slug TEXT PRIMARY KEY,
name TEXT,
address_street TEXT,
address_city TEXT,
address_state TEXT,
address_zip TEXT,
phone TEXT,
geo_lat REAL,
geo_lng REAL,
amenities TEXT, -- JSON array
first_seen TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
last_updated TIMESTAMP DEFAULT CURRENT_TIMESTAMP
)
""")
conn.execute("""
CREATE TABLE IF NOT EXISTS showtimes (
id INTEGER PRIMARY KEY AUTOINCREMENT,
theater_slug TEXT NOT NULL,
show_date TEXT NOT NULL,
movie_title TEXT NOT NULL,
start_time TEXT,
end_time TEXT,
format TEXT DEFAULT 'Standard',
rating TEXT,
runtime TEXT,
ticket_url TEXT,
language TEXT DEFAULT 'en',
zip_code TEXT,
scraped_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
FOREIGN KEY (theater_slug) REFERENCES theaters(slug)
)
""")
conn.execute("""
CREATE TABLE IF NOT EXISTS ticket_prices (
id INTEGER PRIMARY KEY AUTOINCREMENT,
theater_slug TEXT NOT NULL,
movie_title TEXT,
show_date TEXT,
format TEXT,
ticket_type TEXT,
price REAL,
price_text TEXT,
scraped_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
)
""")
conn.execute("CREATE INDEX IF NOT EXISTS idx_showtimes_date ON showtimes(show_date)")
conn.execute("CREATE INDEX IF NOT EXISTS idx_showtimes_movie ON showtimes(movie_title)")
conn.execute("CREATE INDEX IF NOT EXISTS idx_showtimes_theater ON showtimes(theater_slug)")
conn.execute("CREATE INDEX IF NOT EXISTS idx_prices_theater ON ticket_prices(theater_slug)")
conn.commit()
return conn
def save_theater(conn: sqlite3.Connection, theater_data: dict) -> None:
"""Save or update a theater record."""
address = theater_data.get("address", {})
geo = theater_data.get("geo", {})
conn.execute(
"""INSERT OR REPLACE INTO theaters
(slug, name, address_street, address_city, address_state, address_zip,
phone, geo_lat, geo_lng, amenities)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""",
(
theater_data["theater_slug"],
theater_data.get("name"),
address.get("streetAddress"),
address.get("addressLocality"),
address.get("addressRegion"),
address.get("postalCode"),
theater_data.get("phone"),
geo.get("latitude"),
geo.get("longitude"),
json.dumps(theater_data.get("amenities", [])),
)
)
conn.commit()
def save_showtimes(conn: sqlite3.Connection, theater_data: dict) -> int:
"""Save all showtimes for a theater/date scrape. Returns count saved."""
slug = theater_data["theater_slug"]
show_date = theater_data["date"]
zip_code = theater_data.get("zip_code", "")
saved = 0
for movie in theater_data.get("movies", []):
try:
conn.execute(
"""INSERT OR IGNORE INTO showtimes
(theater_slug, show_date, movie_title, start_time, end_time,
format, rating, runtime, ticket_url, language, zip_code)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""",
(
slug, show_date,
movie.get("title", ""),
movie.get("start_time", ""),
movie.get("end_time", ""),
movie.get("format", "Standard"),
movie.get("rating", ""),
movie.get("duration") or movie.get("runtime", ""),
movie.get("url") or movie.get("ticket_url", ""),
movie.get("language", "en"),
zip_code,
)
)
saved += 1
except sqlite3.Error:
continue
conn.commit()
return saved
Analytics: Tracking Pricing Trends
With daily scraping stored in SQLite, you can analyze pricing patterns:
def price_trends_by_format(db_path: str, days: int = 30) -> list:
"""Average ticket prices by format over the last N days."""
conn = sqlite3.connect(db_path)
rows = conn.execute(
"""SELECT format, ticket_type,
AVG(price) as avg_price,
MIN(price) as min_price,
MAX(price) as max_price,
COUNT(*) as sample_count
FROM ticket_prices
WHERE price IS NOT NULL
AND scraped_at >= datetime('now', ?)
GROUP BY format, ticket_type
ORDER BY avg_price DESC""",
(f"-{days} days",)
).fetchall()
conn.close()
return rows
def most_popular_movies_by_showings(db_path: str, show_date: str) -> list:
"""Rank movies by number of showings on a given date."""
conn = sqlite3.connect(db_path)
rows = conn.execute(
"""SELECT movie_title,
COUNT(DISTINCT theater_slug) as theater_count,
COUNT(*) as total_showings,
COUNT(DISTINCT format) as format_count
FROM showtimes
WHERE show_date = ?
GROUP BY movie_title
ORDER BY total_showings DESC
LIMIT 25""",
(show_date,)
).fetchall()
conn.close()
return rows
def theaters_by_format_availability(db_path: str, format_name: str,
show_date: str) -> list:
"""Find all theaters showing a specific format (IMAX, Dolby, etc.)."""
conn = sqlite3.connect(db_path)
rows = conn.execute(
"""SELECT t.name, t.address_city, t.address_state,
COUNT(s.id) as showings_count
FROM theaters t
JOIN showtimes s ON t.slug = s.theater_slug
WHERE s.format LIKE ?
AND s.show_date = ?
GROUP BY t.slug
ORDER BY showings_count DESC""",
(f"%{format_name}%", show_date)
).fetchall()
conn.close()
return rows
Conclusion
Fandango showtimes are reliably extractable from their SEO-friendly listing pages using plain HTTP requests and BeautifulSoup. The JSON-LD structured data is the most stable extraction target — it follows Schema.org standards and survives UI redesigns. Ticket prices require browser automation and are harder to extract at scale due to Akamai protections. For regional analysis, combine ZIP code lookups with theater-level scraping. ThorData's residential proxies provide US-based IPs with geo-targeting that Akamai recognizes as legitimate user traffic — essential once you're scraping across multiple metro areas at scale.
Keep your request rates conservative: one theater page every 3-7 seconds for listing pages, and allow 15-20 seconds between Playwright sessions for checkout pages. The data will accumulate quickly even at these rates — there are only ~3,000 Fandango-listed theaters in the US, so a full national scrape takes a few hours at conservative speeds.