Scraping Poshmark Listings and Price Trends with Python (2026)
Poshmark is the largest social fashion marketplace in the US -- over 80 million users buying and selling secondhand clothing, shoes, and accessories. For resellers, brand analysts, and fashion researchers, Poshmark's data is incredibly valuable: sold prices tell you what items are actually worth, not just what people list them at.
Poshmark has an undocumented API that powers their mobile app, and their web pages are relatively straightforward to parse. Here's how to extract listings, sold history, pricing data, and seller analytics with Python.
Why Poshmark Data Matters
The secondhand fashion market crossed $200 billion globally in 2024 and is growing faster than new apparel sales. Poshmark's transaction data is a real-time price discovery engine for fashion resellers. Specific use cases:
- Reseller sourcing decisions -- know what items sell and at what price before buying inventory at thrift stores
- Brand valuation -- track how luxury and streetwear brands hold value on the secondary market
- Competitive pricing -- see what similar items sold for before you list
- Market sizing -- count sold listings in a category to estimate demand
- Trend spotting -- identify which brands are gaining resale traction before mainstream attention
- Counterfeit detection research -- price outliers (unusually cheap luxury items) signal potential counterfeits
Poshmark's Internal API
The Poshmark iOS and Android apps communicate with https://poshmark.com/api/posts and related endpoints. These aren't documented, but they return clean JSON and don't require authentication for public listing data.
import httpx
import time
import json
class PoshmarkScraper:
BASE_URL = "https://poshmark.com"
def __init__(self, proxy_url: str = None):
self.client = httpx.Client(
base_url=self.BASE_URL,
headers={
"User-Agent": (
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
"AppleWebKit/537.36 Chrome/126.0.0.0 Safari/537.36"
),
"Accept": "application/json",
"X-PoshmarkApp": "1",
},
proxy=proxy_url,
timeout=20,
)
def search_listings(self, query: str, max_items: int = 100, sort_by: str = "best_match") -> list[dict]:
\"\"\"Search active (available) listings.
sort_by options: 'best_match', 'price:asc', 'price:desc', 'time:desc'
\"\"\"
items = []
max_id = ""
while len(items) < max_items:
params = {
"query": query,
"type": "listings",
"count": 48,
"experience": "all",
"sort_by": sort_by,
}
if max_id:
params["max_id"] = max_id
r = self.client.get("/api/posts", params=params)
if r.status_code == 429:
print("Rate limited, sleeping 30s...")
time.sleep(30)
continue
if r.status_code != 200:
print(f"Error {r.status_code}")
break
data = r.json().get("data", [])
if not data:
break
for post in data:
items.append(self._parse_listing(post))
max_id = data[-1].get("id", "")
time.sleep(1.5)
return items[:max_items]
def _parse_listing(self, post: dict) -> dict:
inventory = post.get("inventory", {})
return {
"id": post.get("id"),
"title": post.get("title"),
"brand": post.get("brand"),
"size": post.get("size"),
"original_price": post.get("original_price"),
"listing_price": post.get("price"),
"condition": post.get("condition"),
"category": post.get("category_v2", {}).get("display"),
"subcategory": post.get("category_v2", {}).get("id"),
"color": post.get("color"),
"status": inventory.get("status"),
"available": inventory.get("status") == "available",
"sold": inventory.get("status") == "sold",
"seller": post.get("creator_username"),
"likes": post.get("like_count", 0),
"comments": post.get("comment_count", 0),
"created_at": post.get("created_at"),
"cover_image": post.get("picture_url"),
}
def search_sold(self, query: str, max_items: int = 100) -> list[dict]:
\"\"\"Search sold listings only.\"\"\"
items = []
max_id = ""
while len(items) < max_items:
params = {
"query": query,
"type": "listings",
"availability": "sold_out",
"count": 48,
}
if max_id:
params["max_id"] = max_id
r = self.client.get("/api/posts", params=params)
if r.status_code == 429:
time.sleep(30)
continue
if r.status_code != 200:
break
data = r.json().get("data", [])
if not data:
break
for post in data:
listing = self._parse_listing(post)
listing["sold_price"] = post.get("inventory", {}).get("sold_price")
listing["sold_at"] = post.get("inventory", {}).get("sold_at")
items.append(listing)
max_id = data[-1].get("id", "")
time.sleep(1.5)
return items[:max_items]
def get_seller_profile(self, username: str) -> dict:
r = self.client.get(f"/api/users/{username}")
if r.status_code != 200:
return {}
user = r.json().get("data", {})
return {
"username": user.get("username"),
"display_name": user.get("full_name"),
"followers": user.get("follower_count", 0),
"following": user.get("following_count", 0),
"listings_count": user.get("listing_count", 0),
"sold_count": user.get("sold_count", 0),
"love_count": user.get("love_count", 0),
"joined": user.get("created_at"),
"city": user.get("city"),
"state": user.get("state"),
"header": user.get("header"),
"about": user.get("about"),
"is_verified_merchant": user.get("is_verified_merchant", False),
}
def get_seller_listings(self, username: str, max_items: int = 100, include_sold: bool = False) -> list[dict]:
items = []
max_id = ""
while len(items) < max_items:
params = {"count": 48}
if max_id:
params["max_id"] = max_id
if include_sold:
params["availability"] = "sold_out"
r = self.client.get(f"/api/users/{username}/posts", params=params)
if r.status_code != 200:
break
data = r.json().get("data", [])
if not data:
break
for post in data:
items.append(self._parse_listing(post))
max_id = data[-1].get("id", "")
time.sleep(1.5)
return items[:max_items]
Extracting Sold Listings
Sold listings are the most useful data on Poshmark. They tell you the actual market value of items -- not aspirational pricing, but what people paid:
# Usage
scraper = PoshmarkScraper()
sold = scraper.search_sold("Nike Air Max 90", max_items=200)
print(f"Found {len(sold)} sold listings")
for item in sold[:5]:
print(f"{item['title']} -- ${item['sold_price']} (was ${item['original_price']})")
Analyzing Price Trends
With sold data, you can calculate average selling prices, price-to-retail ratios, and how fast items sell:
from statistics import mean, median
from datetime import datetime
def analyze_price_trends(sold_items: list[dict]) -> dict:
prices = []
discounts = []
days_to_sell = []
size_distribution = {}
for item in sold_items:
sold_price = item.get("sold_price")
original = item.get("original_price")
if sold_price:
try:
price_val = float(str(sold_price).replace("$", "").replace(",", ""))
prices.append(price_val)
if original:
orig_val = float(str(original).replace("$", "").replace(",", ""))
if orig_val > 0:
discounts.append((1 - price_val / orig_val) * 100)
except (ValueError, TypeError):
pass
# Time to sell
created = item.get("created_at")
sold_at = item.get("sold_at")
if created and sold_at:
try:
c = datetime.fromisoformat(created.replace("Z", "+00:00"))
s = datetime.fromisoformat(sold_at.replace("Z", "+00:00"))
days_to_sell.append((s - c).days)
except (ValueError, TypeError):
pass
# Size distribution
size = item.get("size", "Unknown")
size_distribution[size] = size_distribution.get(size, 0) + 1
return {
"total_sold": len(sold_items),
"with_price_data": len(prices),
"avg_sold_price": round(mean(prices), 2) if prices else 0,
"median_sold_price": round(median(prices), 2) if prices else 0,
"min_price": min(prices) if prices else 0,
"max_price": max(prices) if prices else 0,
"price_std_dev": round((sum((p - mean(prices)) ** 2 for p in prices) / len(prices)) ** 0.5, 2) if len(prices) > 1 else 0,
"avg_discount_pct": round(mean(discounts), 1) if discounts else 0,
"median_discount_pct": round(median(discounts), 1) if discounts else 0,
"avg_days_to_sell": round(mean(days_to_sell)) if days_to_sell else 0,
"median_days_to_sell": round(median(days_to_sell)) if days_to_sell else 0,
"fast_movers_pct": round(sum(1 for d in days_to_sell if d <= 7) / len(days_to_sell) * 100, 1) if days_to_sell else 0,
"top_sizes": sorted(size_distribution.items(), key=lambda x: x[1], reverse=True)[:5],
}
sold = scraper.search_sold("Nike Air Max 90", max_items=200)
trends = analyze_price_trends(sold)
print(f"Nike Air Max 90 -- Poshmark Market Data")
print(f" Sold count: {trends['total_sold']}")
print(f" Avg price: ${trends['avg_sold_price']}")
print(f" Median price: ${trends['median_sold_price']}")
print(f" Range: ${trends['min_price']} -- ${trends['max_price']}")
print(f" Avg discount from retail: {trends['avg_discount_pct']}%")
print(f" Avg days to sell: {trends['avg_days_to_sell']}")
print(f" Fast movers (sold <=7 days): {trends['fast_movers_pct']}%")
Brand Comparison Analysis
Compare resale performance across multiple brands:
def compare_brands(brands: list[str], items_per_brand: int = 100) -> list[dict]:
\"\"\"Compare resale metrics across multiple brands.\"\"\"
scraper = PoshmarkScraper()
results = []
for brand in brands:
sold = scraper.search_sold(brand, max_items=items_per_brand)
trends = analyze_price_trends(sold)
results.append({
"brand": brand,
"avg_sold_price": trends["avg_sold_price"],
"avg_discount_pct": trends["avg_discount_pct"],
"avg_days_to_sell": trends["avg_days_to_sell"],
"total_sold": trends["total_sold"],
})
time.sleep(3)
# Sort by average sold price descending
return sorted(results, key=lambda x: x["avg_sold_price"], reverse=True)
brands = ["Gucci", "Louis Vuitton", "Prada", "Coach", "Kate Spade", "Michael Kors"]
comparison = compare_brands(brands, items_per_brand=50)
for b in comparison:
print(f"{b['brand']}: avg ${b['avg_sold_price']}, {b['avg_discount_pct']}% off retail, {b['avg_days_to_sell']} days to sell")
Anti-Bot Measures on Poshmark
Poshmark's bot detection is moderate compared to sites like Uber Eats or Nike. But they still have protections that'll trip you up.
Rate limiting by IP. Poshmark allows roughly 60 requests per minute per IP before returning 429s. For collecting sold data across many brands, you'll burn through that quickly. Rotating residential proxies solve this -- ThorData for Poshmark scraping handles the rotation automatically, and you can sticky a session when you need to paginate through a single seller's closet without switching IPs mid-crawl.
# With proxy rotation for high-volume scraping
scraper = PoshmarkScraper(proxy_url="http://user:[email protected]:9000")
User-Agent filtering. Poshmark blocks known bot user agents and headless browser signatures. Use a current Chrome user agent string and keep it consistent within a session.
Captcha on aggressive patterns. If you hit the search endpoint too fast, Poshmark will start returning CAPTCHA pages instead of JSON. Back off to 2-second intervals and this rarely triggers.
API endpoint changes. Poshmark occasionally modifies their API paths or response structure. The /api/posts endpoint has been stable for years, but always validate the response format before processing.
Building a Price Database
For ongoing market research, store everything in SQLite and query trends over time:
import sqlite3
def init_poshmark_db(db_path: str = "poshmark.db") -> sqlite3.Connection:
conn = sqlite3.connect(db_path)
conn.execute(\"\"\"
CREATE TABLE IF NOT EXISTS sold_listings (
id TEXT PRIMARY KEY,
title TEXT,
brand TEXT,
size TEXT,
sold_price TEXT,
original_price TEXT,
condition TEXT,
category TEXT,
seller TEXT,
sold_at TEXT,
created_at TEXT,
search_query TEXT,
scraped_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
)
\"\"\")
conn.execute(\"\"\"
CREATE TABLE IF NOT EXISTS brand_snapshots (
brand TEXT,
snapshot_date TEXT,
avg_sold_price REAL,
median_sold_price REAL,
avg_discount_pct REAL,
avg_days_to_sell REAL,
sample_count INTEGER,
PRIMARY KEY (brand, snapshot_date)
)
\"\"\")
conn.execute(\"\"\"
CREATE TABLE IF NOT EXISTS seller_profiles (
username TEXT PRIMARY KEY,
display_name TEXT,
followers INTEGER,
listings_count INTEGER,
sold_count INTEGER,
joined TEXT,
city TEXT,
state TEXT,
is_verified_merchant INTEGER,
scraped_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
)
\"\"\")
conn.commit()
return conn
def save_sold_data(conn: sqlite3.Connection, items: list[dict], query: str):
for item in items:
conn.execute(\"\"\"
INSERT OR IGNORE INTO sold_listings
(id, title, brand, size, sold_price, original_price,
condition, category, seller, sold_at, created_at, search_query)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
\"\"\", (
item["id"], item["title"], item.get("brand"),
item.get("size"), item.get("sold_price"),
item.get("original_price"), item.get("condition"),
item.get("category"), item.get("seller"),
item.get("sold_at"), item.get("created_at"), query,
))
conn.commit()
def save_brand_snapshot(conn: sqlite3.Connection, brand: str, trends: dict):
from datetime import date
conn.execute(\"\"\"
INSERT OR REPLACE INTO brand_snapshots
(brand, snapshot_date, avg_sold_price, median_sold_price, avg_discount_pct, avg_days_to_sell, sample_count)
VALUES (?, ?, ?, ?, ?, ?, ?)
\"\"\", (
brand, date.today().isoformat(),
trends["avg_sold_price"], trends["median_sold_price"],
trends["avg_discount_pct"], trends["avg_days_to_sell"],
trends["total_sold"],
))
conn.commit()
Identifying Arbitrage Opportunities
Compare current listings against average sold prices to find underpriced items:
def find_underpriced(query: str, discount_threshold: float = 0.3) -> list[dict]:
\"\"\"Find active listings priced significantly below average sold price.\"\"\"
scraper = PoshmarkScraper()
# Get sold price baseline
sold = scraper.search_sold(query, max_items=100)
trends = analyze_price_trends(sold)
avg_sold = trends["avg_sold_price"]
if avg_sold <= 0:
return []
# Get active listings
active = scraper.search_listings(query, max_items=50, sort_by="price:asc")
opportunities = []
for item in active:
if not item["available"]:
continue
try:
price = float(str(item["listing_price"]).replace("$", "").replace(",", ""))
except (ValueError, TypeError):
continue
if price > 0 and price < avg_sold * (1 - discount_threshold):
opportunities.append({
"id": item["id"],
"title": item["title"],
"brand": item.get("brand"),
"size": item.get("size"),
"listing_price": price,
"avg_sold_price": avg_sold,
"potential_profit": round(avg_sold - price, 2),
"profit_margin_pct": round((avg_sold - price) / price * 100, 1),
"seller": item.get("seller"),
"likes": item.get("likes", 0),
})
return sorted(opportunities, key=lambda x: x["profit_margin_pct"], reverse=True)
# Find Nike Air Max 90 listings priced 30%+ below average sold price
opps = find_underpriced("Nike Air Max 90 size 10", discount_threshold=0.3)
for opp in opps[:10]:
print(f"{opp['title']} ({opp['size']})")
print(f" Listed: ${opp['listing_price']} | Avg sold: ${opp['avg_sold_price']} | Profit: ${opp['potential_profit']} ({opp['profit_margin_pct']}%)")
Use Cases
Poshmark data is practical for several things:
- Reseller sourcing. Know what items actually sell and at what price before you buy inventory at thrift stores or clearance sales.
- Brand analytics. Track how luxury brands hold value on the secondary market. A brand with high resale value has stronger demand.
- Pricing strategy. If you're listing items, seeing what similar items sold for helps you price competitively without leaving money on the table.
- Market sizing. Count sold listings in a category to estimate the addressable market for a product niche.
- Arbitrage. Identify active listings priced below average sold prices -- buy and relist at market rate.
- Trend prediction. Brands with rapidly increasing sold prices and shrinking days-to-sell are gaining traction before it shows up on mainstream trend reports.
Poshmark's API is one of the more stable undocumented APIs out there. It hasn't changed significantly in structure over the past couple years, which makes it reliable for ongoing data collection. Combine with ThorData's rotating residential proxies for high-volume collection without hitting IP rate limits.
Tracking Seller Performance Over Time
High-volume resellers are worth monitoring: they often price aggressively when trying to move inventory fast:
def profile_top_sellers(search_query: str, min_sold: int = 20) -> list[dict]:
"""Find and profile sellers with the most sold listings for a query."""
scraper = PoshmarkScraper()
sold = scraper.search_sold(search_query, max_items=300)
# Count sales per seller
from collections import Counter
seller_counts = Counter(item["seller"] for item in sold if item.get("seller"))
top_sellers = [s for s, count in seller_counts.most_common(20) if count >= min_sold]
profiles = []
for username in top_sellers:
try:
profile = scraper.get_seller_profile(username)
profile["sold_in_query"] = seller_counts[username]
# Get their current inventory
listings = scraper.search_listings(search_query + f" @{username}", max_items=20)
profile["active_listings_count"] = len(listings)
profile["avg_active_price"] = round(
sum(float(str(l.get("listing_price", 0)).replace("$", "").replace(",", "")) for l in listings if l.get("listing_price")) / max(len(listings), 1),
2
)
profiles.append(profile)
time.sleep(1.5)
except Exception as e:
print(f"Error for {username}: {e}")
return sorted(profiles, key=lambda x: x.get("sold_in_query", 0), reverse=True)
# Find top Nike resellers on Poshmark
top_nike_sellers = profile_top_sellers("Nike Air Jordan", min_sold=10)
for seller in top_nike_sellers[:5]:
print(f"@{seller['username']}: {seller['sold_in_query']} sold, {seller['listings_count']} total listings, avg active price ${seller.get('avg_active_price', 'N/A')}")
Size-Based Price Analysis
Sizes significantly affect resale value. Men's size 10-11 Nike shoes sell at premiums while other sizes discount:
def analyze_by_size(query: str, max_items: int = 200) -> dict:
"""Analyze price trends broken down by size."""
scraper = PoshmarkScraper()
sold = scraper.search_sold(query, max_items=max_items)
size_data = {}
for item in sold:
size = item.get("size", "Unknown") or "Unknown"
sold_price = item.get("sold_price")
if sold_price:
try:
price = float(str(sold_price).replace("$", "").replace(",", ""))
if size not in size_data:
size_data[size] = []
size_data[size].append(price)
except (ValueError, TypeError):
pass
from statistics import mean, median
size_summary = {}
for size, prices in size_data.items():
if len(prices) >= 3:
size_summary[size] = {
"count": len(prices),
"avg_price": round(mean(prices), 2),
"median_price": round(median(prices), 2),
"min": min(prices),
"max": max(prices),
}
# Sort by count descending
return dict(sorted(size_summary.items(), key=lambda x: x[1]["count"], reverse=True))
# Analyze Nike Air Max 90 prices by size
size_analysis = analyze_by_size("Nike Air Max 90 men", max_items=300)
print("Size | Count | Avg Price | Median")
for size, data in list(size_analysis.items())[:10]:
print(f" {size:8} | {data['count']:5} | ${data['avg_price']:7.2f} | ${data['median_price']:.2f}")
Condition Premium Analysis
"Like New" vs "Good" vs "Fair" -- quantify how condition affects price:
def analyze_condition_premium(query: str, max_items: int = 200) -> dict:
"""Calculate price premiums for different item conditions."""
scraper = PoshmarkScraper()
sold = scraper.search_sold(query, max_items=max_items)
condition_prices = {}
for item in sold:
condition = item.get("condition", "Unknown") or "Unknown"
sold_price = item.get("sold_price")
if sold_price:
try:
price = float(str(sold_price).replace("$", "").replace(",", ""))
if condition not in condition_prices:
condition_prices[condition] = []
condition_prices[condition].append(price)
except (ValueError, TypeError):
pass
from statistics import mean
condition_summary = {
cond: {
"count": len(prices),
"avg_price": round(mean(prices), 2),
}
for cond, prices in condition_prices.items()
if len(prices) >= 3
}
# Calculate premium vs "Good" baseline
baseline_price = condition_summary.get("Good", {}).get("avg_price", 0)
if baseline_price > 0:
for cond, data in condition_summary.items():
data["premium_vs_good_pct"] = round((data["avg_price"] - baseline_price) / baseline_price * 100, 1)
return dict(sorted(condition_summary.items(), key=lambda x: x[1]["avg_price"], reverse=True))
cond_analysis = analyze_condition_premium("Lululemon leggings")
for condition, data in cond_analysis.items():
premium = data.get("premium_vs_good_pct", "N/A")
print(f" {condition}: ${data['avg_price']} avg (n={data['count']}, {premium}% vs Good)")