← Back to blog

Scraping Poshmark Listings and Price Trends with Python (2026)

Poshmark is the largest social fashion marketplace in the US -- over 80 million users buying and selling secondhand clothing, shoes, and accessories. For resellers, brand analysts, and fashion researchers, Poshmark's data is incredibly valuable: sold prices tell you what items are actually worth, not just what people list them at.

Poshmark has an undocumented API that powers their mobile app, and their web pages are relatively straightforward to parse. Here's how to extract listings, sold history, pricing data, and seller analytics with Python.

Why Poshmark Data Matters

The secondhand fashion market crossed $200 billion globally in 2024 and is growing faster than new apparel sales. Poshmark's transaction data is a real-time price discovery engine for fashion resellers. Specific use cases:

Poshmark's Internal API

The Poshmark iOS and Android apps communicate with https://poshmark.com/api/posts and related endpoints. These aren't documented, but they return clean JSON and don't require authentication for public listing data.

import httpx
import time
import json

class PoshmarkScraper:
    BASE_URL = "https://poshmark.com"

    def __init__(self, proxy_url: str = None):
        self.client = httpx.Client(
            base_url=self.BASE_URL,
            headers={
                "User-Agent": (
                    "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
                    "AppleWebKit/537.36 Chrome/126.0.0.0 Safari/537.36"
                ),
                "Accept": "application/json",
                "X-PoshmarkApp": "1",
            },
            proxy=proxy_url,
            timeout=20,
        )

    def search_listings(self, query: str, max_items: int = 100, sort_by: str = "best_match") -> list[dict]:
        \"\"\"Search active (available) listings.

        sort_by options: 'best_match', 'price:asc', 'price:desc', 'time:desc'
        \"\"\"
        items = []
        max_id = ""

        while len(items) < max_items:
            params = {
                "query": query,
                "type": "listings",
                "count": 48,
                "experience": "all",
                "sort_by": sort_by,
            }
            if max_id:
                params["max_id"] = max_id

            r = self.client.get("/api/posts", params=params)
            if r.status_code == 429:
                print("Rate limited, sleeping 30s...")
                time.sleep(30)
                continue
            if r.status_code != 200:
                print(f"Error {r.status_code}")
                break

            data = r.json().get("data", [])
            if not data:
                break

            for post in data:
                items.append(self._parse_listing(post))

            max_id = data[-1].get("id", "")
            time.sleep(1.5)

        return items[:max_items]

    def _parse_listing(self, post: dict) -> dict:
        inventory = post.get("inventory", {})
        return {
            "id": post.get("id"),
            "title": post.get("title"),
            "brand": post.get("brand"),
            "size": post.get("size"),
            "original_price": post.get("original_price"),
            "listing_price": post.get("price"),
            "condition": post.get("condition"),
            "category": post.get("category_v2", {}).get("display"),
            "subcategory": post.get("category_v2", {}).get("id"),
            "color": post.get("color"),
            "status": inventory.get("status"),
            "available": inventory.get("status") == "available",
            "sold": inventory.get("status") == "sold",
            "seller": post.get("creator_username"),
            "likes": post.get("like_count", 0),
            "comments": post.get("comment_count", 0),
            "created_at": post.get("created_at"),
            "cover_image": post.get("picture_url"),
        }

    def search_sold(self, query: str, max_items: int = 100) -> list[dict]:
        \"\"\"Search sold listings only.\"\"\"
        items = []
        max_id = ""

        while len(items) < max_items:
            params = {
                "query": query,
                "type": "listings",
                "availability": "sold_out",
                "count": 48,
            }
            if max_id:
                params["max_id"] = max_id

            r = self.client.get("/api/posts", params=params)
            if r.status_code == 429:
                time.sleep(30)
                continue
            if r.status_code != 200:
                break

            data = r.json().get("data", [])
            if not data:
                break

            for post in data:
                listing = self._parse_listing(post)
                listing["sold_price"] = post.get("inventory", {}).get("sold_price")
                listing["sold_at"] = post.get("inventory", {}).get("sold_at")
                items.append(listing)

            max_id = data[-1].get("id", "")
            time.sleep(1.5)

        return items[:max_items]

    def get_seller_profile(self, username: str) -> dict:
        r = self.client.get(f"/api/users/{username}")
        if r.status_code != 200:
            return {}

        user = r.json().get("data", {})
        return {
            "username": user.get("username"),
            "display_name": user.get("full_name"),
            "followers": user.get("follower_count", 0),
            "following": user.get("following_count", 0),
            "listings_count": user.get("listing_count", 0),
            "sold_count": user.get("sold_count", 0),
            "love_count": user.get("love_count", 0),
            "joined": user.get("created_at"),
            "city": user.get("city"),
            "state": user.get("state"),
            "header": user.get("header"),
            "about": user.get("about"),
            "is_verified_merchant": user.get("is_verified_merchant", False),
        }

    def get_seller_listings(self, username: str, max_items: int = 100, include_sold: bool = False) -> list[dict]:
        items = []
        max_id = ""

        while len(items) < max_items:
            params = {"count": 48}
            if max_id:
                params["max_id"] = max_id
            if include_sold:
                params["availability"] = "sold_out"

            r = self.client.get(f"/api/users/{username}/posts", params=params)
            if r.status_code != 200:
                break

            data = r.json().get("data", [])
            if not data:
                break

            for post in data:
                items.append(self._parse_listing(post))

            max_id = data[-1].get("id", "")
            time.sleep(1.5)

        return items[:max_items]

Extracting Sold Listings

Sold listings are the most useful data on Poshmark. They tell you the actual market value of items -- not aspirational pricing, but what people paid:

# Usage
scraper = PoshmarkScraper()
sold = scraper.search_sold("Nike Air Max 90", max_items=200)
print(f"Found {len(sold)} sold listings")
for item in sold[:5]:
    print(f"{item['title']} -- ${item['sold_price']} (was ${item['original_price']})")

With sold data, you can calculate average selling prices, price-to-retail ratios, and how fast items sell:

from statistics import mean, median
from datetime import datetime

def analyze_price_trends(sold_items: list[dict]) -> dict:
    prices = []
    discounts = []
    days_to_sell = []
    size_distribution = {}

    for item in sold_items:
        sold_price = item.get("sold_price")
        original = item.get("original_price")

        if sold_price:
            try:
                price_val = float(str(sold_price).replace("$", "").replace(",", ""))
                prices.append(price_val)

                if original:
                    orig_val = float(str(original).replace("$", "").replace(",", ""))
                    if orig_val > 0:
                        discounts.append((1 - price_val / orig_val) * 100)
            except (ValueError, TypeError):
                pass

        # Time to sell
        created = item.get("created_at")
        sold_at = item.get("sold_at")
        if created and sold_at:
            try:
                c = datetime.fromisoformat(created.replace("Z", "+00:00"))
                s = datetime.fromisoformat(sold_at.replace("Z", "+00:00"))
                days_to_sell.append((s - c).days)
            except (ValueError, TypeError):
                pass

        # Size distribution
        size = item.get("size", "Unknown")
        size_distribution[size] = size_distribution.get(size, 0) + 1

    return {
        "total_sold": len(sold_items),
        "with_price_data": len(prices),
        "avg_sold_price": round(mean(prices), 2) if prices else 0,
        "median_sold_price": round(median(prices), 2) if prices else 0,
        "min_price": min(prices) if prices else 0,
        "max_price": max(prices) if prices else 0,
        "price_std_dev": round((sum((p - mean(prices)) ** 2 for p in prices) / len(prices)) ** 0.5, 2) if len(prices) > 1 else 0,
        "avg_discount_pct": round(mean(discounts), 1) if discounts else 0,
        "median_discount_pct": round(median(discounts), 1) if discounts else 0,
        "avg_days_to_sell": round(mean(days_to_sell)) if days_to_sell else 0,
        "median_days_to_sell": round(median(days_to_sell)) if days_to_sell else 0,
        "fast_movers_pct": round(sum(1 for d in days_to_sell if d <= 7) / len(days_to_sell) * 100, 1) if days_to_sell else 0,
        "top_sizes": sorted(size_distribution.items(), key=lambda x: x[1], reverse=True)[:5],
    }


sold = scraper.search_sold("Nike Air Max 90", max_items=200)
trends = analyze_price_trends(sold)

print(f"Nike Air Max 90 -- Poshmark Market Data")
print(f"  Sold count: {trends['total_sold']}")
print(f"  Avg price: ${trends['avg_sold_price']}")
print(f"  Median price: ${trends['median_sold_price']}")
print(f"  Range: ${trends['min_price']} -- ${trends['max_price']}")
print(f"  Avg discount from retail: {trends['avg_discount_pct']}%")
print(f"  Avg days to sell: {trends['avg_days_to_sell']}")
print(f"  Fast movers (sold <=7 days): {trends['fast_movers_pct']}%")

Brand Comparison Analysis

Compare resale performance across multiple brands:

def compare_brands(brands: list[str], items_per_brand: int = 100) -> list[dict]:
    \"\"\"Compare resale metrics across multiple brands.\"\"\"
    scraper = PoshmarkScraper()
    results = []

    for brand in brands:
        sold = scraper.search_sold(brand, max_items=items_per_brand)
        trends = analyze_price_trends(sold)
        results.append({
            "brand": brand,
            "avg_sold_price": trends["avg_sold_price"],
            "avg_discount_pct": trends["avg_discount_pct"],
            "avg_days_to_sell": trends["avg_days_to_sell"],
            "total_sold": trends["total_sold"],
        })
        time.sleep(3)

    # Sort by average sold price descending
    return sorted(results, key=lambda x: x["avg_sold_price"], reverse=True)


brands = ["Gucci", "Louis Vuitton", "Prada", "Coach", "Kate Spade", "Michael Kors"]
comparison = compare_brands(brands, items_per_brand=50)
for b in comparison:
    print(f"{b['brand']}: avg ${b['avg_sold_price']}, {b['avg_discount_pct']}% off retail, {b['avg_days_to_sell']} days to sell")

Anti-Bot Measures on Poshmark

Poshmark's bot detection is moderate compared to sites like Uber Eats or Nike. But they still have protections that'll trip you up.

Rate limiting by IP. Poshmark allows roughly 60 requests per minute per IP before returning 429s. For collecting sold data across many brands, you'll burn through that quickly. Rotating residential proxies solve this -- ThorData for Poshmark scraping handles the rotation automatically, and you can sticky a session when you need to paginate through a single seller's closet without switching IPs mid-crawl.

# With proxy rotation for high-volume scraping
scraper = PoshmarkScraper(proxy_url="http://user:[email protected]:9000")

User-Agent filtering. Poshmark blocks known bot user agents and headless browser signatures. Use a current Chrome user agent string and keep it consistent within a session.

Captcha on aggressive patterns. If you hit the search endpoint too fast, Poshmark will start returning CAPTCHA pages instead of JSON. Back off to 2-second intervals and this rarely triggers.

API endpoint changes. Poshmark occasionally modifies their API paths or response structure. The /api/posts endpoint has been stable for years, but always validate the response format before processing.

Building a Price Database

For ongoing market research, store everything in SQLite and query trends over time:

import sqlite3

def init_poshmark_db(db_path: str = "poshmark.db") -> sqlite3.Connection:
    conn = sqlite3.connect(db_path)

    conn.execute(\"\"\"
        CREATE TABLE IF NOT EXISTS sold_listings (
            id TEXT PRIMARY KEY,
            title TEXT,
            brand TEXT,
            size TEXT,
            sold_price TEXT,
            original_price TEXT,
            condition TEXT,
            category TEXT,
            seller TEXT,
            sold_at TEXT,
            created_at TEXT,
            search_query TEXT,
            scraped_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
        )
    \"\"\")

    conn.execute(\"\"\"
        CREATE TABLE IF NOT EXISTS brand_snapshots (
            brand TEXT,
            snapshot_date TEXT,
            avg_sold_price REAL,
            median_sold_price REAL,
            avg_discount_pct REAL,
            avg_days_to_sell REAL,
            sample_count INTEGER,
            PRIMARY KEY (brand, snapshot_date)
        )
    \"\"\")

    conn.execute(\"\"\"
        CREATE TABLE IF NOT EXISTS seller_profiles (
            username TEXT PRIMARY KEY,
            display_name TEXT,
            followers INTEGER,
            listings_count INTEGER,
            sold_count INTEGER,
            joined TEXT,
            city TEXT,
            state TEXT,
            is_verified_merchant INTEGER,
            scraped_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
        )
    \"\"\")

    conn.commit()
    return conn


def save_sold_data(conn: sqlite3.Connection, items: list[dict], query: str):
    for item in items:
        conn.execute(\"\"\"
            INSERT OR IGNORE INTO sold_listings
            (id, title, brand, size, sold_price, original_price,
             condition, category, seller, sold_at, created_at, search_query)
            VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
        \"\"\", (
            item["id"], item["title"], item.get("brand"),
            item.get("size"), item.get("sold_price"),
            item.get("original_price"), item.get("condition"),
            item.get("category"), item.get("seller"),
            item.get("sold_at"), item.get("created_at"), query,
        ))
    conn.commit()


def save_brand_snapshot(conn: sqlite3.Connection, brand: str, trends: dict):
    from datetime import date
    conn.execute(\"\"\"
        INSERT OR REPLACE INTO brand_snapshots
        (brand, snapshot_date, avg_sold_price, median_sold_price, avg_discount_pct, avg_days_to_sell, sample_count)
        VALUES (?, ?, ?, ?, ?, ?, ?)
    \"\"\", (
        brand, date.today().isoformat(),
        trends["avg_sold_price"], trends["median_sold_price"],
        trends["avg_discount_pct"], trends["avg_days_to_sell"],
        trends["total_sold"],
    ))
    conn.commit()

Identifying Arbitrage Opportunities

Compare current listings against average sold prices to find underpriced items:

def find_underpriced(query: str, discount_threshold: float = 0.3) -> list[dict]:
    \"\"\"Find active listings priced significantly below average sold price.\"\"\"
    scraper = PoshmarkScraper()

    # Get sold price baseline
    sold = scraper.search_sold(query, max_items=100)
    trends = analyze_price_trends(sold)
    avg_sold = trends["avg_sold_price"]

    if avg_sold <= 0:
        return []

    # Get active listings
    active = scraper.search_listings(query, max_items=50, sort_by="price:asc")

    opportunities = []
    for item in active:
        if not item["available"]:
            continue

        try:
            price = float(str(item["listing_price"]).replace("$", "").replace(",", ""))
        except (ValueError, TypeError):
            continue

        if price > 0 and price < avg_sold * (1 - discount_threshold):
            opportunities.append({
                "id": item["id"],
                "title": item["title"],
                "brand": item.get("brand"),
                "size": item.get("size"),
                "listing_price": price,
                "avg_sold_price": avg_sold,
                "potential_profit": round(avg_sold - price, 2),
                "profit_margin_pct": round((avg_sold - price) / price * 100, 1),
                "seller": item.get("seller"),
                "likes": item.get("likes", 0),
            })

    return sorted(opportunities, key=lambda x: x["profit_margin_pct"], reverse=True)


# Find Nike Air Max 90 listings priced 30%+ below average sold price
opps = find_underpriced("Nike Air Max 90 size 10", discount_threshold=0.3)
for opp in opps[:10]:
    print(f"{opp['title']} ({opp['size']})")
    print(f"  Listed: ${opp['listing_price']} | Avg sold: ${opp['avg_sold_price']} | Profit: ${opp['potential_profit']} ({opp['profit_margin_pct']}%)")

Use Cases

Poshmark data is practical for several things:

Poshmark's API is one of the more stable undocumented APIs out there. It hasn't changed significantly in structure over the past couple years, which makes it reliable for ongoing data collection. Combine with ThorData's rotating residential proxies for high-volume collection without hitting IP rate limits.

Tracking Seller Performance Over Time

High-volume resellers are worth monitoring: they often price aggressively when trying to move inventory fast:

def profile_top_sellers(search_query: str, min_sold: int = 20) -> list[dict]:
    """Find and profile sellers with the most sold listings for a query."""
    scraper = PoshmarkScraper()
    sold = scraper.search_sold(search_query, max_items=300)

    # Count sales per seller
    from collections import Counter
    seller_counts = Counter(item["seller"] for item in sold if item.get("seller"))
    top_sellers = [s for s, count in seller_counts.most_common(20) if count >= min_sold]

    profiles = []
    for username in top_sellers:
        try:
            profile = scraper.get_seller_profile(username)
            profile["sold_in_query"] = seller_counts[username]

            # Get their current inventory
            listings = scraper.search_listings(search_query + f" @{username}", max_items=20)
            profile["active_listings_count"] = len(listings)
            profile["avg_active_price"] = round(
                sum(float(str(l.get("listing_price", 0)).replace("$", "").replace(",", "")) for l in listings if l.get("listing_price")) / max(len(listings), 1),
                2
            )
            profiles.append(profile)
            time.sleep(1.5)
        except Exception as e:
            print(f"Error for {username}: {e}")

    return sorted(profiles, key=lambda x: x.get("sold_in_query", 0), reverse=True)


# Find top Nike resellers on Poshmark
top_nike_sellers = profile_top_sellers("Nike Air Jordan", min_sold=10)
for seller in top_nike_sellers[:5]:
    print(f"@{seller['username']}: {seller['sold_in_query']} sold, {seller['listings_count']} total listings, avg active price ${seller.get('avg_active_price', 'N/A')}")

Size-Based Price Analysis

Sizes significantly affect resale value. Men's size 10-11 Nike shoes sell at premiums while other sizes discount:

def analyze_by_size(query: str, max_items: int = 200) -> dict:
    """Analyze price trends broken down by size."""
    scraper = PoshmarkScraper()
    sold = scraper.search_sold(query, max_items=max_items)

    size_data = {}
    for item in sold:
        size = item.get("size", "Unknown") or "Unknown"
        sold_price = item.get("sold_price")

        if sold_price:
            try:
                price = float(str(sold_price).replace("$", "").replace(",", ""))
                if size not in size_data:
                    size_data[size] = []
                size_data[size].append(price)
            except (ValueError, TypeError):
                pass

    from statistics import mean, median

    size_summary = {}
    for size, prices in size_data.items():
        if len(prices) >= 3:
            size_summary[size] = {
                "count": len(prices),
                "avg_price": round(mean(prices), 2),
                "median_price": round(median(prices), 2),
                "min": min(prices),
                "max": max(prices),
            }

    # Sort by count descending
    return dict(sorted(size_summary.items(), key=lambda x: x[1]["count"], reverse=True))


# Analyze Nike Air Max 90 prices by size
size_analysis = analyze_by_size("Nike Air Max 90 men", max_items=300)
print("Size | Count | Avg Price | Median")
for size, data in list(size_analysis.items())[:10]:
    print(f"  {size:8} | {data['count']:5} | ${data['avg_price']:7.2f}    | ${data['median_price']:.2f}")

Condition Premium Analysis

"Like New" vs "Good" vs "Fair" -- quantify how condition affects price:

def analyze_condition_premium(query: str, max_items: int = 200) -> dict:
    """Calculate price premiums for different item conditions."""
    scraper = PoshmarkScraper()
    sold = scraper.search_sold(query, max_items=max_items)

    condition_prices = {}
    for item in sold:
        condition = item.get("condition", "Unknown") or "Unknown"
        sold_price = item.get("sold_price")

        if sold_price:
            try:
                price = float(str(sold_price).replace("$", "").replace(",", ""))
                if condition not in condition_prices:
                    condition_prices[condition] = []
                condition_prices[condition].append(price)
            except (ValueError, TypeError):
                pass

    from statistics import mean

    condition_summary = {
        cond: {
            "count": len(prices),
            "avg_price": round(mean(prices), 2),
        }
        for cond, prices in condition_prices.items()
        if len(prices) >= 3
    }

    # Calculate premium vs "Good" baseline
    baseline_price = condition_summary.get("Good", {}).get("avg_price", 0)
    if baseline_price > 0:
        for cond, data in condition_summary.items():
            data["premium_vs_good_pct"] = round((data["avg_price"] - baseline_price) / baseline_price * 100, 1)

    return dict(sorted(condition_summary.items(), key=lambda x: x[1]["avg_price"], reverse=True))


cond_analysis = analyze_condition_premium("Lululemon leggings")
for condition, data in cond_analysis.items():
    premium = data.get("premium_vs_good_pct", "N/A")
    print(f"  {condition}: ${data['avg_price']} avg (n={data['count']}, {premium}% vs Good)")