← Back to blog

Scrape LinkedIn Job Listings with Python — No Login Required (2026)

LinkedIn's public job search is one of the most useful datasets you can scrape without any authentication. Job titles, company names, locations, posted dates, salary ranges when listed — all available at linkedin.com/jobs/search without logging in. Here's what actually works in 2026.

Why Scrape LinkedIn Jobs? Practical Use Cases

LinkedIn is the largest job board in the world, and its public listings are a gold mine for:

How LinkedIn Public Job Search Works

LinkedIn exposes job search results at a clean URL structure:

https://www.linkedin.com/jobs/search/?keywords=python+developer&location=New+York&geoId=105080838&start=0

Key parameters:

Common GeoIDs Reference

Location GeoID
United States 103644278
New York, NY 105080838
San Francisco Bay Area 90000084
London, UK 90009496
Berlin, Germany 103035651
Remote (Worldwide) 92000000
Toronto, Canada 100025096
Sydney, Australia 104769905

Complete Working Script

#!/usr/bin/env python3
"""LinkedIn public job scraper — no login required."""

import requests
from bs4 import BeautifulSoup
import time
import random
import json
import csv
import sys
from datetime import datetime

HEADERS_POOL = [
    {
        "User-Agent": (
            "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
            "AppleWebKit/537.36 (KHTML, like Gecko) "
            "Chrome/124.0.0.0 Safari/537.36"
        ),
        "Accept-Language": "en-US,en;q=0.9",
        "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
        "Referer": "https://www.google.com/",
    },
    {
        "User-Agent": (
            "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
            "AppleWebKit/605.1.15 (KHTML, like Gecko) "
            "Version/17.4 Safari/605.1.15"
        ),
        "Accept-Language": "en-GB,en;q=0.9",
        "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
        "Referer": "https://www.linkedin.com/",
    },
    {
        "User-Agent": (
            "Mozilla/5.0 (X11; Linux x86_64) "
            "AppleWebKit/537.36 (KHTML, like Gecko) "
            "Chrome/123.0.0.0 Safari/537.36"
        ),
        "Accept-Language": "en-US,en;q=0.8",
        "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
        "Referer": "https://www.google.com/search?q=linkedin+jobs",
    },
]


def get_headers():
    return random.choice(HEADERS_POOL)


def fetch_job_listings(keywords: str, location: str, geo_id: str,
                        start: int = 0, time_filter: str = None,
                        experience: str = None, work_type: str = None,
                        proxies: dict = None) -> BeautifulSoup:
    """Fetch a page of job search results."""
    url = "https://www.linkedin.com/jobs/search/"
    params = {
        "keywords": keywords,
        "location": location,
        "geoId": geo_id,
        "start": start,
        "position": 1,
        "pageNum": 0,
    }
    if time_filter:
        params["f_TPR"] = time_filter
    if experience:
        params["f_E"] = experience
    if work_type:
        params["f_WT"] = work_type

    response = requests.get(url, params=params, headers=get_headers(),
                             proxies=proxies, timeout=15)
    response.raise_for_status()
    return BeautifulSoup(response.text, "html.parser")


def parse_job_cards(soup: BeautifulSoup) -> list[dict]:
    """Extract job data from search result cards."""
    jobs = []
    cards = soup.select("div.base-card[data-entity-urn]")

    for card in cards:
        urn = card.get("data-entity-urn", "")
        job_id = urn.split(":")[-1] if urn else None

        title_el = card.select_one("h3.base-search-card__title")
        company_el = card.select_one("h4.base-search-card__subtitle a")
        location_el = card.select_one("span.job-search-card__location")
        date_el = card.select_one("time")
        salary_el = card.select_one("span.job-search-card__salary-info")

        jobs.append({
            "id": job_id,
            "title": title_el.get_text(strip=True) if title_el else None,
            "company": company_el.get_text(strip=True) if company_el else None,
            "location": location_el.get_text(strip=True) if location_el else None,
            "posted": date_el.get("datetime") if date_el else None,
            "salary": salary_el.get_text(strip=True) if salary_el else None,
            "url": (
                f"https://www.linkedin.com/jobs/view/{job_id}/"
                if job_id else None
            ),
        })

    return jobs


def fetch_job_detail(job_id: str, proxies: dict = None) -> dict:
    """Fetch full job description and metadata."""
    url = f"https://www.linkedin.com/jobs/view/{job_id}/"
    response = requests.get(url, headers=get_headers(),
                              proxies=proxies, timeout=15)
    response.raise_for_status()

    soup = BeautifulSoup(response.text, "html.parser")

    description_el = soup.select_one("div.show-more-less-html__markup")
    criteria = {}
    for item in soup.select("li.description__job-criteria-item"):
        label = item.select_one("h3")
        value = item.select_one("span")
        if label and value:
            criteria[label.get_text(strip=True)] = value.get_text(strip=True)

    return {
        "description": (
            description_el.get_text(separator="\n", strip=True)
            if description_el else None
        ),
        "seniority_level": criteria.get("Seniority level"),
        "employment_type": criteria.get("Employment type"),
        "job_function": criteria.get("Job function"),
        "industries": criteria.get("Industries"),
    }


def scrape_all_pages(keywords: str, location: str, geo_id: str,
                      max_pages: int = 5, proxies: dict = None,
                      **filters) -> list[dict]:
    """Scrape multiple pages of job listings."""
    all_jobs = []

    for page in range(max_pages):
        start = page * 25
        print(f"  Page {page + 1} (offset {start})...", end=" ")

        try:
            soup = fetch_job_listings(keywords, location, geo_id,
                                       start=start, proxies=proxies,
                                       **filters)
            jobs = parse_job_cards(soup)
        except requests.exceptions.HTTPError as e:
            print(f"HTTP {e.response.status_code} — stopping")
            break

        if not jobs:
            print("no results — stopping")
            break

        print(f"found {len(jobs)} jobs")
        all_jobs.extend(jobs)

        time.sleep(random.uniform(3.5, 7.0))

    return all_jobs

Pagination: What You Need to Know

LinkedIn returns 25 results per page. Use start=0, start=25, start=50, and so on. In practice, results dry up after 10-15 pages regardless — LinkedIn caps public search at around 1000 listings per query.

To get more results, vary your search parameters:

# Instead of one big search, run multiple targeted ones
searches = [
    {"keywords": "python developer", "location": "New York, NY", "geo_id": "105080838"},
    {"keywords": "python developer", "location": "San Francisco Bay Area", "geo_id": "90000084"},
    {"keywords": "python developer", "location": "Remote", "geo_id": "92000000"},
    {"keywords": "django developer", "location": "United States", "geo_id": "103644278"},
    {"keywords": "fastapi engineer", "location": "United States", "geo_id": "103644278"},
]

all_jobs = []
seen_ids = set()

for search in searches:
    jobs = scrape_all_pages(**search, max_pages=5)
    for job in jobs:
        if job["id"] not in seen_ids:
            seen_ids.add(job["id"])
            all_jobs.append(job)
    time.sleep(random.uniform(10, 20))  # longer pause between searches

print(f"Total unique jobs: {len(all_jobs)}")

Anti-Bot Measures and How to Handle Them

LinkedIn is aggressive about bot detection. They fingerprint request patterns, track IP velocity, and serve challenges after a small number of rapid requests from the same IP.

What Gets You Blocked

Behavior Risk What Happens
Fixed 1-second delays High 429 after ~5 pages
Same User-Agent always Medium CAPTCHA challenge
Datacenter IP High Blocked after ~10 requests
> 100 requests/hour from one IP High Temporary IP ban
Immediate detail fetch after search Medium CAPTCHA on detail page

What Works

1. Rotate headers — The script above uses a pool of 3 User-Agent/header combinations. Expand this pool for production use.

2. Residential proxies — For any serious LinkedIn scraping, ThorData's residential proxies are essential. Residential IPs rotate automatically and look like real users from different locations.

PROXY_USER = "your_username"
PROXY_PASS = "your_password"
PROXY_HOST = "proxy.thordata.com"
PROXY_PORT = 9000

proxies = {
    "http": f"http://{PROXY_USER}:{PROXY_PASS}@{PROXY_HOST}:{PROXY_PORT}",
    "https": f"http://{PROXY_USER}:{PROXY_PASS}@{PROXY_HOST}:{PROXY_PORT}",
}

response = requests.get(url, headers=get_headers(),
                        proxies=proxies, timeout=20)

3. Realistic delays — Vary between 3.5 and 8 seconds between page requests. Add occasional longer pauses (15-30 seconds) every 5-10 requests.

4. Separate scraping phases — Collect all job card data first, then fetch details in a second pass with longer delays.

5. Respect the 429 — If you get a 429 status code, back off for at least 60 seconds:

def safe_get(url: str, proxies: dict = None,
              max_retries: int = 3) -> requests.Response:
    """GET with automatic 429 backoff."""
    for attempt in range(max_retries):
        response = requests.get(url, headers=get_headers(),
                                 proxies=proxies, timeout=15)
        if response.status_code == 429:
            wait = int(response.headers.get("Retry-After", 60))
            print(f"Rate limited — waiting {wait}s")
            time.sleep(wait)
            continue
        return response
    raise Exception(f"Max retries exceeded for {url}")

Building a Salary Database

One high-value use case is building a salary comparison database:

import re
import sqlite3

def init_db(db_path: str = "linkedin_jobs.db") -> sqlite3.Connection:
    """Initialize database for LinkedIn job data."""
    conn = sqlite3.connect(db_path)
    conn.executescript("""
        CREATE TABLE IF NOT EXISTS jobs (
            id TEXT PRIMARY KEY,
            title TEXT,
            company TEXT,
            location TEXT,
            posted TEXT,
            salary TEXT,
            salary_min REAL,
            salary_max REAL,
            description TEXT,
            seniority_level TEXT,
            employment_type TEXT,
            job_function TEXT,
            industries TEXT,
            url TEXT,
            keywords TEXT,
            scraped_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
        );

        CREATE INDEX IF NOT EXISTS idx_jobs_company ON jobs(company);
        CREATE INDEX IF NOT EXISTS idx_jobs_title ON jobs(title);
        CREATE INDEX IF NOT EXISTS idx_jobs_posted ON jobs(posted);
    """)
    conn.commit()
    return conn


def parse_salary(raw: str) -> tuple[float | None, float | None]:
    """Parse salary string to min/max floats. Returns (min, max)."""
    if not raw:
        return None, None

    numbers = re.findall(r"[\d,]+", raw.replace(",", ""))
    if not numbers:
        return None, None

    amounts = [int(n) for n in numbers]

    # Annualize hourly rates
    if "hr" in raw.lower() or "hour" in raw.lower():
        amounts = [a * 2080 for a in amounts]

    if len(amounts) >= 2:
        return float(min(amounts)), float(max(amounts))
    elif amounts:
        return float(amounts[0]), float(amounts[0])
    return None, None


def save_jobs(conn: sqlite3.Connection, jobs: list[dict],
               keywords: str = "") -> int:
    """Save job listings to database. Returns count saved."""
    saved = 0
    for job in jobs:
        salary_min, salary_max = parse_salary(job.get("salary"))
        try:
            conn.execute("""
                INSERT OR IGNORE INTO jobs
                (id, title, company, location, posted, salary,
                 salary_min, salary_max, description, seniority_level,
                 employment_type, job_function, industries, url, keywords)
                VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)
            """, (
                job.get("id"), job.get("title"), job.get("company"),
                job.get("location"), job.get("posted"), job.get("salary"),
                salary_min, salary_max,
                job.get("description", "")[:3000] if job.get("description") else None,
                job.get("seniority_level"), job.get("employment_type"),
                job.get("job_function"), job.get("industries"),
                job.get("url"), keywords,
            ))
            saved += 1
        except sqlite3.Error:
            pass
    conn.commit()
    return saved


def salary_report(conn: sqlite3.Connection, keywords: str = None) -> None:
    """Print salary statistics from collected jobs."""
    where = "WHERE salary_min IS NOT NULL"
    params = ()
    if keywords:
        where += " AND keywords LIKE ?"
        params = (f"%{keywords}%",)

    print(f"\n=== Salary Analysis {'for ' + keywords if keywords else ''} ===\n")

    for row in conn.execute(f"""
        SELECT
            CASE
                WHEN salary_max > 200000 THEN 'Elite ($200k+)'
                WHEN salary_max > 150000 THEN 'Senior ($150-200k)'
                WHEN salary_max > 100000 THEN 'Mid ($100-150k)'
                WHEN salary_max > 70000 THEN 'Junior ($70-100k)'
                ELSE 'Entry (< $70k)'
            END as tier,
            COUNT(*) as jobs,
            AVG(salary_min) as avg_min,
            AVG(salary_max) as avg_max
        FROM jobs {where}
        GROUP BY tier
        ORDER BY MIN(salary_max) DESC
    """, params):
        print(f"  {row[0]:20}: {row[1]:4} jobs, "
              f"avg ${row[2]:,.0f} - ${row[3]:,.0f}")

    # Top companies by job count with salary data
    print("\nTop companies by job postings (with salary data):")
    for row in conn.execute(f"""
        SELECT company, COUNT(*) as jobs,
               AVG(salary_min) as avg_min,
               AVG(salary_max) as avg_max
        FROM jobs {where}
        GROUP BY company
        ORDER BY jobs DESC LIMIT 10
    """, params):
        print(f"  {(row[0] or 'Unknown'):30}: {row[1]:3} jobs, "
              f"avg ${row[2]:,.0f} - ${row[3]:,.0f}")

Skills Extraction from Job Descriptions

Job descriptions are goldmines for skills demand analysis:

SKILL_PATTERNS = {
    "Python": r"\bpython\b",
    "Django": r"\bdjango\b",
    "FastAPI": r"\bfastapi\b",
    "Flask": r"\bflask\b",
    "PostgreSQL": r"\bpostgresql\b|\bpostgres\b",
    "MySQL": r"\bmysql\b",
    "Redis": r"\bredis\b",
    "Docker": r"\bdocker\b",
    "Kubernetes": r"\bkubernetes\b|\bk8s\b",
    "AWS": r"\baws\b|\bamazon web services\b",
    "GCP": r"\bgcp\b|\bgoogle cloud\b",
    "Azure": r"\bazure\b",
    "Spark": r"\bapache spark\b|\bpyspark\b",
    "Machine Learning": r"\bmachine learning\b|\bml\b",
    "Data Science": r"\bdata science\b",
    "React": r"\breact\b",
    "TypeScript": r"\btypescript\b",
    "GraphQL": r"\bgraphql\b",
    "Kafka": r"\bkafka\b",
    "Elasticsearch": r"\belasticsearch\b",
}


def extract_skills(description: str) -> list[str]:
    """Extract recognized skill keywords from a job description."""
    if not description:
        return []

    text = description.lower()
    found = []
    for skill, pattern in SKILL_PATTERNS.items():
        if re.search(pattern, text, re.IGNORECASE):
            found.append(skill)

    return found


def skills_demand_report(conn: sqlite3.Connection) -> None:
    """Count skill mentions across all collected job descriptions."""
    skill_counts = {skill: 0 for skill in SKILL_PATTERNS}
    total_jobs = 0

    for row in conn.execute(
        "SELECT description FROM jobs WHERE description IS NOT NULL"
    ):
        skills = extract_skills(row[0])
        for skill in skills:
            skill_counts[skill] = skill_counts.get(skill, 0) + 1
        total_jobs += 1

    print(f"\n=== Skills Demand ({total_jobs} jobs analyzed) ===\n")
    for skill, count in sorted(
        skill_counts.items(), key=lambda x: -x[1]
    ):
        if count > 0:
            pct = count / total_jobs * 100
            bar = "█" * int(pct / 2)
            print(f"  {skill:20}: {count:5} ({pct:.1f}%) {bar}")

Summary

LinkedIn public job search is accessible without authentication. The URL parameters are clean, the HTML structure is consistent enough to parse reliably, and you can get title, company, location, posted date, salary (when listed), and full descriptions from two endpoint types. The limiting factor is rate limiting — residential proxies from ThorData and realistic delays are what separate a scraper that runs for ten minutes from one that runs all day. Start conservative on delays (5+ seconds between requests), monitor your response codes, and increase throughput only once you have the proxy layer working. The salary data and skills extraction capabilities make this one of the more commercially valuable scraping use cases available without any authentication.

Running the scraper on a regular schedule lets you track market trends rather than just taking a snapshot:

from datetime import datetime, timedelta

def daily_job_collection(conn: sqlite3.Connection,
                           search_configs: list[dict],
                           proxies: dict = None) -> None:
    """
    Run daily job collection for a set of search configs.
    Designed to be called from a cron job or scheduler.
    """
    run_date = datetime.utcnow().strftime("%Y-%m-%d")
    print(f"\nDaily job collection — {run_date}")

    total_new = 0
    for config in search_configs:
        keywords = config["keywords"]
        geo_id = config["geo_id"]
        location = config["location"]

        print(f"\n  Searching: {keywords} in {location}")

        jobs = scrape_all_pages(
            keywords=keywords,
            location=location,
            geo_id=geo_id,
            max_pages=3,
            time_filter="r86400",  # last 24 hours only
            proxies=proxies,
        )

        saved = save_jobs(conn, jobs, keywords=keywords)
        total_new += saved
        print(f"  New jobs: {saved}")

        time.sleep(random.uniform(15, 25))

    print(f"\nTotal new jobs collected: {total_new}")


def trend_analysis(conn: sqlite3.Connection,
                    keywords: str,
                    days: int = 30) -> None:
    """Analyze job posting trends for a specific keywords."""
    print(f"\n=== 30-Day Trend: '{keywords}' ===\n")

    # Daily posting volume
    print("Daily job postings:")
    for row in conn.execute("""
        SELECT
            DATE(scraped_at) as day,
            COUNT(*) as new_jobs
        FROM jobs
        WHERE keywords LIKE ?
          AND scraped_at >= DATE('now', '-30 days')
        GROUP BY day
        ORDER BY day DESC
    """, (f"%{keywords}%",)):
        bar = "█" * (row[1] // 2)
        print(f"  {row[0]}: {row[1]:4} jobs  {bar}")

    # Remote vs on-site trend
    print("\nWork type distribution:")
    for row in conn.execute("""
        SELECT
            CASE
                WHEN LOWER(location) LIKE '%remote%' THEN 'Remote'
                WHEN LOWER(location) LIKE '%hybrid%' THEN 'Hybrid'
                ELSE 'On-site'
            END as work_type,
            COUNT(*) as count
        FROM jobs
        WHERE keywords LIKE ?
        GROUP BY work_type
        ORDER BY count DESC
    """, (f"%{keywords}%",)):
        print(f"  {row[0]:10}: {row[1]} jobs")

    # Top hiring companies
    print("\nTop hiring companies:")
    for row in conn.execute("""
        SELECT company, COUNT(*) as jobs
        FROM jobs
        WHERE keywords LIKE ?
          AND company IS NOT NULL
          AND scraped_at >= DATE('now', '-30 days')
        GROUP BY company
        ORDER BY jobs DESC LIMIT 15
    """, (f"%{keywords}%",)):
        print(f"  {(row[0] or '?'):30}: {row[1]:3} postings")

    # Seniority distribution
    print("\nSeniority level breakdown:")
    for row in conn.execute("""
        SELECT
            COALESCE(seniority_level, 'Not specified') as level,
            COUNT(*) as count
        FROM jobs
        WHERE keywords LIKE ?
        GROUP BY level
        ORDER BY count DESC
    """, (f"%{keywords}%",)):
        print(f"  {row[0]:25}: {row[1]}")


def salary_trend(conn: sqlite3.Connection, keywords: str) -> None:
    """Track salary range changes over time."""
    print(f"\n=== Salary Trend: '{keywords}' ===\n")

    for row in conn.execute("""
        SELECT
            DATE(scraped_at) as week,
            COUNT(CASE WHEN salary_min IS NOT NULL THEN 1 END) as jobs_with_salary,
            AVG(salary_min) as avg_min,
            AVG(salary_max) as avg_max,
            MAX(salary_max) as top_salary
        FROM jobs
        WHERE keywords LIKE ?
          AND scraped_at >= DATE('now', '-60 days')
        GROUP BY STRFTIME('%Y-%W', scraped_at)
        ORDER BY week DESC
        LIMIT 8
    """, (f"%{keywords}%",)):
        if row[2]:
            print(f"  Week of {row[0]}: "
                  f"{row[1]} listings with salary, "
                  f"avg ${row[2]:,.0f}-${row[3]:,.0f}, "
                  f"top ${row[4]:,.0f}")

Building a Job Alert System

Combining the scraper with email or webhook notifications creates a useful personal job alert product:

import smtplib
from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart

def find_new_jobs_matching_criteria(conn: sqlite3.Connection,
                                     title_keywords: list[str],
                                     min_salary: float = None,
                                     locations: list[str] = None,
                                     hours_back: int = 24) -> list[dict]:
    """
    Find recently added jobs matching specific criteria.
    Use for building a personalized job alert.
    """
    conditions = ["scraped_at >= DATETIME('now', ?)", "id IS NOT NULL"]
    params = [f"-{hours_back} hours"]

    # Title keyword filter
    if title_keywords:
        title_conditions = " OR ".join(
            "LOWER(title) LIKE ?" for _ in title_keywords
        )
        conditions.append(f"({title_conditions})")
        params.extend(f"%{kw.lower()}%" for kw in title_keywords)

    # Salary filter
    if min_salary:
        conditions.append("(salary_max IS NULL OR salary_max >= ?)")
        params.append(min_salary)

    # Location filter
    if locations:
        loc_conditions = " OR ".join(
            "LOWER(location) LIKE ?" for _ in locations
        )
        conditions.append(f"({loc_conditions})")
        params.extend(f"%{loc.lower()}%" for loc in locations)

    where = " AND ".join(conditions)
    query = f"""
        SELECT id, title, company, location, salary, posted, url
        FROM jobs WHERE {where}
        ORDER BY scraped_at DESC LIMIT 50
    """

    rows = conn.execute(query, params).fetchall()
    return [
        {
            "id": r[0], "title": r[1], "company": r[2],
            "location": r[3], "salary": r[4],
            "posted": r[5], "url": r[6],
        }
        for r in rows
    ]


def format_job_digest(jobs: list[dict]) -> str:
    """Format a list of jobs as an HTML email digest."""
    if not jobs:
        return "<p>No new matching jobs found.</p>"

    lines = [
        f"<h2>{len(jobs)} New Job{'s' if len(jobs) != 1 else ''} Found</h2>",
        "<ul>",
    ]
    for job in jobs:
        salary_str = f" — {job['salary']}" if job.get("salary") else ""
        lines.append(
            f'<li><a href="{job["url"]}">{job["title"]}</a> '
            f'at <strong>{job["company"]}</strong> '
            f'({job["location"]}{salary_str})</li>'
        )
    lines.append("</ul>")
    return "\n".join(lines)

Key Takeaways

LinkedIn public job search provides job title, company, location, salary (when listed), and full description without any authentication. The combination of skills extraction, salary parsing, and trend analysis over time creates a powerful labor market intelligence tool. For sustained scraping, ThorData's residential proxies solve the IP-blocking problem that limits scrapers to a handful of pages before they hit rate limits. With proper proxy rotation and realistic delays, you can collect thousands of new listings daily across multiple search configurations.