How to Scrape Historical Weather Data in 2026: Open-Meteo, NOAA & Weather Underground
How to Scrape Historical Weather Data in 2026: Open-Meteo, NOAA & Weather Underground
Historical weather data powers everything from agricultural forecasting to insurance risk models, real estate analysis, and event planning tools. The challenge isn't that weather data is hidden -- it's scattered across dozens of sources with different formats, coverage gaps, and access methods.
This guide covers the three most practical sources: Open-Meteo (free API, global coverage), NOAA (the gold standard for US data), and Weather Underground (community station data you can't get elsewhere). We'll use Python for all three.
What Data Can You Collect?
Across these sources, you can get:
- Daily observations -- temperature (min/max/avg), precipitation, humidity, wind speed, pressure
- Hourly records -- temperature, dew point, wind direction, cloud cover, visibility
- Historical ranges -- Open-Meteo goes back to 1940, NOAA to the 1800s for some stations
- Station-level data -- specific weather station readings vs. interpolated grid data
- Derived metrics -- growing degree days, heating/cooling degree days, precipitation accumulation
- Climate normals -- 30-year averages for any location, useful as baselines for anomaly detection
Anti-Bot Measures and Rate Limits
Weather data sources vary widely in how they handle automated access:
- Open-Meteo -- Generous for a free API. No API key needed. Rate limit is ~10,000 requests/day per IP. Exceeding it returns 429 with a retry-after header.
- NOAA CDO API -- Requires a free token. Limit is 5 requests per second and 10,000 per day. Well-documented limits, clean error messages.
- Weather Underground -- The hardest to scrape. Cloudflare protection, JavaScript rendering, aggressive bot detection. Datacenter IPs get blocked fast.
- IP-based throttling -- All three sources throttle by IP. For bulk collection across thousands of cities, you'll hit limits quickly from a single IP.
For Weather Underground scraping and bulk Open-Meteo requests, rotating residential proxies make the difference between finishing your dataset and getting blocked halfway through. ThorData works well here -- their residential IPs don't trigger the bot detection that datacenter IPs do on Weather Underground, and the rotation keeps you under per-IP rate limits on Open-Meteo.
Open-Meteo: Free Historical Weather API
The simplest option. No API key, global coverage, data back to 1940.
pip install requests pandas
Fetching Historical Data
import requests
import pandas as pd
from datetime import datetime, date
def get_historical_weather(lat: float, lon: float, start: str, end: str) -> pd.DataFrame:
\"\"\"Fetch daily historical weather from Open-Meteo.
start/end format: 'YYYY-MM-DD'
\"\"\"
url = "https://archive-api.open-meteo.com/v1/archive"
params = {
"latitude": lat,
"longitude": lon,
"start_date": start,
"end_date": end,
"daily": ",".join([
"temperature_2m_max",
"temperature_2m_min",
"temperature_2m_mean",
"precipitation_sum",
"rain_sum",
"snowfall_sum",
"windspeed_10m_max",
"windgusts_10m_max",
"relative_humidity_2m_mean",
"pressure_msl_mean",
"sunshine_duration",
"et0_fao_evapotranspiration",
]),
"timezone": "auto",
}
resp = requests.get(url, params=params, timeout=30)
resp.raise_for_status()
data = resp.json()
daily = data["daily"]
df = pd.DataFrame({
"date": pd.to_datetime(daily["time"]),
"temp_max_c": daily["temperature_2m_max"],
"temp_min_c": daily["temperature_2m_min"],
"temp_mean_c": daily["temperature_2m_mean"],
"precipitation_mm": daily["precipitation_sum"],
"rain_mm": daily["rain_sum"],
"snowfall_cm": daily["snowfall_sum"],
"wind_max_kmh": daily["windspeed_10m_max"],
"wind_gusts_kmh": daily["windgusts_10m_max"],
"humidity_pct": daily.get("relative_humidity_2m_mean"),
"sunshine_hours": [s / 3600 if s else None for s in (daily.get("sunshine_duration") or [])],
})
return df
# Example: New York City, full year 2025
df = get_historical_weather(40.7128, -74.0060, "2025-01-01", "2025-12-31")
print(f"Records: {len(df)}")
print(f"Hottest day: {df.loc[df['temp_max_c'].idxmax(), 'date'].date()} ({df['temp_max_c'].max():.1f}C)")
print(f"Coldest day: {df.loc[df['temp_min_c'].idxmin(), 'date'].date()} ({df['temp_min_c'].min():.1f}C)")
print(f"Total rain: {df['rain_mm'].sum():.0f}mm")
print(f"Total snowfall: {df['snowfall_cm'].sum():.0f}cm")
Fetching Hourly Data
For hourly resolution, use the hourly parameter set:
def get_hourly_weather(lat: float, lon: float, start: str, end: str) -> pd.DataFrame:
url = "https://archive-api.open-meteo.com/v1/archive"
params = {
"latitude": lat,
"longitude": lon,
"start_date": start,
"end_date": end,
"hourly": "temperature_2m,relative_humidity_2m,precipitation,rain,wind_speed_10m,wind_direction_10m,surface_pressure,cloud_cover,visibility",
"timezone": "auto",
}
resp = requests.get(url, params=params, timeout=30)
resp.raise_for_status()
data = resp.json()
hourly = data["hourly"]
df = pd.DataFrame({
"datetime": pd.to_datetime(hourly["time"]),
"temp_c": hourly["temperature_2m"],
"humidity_pct": hourly["relative_humidity_2m"],
"precip_mm": hourly["precipitation"],
"wind_speed_kmh": hourly["wind_speed_10m"],
"wind_dir_deg": hourly["wind_direction_10m"],
"pressure_hpa": hourly["surface_pressure"],
"cloud_cover_pct": hourly["cloud_cover"],
})
return df
Bulk City Collection
import time
CITIES = {
"New York": (40.7128, -74.0060),
"London": (51.5074, -0.1278),
"Tokyo": (35.6762, 139.6503),
"Sydney": (-33.8688, 151.2093),
"Sao Paulo": (-23.5505, -46.6333),
"Dubai": (25.2048, 55.2708),
"Toronto": (43.6532, -79.3832),
"Berlin": (52.5200, 13.4050),
"Singapore": (1.3521, 103.8198),
"Mumbai": (19.0760, 72.8777),
"Cairo": (30.0444, 31.2357),
"Mexico City": (19.4326, -99.1332),
}
def collect_multi_city(cities: dict, start: str, end: str, proxy: dict = None) -> dict:
results = {}
for city, (lat, lon) in cities.items():
try:
df = get_historical_weather(lat, lon, start, end)
results[city] = df
print(f"Done {city}: {len(df)} days")
except Exception as e:
print(f"Error {city}: {e}")
time.sleep(0.5)
return results
data = collect_multi_city(CITIES, "2025-01-01", "2025-12-31")
NOAA Climate Data Online
NOAA's CDO API is the authoritative source for US weather station data. Get a free token at ncdc.noaa.gov/cdo-web/token.
NOAA_TOKEN = "YOUR_TOKEN"
def get_noaa_data(station_id: str, start: str, end: str, dataset: str = "GHCND") -> list:
\"\"\"Fetch daily weather from NOAA CDO API.
station_id: e.g., 'GHCND:USW00094728' (Central Park, NYC)
\"\"\"
url = "https://www.ncdc.noaa.gov/cdo-web/api/v2/data"
headers = {"token": NOAA_TOKEN}
params = {
"datasetid": dataset,
"stationid": station_id,
"startdate": start,
"enddate": end,
"datatypeid": "TMAX,TMIN,PRCP,SNOW,AWND,TAVG",
"units": "metric",
"limit": 1000,
}
all_records = []
offset = 1
while True:
params["offset"] = offset
resp = requests.get(url, headers=headers, params=params, timeout=15)
resp.raise_for_status()
data = resp.json()
results = data.get("results", [])
if not results:
break
all_records.extend(results)
offset += len(results)
total_count = data.get("metadata", {}).get("resultset", {}).get("count", 0)
if offset > total_count:
break
time.sleep(0.25)
return all_records
# Central Park station, full year 2025
records = get_noaa_data("GHCND:USW00094728", "2025-01-01", "2025-12-31")
for r in records[:5]:
print(f"{r['date'][:10]} | {r['datatype']}: {r['value']}")
Pivoting NOAA Records to Wide Format
NOAA returns one row per data type per day. Pivot to get one row per day:
def pivot_noaa_records(records: list) -> pd.DataFrame:
df = pd.DataFrame(records)
if df.empty:
return df
df["date"] = pd.to_datetime(df["date"])
pivoted = df.pivot_table(index="date", columns="datatype", values="value", aggfunc="first")
pivoted.columns.name = None
pivoted = pivoted.reset_index()
# Rename standard columns
rename_map = {
"TMAX": "temp_max_c",
"TMIN": "temp_min_c",
"TAVG": "temp_avg_c",
"PRCP": "precip_mm",
"SNOW": "snowfall_mm",
"AWND": "avg_wind_speed_ms",
}
pivoted = pivoted.rename(columns={k: v for k, v in rename_map.items() if k in pivoted.columns})
return pivoted
Finding Weather Stations
def find_stations(lat: float, lon: float, radius_deg: float = 0.5) -> list:
\"\"\"Find NOAA weather stations near a location.\"\"\"
url = "https://www.ncdc.noaa.gov/cdo-web/api/v2/stations"
headers = {"token": NOAA_TOKEN}
params = {
"datasetid": "GHCND",
"extent": f"{lat-radius_deg},{lon-radius_deg},{lat+radius_deg},{lon+radius_deg}",
"limit": 25,
}
resp = requests.get(url, headers=headers, params=params, timeout=15)
data = resp.json()
stations = []
for s in data.get("results", []):
stations.append({
"id": s["id"],
"name": s["name"],
"lat": s.get("latitude"),
"lon": s.get("longitude"),
"elevation_m": s.get("elevation"),
"min_date": s.get("mindate"),
"max_date": s.get("maxdate"),
})
return stations
# Find stations near Chicago
chicago_stations = find_stations(41.8781, -87.6298)
for s in chicago_stations[:5]:
print(f"{s['id']}: {s['name']} ({s['min_date']} to {s['max_date']})")
Scraping Weather Underground
Weather Underground has data from 250,000+ personal weather stations -- granularity you can't get from NOAA or Open-Meteo. But it requires scraping.
from bs4 import BeautifulSoup
import json
def scrape_wunderground(station_id: str, date_str: str, proxy: str = None) -> dict:
\"\"\"Scrape daily weather from Weather Underground.
station_id: e.g., 'KNYNEWYO722' (a PWS in New York)
date_str: 'YYYY-MM-DD'
proxy: e.g., 'http://USER:[email protected]:9000'
\"\"\"
url = f"https://www.wunderground.com/dashboard/pws/{station_id}/table/{date_str}/{date_str}/daily"
headers = {
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36",
"Accept": "text/html,application/xhtml+xml",
"Accept-Language": "en-US,en;q=0.9",
"Sec-Fetch-Dest": "document",
"Sec-Fetch-Mode": "navigate",
}
proxies = {"https": proxy, "http": proxy} if proxy else None
resp = requests.get(url, headers=headers, proxies=proxies, timeout=20)
soup = BeautifulSoup(resp.text, "html.parser")
# Weather Underground embeds data in JSON within script tags
for script in soup.find_all("script"):
text = script.string or ""
if '"observations"' in text:
start = text.find('{"observations"')
if start >= 0:
# Find the matching closing brace
depth = 0
end = start
for i, c in enumerate(text[start:]):
if c == '{':
depth += 1
elif c == '}':
depth -= 1
if depth == 0:
end = start + i + 1
break
try:
return json.loads(text[start:end])
except json.JSONDecodeError:
pass
return {}
# Use residential proxy to avoid Cloudflare blocks
PROXY = "http://USER:[email protected]:9000"
data = scrape_wunderground("KNYNEWYO722", "2025-06-15", proxy=PROXY)
observations = data.get("observations", [])
if observations:
first = observations[0]
print(f"Temp: {first.get('metric', {}).get('tempAvg')}C")
print(f"Precip: {first.get('metric', {}).get('precipTotal')}mm")
Calculating Derived Metrics
Raw temperature and precipitation data is more useful with derived metrics:
def add_derived_metrics(df: pd.DataFrame) -> pd.DataFrame:
\"\"\"Add derived weather metrics to a daily dataframe.\"\"\"
df = df.copy()
# Heating/Cooling Degree Days (base 18.3C / 65F)
base = 18.3
df["hdd"] = (base - df["temp_mean_c"]).clip(lower=0)
df["cdd"] = (df["temp_mean_c"] - base).clip(lower=0)
# Growing Degree Days (base 10C for many crops)
gdd_base = 10.0
df["gdd"] = ((df["temp_max_c"] + df["temp_min_c"]) / 2 - gdd_base).clip(lower=0)
# Heat index (simplified for high temp/humidity days)
df["feels_hot"] = (df["temp_max_c"] > 32) & (df["humidity_pct"] > 60)
# Frost days
df["frost_day"] = df["temp_min_c"] < 0
# Heavy rain days
df["heavy_rain"] = df["precipitation_mm"] > 25
return df
df = get_historical_weather(40.7128, -74.0060, "2025-01-01", "2025-12-31")
df = add_derived_metrics(df)
print(f"Heating degree days: {df['hdd'].sum():.0f}")
print(f"Cooling degree days: {df['cdd'].sum():.0f}")
print(f"Frost days: {df['frost_day'].sum()}")
print(f"Heavy rain days: {df['heavy_rain'].sum()}")
Storing and Analyzing Weather Data
import sqlite3
def init_weather_db(path: str = "weather.db"):
conn = sqlite3.connect(path)
conn.execute(\"\"\"
CREATE TABLE IF NOT EXISTS daily_weather (
city TEXT,
lat REAL,
lon REAL,
date TEXT,
temp_max_c REAL,
temp_min_c REAL,
temp_mean_c REAL,
precip_mm REAL,
wind_max_kmh REAL,
humidity_pct REAL,
snowfall_cm REAL,
hdd REAL,
cdd REAL,
gdd REAL,
source TEXT,
PRIMARY KEY (city, date, source)
)
\"\"\")
conn.commit()
return conn
def save_weather(conn, city: str, lat: float, lon: float, df: pd.DataFrame, source: str = "open-meteo"):
for _, row in df.iterrows():
conn.execute(
"INSERT OR REPLACE INTO daily_weather VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)",
(
city, lat, lon, str(row["date"].date()),
row.get("temp_max_c"), row.get("temp_min_c"), row.get("temp_mean_c"),
row.get("precipitation_mm"), row.get("wind_max_kmh"), row.get("humidity_pct"),
row.get("snowfall_cm"), row.get("hdd"), row.get("cdd"), row.get("gdd"),
source,
)
)
conn.commit()
def query_anomalies(conn, city: str, metric: str = "temp_mean_c", z_threshold: float = 2.0) -> list[dict]:
\"\"\"Find days with anomalous weather using z-scores.\"\"\"
import statistics
rows = conn.execute(
f"SELECT date, {metric} FROM daily_weather WHERE city = ? AND {metric} IS NOT NULL ORDER BY date",
(city,)
).fetchall()
if len(rows) < 10:
return []
values = [r[1] for r in rows]
mean = statistics.mean(values)
stdev = statistics.stdev(values)
anomalies = []
for date, val in rows:
if stdev > 0:
z = (val - mean) / stdev
if abs(z) >= z_threshold:
anomalies.append({
"date": date,
"value": round(val, 1),
"z_score": round(z, 2),
"direction": "hot/wet/windy" if z > 0 else "cold/dry/calm",
})
return sorted(anomalies, key=lambda x: abs(x["z_score"]), reverse=True)
Building a Weather Comparison Tool
Compare weather across multiple cities and years:
def compare_cities_annual(cities: dict, year: int, db_path: str = "weather.db") -> pd.DataFrame:
\"\"\"Compare annual weather metrics across cities.\"\"\"
conn = init_weather_db(db_path)
start = f"{year}-01-01"
end = f"{year}-12-31"
# Collect and store data for each city
for city, (lat, lon) in cities.items():
df = get_historical_weather(lat, lon, start, end)
df = add_derived_metrics(df)
save_weather(conn, city, lat, lon, df)
time.sleep(0.5)
# Query annual summaries
summary_rows = conn.execute(\"\"\"
SELECT city,
round(avg(temp_mean_c), 1) as avg_temp,
round(max(temp_max_c), 1) as max_temp,
round(min(temp_min_c), 1) as min_temp,
round(sum(precip_mm), 0) as total_precip,
sum(CASE WHEN temp_min_c < 0 THEN 1 ELSE 0 END) as frost_days,
round(sum(hdd), 0) as total_hdd,
round(sum(cdd), 0) as total_cdd
FROM daily_weather
WHERE date >= ? AND date <= ? AND source = 'open-meteo'
GROUP BY city
ORDER BY avg_temp DESC
\"\"\", (start, end)).fetchall()
conn.close()
columns = ["city", "avg_temp_c", "max_temp_c", "min_temp_c", "total_precip_mm", "frost_days", "hdd", "cdd"]
return pd.DataFrame(summary_rows, columns=columns)
comparison = compare_cities_annual(CITIES, 2025)
print(comparison.to_string(index=False))
Legal Considerations
Open-Meteo is explicitly free and open-source -- no restrictions on commercial use. NOAA data is public domain (US government). Weather Underground's data comes from personal weather stations whose owners opted into sharing, but WU's Terms of Service restrict scraping. For production use, consider WU's paid API tier. Open-Meteo and NOAA cover the vast majority of use cases without legal concerns.
Key Takeaways
- Open-Meteo first -- free, no API key, global coverage back to 1940. It handles 90% of historical weather needs.
- NOAA CDO is authoritative for US station data and goes back centuries for some locations.
- Weather Underground fills gaps with personal station data but requires scraping with residential proxies. ThorData's rotating residential IPs handle Cloudflare and per-IP throttling that blocks datacenter proxies.
- For bulk city collection (100+ locations), rate limits hit fast from a single IP -- proxy rotation keeps your collection running.
- Add derived metrics (HDD, CDD, GDD, frost days) to make raw temperature data actionable.
- Store everything in SQLite with city/date/source as the composite key for easy cross-source comparison.
- Z-score anomaly detection on historical data surfaces extreme weather events worth investigating further.
Climate Change Signal Detection
With decades of historical data, you can detect temperature trend signals:
import numpy as np
def detect_warming_trend(df: pd.DataFrame, column: str = "temp_mean_c") -> dict:
"""Fit a linear trend to detect warming/cooling signals."""
df_clean = df.dropna(subset=[column]).copy()
if len(df_clean) < 30:
return {"error": "Not enough data points"}
# Ordinal day numbers for regression
df_clean["day_num"] = (df_clean["date"] - df_clean["date"].min()).dt.days
x = df_clean["day_num"].values
y = df_clean[column].values
# Linear regression via least squares
n = len(x)
slope = (n * np.sum(x * y) - np.sum(x) * np.sum(y)) / (n * np.sum(x**2) - np.sum(x)**2)
intercept = (np.sum(y) - slope * np.sum(x)) / n
# Trend in degrees per decade
trend_per_decade = slope * 3650
return {
"column": column,
"years_of_data": round(x.max() / 365, 1),
"trend_per_decade_c": round(trend_per_decade, 3),
"direction": "warming" if slope > 0 else "cooling",
"start_mean": round(y[:30].mean(), 2),
"end_mean": round(y[-30:].mean(), 2),
"total_change_c": round(y[-30:].mean() - y[:30].mean(), 2),
}
# NYC temperature trend 1980-2025
nyc_data = get_historical_weather(40.7128, -74.0060, "1980-01-01", "2025-12-31")
trend = detect_warming_trend(nyc_data)
print(f"NYC temperature trend: {trend['trend_per_decade_c']}C per decade ({trend['direction']})")
print(f"Period: {trend['years_of_data']} years")
print(f"Start avg: {trend['start_mean']}C -> End avg: {trend['end_mean']}C (change: {trend['total_change_c']}C)")
Agricultural Weather Analysis
Growing degree days and frost dates are critical for agricultural planning:
def agricultural_season_summary(lat: float, lon: float, year: int) -> dict:
"""Generate agricultural weather summary for a growing season."""
start = f"{year}-01-01"
end = f"{year}-12-31"
df = get_historical_weather(lat, lon, start, end)
df = add_derived_metrics(df)
# Last spring frost (last day below 0C before July 1)
spring_frosts = df[(df["date"].dt.month < 7) & (df["temp_min_c"] < 0)]
last_spring_frost = spring_frosts["date"].max() if not spring_frosts.empty else None
# First fall frost (first day below 0C after August 1)
fall_frosts = df[(df["date"].dt.month > 7) & (df["temp_min_c"] < 0)]
first_fall_frost = fall_frosts["date"].min() if not fall_frosts.empty else None
# Growing season length
growing_season_days = None
if last_spring_frost is not None and first_fall_frost is not None:
growing_season_days = (first_fall_frost - last_spring_frost).days
return {
"year": year,
"last_spring_frost": str(last_spring_frost.date()) if last_spring_frost is not None else "None",
"first_fall_frost": str(first_fall_frost.date()) if first_fall_frost is not None else "None",
"growing_season_days": growing_season_days,
"total_gdd_base10": round(df["gdd"].sum(), 0),
"total_hdd": round(df["hdd"].sum(), 0),
"total_cdd": round(df["cdd"].sum(), 0),
"annual_precip_mm": round(df["precipitation_mm"].sum(), 0),
"frost_days": int(df["frost_day"].sum()),
}
# Agricultural summary for Iowa farmland (corn belt)
iowa_summary = agricultural_season_summary(42.0, -93.6, 2025)
for k, v in iowa_summary.items():
print(f" {k}: {v}")
Comparing Actual vs Climate Normal
Use 30-year averages as a baseline to identify anomalous years:
def compare_year_to_normal(lat: float, lon: float, target_year: int, baseline_start: int = 1991, baseline_end: int = 2020) -> pd.DataFrame:
"""Compare a year's weather to the 30-year climate normal."""
# Fetch target year
target_df = get_historical_weather(lat, lon, f"{target_year}-01-01", f"{target_year}-12-31")
target_df["month"] = target_df["date"].dt.month
# Fetch baseline period (in chunks to avoid timeout)
baseline_dfs = []
for year in range(baseline_start, baseline_end + 1):
df = get_historical_weather(lat, lon, f"{year}-01-01", f"{year}-12-31")
df["year"] = year
df["month"] = df["date"].dt.month
baseline_dfs.append(df)
time.sleep(0.2)
baseline = pd.concat(baseline_dfs)
# Calculate monthly normals
monthly_normal = baseline.groupby("month").agg({
"temp_mean_c": "mean",
"precipitation_mm": "sum",
}).rename(columns={"temp_mean_c": "normal_temp", "precipitation_mm": "normal_precip"})
monthly_normal["normal_precip"] = monthly_normal["normal_precip"] / (baseline_end - baseline_start + 1)
# Aggregate target year by month
monthly_target = target_df.groupby("month").agg({
"temp_mean_c": "mean",
"precipitation_mm": "sum",
}).rename(columns={"temp_mean_c": "actual_temp", "precipitation_mm": "actual_precip"})
# Merge and calculate anomalies
comparison = monthly_normal.join(monthly_target)
comparison["temp_anomaly"] = (comparison["actual_temp"] - comparison["normal_temp"]).round(2)
comparison["precip_anomaly_pct"] = ((comparison["actual_precip"] - comparison["normal_precip"]) / comparison["normal_precip"] * 100).round(1)
return comparison
Rainfall Pattern Analysis
def analyze_rainfall_patterns(df: pd.DataFrame) -> dict:
"""Analyze rainfall patterns: wet spells, dry spells, intensity."""
rainy_days = df[df["precipitation_mm"] > 1.0]
dry_days = df[df["precipitation_mm"] <= 1.0]
# Consecutive dry spell lengths
df_copy = df.copy()
df_copy["is_dry"] = df_copy["precipitation_mm"] <= 1.0
df_copy["spell_id"] = (df_copy["is_dry"] != df_copy["is_dry"].shift()).cumsum()
dry_spells = df_copy[df_copy["is_dry"]].groupby("spell_id").size()
return {
"rainy_days": len(rainy_days),
"dry_days": len(dry_days),
"rainy_day_pct": round(len(rainy_days) / len(df) * 100, 1),
"avg_rain_on_rainy_days": round(rainy_days["precipitation_mm"].mean(), 1),
"max_daily_rain": round(df["precipitation_mm"].max(), 1),
"longest_dry_spell": int(dry_spells.max()) if not dry_spells.empty else 0,
"avg_dry_spell": round(dry_spells.mean(), 1) if not dry_spells.empty else 0,
"heavy_rain_days_25mm": int((df["precipitation_mm"] > 25).sum()),
"extreme_rain_days_50mm": int((df["precipitation_mm"] > 50).sum()),
}