Files
wiki/tasks/flightradar24/ingest/schedule/opensky_worker.py
2026-04-20 14:20:01 +03:00

142 lines
5.3 KiB
Python

"""
OpenSky Network API worker — enriches fr24_ext.schedule with icao24 + actual times.
"""
import logging
import time
from datetime import date, datetime, timezone
from typing import Dict, List, Optional
from functools import wraps
import requests
import psycopg2
from config import config
log = logging.getLogger(__name__)
OPENSKY_BASE = "https://opensky-network.org/api/flights"
# ── retry decorator ───────────────────────────────────────────────────────────
def retry(max_retries: int = 3, base_delay: float = 10.0):
def decorator(func):
@wraps(func)
def wrapper(*args, **kwargs):
for attempt in range(max_retries):
try:
return func(*args, **kwargs)
except requests.RequestException as e:
if attempt < max_retries - 1:
wait = base_delay * (2 ** attempt)
log.warning("Retry %d/%d after %.0fs: %s", attempt + 1, max_retries, wait, e)
time.sleep(wait)
else:
raise
return wrapper
return decorator
# ── API fetch ─────────────────────────────────────────────────────────────────
@retry(max_retries=3, base_delay=10.0)
def fetch_opensky_flights(icao: str, begin_ts: int, end_ts: int, direction: str) -> List[Dict]:
"""
Fetch arrivals or departures from OpenSky for one airport/day.
direction: 'arrival' | 'departure'
"""
url = f"{OPENSKY_BASE}/{direction}"
params = {"airport": icao, "begin": begin_ts, "end": end_ts}
auth = None
if config.OPENSKY_USERNAME and config.OPENSKY_PASSWORD:
auth = (config.OPENSKY_USERNAME, config.OPENSKY_PASSWORD)
resp = requests.get(url, params=params, auth=auth, timeout=60)
# 404 means no data for this period — not an error
if resp.status_code == 404:
return []
resp.raise_for_status()
return resp.json() or []
# ── DB enrichment ─────────────────────────────────────────────────────────────
def enrich_flights(conn, opensky_flights: List[Dict], airport_iata: str, direction: str) -> int:
"""
Update icao24 + actual_at on existing schedule rows matched by callsign.
Match window: ±3 hours around the OpenSky actual time.
"""
if not opensky_flights:
return 0
enriched = 0
with conn.cursor() as cur:
for flight in opensky_flights:
icao24 = (flight.get("icao24") or "").strip().lower()
callsign = (flight.get("callsign") or "").strip()
if not icao24 or not callsign:
continue
# actual time: lastSeen for arrivals, firstSeen for departures
actual_ts = (
flight.get("lastSeen") if direction == "arrival"
else flight.get("firstSeen")
)
if not actual_ts:
continue
actual_at = datetime.fromtimestamp(actual_ts, tz=timezone.utc)
cur.execute(
"""
UPDATE fr24_ext.schedule
SET
icao24 = %s,
actual_at = %s,
source = CASE WHEN source = 'yandex' THEN 'merged' ELSE source END
WHERE airport_iata = %s
AND direction = %s
AND flight_number = %s
AND scheduled_at BETWEEN %s - INTERVAL '3 hours'
AND %s + INTERVAL '3 hours'
""",
(icao24, actual_at, airport_iata, direction, callsign, actual_at, actual_at),
)
enriched += cur.rowcount
return enriched
# ── main entry ────────────────────────────────────────────────────────────────
def enrich_day(target_date: date, conn) -> int:
"""Enrich all airports for one day. Returns total rows updated."""
# Unix timestamps for start/end of the day (UTC)
day_start = int(datetime(target_date.year, target_date.month, target_date.day,
tzinfo=timezone.utc).timestamp())
day_end = day_start + 86400
total = 0
for airport_iata, airport_info in config.AIRPORTS.items():
icao = airport_info["icao"]
log.info("OpenSky: enriching %s (%s) for %s", airport_iata, icao, target_date)
for direction in ("arrival", "departure"):
try:
flights = fetch_opensky_flights(icao, day_start, day_end, direction)
count = enrich_flights(conn, flights, airport_iata, direction)
total += count
log.info("OpenSky: %s %s%d rows enriched", airport_iata, direction, count)
except Exception as e:
log.error("OpenSky: failed %s %s: %s", airport_iata, direction, e)
time.sleep(config.OPENSKY_RATE_LIMIT_SEC)
conn.commit()
return total