Files
wiki/tasks/flightradar24/ingest/schedule/yandex_worker.py
2026-04-20 15:50:01 +03:00

214 lines
8.2 KiB
Python

"""
Yandex.Rasp API worker — loads airport schedule into fr24_ext.schedule.
Makes two requests per airport per day: event=departure and event=arrival.
"""
import logging
import time
from datetime import date
from typing import Dict, List, Optional
from functools import wraps
import requests
import psycopg2
from config import config
log = logging.getLogger(__name__)
YANDEX_URL = "https://api.rasp.yandex.net/v3.0/schedule/"
# ── retry decorator ───────────────────────────────────────────────────────────
def retry(max_retries: int = 3, base_delay: float = 5.0):
def decorator(func):
@wraps(func)
def wrapper(*args, **kwargs):
for attempt in range(max_retries):
try:
return func(*args, **kwargs)
except requests.RequestException as e:
if attempt < max_retries - 1:
wait = base_delay * (2 ** attempt)
log.warning("Retry %d/%d after %.0fs: %s", attempt + 1, max_retries, wait, e)
time.sleep(wait)
else:
raise
return wrapper
return decorator
# ── API fetch ─────────────────────────────────────────────────────────────────
@retry(max_retries=3, base_delay=5.0)
def _fetch_page(yandex_code: str, target_date: date, event: str, offset: int = 0) -> dict:
params = {
"apikey": config.YANDEX_RASP_API_KEY,
"station": yandex_code,
"date": target_date.isoformat(),
"transport_types": "plane",
"event": event,
"offset": offset,
"limit": 100,
}
resp = requests.get(YANDEX_URL, params=params, timeout=30)
resp.raise_for_status()
return resp.json()
def fetch_airport_schedule(yandex_code: str, target_date: date, direction: str) -> List[Dict]:
"""
Fetches all flights for one airport/direction with pagination.
direction: 'departure' | 'arrival'
"""
flights = []
offset = 0
while True:
data = _fetch_page(yandex_code, target_date, event=direction, offset=offset)
items = data.get("schedule", [])
pagination = data.get("pagination", {})
for item in items:
flight = _parse_item(item, direction)
if flight:
flights.append(flight)
total = pagination.get("total", 0)
offset += len(items)
if offset >= total or not items:
break
time.sleep(config.YANDEX_RATE_LIMIT_SEC)
return flights
def _parse_item(item: Dict, direction: str) -> Optional[Dict]:
thread = item.get("thread", {})
flight_number = thread.get("number", "").strip()
if not flight_number:
return None
carrier = thread.get("carrier", {})
# Normalize flight number: Yandex returns "SU 1234 12345" (number + extra codes)
# Keep only first two tokens: airline code + flight number → "SU 1234"
parts = flight_number.split()
if len(parts) >= 2:
flight_number = f"{parts[0]} {parts[1]}"
else:
flight_number = parts[0] if parts else flight_number
# Scheduled time: departure event → use 'departure' field; arrival → 'arrival'
scheduled_at = item.get("departure") if direction == "departure" else item.get("arrival")
if not scheduled_at:
# fallback
scheduled_at = item.get("departure") or item.get("arrival")
if not scheduled_at:
return None
# Route: from/to station objects
from_station = item.get("from", {}) or {}
to_station = item.get("to", {}) or {}
origin_iata = _station_iata(from_station)
destination_iata = _station_iata(to_station)
return {
"flight_number": flight_number,
"airline_iata": carrier.get("code"),
"airline_name": carrier.get("title"),
"origin_iata": origin_iata,
"destination_iata": destination_iata,
"aircraft_type": thread.get("vehicle"),
"scheduled_at": scheduled_at,
"direction": direction,
"status": "scheduled",
"source": "yandex",
}
def _station_iata(station: Dict) -> Optional[str]:
"""Extract IATA code from station dict (codes list or direct field)."""
codes = station.get("codes", {})
if isinstance(codes, dict):
iata = codes.get("iata")
if iata:
return iata[:3].upper()
# fallback: station code field
code = station.get("code", "")
if code and not code.startswith("s"): # yandex internal codes start with 's'
return code[:3].upper()
return None
# ── DB upsert ─────────────────────────────────────────────────────────────────
def upsert_flights(conn, flights: List[Dict], airport_iata: str, flight_date: date) -> int:
if not flights:
return 0
with conn.cursor() as cur:
for flight in flights:
cur.execute(
"""
INSERT INTO fr24_ext.schedule
(flight_date, airport_iata, direction, flight_number,
airline_iata, airline_name, origin_iata, destination_iata,
aircraft_type, scheduled_at, status, source)
VALUES
(%(flight_date)s, %(airport_iata)s, %(direction)s, %(flight_number)s,
%(airline_iata)s, %(airline_name)s, %(origin_iata)s, %(destination_iata)s,
%(aircraft_type)s, %(scheduled_at)s, %(status)s, %(source)s)
ON CONFLICT (flight_number, airport_iata, scheduled_at, direction)
DO UPDATE SET
airline_name = EXCLUDED.airline_name,
origin_iata = COALESCE(EXCLUDED.origin_iata, fr24_ext.schedule.origin_iata),
destination_iata = COALESCE(EXCLUDED.destination_iata, fr24_ext.schedule.destination_iata),
aircraft_type = COALESCE(EXCLUDED.aircraft_type, fr24_ext.schedule.aircraft_type),
status = EXCLUDED.status,
fetched_at = now()
""",
{
"flight_date": flight_date,
"airport_iata": airport_iata,
"direction": flight["direction"],
"flight_number": flight["flight_number"],
"airline_iata": flight.get("airline_iata"),
"airline_name": flight.get("airline_name"),
"origin_iata": flight.get("origin_iata"),
"destination_iata": flight.get("destination_iata"),
"aircraft_type": flight.get("aircraft_type"),
"scheduled_at": flight["scheduled_at"],
"status": flight.get("status", "scheduled"),
"source": flight["source"],
},
)
return len(flights)
# ── main entry ────────────────────────────────────────────────────────────────
def fetch_day(target_date: date, conn) -> int:
"""Load schedule for all airports for one day. Returns total flights upserted."""
total = 0
for airport_iata, airport_info in config.AIRPORTS.items():
yandex_code = airport_info["yandex_code"]
log.info("Yandex: fetching %s (%s) for %s", airport_iata, yandex_code, target_date)
for direction in ("departure", "arrival"):
try:
flights = fetch_airport_schedule(yandex_code, target_date, direction)
count = upsert_flights(conn, flights, airport_iata, target_date)
total += count
log.info("Yandex: %s %s%d flights upserted", airport_iata, direction, count)
except Exception as e:
log.error("Yandex: failed %s %s: %s", airport_iata, direction, e)
time.sleep(config.YANDEX_RATE_LIMIT_SEC)
conn.commit()
return total