214 lines
8.2 KiB
Python
214 lines
8.2 KiB
Python
"""
|
|
Yandex.Rasp API worker — loads airport schedule into fr24_ext.schedule.
|
|
Makes two requests per airport per day: event=departure and event=arrival.
|
|
"""
|
|
import logging
|
|
import time
|
|
from datetime import date
|
|
from typing import Dict, List, Optional
|
|
from functools import wraps
|
|
|
|
import requests
|
|
import psycopg2
|
|
|
|
from config import config
|
|
|
|
log = logging.getLogger(__name__)
|
|
|
|
YANDEX_URL = "https://api.rasp.yandex.net/v3.0/schedule/"
|
|
|
|
|
|
# ── retry decorator ───────────────────────────────────────────────────────────
|
|
|
|
def retry(max_retries: int = 3, base_delay: float = 5.0):
|
|
def decorator(func):
|
|
@wraps(func)
|
|
def wrapper(*args, **kwargs):
|
|
for attempt in range(max_retries):
|
|
try:
|
|
return func(*args, **kwargs)
|
|
except requests.RequestException as e:
|
|
if attempt < max_retries - 1:
|
|
wait = base_delay * (2 ** attempt)
|
|
log.warning("Retry %d/%d after %.0fs: %s", attempt + 1, max_retries, wait, e)
|
|
time.sleep(wait)
|
|
else:
|
|
raise
|
|
return wrapper
|
|
return decorator
|
|
|
|
|
|
# ── API fetch ─────────────────────────────────────────────────────────────────
|
|
|
|
@retry(max_retries=3, base_delay=5.0)
|
|
def _fetch_page(yandex_code: str, target_date: date, event: str, offset: int = 0) -> dict:
|
|
params = {
|
|
"apikey": config.YANDEX_RASP_API_KEY,
|
|
"station": yandex_code,
|
|
"date": target_date.isoformat(),
|
|
"transport_types": "plane",
|
|
"event": event,
|
|
"offset": offset,
|
|
"limit": 100,
|
|
}
|
|
resp = requests.get(YANDEX_URL, params=params, timeout=30)
|
|
resp.raise_for_status()
|
|
return resp.json()
|
|
|
|
|
|
def fetch_airport_schedule(yandex_code: str, target_date: date, direction: str) -> List[Dict]:
|
|
"""
|
|
Fetches all flights for one airport/direction with pagination.
|
|
direction: 'departure' | 'arrival'
|
|
"""
|
|
flights = []
|
|
offset = 0
|
|
|
|
while True:
|
|
data = _fetch_page(yandex_code, target_date, event=direction, offset=offset)
|
|
items = data.get("schedule", [])
|
|
pagination = data.get("pagination", {})
|
|
|
|
for item in items:
|
|
flight = _parse_item(item, direction)
|
|
if flight:
|
|
flights.append(flight)
|
|
|
|
total = pagination.get("total", 0)
|
|
offset += len(items)
|
|
|
|
if offset >= total or not items:
|
|
break
|
|
|
|
time.sleep(config.YANDEX_RATE_LIMIT_SEC)
|
|
|
|
return flights
|
|
|
|
|
|
def _parse_item(item: Dict, direction: str) -> Optional[Dict]:
|
|
thread = item.get("thread", {})
|
|
flight_number = thread.get("number", "").strip()
|
|
if not flight_number:
|
|
return None
|
|
|
|
carrier = thread.get("carrier", {})
|
|
|
|
# Normalize flight number: Yandex returns "SU 1234 12345" (number + extra codes)
|
|
# Keep only first two tokens: airline code + flight number → "SU 1234"
|
|
parts = flight_number.split()
|
|
if len(parts) >= 2:
|
|
flight_number = f"{parts[0]} {parts[1]}"
|
|
else:
|
|
flight_number = parts[0] if parts else flight_number
|
|
|
|
# Scheduled time: departure event → use 'departure' field; arrival → 'arrival'
|
|
scheduled_at = item.get("departure") if direction == "departure" else item.get("arrival")
|
|
if not scheduled_at:
|
|
# fallback
|
|
scheduled_at = item.get("departure") or item.get("arrival")
|
|
if not scheduled_at:
|
|
return None
|
|
|
|
# Route: from/to station objects
|
|
from_station = item.get("from", {}) or {}
|
|
to_station = item.get("to", {}) or {}
|
|
origin_iata = _station_iata(from_station)
|
|
destination_iata = _station_iata(to_station)
|
|
|
|
return {
|
|
"flight_number": flight_number,
|
|
"airline_iata": carrier.get("code"),
|
|
"airline_name": carrier.get("title"),
|
|
"origin_iata": origin_iata,
|
|
"destination_iata": destination_iata,
|
|
"aircraft_type": thread.get("vehicle"),
|
|
"scheduled_at": scheduled_at,
|
|
"direction": direction,
|
|
"status": "scheduled",
|
|
"source": "yandex",
|
|
}
|
|
|
|
|
|
def _station_iata(station: Dict) -> Optional[str]:
|
|
"""Extract IATA code from station dict (codes list or direct field)."""
|
|
codes = station.get("codes", {})
|
|
if isinstance(codes, dict):
|
|
iata = codes.get("iata")
|
|
if iata:
|
|
return iata[:3].upper()
|
|
# fallback: station code field
|
|
code = station.get("code", "")
|
|
if code and not code.startswith("s"): # yandex internal codes start with 's'
|
|
return code[:3].upper()
|
|
return None
|
|
|
|
|
|
# ── DB upsert ─────────────────────────────────────────────────────────────────
|
|
|
|
def upsert_flights(conn, flights: List[Dict], airport_iata: str, flight_date: date) -> int:
|
|
if not flights:
|
|
return 0
|
|
|
|
with conn.cursor() as cur:
|
|
for flight in flights:
|
|
cur.execute(
|
|
"""
|
|
INSERT INTO fr24_ext.schedule
|
|
(flight_date, airport_iata, direction, flight_number,
|
|
airline_iata, airline_name, origin_iata, destination_iata,
|
|
aircraft_type, scheduled_at, status, source)
|
|
VALUES
|
|
(%(flight_date)s, %(airport_iata)s, %(direction)s, %(flight_number)s,
|
|
%(airline_iata)s, %(airline_name)s, %(origin_iata)s, %(destination_iata)s,
|
|
%(aircraft_type)s, %(scheduled_at)s, %(status)s, %(source)s)
|
|
ON CONFLICT (flight_number, airport_iata, scheduled_at, direction)
|
|
DO UPDATE SET
|
|
airline_name = EXCLUDED.airline_name,
|
|
origin_iata = COALESCE(EXCLUDED.origin_iata, fr24_ext.schedule.origin_iata),
|
|
destination_iata = COALESCE(EXCLUDED.destination_iata, fr24_ext.schedule.destination_iata),
|
|
aircraft_type = COALESCE(EXCLUDED.aircraft_type, fr24_ext.schedule.aircraft_type),
|
|
status = EXCLUDED.status,
|
|
fetched_at = now()
|
|
""",
|
|
{
|
|
"flight_date": flight_date,
|
|
"airport_iata": airport_iata,
|
|
"direction": flight["direction"],
|
|
"flight_number": flight["flight_number"],
|
|
"airline_iata": flight.get("airline_iata"),
|
|
"airline_name": flight.get("airline_name"),
|
|
"origin_iata": flight.get("origin_iata"),
|
|
"destination_iata": flight.get("destination_iata"),
|
|
"aircraft_type": flight.get("aircraft_type"),
|
|
"scheduled_at": flight["scheduled_at"],
|
|
"status": flight.get("status", "scheduled"),
|
|
"source": flight["source"],
|
|
},
|
|
)
|
|
return len(flights)
|
|
|
|
|
|
# ── main entry ────────────────────────────────────────────────────────────────
|
|
|
|
def fetch_day(target_date: date, conn) -> int:
|
|
"""Load schedule for all airports for one day. Returns total flights upserted."""
|
|
total = 0
|
|
|
|
for airport_iata, airport_info in config.AIRPORTS.items():
|
|
yandex_code = airport_info["yandex_code"]
|
|
log.info("Yandex: fetching %s (%s) for %s", airport_iata, yandex_code, target_date)
|
|
|
|
for direction in ("departure", "arrival"):
|
|
try:
|
|
flights = fetch_airport_schedule(yandex_code, target_date, direction)
|
|
count = upsert_flights(conn, flights, airport_iata, target_date)
|
|
total += count
|
|
log.info("Yandex: %s %s → %d flights upserted", airport_iata, direction, count)
|
|
except Exception as e:
|
|
log.error("Yandex: failed %s %s: %s", airport_iata, direction, e)
|
|
|
|
time.sleep(config.YANDEX_RATE_LIMIT_SEC)
|
|
|
|
conn.commit()
|
|
return total
|