""" FR24 tracks worker. 1. GET /api/flight-summary/full for each day → actual flight data (up to 20000) 2. Optionally: GET /api/flight-tracks?flight_id={fr24_id} → track points 3. Upsert into fr24_ext.flight_actual (actual data) 4. Upsert into fr24_ext.flight_tracks_fr24 + fr24_ext.track_points_fr24 (tracks) 5. Enrich fr24_ext.schedule with actual times + delays """ import logging import re import time from datetime import date, datetime, timezone from typing import Dict, Iterator, List, Optional import psycopg2 import psycopg2.extras import requests from config import config log = logging.getLogger("fr24_worker") HEADERS = { "Authorization": f"Bearer {config.FR24_API_KEY}", "Accept": "application/json", "Accept-Version": "v1", } _last_request_at: float = 0.0 def _throttle(): """Enforce rate limit: max 10 req/min → sleep if needed.""" global _last_request_at elapsed = time.monotonic() - _last_request_at if elapsed < config.RATE_LIMIT_SEC: time.sleep(config.RATE_LIMIT_SEC - elapsed) _last_request_at = time.monotonic() def _get(path: str, params: dict = None) -> dict: _throttle() url = f"{config.FR24_API_BASE}{path}" resp = requests.get(url, headers=HEADERS, params=params, timeout=60) if resp.status_code == 429: retry_after = int(resp.headers.get("Retry-After", 60)) log.warning("Rate limited, sleeping %ds", retry_after) time.sleep(retry_after) return _get(path, params) resp.raise_for_status() return resp.json() def _build_airports_param() -> str: """Build airports param: both:SVO,both:DME,both:VKO,both:ZIA""" prefix = config.AIRPORT_DIRECTION_PREFIX codes = [a.strip() for a in config.AIRPORTS.split(",") if a.strip()] return ",".join(f"{prefix}{code}" for code in codes) def iter_flight_summary_pages(target_date: date) -> Iterator[List[Dict]]: """Yield one page (list of flights) at a time. Stops on error/empty/MAX_PAGES. Deduplicates across pages by fr24_id — avoids ×4 duplicates from airports param 'both:SVO,both:DME,both:VKO,both:ZIA'. """ PAGE = 20 # Explorer tier hard limit per request airports_param = _build_airports_param() dt_from = f"{target_date}T00:00:00" dt_to = f"{target_date}T23:59:59" offset = 0 seen_fr24_ids: set = set() # dedup across pages page_num = 0 while True: try: data = _get("/api/flight-summary/full", params={ "flight_datetime_from": dt_from, "flight_datetime_to": dt_to, "airports": airports_param, "limit": PAGE, "offset": offset, }) except Exception as e: log.error("fetch page offset=%d failed: %s", offset, e) break items = data.get("data", data) if isinstance(data, dict) else data if not items or not isinstance(items, list): break # Deduplicate by fr24_id unique = [x for x in items if x.get("fr24_id") not in seen_fr24_ids] seen_fr24_ids.update(x["fr24_id"] for x in items if x.get("fr24_id")) log.debug( "iter_flight_summary_pages: page=%d offset=%d got=%d unique=%d total_seen=%d", page_num, offset, len(items), len(unique), len(seen_fr24_ids), ) yield unique page_num += 1 if page_num >= config.MAX_PAGES: log.warning("Reached MAX_PAGES=%d, stopping pagination", config.MAX_PAGES) break if len(items) < PAGE: break # last page offset += PAGE def fetch_track(fr24_id: str) -> Optional[List[Dict]]: """Fetch track points for a single flight.""" try: data = _get("/api/flight-tracks", params={"flight_id": fr24_id}) if isinstance(data, list) and data: return data[0].get("tracks", []) if isinstance(data, dict): return data.get("tracks", []) return [] except requests.HTTPError as e: log.warning("Failed to fetch track for %s: %s", fr24_id, e) return None # ── DB upsert: flight_actual ───────────────────────────────────────────────── def upsert_flight_actual(conn, item: Dict, target_date: date) -> Optional[int]: """Insert/update actual flight data. Returns DB id.""" fr24_id = item.get("fr24_id") if not fr24_id: return None with conn.cursor() as cur: cur.execute( """ INSERT INTO fr24_ext.flight_actual (fr24_id, flight, callsign, operated_as, origin_icao, dest_icao, orig_iata, dest_iata, datetime_takeoff, datetime_landed, flight_time, runway_takeoff, runway_landed, actual_distance, category, registration, flight_ended, first_seen, last_seen, flight_date) VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s) ON CONFLICT (fr24_id) DO UPDATE SET flight = EXCLUDED.flight, callsign = EXCLUDED.callsign, operated_as = EXCLUDED.operated_as, origin_icao = EXCLUDED.origin_icao, dest_icao = EXCLUDED.dest_icao, orig_iata = EXCLUDED.orig_iata, dest_iata = EXCLUDED.dest_iata, datetime_takeoff = EXCLUDED.datetime_takeoff, datetime_landed = EXCLUDED.datetime_landed, flight_time = EXCLUDED.flight_time, runway_takeoff = EXCLUDED.runway_takeoff, runway_landed = EXCLUDED.runway_landed, actual_distance = EXCLUDED.actual_distance, category = EXCLUDED.category, registration = EXCLUDED.registration, flight_ended = EXCLUDED.flight_ended, first_seen = EXCLUDED.first_seen, last_seen = EXCLUDED.last_seen, fetched_at = now() RETURNING id """, ( fr24_id, item.get("flight"), item.get("callsign"), item.get("operating_as") or item.get("operated_as"), # FR24 uses operating_as item.get("orig_icao") or item.get("origin_icao"), # FR24 uses orig_icao item.get("dest_icao"), item.get("orig_iata"), item.get("dest_iata"), item.get("datetime_takeoff"), item.get("datetime_landed"), item.get("flight_time"), item.get("runway_takeoff"), item.get("runway_landed"), item.get("actual_distance") or item.get("distance"), item.get("category"), item.get("reg") or item.get("registration"), # FR24 uses reg item.get("flight_ended", False), item.get("first_seen"), item.get("last_seen"), target_date, ), ) row = cur.fetchone() return row[0] if row else None # ── DB upsert: flight_tracks_fr24 (existing, kept for tracks) ──────────────── def upsert_flight(conn, summary: Dict, target_date: date) -> Optional[int]: """Insert/update flight header for tracks. Return DB id.""" with conn.cursor() as cur: cur.execute( """ INSERT INTO fr24_ext.flight_tracks_fr24 (fr24_id, flight_number, callsign, aircraft_type, registration, origin_icao, destination_icao, actual_takeoff, actual_landed, flight_date) VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s) ON CONFLICT (fr24_id) DO UPDATE SET flight_number = EXCLUDED.flight_number, callsign = EXCLUDED.callsign, aircraft_type = EXCLUDED.aircraft_type, registration = EXCLUDED.registration, origin_icao = EXCLUDED.origin_icao, destination_icao = EXCLUDED.destination_icao, actual_takeoff = EXCLUDED.actual_takeoff, actual_landed = EXCLUDED.actual_landed, fetched_at = now() RETURNING id """, ( summary.get("fr24_id"), summary.get("flight"), summary.get("callsign"), summary.get("type"), summary.get("reg"), summary.get("origin_icao"), summary.get("destination_icao"), summary.get("datetime_takeoff"), summary.get("datetime_landed"), target_date, ), ) row = cur.fetchone() return row[0] if row else None def upsert_track_points(conn, track_id: int, points: List[Dict]): """Delete old points and insert fresh ones.""" with conn.cursor() as cur: cur.execute("DELETE FROM fr24_ext.track_points_fr24 WHERE track_id = %s", (track_id,)) if not points: return args = [ ( track_id, p.get("timestamp"), p.get("lat"), p.get("lon"), p.get("alt"), p.get("gspeed"), p.get("vspeed"), p.get("track"), p.get("squawk"), p.get("source"), ) for p in points if p.get("lat") is not None and p.get("lon") is not None ] psycopg2.extras.execute_values( cur, """ INSERT INTO fr24_ext.track_points_fr24 (track_id, observed_at, lat, lon, altitude_ft, gspeed_kt, vspeed_fpm, heading, squawk, source) VALUES %s """, args, ) # ── Enrich schedule with actual times ──────────────────────────────────────── def _normalize_flight_number(fn: str) -> str: """ Normalize flight number for matching. 'SU 1234' → 'SU1234', 'SU1234' → 'SU1234' """ if not fn: return "" return re.sub(r"\s+", "", fn.strip().upper()) def enrich_schedule(conn, target_date: date) -> int: """ Match flight_actual rows to schedule rows by flight number + date. Update schedule with actual times, delays, fr24_id, and category. Returns number of schedule rows updated. """ with conn.cursor() as cur: # Match by normalized flight number + flight_date # IATA → ICAO mapping for Moscow airports # For departures: schedule airport is origin → fa.origin_icao must be Moscow ICAO # For arrivals: schedule airport is destination → fa.dest_icao must be Moscow ICAO cur.execute( """ WITH matches AS ( SELECT s.schedule_id, fa.fr24_id, fa.datetime_takeoff AS actual_takeoff, fa.datetime_landed AS actual_landed, fa.category AS flight_category, CASE WHEN s.direction = 'departure' AND fa.datetime_takeoff IS NOT NULL THEN EXTRACT(EPOCH FROM (fa.datetime_takeoff - s.scheduled_at))::int / 60 WHEN s.direction = 'arrival' AND fa.datetime_landed IS NOT NULL THEN EXTRACT(EPOCH FROM (fa.datetime_landed - s.scheduled_at))::int / 60 END AS delay_takeoff_min, CASE WHEN s.direction = 'arrival' AND fa.datetime_landed IS NOT NULL AND s.scheduled_at IS NOT NULL THEN EXTRACT(EPOCH FROM (fa.datetime_landed - s.scheduled_at))::int / 60 WHEN s.direction = 'departure' AND fa.datetime_takeoff IS NOT NULL AND s.scheduled_at IS NOT NULL THEN EXTRACT(EPOCH FROM (fa.datetime_takeoff - s.scheduled_at))::int / 60 END AS delay_landed_min FROM fr24_ext.schedule s JOIN fr24_ext.flight_actual fa ON UPPER(REPLACE(fa.flight, ' ', '')) = UPPER(REPLACE(s.flight_number, ' ', '')) AND fa.flight_date = s.flight_date WHERE s.flight_date = %s AND ( (s.direction = 'departure' AND fa.origin_icao IN ('UUEE','UUDD','UUWW','UUBW')) OR (s.direction = 'arrival' AND fa.dest_icao IN ('UUEE','UUDD','UUWW','UUBW')) ) ) UPDATE fr24_ext.schedule s SET actual_takeoff = m.actual_takeoff, actual_landed = m.actual_landed, fr24_id = m.fr24_id, flight_category = m.flight_category, delay_takeoff_min = m.delay_takeoff_min, delay_landed_min = m.delay_landed_min, fetched_at = now() FROM matches m WHERE s.schedule_id = m.schedule_id """, (target_date,), ) updated = cur.rowcount return updated # ── Supplement schedule with FR24 flights not in Yandex ───────────────────── # Moscow airports: ICAO → IATA (for supplement_schedule) _MOSCOW_ICAO_TO_IATA: Dict[str, str] = { "UUEE": "SVO", "UUDD": "DME", "UUWW": "VKO", "UUBW": "ZIA", } _MOSCOW_ICAO_SET = set(_MOSCOW_ICAO_TO_IATA.keys()) def supplement_schedule(conn, target_date: date) -> int: """ Insert into fr24_ext.schedule flights from flight_actual that have no matching schedule record. Source: fr24_ext.flight_actual Target: fr24_ext.schedule (source='fr24') Returns: number of rows inserted """ with conn.cursor() as cur: cur.execute( """ INSERT INTO fr24_ext.schedule (flight_date, airport_iata, direction, flight_number, airline_iata, origin_iata, destination_iata, aircraft_type, scheduled_at, actual_takeoff, actual_landed, status, source, fr24_id) SELECT fa.flight_date, -- airport_iata: the Moscow airport involved in this flight CASE WHEN fa.origin_icao IN ('UUEE','UUDD','UUWW','UUBW') THEN CASE fa.origin_icao WHEN 'UUEE' THEN 'SVO' WHEN 'UUDD' THEN 'DME' WHEN 'UUWW' THEN 'VKO' WHEN 'UUBW' THEN 'ZIA' END ELSE CASE fa.dest_icao WHEN 'UUEE' THEN 'SVO' WHEN 'UUDD' THEN 'DME' WHEN 'UUWW' THEN 'VKO' WHEN 'UUBW' THEN 'ZIA' END END AS airport_iata, -- direction: departure if origin is Moscow, otherwise arrival CASE WHEN fa.origin_icao IN ('UUEE','UUDD','UUWW','UUBW') THEN 'departure' ELSE 'arrival' END AS direction, -- flight_number: normalised (strip spaces) UPPER(REGEXP_REPLACE(fa.flight, '\\s+', '', 'g')) AS flight_number, -- airline_iata: leading letter prefix of the flight number UPPER(SUBSTRING(REGEXP_REPLACE(fa.flight, '\\s+', '', 'g') FROM '^([A-Z]{2,3})')) AS airline_iata, -- origin_iata: map known Moscow ICAOs, others NULL CASE fa.origin_icao WHEN 'UUEE' THEN 'SVO' WHEN 'UUDD' THEN 'DME' WHEN 'UUWW' THEN 'VKO' WHEN 'UUBW' THEN 'ZIA' ELSE NULL END AS origin_iata, -- destination_iata: map known Moscow ICAOs, others NULL CASE fa.dest_icao WHEN 'UUEE' THEN 'SVO' WHEN 'UUDD' THEN 'DME' WHEN 'UUWW' THEN 'VKO' WHEN 'UUBW' THEN 'ZIA' ELSE NULL END AS destination_iata, NULL AS aircraft_type, -- scheduled_at: takeoff time for departures, landed for arrivals; fallback to first_seen COALESCE( CASE WHEN fa.origin_icao IN ('UUEE','UUDD','UUWW','UUBW') THEN fa.datetime_takeoff ELSE fa.datetime_landed END, fa.first_seen ) AS scheduled_at, fa.datetime_takeoff AS actual_takeoff, fa.datetime_landed AS actual_landed, 'actual' AS status, 'fr24' AS source, fa.fr24_id FROM fr24_ext.flight_actual fa WHERE fa.flight_date = %(date)s AND fa.flight IS NOT NULL AND fa.flight != '' -- Must involve at least one Moscow airport AND ( fa.origin_icao IN ('UUEE','UUDD','UUWW','UUBW') OR fa.dest_icao IN ('UUEE','UUDD','UUWW','UUBW') ) -- Skip flights already present in schedule AND NOT EXISTS ( SELECT 1 FROM fr24_ext.schedule s WHERE UPPER(REPLACE(s.flight_number, ' ', '')) = UPPER(REPLACE(fa.flight, ' ', '')) AND s.flight_date = fa.flight_date ) ON CONFLICT (flight_number, airport_iata, scheduled_at, direction) DO NOTHING """, {"date": target_date}, ) return cur.rowcount # ── Main entry ─────────────────────────────────────────────────────────────── def run(target_date: date, conn) -> Dict: """Main entry: load flight summaries + optionally tracks. Returns stats dict.""" log.info("FR24 worker: starting for %s", target_date) estimated_max = config.MAX_PAGES * config.PAGE_SIZE log.info("FR24 worker: MAX_PAGES=%d, PAGE_SIZE=%d, estimated max flights=%d", config.MAX_PAGES, config.PAGE_SIZE, estimated_max) if estimated_max > config.CREDIT_GUARD_MAX_FLIGHTS: log.critical("CREDIT GUARD: estimated max flights %d exceeds limit %d " "(верифицируй MAX_PAGES=%d и PAGE_SIZE=%d перед запуском!)", estimated_max, config.CREDIT_GUARD_MAX_FLIGHTS, config.MAX_PAGES, config.PAGE_SIZE) stats = { "date": str(target_date), "flights_found": 0, "flights_upserted": 0, "tracks_loaded": 0, "schedule_enriched": 0, "errors": 0, } # 1. Fetch flight summaries page by page, commit after each page for page in iter_flight_summary_pages(target_date): stats["flights_found"] += len(page) for item in page: fr24_id = item.get("fr24_id") if not fr24_id: continue try: actual_id = upsert_flight_actual(conn, item, target_date) if actual_id: stats["flights_upserted"] += 1 # Optionally fetch tracks (costs extra credits) if config.FETCH_TRACKS: track_id = upsert_flight(conn, item, target_date) if track_id: points = fetch_track(fr24_id) if points is not None: upsert_track_points(conn, track_id, points) stats["tracks_loaded"] += 1 else: stats["errors"] += 1 log.debug("FR24: %s upserted", fr24_id) except Exception as e: conn.rollback() stats["errors"] += 1 log.error("FR24: error processing %s: %s", fr24_id, e) # Commit after each page — partial progress survives errors on later pages try: conn.commit() log.debug("Committed page, total so far: %d", stats["flights_upserted"]) except Exception as e: conn.rollback() log.error("Commit failed: %s", e) stats["errors"] += 1 log.info("FR24 worker: found %d flights, upserted %d", stats["flights_found"], stats["flights_upserted"]) # 3. Enrich schedule with actual times try: enriched = enrich_schedule(conn, target_date) conn.commit() stats["schedule_enriched"] = enriched log.info("FR24 worker: enriched %d schedule rows", enriched) except Exception as e: conn.rollback() log.error("FR24 worker: schedule enrichment failed: %s", e) stats["errors"] += 1 # 4. Supplement schedule with flights from FR24 not in Yandex try: supplemented = supplement_schedule(conn, target_date) conn.commit() stats["schedule_supplemented"] = supplemented log.info("FR24 worker: supplemented %d new schedule rows", supplemented) except Exception as e: conn.rollback() log.error("FR24 worker: supplement_schedule failed: %s", e) stats["errors"] += 1 log.info("FR24 worker done: %s", stats) return stats