From 3e934a63fa777af8c100a248d8d03ffaef8d7fd3 Mon Sep 17 00:00:00 2001 From: Stream Date: Tue, 21 Apr 2026 13:10:01 +0300 Subject: [PATCH] auto-sync: 2026-04-21 13:10:01 --- tasks/flightradar24/ingest/mart/build_mart.py | 163 +++++++++++++++--- 1 file changed, 140 insertions(+), 23 deletions(-) diff --git a/tasks/flightradar24/ingest/mart/build_mart.py b/tasks/flightradar24/ingest/mart/build_mart.py index fc54145..8b12326 100644 --- a/tasks/flightradar24/ingest/mart/build_mart.py +++ b/tasks/flightradar24/ingest/mart/build_mart.py @@ -10,6 +10,7 @@ For each flight in fr24_ext.schedule: 5. Update fr24_mart.source_coverage """ import logging +import re from datetime import date from typing import Dict, List, Optional, Tuple @@ -23,6 +24,25 @@ log = logging.getLogger("build_mart") # ft → m conversion FT_TO_M = 0.3048 +# ICAO → IATA airport mapping (for matching schedule IATA to track ICAO) +ICAO_TO_IATA = { + "UUEE": "SVO", "UUDD": "DME", "UUWW": "VKO", "UUBW": "ZIA", + "ULLI": "LED", "USSS": "SVX", "UNNT": "OVB", "UUEM": "KZN", + "UWGG": "GOJ", "UWUR": "MCX", "URSS": "AER", "URKK": "KRR", + "UMMS": "MSQ", "UKBB": "KBP", "UKLL": "LWO", "UTTT": "TAS", + "UTAA": "ASB", "LTFM": "IST", "EDDF": "FRA", "LFPG": "CDG", + "EGLL": "LHR", "LEMD": "MAD", "LIRF": "FCO", "EHAM": "AMS", + "LPPT": "LIS", "EDDM": "MUC", "LOWW": "VIE", "LKPR": "PRG", + "EPWA": "WAW", "EVRA": "RIX", "EYVI": "VNO", "EETN": "TLL", + "UACC": "TSE", "UATG": "GUW", "UAII": "CIT", "UTNU": "UGC", + "UTSB": "BHK", "UTSS": "SKD", "UTST": "TJK", "OEGS": "GIZ", + "RJTT": "HND", "RJBB": "KIX", "ZBAA": "PEK", "ZSSS": "SHA", + "ZSPD": "PVG", "VIDP": "DEL", "VABB": "BOM", "OMDB": "DXB", + "OTHH": "DOH", "OJAI": "AMM", "LLBG": "TLV", +} + +IATA_TO_ICAO = {v: k for k, v in ICAO_TO_IATA.items()} + def _ft_to_m(ft: Optional[int]) -> Optional[int]: if ft is None: @@ -49,37 +69,127 @@ def find_rtlsdr_flight(conn, callsign: str, flight_date: date) -> Optional[int]: return row[0] if row else None -def find_fr24_track(conn, flight_number: str, flight_date: date) -> Optional[Tuple[int, str]]: - """Return (id, aircraft_type) from fr24_ext.flight_tracks_fr24.""" +def _extract_flight_num(flight_number: str) -> str: + """Extract numeric part: 'FV 6807' → '6807', 'SU6807' → '6807'.""" + digits = re.sub(r'[^0-9]', '', flight_number) + return digits + + +def find_fr24_track(conn, flight_number: str, flight_date: date, + origin_iata: str = None, destination_iata: str = None + ) -> Optional[Tuple[int, str]]: + """Return (id, aircraft_type) from fr24_ext.flight_tracks_fr24. + Matches by numeric flight number + optional route (IATA→ICAO).""" + fnum = _extract_flight_num(flight_number) + if not fnum: + return None + with conn.cursor() as cur: + # First try exact match on flight_number cur.execute( """ - SELECT id, aircraft_type FROM fr24_ext.flight_tracks_fr24 + SELECT id, aircraft_type, origin_icao, destination_icao + FROM fr24_ext.flight_tracks_fr24 WHERE flight_number = %s AND flight_date = %s ORDER BY fetched_at DESC - LIMIT 1 """, (flight_number, flight_date), ) - row = cur.fetchone() - return (row[0], row[1]) if row else None + rows = cur.fetchall() + if rows: + if len(rows) == 1: + return (rows[0][0], rows[0][1]) + # Multiple matches — try to disambiguate by route + if origin_iata and destination_iata: + for row in rows: + orig_iata = ICAO_TO_IATA.get(row[2]) + dest_iata = ICAO_TO_IATA.get(row[3]) + if orig_iata == origin_iata and dest_iata == destination_iata: + return (row[0], row[1]) + return (rows[0][0], rows[0][1]) - -def find_fa_track(conn, flight_number: str, flight_date: date) -> Optional[Tuple[int, str]]: - """Return (id, aircraft_type) from fr24_ext.flight_tracks_fa.""" - ident = flight_number.replace(" ", "") - with conn.cursor() as cur: + # No exact match — try by numeric flight number + # FR24 flight_number format: 'SU6807' (ICAO code + digits) + # Schedule format: 'FV 6807' (IATA code + space + digits) + # Match by numeric suffix cur.execute( """ - SELECT id, aircraft_type FROM fr24_ext.flight_tracks_fa + SELECT id, aircraft_type, origin_icao, destination_icao + FROM fr24_ext.flight_tracks_fr24 + WHERE regexp_replace(flight_number, '[^0-9]', '', 'g') = %s + AND flight_date = %s + ORDER BY fetched_at DESC + """, + (fnum, flight_date), + ) + rows = cur.fetchall() + if not rows: + return None + + # Prefer route match + if origin_iata and destination_iata: + for row in rows: + orig_iata = ICAO_TO_IATA.get(row[2]) + dest_iata = ICAO_TO_IATA.get(row[3]) + if orig_iata == origin_iata and dest_iata == destination_iata: + return (row[0], row[1]) + + # Fallback: first match by number only + return (rows[0][0], rows[0][1]) + + +def find_fa_track(conn, flight_number: str, flight_date: date, + origin_iata: str = None, destination_iata: str = None + ) -> Optional[Tuple[int, str]]: + """Return (id, aircraft_type) from fr24_ext.flight_tracks_fa. + Matches by numeric flight number + optional route.""" + fnum = _extract_flight_num(flight_number) + if not fnum: + return None + + ident = flight_number.replace(" ", "") + + with conn.cursor() as cur: + # Exact match on ident_iata + cur.execute( + """ + SELECT id, aircraft_type, origin_iata, destination_iata + FROM fr24_ext.flight_tracks_fa WHERE ident_iata = %s AND flight_date = %s ORDER BY fetched_at DESC - LIMIT 1 """, (ident, flight_date), ) - row = cur.fetchone() - return (row[0], row[1]) if row else None + rows = cur.fetchall() + if rows: + if len(rows) == 1: + return (rows[0][0], rows[0][1]) + if origin_iata and destination_iata: + for row in rows: + if row[2] == origin_iata and row[3] == destination_iata: + return (row[0], row[1]) + return (rows[0][0], rows[0][1]) + + # Try by numeric ident + route + cur.execute( + """ + SELECT id, aircraft_type, origin_iata, destination_iata + FROM fr24_ext.flight_tracks_fa + WHERE ident_iata ~ (%s || '$') AND flight_date = %s + ORDER BY fetched_at DESC + """, + (fnum, flight_date), + ) + rows = cur.fetchall() + if not rows: + return None + + if origin_iata and destination_iata: + for row in rows: + if row[2] == origin_iata and row[3] == destination_iata: + return (row[0], row[1]) + + return (rows[0][0], rows[0][1]) # ── point fetchers ──────────────────────────────────────────── @@ -180,7 +290,7 @@ def upsert_mart_flight(conn, sched: Dict, source_info: Dict) -> int: ( sched.get("flight_number"), sched.get("callsign") or sched.get("flight_number"), - sched.get("icao24"), + None, sched.get("airline_iata"), sched.get("origin_iata"), sched.get("destination_iata"), @@ -193,7 +303,7 @@ def upsert_mart_flight(conn, sched: Dict, source_info: Dict) -> int: source_info.get("has_fa", False), source_info.get("track_source"), source_info.get("track_points", 0), - sched.get("id"), + sched.get("schedule_id"), source_info.get("fr24_track_id"), source_info.get("fa_track_id"), source_info.get("rtlsdr_flight_id"), @@ -322,10 +432,9 @@ def build(target_date: date, conn) -> Dict: cur.execute( """ SELECT DISTINCT ON (flight_number, direction) - id, flight_number, airline_iata, origin_iata, destination_iata, - scheduled_at, icao24, flight_date, - -- use callsign from icao24 if available, else flight_number - COALESCE(icao24, flight_number) AS callsign + schedule_id, flight_number, airline_iata, origin_iata, destination_iata, + scheduled_at, aircraft_type, flight_date, + flight_number AS callsign FROM fr24_ext.schedule WHERE flight_date = %s ORDER BY flight_number, direction, scheduled_at @@ -363,7 +472,11 @@ def build(target_date: date, conn) -> Dict: source_label = "rtlsdr" # 2. Try FR24 - fr24_result = find_fr24_track(conn, flight_number, target_date) + fr24_result = find_fr24_track( + conn, flight_number, target_date, + origin_iata=sched.get("origin_iata"), + destination_iata=sched.get("destination_iata"), + ) if fr24_result: source_info["has_fr24"] = True source_info["fr24_track_id"] = fr24_result[0] @@ -375,7 +488,11 @@ def build(target_date: date, conn) -> Dict: source_info["aircraft_type"] = fr24_result[1] # 3. Try FlightAware - fa_result = find_fa_track(conn, flight_number, target_date) + fa_result = find_fa_track( + conn, flight_number, target_date, + origin_iata=sched.get("origin_iata"), + destination_iata=sched.get("destination_iata"), + ) if fa_result: source_info["has_fa"] = True source_info["fa_track_id"] = fa_result[0]