""" Mart builder: merges all track sources into fr24_mart. Priority: RTL-SDR > FR24 > FlightAware For each flight in fr24_ext.schedule: 1. Find matching tracks from each source 2. Pick best available track 3. Copy points to fr24_mart.track_points with noise_score 4. Update fr24_mart.noise_grid (0.01° cells) 5. Update fr24_mart.source_coverage """ import logging import re from datetime import date from typing import Dict, List, Optional, Tuple import psycopg2 import psycopg2.extras from noise_model import altitude_to_noise_db log = logging.getLogger("build_mart") # ft → m conversion FT_TO_M = 0.3048 # ICAO → IATA airport mapping (for matching schedule IATA to track ICAO) ICAO_TO_IATA = { "UUEE": "SVO", "UUDD": "DME", "UUWW": "VKO", "UUBW": "ZIA", "ULLI": "LED", "USSS": "SVX", "UNNT": "OVB", "UUEM": "KZN", "UWGG": "GOJ", "UWUR": "MCX", "URSS": "AER", "URKK": "KRR", "UMMS": "MSQ", "UKBB": "KBP", "UKLL": "LWO", "UTTT": "TAS", "UTAA": "ASB", "LTFM": "IST", "EDDF": "FRA", "LFPG": "CDG", "EGLL": "LHR", "LEMD": "MAD", "LIRF": "FCO", "EHAM": "AMS", "LPPT": "LIS", "EDDM": "MUC", "LOWW": "VIE", "LKPR": "PRG", "EPWA": "WAW", "EVRA": "RIX", "EYVI": "VNO", "EETN": "TLL", "UACC": "TSE", "UATG": "GUW", "UAII": "CIT", "UTNU": "UGC", "UTSB": "BHK", "UTSS": "SKD", "UTST": "TJK", "OEGS": "GIZ", "RJTT": "HND", "RJBB": "KIX", "ZBAA": "PEK", "ZSSS": "SHA", "ZSPD": "PVG", "VIDP": "DEL", "VABB": "BOM", "OMDB": "DXB", "OTHH": "DOH", "OJAI": "AMM", "LLBG": "TLV", # Russian domestic "URWW": "VOG", "UNKL": "KJA", "USCC": "CEK", "UWUU": "UFA", "XWGS": "PEE", "UWKD": "KZN", "ULMM": "MMK", "ULWW": "PES", "ULPB": "PES", "UUYY": "SYK", "USDD": "TOF", "UNOO": "OMS", "UNBB": "BAX", "UNWW": "NOZ", "ULWC": "KEM", "UIII": "IKT", "UITT": "UUS", "UIBB": "BQS", "UHPP": "PKC", "UHMD": "GDX", "UHMM": "MMK", "UASK": "IGT", "UATT": "AKX", "UAOO": "KSN", "UKDD": "DNK", "UKDE": "ZAP", # Turkey "LTBA": "ISL", "LTAI": "AYT", "LTBS": "DLM", "LTBJ": "ADB", # Spain / Canaries / Portugal "LEAL": "ALC", "LEPA": "PMI", "GCTS": "TFS", "GCRR": "ACE", "GCFV": "FUE", "GCLA": "SPC", "LPFR": "FAO", # Cyprus "LCLK": "LCA", "LCPH": "PFO", # Africa "HECA": "CAI", "HTDA": "DAR", "HAAB": "ADD", "HESH": "SSH", "HEGN": "HRG", # Southeast Asia "VDPP": "PNH", "VVNB": "HAN", "VVTS": "SGN", "WSSS": "SIN", "VTBD": "DMK", "VTBS": "BKK", # East Asia "RJAA": "NRT", "RKSI": "ICN", "ZBAD": "PKX", "ZGGG": "CAN", "ZHHH": "WUH", "ZWWW": "URC", # Gulf "OMAA": "AUH", "OERK": "RUH", "OEDF": "DMM", } IATA_TO_ICAO = {v: k for k, v in ICAO_TO_IATA.items()} # IATA airline code → ICAO airline code (for RTL-SDR callsign conversion) AIRLINE_IATA_TO_ICAO = { "SU": "AFL", # Аэрофлот "FV": "SDM", # Россия "DP": "PBD", # Победа "S7": "SBI", # S7 "U6": "SVR", # Уральские авиалинии "UT": "UTS", # UTair "N4": "NWS", # Nordwind "5N": "AUL", # Smartavia "7K": "KYV", # Ямал "6W": "TZA", # Saratov (исторический) "ZX": "AZS", # Azimuth "RT": "RLT", # РУСЛАЙН "TK": "THY", # Turkish Airlines "LH": "DLH", # Lufthansa "AF": "AFR", # Air France "BA": "BAW", # British Airways "EK": "UAE", # Emirates "QR": "QTR", # Qatar Airways "SV": "SVA", # Saudia "ET": "ETH", # Ethiopian "FZ": "FDB", # flydubai "CZ": "CSN", # China Southern "CA": "CCA", # Air China "MU": "CES", # China Eastern "HU": "CHH", # Hainan Airlines "9C": "CQH", # Spring Airlines "MS": "MSR", # EgyptAir "AT": "RAM", # Royal Air Maroc "IR": "IRA", # Iran Air "W5": "IRM", # Mahan Air "KC": "KZR", # Air Astana "HY": "UZB", # Uzbekistan Airways "T5": "TUA", # Turkmenistan Airlines "J2": "AHY", # Azerbaijan Airlines "A9": "TGZ", # Georgian Airways "QN": "RLU", # Royal Flight } def _flight_number_to_callsign(flight_number: str) -> Optional[str]: """Convert 'SU 1057' (IATA) to 'AFL1057' (ICAO callsign) for RTL-SDR matching.""" m = re.match(r'^([A-Z0-9]{1,3})\s*(\d+)$', flight_number.strip()) if not m: return None iata_code, num = m.group(1), m.group(2) icao_code = AIRLINE_IATA_TO_ICAO.get(iata_code) if icao_code: return f"{icao_code}{num}" return None def _ft_to_m(ft: Optional[int]) -> Optional[int]: if ft is None: return None return int(ft * FT_TO_M) # ── source matchers ─────────────────────────────────────────── def find_rtlsdr_flight(conn, flight_number: str, flight_date: date) -> Optional[int]: """Return fr24.flights.flight_id for RTL-SDR data. Converts IATA flight_number (e.g. 'SU 1057') to ICAO callsign ('AFL1057').""" callsign = _flight_number_to_callsign(flight_number) if not callsign: return None with conn.cursor() as cur: cur.execute( "SELECT f.flight_id FROM fr24.flights f " "WHERE f.callsign = %s AND f.started_at::date = %s " "ORDER BY f.started_at LIMIT 1", (callsign, flight_date), ) row = cur.fetchone() return row[0] if row else None def _extract_flight_num(flight_number: str) -> str: """Extract numeric part: 'FV 6807' → '6807', 'SU6807' → '6807'.""" digits = re.sub(r'[^0-9]', '', flight_number) return digits def find_fr24_track(conn, flight_number: str, flight_date: date, origin_iata: str = None, destination_iata: str = None ) -> Optional[Tuple[int, str]]: """Return (id, aircraft_type) from fr24_ext.flight_tracks_fr24. Matches by numeric flight number + optional route (IATA→ICAO).""" fnum = _extract_flight_num(flight_number) if not fnum: return None with conn.cursor() as cur: # First try exact match on flight_number cur.execute( """ SELECT id, aircraft_type, origin_icao, destination_icao FROM fr24_ext.flight_tracks_fr24 WHERE flight_number = %s AND flight_date = %s ORDER BY fetched_at DESC """, (flight_number, flight_date), ) rows = cur.fetchall() if rows: if len(rows) == 1: return (rows[0][0], rows[0][1]) # Multiple matches — try to disambiguate by route if origin_iata and destination_iata: for row in rows: orig_iata = ICAO_TO_IATA.get(row[2]) dest_iata = ICAO_TO_IATA.get(row[3]) if orig_iata == origin_iata and dest_iata == destination_iata: return (row[0], row[1]) return (rows[0][0], rows[0][1]) # No exact match — try by numeric flight number # FR24 flight_number format: 'SU6807' (ICAO code + digits) # Schedule format: 'FV 6807' (IATA code + space + digits) # Match by numeric suffix cur.execute( """ SELECT id, aircraft_type, origin_icao, destination_icao FROM fr24_ext.flight_tracks_fr24 WHERE regexp_replace(flight_number, '[^0-9]', '', 'g') = %s AND flight_date = %s ORDER BY fetched_at DESC """, (fnum, flight_date), ) rows = cur.fetchall() if not rows: return None # Full route match (preferred) if origin_iata and destination_iata: for row in rows: orig_iata = ICAO_TO_IATA.get(row[2]) dest_iata = ICAO_TO_IATA.get(row[3]) if orig_iata == origin_iata and dest_iata == destination_iata: return (row[0], row[1]) # Fallback: match by origin only (full route match failed) if origin_iata: for row in rows: orig_iata = ICAO_TO_IATA.get(row[2]) if orig_iata == origin_iata: return (row[0], row[1]) # No match return None def find_fa_track(conn, flight_number: str, flight_date: date, origin_iata: str = None, destination_iata: str = None ) -> Optional[Tuple[int, str]]: """Return (id, aircraft_type) from fr24_ext.flight_tracks_fa. Matches by numeric flight number + optional route.""" fnum = _extract_flight_num(flight_number) if not fnum: return None ident = flight_number.replace(" ", "") with conn.cursor() as cur: # Exact match on ident_iata cur.execute( """ SELECT id, aircraft_type, origin_icao, destination_icao FROM fr24_ext.flight_tracks_fa WHERE ident_iata = %s AND flight_date = %s ORDER BY fetched_at DESC """, (ident, flight_date), ) rows = cur.fetchall() if rows: if len(rows) == 1: return (rows[0][0], rows[0][1]) if origin_iata and destination_iata: for row in rows: orig_iata = ICAO_TO_IATA.get(row[2]) dest_iata = ICAO_TO_IATA.get(row[3]) if orig_iata == origin_iata and dest_iata == destination_iata: return (row[0], row[1]) return (rows[0][0], rows[0][1]) # Try by numeric ident + route cur.execute( """ SELECT id, aircraft_type, origin_icao, destination_icao FROM fr24_ext.flight_tracks_fa WHERE regexp_replace(ident_iata, '[^0-9]', '', 'g') = %s AND flight_date = %s ORDER BY fetched_at DESC """, (fnum, flight_date), ) rows = cur.fetchall() if not rows: return None # Full route match (preferred) if origin_iata and destination_iata: for row in rows: orig_iata = ICAO_TO_IATA.get(row[2]) dest_iata = ICAO_TO_IATA.get(row[3]) if orig_iata == origin_iata and dest_iata == destination_iata: return (row[0], row[1]) # Fallback: match by origin only (full route match failed) if origin_iata: for row in rows: orig_iata = ICAO_TO_IATA.get(row[2]) if orig_iata == origin_iata: return (row[0], row[1]) # No match return None # ── point fetchers ──────────────────────────────────────────── def get_rtlsdr_points(conn, flight_id: int) -> List[Dict]: with conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor) as cur: cur.execute( """ SELECT tp.observed_at, ST_Y(tp.geom) AS lat, ST_X(tp.geom) AS lon, tp.altitude_m, tp.ground_speed_kt AS speed_kt, tp.heading_deg AS heading FROM fr24.track_points tp JOIN fr24.tracks t ON t.track_id = tp.track_id WHERE t.flight_id = %s ORDER BY tp.observed_at """, (flight_id,), ) return [dict(r) for r in cur.fetchall()] def get_fr24_points(conn, track_id: int) -> List[Dict]: with conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor) as cur: cur.execute( """ SELECT observed_at, lat, lon, altitude_ft, gspeed_kt AS speed_kt, heading FROM fr24_ext.track_points_fr24 WHERE track_id = %s ORDER BY observed_at """, (track_id,), ) rows = [dict(r) for r in cur.fetchall()] # convert ft → m for r in rows: r["altitude_m"] = _ft_to_m(r.pop("altitude_ft", None)) return rows def get_fa_points(conn, track_id: int) -> List[Dict]: with conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor) as cur: cur.execute( """ SELECT observed_at, lat, lon, altitude_ft, gspeed_kt AS speed_kt, heading FROM fr24_ext.track_points_fa WHERE track_id = %s ORDER BY observed_at """, (track_id,), ) rows = [dict(r) for r in cur.fetchall()] for r in rows: r["altitude_m"] = _ft_to_m(r.pop("altitude_ft", None)) return rows # ── mart writers ────────────────────────────────────────────── def upsert_mart_flight(conn, sched: Dict, source_info: Dict) -> int: """Upsert into fr24_mart.flights, return mart flight id.""" with conn.cursor() as cur: cur.execute( """ INSERT INTO fr24_mart.flights (flight_number, callsign, icao24, airline_iata, origin_iata, destination_iata, aircraft_type, flight_date, scheduled_dep, has_schedule, has_rtlsdr, has_fr24, has_fa, track_source, track_points, schedule_id, fr24_track_id, fa_track_id, rtlsdr_flight_id, updated_at) VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,now()) ON CONFLICT (flight_date, callsign) DO UPDATE SET flight_number = EXCLUDED.flight_number, airline_iata = EXCLUDED.airline_iata, origin_iata = EXCLUDED.origin_iata, destination_iata = EXCLUDED.destination_iata, aircraft_type = COALESCE(EXCLUDED.aircraft_type, fr24_mart.flights.aircraft_type), scheduled_dep = EXCLUDED.scheduled_dep, has_schedule = EXCLUDED.has_schedule, has_rtlsdr = EXCLUDED.has_rtlsdr, has_fr24 = EXCLUDED.has_fr24, has_fa = EXCLUDED.has_fa, track_source = EXCLUDED.track_source, track_points = EXCLUDED.track_points, schedule_id = EXCLUDED.schedule_id, fr24_track_id = EXCLUDED.fr24_track_id, fa_track_id = EXCLUDED.fa_track_id, rtlsdr_flight_id = EXCLUDED.rtlsdr_flight_id, updated_at = now() RETURNING id """, ( sched.get("flight_number"), sched.get("callsign") or sched.get("flight_number"), None, sched.get("airline_iata"), sched.get("origin_iata"), sched.get("destination_iata"), source_info.get("aircraft_type"), sched["flight_date"], sched.get("scheduled_at"), True, source_info.get("has_rtlsdr", False), source_info.get("has_fr24", False), source_info.get("has_fa", False), source_info.get("track_source"), source_info.get("track_points", 0), sched.get("schedule_id"), source_info.get("fr24_track_id"), source_info.get("fa_track_id"), source_info.get("rtlsdr_flight_id"), ), ) row = cur.fetchone() return row[0] def insert_mart_points(conn, mart_flight_id: int, points: List[Dict], source: str, aircraft_type: str): """Delete old mart points and insert new ones with noise_score.""" with conn.cursor() as cur: cur.execute("DELETE FROM fr24_mart.track_points WHERE flight_id = %s", (mart_flight_id,)) if not points: return args = [] for p in points: alt_m = float(p.get("altitude_m") or 0) alt_ft = alt_m / FT_TO_M noise = altitude_to_noise_db(alt_ft, aircraft_type or "default") args.append(( mart_flight_id, p["observed_at"], p["lat"], p["lon"], alt_m, p.get("speed_kt"), p.get("heading"), source, round(noise, 2), )) psycopg2.extras.execute_values( cur, """ INSERT INTO fr24_mart.track_points (flight_id, observed_at, lat, lon, altitude_m, speed_kt, heading, source, noise_score) VALUES %s """, args, ) def update_noise_grid(conn, flight_date: date): """Aggregate track_points into noise_grid by 0.01° cells.""" with conn.cursor() as cur: cur.execute( """ INSERT INTO fr24_mart.noise_grid (grid_lat, grid_lon, period_date, flight_count, noise_score, avg_altitude_m, updated_at) SELECT round(lat::numeric, 2) AS grid_lat, round(lon::numeric, 2) AS grid_lon, %s AS period_date, COUNT(DISTINCT flight_id) AS flight_count, AVG(tp.noise_score) AS noise_score, AVG(altitude_m) AS avg_altitude_m, now() FROM fr24_mart.track_points tp JOIN fr24_mart.flights f ON f.id = tp.flight_id WHERE f.flight_date = %s GROUP BY grid_lat, grid_lon ON CONFLICT (grid_lat, grid_lon, period_date) DO UPDATE SET flight_count = EXCLUDED.flight_count, noise_score = EXCLUDED.noise_score, avg_altitude_m = EXCLUDED.avg_altitude_m, updated_at = now() """, (flight_date, flight_date), ) def update_source_coverage(conn, flight_date: date): """Recalculate source_coverage for the date.""" with conn.cursor() as cur: cur.execute( "SELECT count(*), count(*) FILTER(WHERE has_fr24), count(*) FILTER(WHERE has_rtlsdr)" " FROM fr24_mart.flights WHERE flight_date = %s", (flight_date,), ) row = cur.fetchone() log.info("source_coverage debug: total=%s fr24=%s rtlsdr=%s", *row) cur.execute( """ INSERT INTO fr24_mart.source_coverage (coverage_date, total_schedule, with_rtlsdr, with_fr24, with_fa, schedule_only, rtlsdr_pct, fr24_pct, fa_pct, updated_at) SELECT %s, COUNT(*) AS total_schedule, COUNT(*) FILTER (WHERE has_rtlsdr) AS with_rtlsdr, COUNT(*) FILTER (WHERE has_fr24) AS with_fr24, COUNT(*) FILTER (WHERE has_fa) AS with_fa, COUNT(*) FILTER (WHERE NOT has_rtlsdr AND NOT has_fr24 AND NOT has_fa) AS schedule_only, ROUND(100.0 * COUNT(*) FILTER (WHERE has_rtlsdr) / NULLIF(COUNT(*),0), 1), ROUND(100.0 * COUNT(*) FILTER (WHERE has_fr24) / NULLIF(COUNT(*),0), 1), ROUND(100.0 * COUNT(*) FILTER (WHERE has_fa) / NULLIF(COUNT(*),0), 1), now() FROM fr24_mart.flights WHERE flight_date = %s ON CONFLICT (coverage_date) DO UPDATE SET total_schedule = EXCLUDED.total_schedule, with_rtlsdr = EXCLUDED.with_rtlsdr, with_fr24 = EXCLUDED.with_fr24, with_fa = EXCLUDED.with_fa, schedule_only = EXCLUDED.schedule_only, rtlsdr_pct = EXCLUDED.rtlsdr_pct, fr24_pct = EXCLUDED.fr24_pct, fa_pct = EXCLUDED.fa_pct, updated_at = now() """, (flight_date, flight_date), ) # ── main ────────────────────────────────────────────────────── def build(target_date: date, conn) -> Dict: log.info("Mart build: starting for %s", target_date) stats = { "date": str(target_date), "schedule_flights": 0, "mart_flights": 0, "with_track": 0, "errors": 0, } # Load schedule for the date with conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor) as cur: cur.execute( """ SELECT DISTINCT ON (flight_number, direction) schedule_id, flight_number, airline_iata, origin_iata, destination_iata, scheduled_at, aircraft_type, flight_date, flight_number AS callsign FROM fr24_ext.schedule WHERE flight_date = %s ORDER BY flight_number, direction, scheduled_at """, (target_date,), ) schedule = [dict(r) for r in cur.fetchall()] stats["schedule_flights"] = len(schedule) log.info("Mart build: %d schedule flights", len(schedule)) for sched in schedule: flight_number = sched["flight_number"] callsign = sched.get("callsign") or flight_number try: source_info: Dict = { "has_rtlsdr": False, "has_fr24": False, "has_fa": False, "track_source": None, "track_points": 0, "aircraft_type": None, "fr24_track_id": None, "fa_track_id": None, "rtlsdr_flight_id": None, } points: List[Dict] = [] source_label = None # 1. Try RTL-SDR rtlsdr_id = find_rtlsdr_flight(conn, flight_number, target_date) if rtlsdr_id: source_info["has_rtlsdr"] = True source_info["rtlsdr_flight_id"] = rtlsdr_id pts = get_rtlsdr_points(conn, rtlsdr_id) if pts: points = pts source_label = "rtlsdr" # 2. Try FR24 fr24_result = find_fr24_track( conn, flight_number, target_date, origin_iata=sched.get("origin_iata"), destination_iata=sched.get("destination_iata"), ) if fr24_result: source_info["has_fr24"] = True source_info["fr24_track_id"] = fr24_result[0] if not points: pts = get_fr24_points(conn, fr24_result[0]) if pts: points = pts source_label = "fr24" source_info["aircraft_type"] = fr24_result[1] # 3. Try FlightAware fa_result = find_fa_track( conn, flight_number, target_date, origin_iata=sched.get("origin_iata"), destination_iata=sched.get("destination_iata"), ) if fa_result: source_info["has_fa"] = True source_info["fa_track_id"] = fa_result[0] if not points: pts = get_fa_points(conn, fa_result[0]) if pts: points = pts source_label = "fa" source_info["aircraft_type"] = fa_result[1] source_info["track_source"] = source_label source_info["track_points"] = len(points) mart_id = upsert_mart_flight(conn, sched, source_info) if points: insert_mart_points( conn, mart_id, points, source_label, source_info.get("aircraft_type") or "default", ) stats["with_track"] += 1 stats["mart_flights"] += 1 except Exception as e: conn.rollback() stats["errors"] += 1 log.error("Mart: error processing %s: %s", flight_number, e) continue try: update_noise_grid(conn, target_date) update_source_coverage(conn, target_date) conn.commit() except Exception as e: conn.rollback() log.error("Mart: error updating grid/coverage: %s", e) stats["errors"] += 1 log.info("Mart build done: %s", stats) return stats