Files
wiki/tasks/flightradar24/ingest/tracks_fr24/fr24_worker.py
2026-04-26 02:10:01 +03:00

542 lines
21 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
FR24 tracks worker.
1. GET /api/flight-summary/full for each day → actual flight data (up to 20000)
2. Optionally: GET /api/flight-tracks?flight_id={fr24_id} → track points
3. Upsert into fr24_ext.flight_actual (actual data)
4. Upsert into fr24_ext.flight_tracks_fr24 + fr24_ext.track_points_fr24 (tracks)
5. Enrich fr24_ext.schedule with actual times + delays
"""
import logging
import re
import time
from datetime import date, datetime, timezone
from typing import Dict, Iterator, List, Optional
import psycopg2
import psycopg2.extras
import requests
from config import config
log = logging.getLogger("fr24_worker")
HEADERS = {
"Authorization": f"Bearer {config.FR24_API_KEY}",
"Accept": "application/json",
"Accept-Version": "v1",
}
_last_request_at: float = 0.0
def _throttle():
"""Enforce rate limit: max 10 req/min → sleep if needed."""
global _last_request_at
elapsed = time.monotonic() - _last_request_at
if elapsed < config.RATE_LIMIT_SEC:
time.sleep(config.RATE_LIMIT_SEC - elapsed)
_last_request_at = time.monotonic()
def _get(path: str, params: dict = None) -> dict:
_throttle()
url = f"{config.FR24_API_BASE}{path}"
resp = requests.get(url, headers=HEADERS, params=params, timeout=60)
if resp.status_code == 429:
retry_after = int(resp.headers.get("Retry-After", 60))
log.warning("Rate limited, sleeping %ds", retry_after)
time.sleep(retry_after)
return _get(path, params)
resp.raise_for_status()
return resp.json()
def _build_airports_param() -> str:
"""Build airports param: both:SVO,both:DME,both:VKO,both:ZIA"""
prefix = config.AIRPORT_DIRECTION_PREFIX
codes = [a.strip() for a in config.AIRPORTS.split(",") if a.strip()]
return ",".join(f"{prefix}{code}" for code in codes)
def iter_flight_summary_pages(target_date: date) -> Iterator[List[Dict]]:
"""Yield one page (list of flights) at a time. Stops on error/empty/MAX_PAGES.
Deduplicates across pages by fr24_id — avoids ×4 duplicates from
airports param 'both:SVO,both:DME,both:VKO,both:ZIA'.
"""
PAGE = 20 # Explorer tier hard limit per request
airports_param = _build_airports_param()
dt_from = f"{target_date}T00:00:00"
dt_to = f"{target_date}T23:59:59"
offset = 0
seen_fr24_ids: set = set() # dedup across pages
page_num = 0
while True:
try:
data = _get("/api/flight-summary/full", params={
"flight_datetime_from": dt_from,
"flight_datetime_to": dt_to,
"airports": airports_param,
"limit": PAGE,
"offset": offset,
})
except Exception as e:
log.error("fetch page offset=%d failed: %s", offset, e)
break
items = data.get("data", data) if isinstance(data, dict) else data
if not items or not isinstance(items, list):
break
# Deduplicate by fr24_id
unique = [x for x in items if x.get("fr24_id") not in seen_fr24_ids]
seen_fr24_ids.update(x["fr24_id"] for x in items if x.get("fr24_id"))
log.debug(
"iter_flight_summary_pages: page=%d offset=%d got=%d unique=%d total_seen=%d",
page_num, offset, len(items), len(unique), len(seen_fr24_ids),
)
yield unique
page_num += 1
if page_num >= config.MAX_PAGES:
log.warning("Reached MAX_PAGES=%d, stopping pagination", config.MAX_PAGES)
break
if len(items) < PAGE:
break # last page
offset += PAGE
def fetch_track(fr24_id: str) -> Optional[List[Dict]]:
"""Fetch track points for a single flight."""
try:
data = _get("/api/flight-tracks", params={"flight_id": fr24_id})
if isinstance(data, list) and data:
return data[0].get("tracks", [])
if isinstance(data, dict):
return data.get("tracks", [])
return []
except requests.HTTPError as e:
log.warning("Failed to fetch track for %s: %s", fr24_id, e)
return None
# ── DB upsert: flight_actual ─────────────────────────────────────────────────
def upsert_flight_actual(conn, item: Dict, target_date: date) -> Optional[int]:
"""Insert/update actual flight data. Returns DB id."""
fr24_id = item.get("fr24_id")
if not fr24_id:
return None
with conn.cursor() as cur:
cur.execute(
"""
INSERT INTO fr24_ext.flight_actual
(fr24_id, flight, callsign, operated_as, origin_icao, dest_icao,
orig_iata, dest_iata,
datetime_takeoff, datetime_landed, flight_time,
runway_takeoff, runway_landed, actual_distance, category,
registration,
flight_ended, first_seen, last_seen, flight_date)
VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)
ON CONFLICT (fr24_id) DO UPDATE SET
flight = EXCLUDED.flight,
callsign = EXCLUDED.callsign,
operated_as = EXCLUDED.operated_as,
origin_icao = EXCLUDED.origin_icao,
dest_icao = EXCLUDED.dest_icao,
orig_iata = EXCLUDED.orig_iata,
dest_iata = EXCLUDED.dest_iata,
datetime_takeoff = EXCLUDED.datetime_takeoff,
datetime_landed = EXCLUDED.datetime_landed,
flight_time = EXCLUDED.flight_time,
runway_takeoff = EXCLUDED.runway_takeoff,
runway_landed = EXCLUDED.runway_landed,
actual_distance = EXCLUDED.actual_distance,
category = EXCLUDED.category,
registration = EXCLUDED.registration,
flight_ended = EXCLUDED.flight_ended,
first_seen = EXCLUDED.first_seen,
last_seen = EXCLUDED.last_seen,
fetched_at = now()
RETURNING id
""",
(
fr24_id,
item.get("flight"),
item.get("callsign"),
item.get("operating_as") or item.get("operated_as"), # FR24 uses operating_as
item.get("orig_icao") or item.get("origin_icao"), # FR24 uses orig_icao
item.get("dest_icao"),
item.get("orig_iata"),
item.get("dest_iata"),
item.get("datetime_takeoff"),
item.get("datetime_landed"),
item.get("flight_time"),
item.get("runway_takeoff"),
item.get("runway_landed"),
item.get("actual_distance") or item.get("distance"),
item.get("category"),
item.get("reg") or item.get("registration"), # FR24 uses reg
item.get("flight_ended", False),
item.get("first_seen"),
item.get("last_seen"),
target_date,
),
)
row = cur.fetchone()
return row[0] if row else None
# ── DB upsert: flight_tracks_fr24 (existing, kept for tracks) ────────────────
def upsert_flight(conn, summary: Dict, target_date: date) -> Optional[int]:
"""Insert/update flight header for tracks. Return DB id."""
with conn.cursor() as cur:
cur.execute(
"""
INSERT INTO fr24_ext.flight_tracks_fr24
(fr24_id, flight_number, callsign, aircraft_type, registration,
origin_icao, destination_icao, actual_takeoff, actual_landed, flight_date)
VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)
ON CONFLICT (fr24_id) DO UPDATE SET
flight_number = EXCLUDED.flight_number,
callsign = EXCLUDED.callsign,
aircraft_type = EXCLUDED.aircraft_type,
registration = EXCLUDED.registration,
origin_icao = EXCLUDED.origin_icao,
destination_icao = EXCLUDED.destination_icao,
actual_takeoff = EXCLUDED.actual_takeoff,
actual_landed = EXCLUDED.actual_landed,
fetched_at = now()
RETURNING id
""",
(
summary.get("fr24_id"),
summary.get("flight"),
summary.get("callsign"),
summary.get("type"),
summary.get("reg"),
summary.get("origin_icao"),
summary.get("destination_icao"),
summary.get("datetime_takeoff"),
summary.get("datetime_landed"),
target_date,
),
)
row = cur.fetchone()
return row[0] if row else None
def upsert_track_points(conn, track_id: int, points: List[Dict]):
"""Delete old points and insert fresh ones."""
with conn.cursor() as cur:
cur.execute("DELETE FROM fr24_ext.track_points_fr24 WHERE track_id = %s", (track_id,))
if not points:
return
args = [
(
track_id,
p.get("timestamp"),
p.get("lat"),
p.get("lon"),
p.get("alt"),
p.get("gspeed"),
p.get("vspeed"),
p.get("track"),
p.get("squawk"),
p.get("source"),
)
for p in points
if p.get("lat") is not None and p.get("lon") is not None
]
psycopg2.extras.execute_values(
cur,
"""
INSERT INTO fr24_ext.track_points_fr24
(track_id, observed_at, lat, lon, altitude_ft, gspeed_kt,
vspeed_fpm, heading, squawk, source)
VALUES %s
""",
args,
)
# ── Enrich schedule with actual times ────────────────────────────────────────
def _normalize_flight_number(fn: str) -> str:
"""
Normalize flight number for matching.
'SU 1234''SU1234', 'SU1234''SU1234'
"""
if not fn:
return ""
return re.sub(r"\s+", "", fn.strip().upper())
def enrich_schedule(conn, target_date: date) -> int:
"""
Match flight_actual rows to schedule rows by flight number + date.
Update schedule with actual times, delays, fr24_id, and category.
Returns number of schedule rows updated.
"""
with conn.cursor() as cur:
# Match by normalized flight number + flight_date
# IATA → ICAO mapping for Moscow airports
# For departures: schedule airport is origin → fa.origin_icao must be Moscow ICAO
# For arrivals: schedule airport is destination → fa.dest_icao must be Moscow ICAO
cur.execute(
"""
WITH matches AS (
SELECT
s.schedule_id,
fa.fr24_id,
fa.datetime_takeoff AS actual_takeoff,
fa.datetime_landed AS actual_landed,
fa.category AS flight_category,
CASE
WHEN s.direction = 'departure' AND fa.datetime_takeoff IS NOT NULL
THEN EXTRACT(EPOCH FROM (fa.datetime_takeoff - s.scheduled_at))::int / 60
WHEN s.direction = 'arrival' AND fa.datetime_landed IS NOT NULL
THEN EXTRACT(EPOCH FROM (fa.datetime_landed - s.scheduled_at))::int / 60
END AS delay_takeoff_min,
CASE
WHEN s.direction = 'arrival' AND fa.datetime_landed IS NOT NULL AND s.scheduled_at IS NOT NULL
THEN EXTRACT(EPOCH FROM (fa.datetime_landed - s.scheduled_at))::int / 60
WHEN s.direction = 'departure' AND fa.datetime_takeoff IS NOT NULL AND s.scheduled_at IS NOT NULL
THEN EXTRACT(EPOCH FROM (fa.datetime_takeoff - s.scheduled_at))::int / 60
END AS delay_landed_min
FROM fr24_ext.schedule s
JOIN fr24_ext.flight_actual fa
ON UPPER(REPLACE(fa.flight, ' ', '')) = UPPER(REPLACE(s.flight_number, ' ', ''))
AND fa.flight_date = s.flight_date
WHERE s.flight_date = %s
AND (
(s.direction = 'departure' AND fa.origin_icao IN ('UUEE','UUDD','UUWW','UUBW'))
OR
(s.direction = 'arrival' AND fa.dest_icao IN ('UUEE','UUDD','UUWW','UUBW'))
)
)
UPDATE fr24_ext.schedule s
SET
actual_takeoff = m.actual_takeoff,
actual_landed = m.actual_landed,
fr24_id = m.fr24_id,
flight_category = m.flight_category,
delay_takeoff_min = m.delay_takeoff_min,
delay_landed_min = m.delay_landed_min,
fetched_at = now()
FROM matches m
WHERE s.schedule_id = m.schedule_id
""",
(target_date,),
)
updated = cur.rowcount
return updated
# ── Supplement schedule with FR24 flights not in Yandex ─────────────────────
# Moscow airports: ICAO → IATA (for supplement_schedule)
_MOSCOW_ICAO_TO_IATA: Dict[str, str] = {
"UUEE": "SVO",
"UUDD": "DME",
"UUWW": "VKO",
"UUBW": "ZIA",
}
_MOSCOW_ICAO_SET = set(_MOSCOW_ICAO_TO_IATA.keys())
def supplement_schedule(conn, target_date: date) -> int:
"""
Insert into fr24_ext.schedule flights from flight_actual
that have no matching schedule record.
Source: fr24_ext.flight_actual
Target: fr24_ext.schedule (source='fr24')
Returns: number of rows inserted
"""
with conn.cursor() as cur:
cur.execute(
"""
INSERT INTO fr24_ext.schedule
(flight_date, airport_iata, direction, flight_number,
airline_iata, origin_iata, destination_iata,
aircraft_type, scheduled_at,
actual_takeoff, actual_landed,
status, source, fr24_id)
SELECT
fa.flight_date,
-- airport_iata: the Moscow airport involved in this flight
CASE
WHEN fa.origin_icao IN ('UUEE','UUDD','UUWW','UUBW')
THEN CASE fa.origin_icao
WHEN 'UUEE' THEN 'SVO'
WHEN 'UUDD' THEN 'DME'
WHEN 'UUWW' THEN 'VKO'
WHEN 'UUBW' THEN 'ZIA'
END
ELSE CASE fa.dest_icao
WHEN 'UUEE' THEN 'SVO'
WHEN 'UUDD' THEN 'DME'
WHEN 'UUWW' THEN 'VKO'
WHEN 'UUBW' THEN 'ZIA'
END
END AS airport_iata,
-- direction: departure if origin is Moscow, otherwise arrival
CASE
WHEN fa.origin_icao IN ('UUEE','UUDD','UUWW','UUBW')
THEN 'departure'
ELSE 'arrival'
END AS direction,
-- flight_number: normalised (strip spaces)
UPPER(REGEXP_REPLACE(fa.flight, '\\s+', '', 'g')) AS flight_number,
-- airline_iata: leading letter prefix of the flight number
UPPER(SUBSTRING(REGEXP_REPLACE(fa.flight, '\\s+', '', 'g') FROM '^([A-Z]{2,3})')) AS airline_iata,
-- origin_iata: map known Moscow ICAOs, others NULL
CASE fa.origin_icao
WHEN 'UUEE' THEN 'SVO'
WHEN 'UUDD' THEN 'DME'
WHEN 'UUWW' THEN 'VKO'
WHEN 'UUBW' THEN 'ZIA'
ELSE NULL
END AS origin_iata,
-- destination_iata: map known Moscow ICAOs, others NULL
CASE fa.dest_icao
WHEN 'UUEE' THEN 'SVO'
WHEN 'UUDD' THEN 'DME'
WHEN 'UUWW' THEN 'VKO'
WHEN 'UUBW' THEN 'ZIA'
ELSE NULL
END AS destination_iata,
NULL AS aircraft_type,
-- scheduled_at: takeoff time for departures, landed for arrivals; fallback to first_seen
COALESCE(
CASE
WHEN fa.origin_icao IN ('UUEE','UUDD','UUWW','UUBW')
THEN fa.datetime_takeoff
ELSE fa.datetime_landed
END,
fa.first_seen
) AS scheduled_at,
fa.datetime_takeoff AS actual_takeoff,
fa.datetime_landed AS actual_landed,
'actual' AS status,
'fr24' AS source,
fa.fr24_id
FROM fr24_ext.flight_actual fa
WHERE fa.flight_date = %(date)s
AND fa.flight IS NOT NULL
AND fa.flight != ''
-- Must involve at least one Moscow airport
AND (
fa.origin_icao IN ('UUEE','UUDD','UUWW','UUBW')
OR fa.dest_icao IN ('UUEE','UUDD','UUWW','UUBW')
)
-- Skip flights already present in schedule
AND NOT EXISTS (
SELECT 1 FROM fr24_ext.schedule s
WHERE UPPER(REPLACE(s.flight_number, ' ', ''))
= UPPER(REPLACE(fa.flight, ' ', ''))
AND s.flight_date = fa.flight_date
)
ON CONFLICT (flight_number, airport_iata, scheduled_at, direction) DO NOTHING
""",
{"date": target_date},
)
return cur.rowcount
# ── Main entry ───────────────────────────────────────────────────────────────
def run(target_date: date, conn) -> Dict:
"""Main entry: load flight summaries + optionally tracks. Returns stats dict."""
log.info("FR24 worker: starting for %s", target_date)
estimated_max = config.MAX_PAGES * config.PAGE_SIZE
log.info("FR24 worker: MAX_PAGES=%d, PAGE_SIZE=%d, estimated max flights=%d",
config.MAX_PAGES, config.PAGE_SIZE, estimated_max)
if estimated_max > config.CREDIT_GUARD_MAX_FLIGHTS:
log.critical("CREDIT GUARD: estimated max flights %d exceeds limit %d "
"(верифицируй MAX_PAGES=%d и PAGE_SIZE=%d перед запуском!)",
estimated_max, config.CREDIT_GUARD_MAX_FLIGHTS,
config.MAX_PAGES, config.PAGE_SIZE)
stats = {
"date": str(target_date),
"flights_found": 0,
"flights_upserted": 0,
"tracks_loaded": 0,
"schedule_enriched": 0,
"errors": 0,
}
# 1. Fetch flight summaries page by page, commit after each page
for page in iter_flight_summary_pages(target_date):
stats["flights_found"] += len(page)
for item in page:
fr24_id = item.get("fr24_id")
if not fr24_id:
continue
try:
actual_id = upsert_flight_actual(conn, item, target_date)
if actual_id:
stats["flights_upserted"] += 1
# Optionally fetch tracks (costs extra credits)
if config.FETCH_TRACKS:
track_id = upsert_flight(conn, item, target_date)
if track_id:
points = fetch_track(fr24_id)
if points is not None:
upsert_track_points(conn, track_id, points)
stats["tracks_loaded"] += 1
else:
stats["errors"] += 1
log.debug("FR24: %s upserted", fr24_id)
except Exception as e:
conn.rollback()
stats["errors"] += 1
log.error("FR24: error processing %s: %s", fr24_id, e)
# Commit after each page — partial progress survives errors on later pages
try:
conn.commit()
log.debug("Committed page, total so far: %d", stats["flights_upserted"])
except Exception as e:
conn.rollback()
log.error("Commit failed: %s", e)
stats["errors"] += 1
log.info("FR24 worker: found %d flights, upserted %d",
stats["flights_found"], stats["flights_upserted"])
# 3. Enrich schedule with actual times
try:
enriched = enrich_schedule(conn, target_date)
conn.commit()
stats["schedule_enriched"] = enriched
log.info("FR24 worker: enriched %d schedule rows", enriched)
except Exception as e:
conn.rollback()
log.error("FR24 worker: schedule enrichment failed: %s", e)
stats["errors"] += 1
# 4. Supplement schedule with flights from FR24 not in Yandex
try:
supplemented = supplement_schedule(conn, target_date)
conn.commit()
stats["schedule_supplemented"] = supplemented
log.info("FR24 worker: supplemented %d new schedule rows", supplemented)
except Exception as e:
conn.rollback()
log.error("FR24 worker: supplement_schedule failed: %s", e)
stats["errors"] += 1
log.info("FR24 worker done: %s", stats)
return stats