542 lines
21 KiB
Python
542 lines
21 KiB
Python
"""
|
||
FR24 tracks worker.
|
||
1. GET /api/flight-summary/full for each day → actual flight data (up to 20000)
|
||
2. Optionally: GET /api/flight-tracks?flight_id={fr24_id} → track points
|
||
3. Upsert into fr24_ext.flight_actual (actual data)
|
||
4. Upsert into fr24_ext.flight_tracks_fr24 + fr24_ext.track_points_fr24 (tracks)
|
||
5. Enrich fr24_ext.schedule with actual times + delays
|
||
"""
|
||
import logging
|
||
import re
|
||
import time
|
||
from datetime import date, datetime, timezone
|
||
from typing import Dict, Iterator, List, Optional
|
||
|
||
import psycopg2
|
||
import psycopg2.extras
|
||
import requests
|
||
|
||
from config import config
|
||
|
||
log = logging.getLogger("fr24_worker")
|
||
|
||
HEADERS = {
|
||
"Authorization": f"Bearer {config.FR24_API_KEY}",
|
||
"Accept": "application/json",
|
||
"Accept-Version": "v1",
|
||
}
|
||
|
||
_last_request_at: float = 0.0
|
||
|
||
|
||
def _throttle():
|
||
"""Enforce rate limit: max 10 req/min → sleep if needed."""
|
||
global _last_request_at
|
||
elapsed = time.monotonic() - _last_request_at
|
||
if elapsed < config.RATE_LIMIT_SEC:
|
||
time.sleep(config.RATE_LIMIT_SEC - elapsed)
|
||
_last_request_at = time.monotonic()
|
||
|
||
|
||
def _get(path: str, params: dict = None) -> dict:
|
||
_throttle()
|
||
url = f"{config.FR24_API_BASE}{path}"
|
||
resp = requests.get(url, headers=HEADERS, params=params, timeout=60)
|
||
if resp.status_code == 429:
|
||
retry_after = int(resp.headers.get("Retry-After", 60))
|
||
log.warning("Rate limited, sleeping %ds", retry_after)
|
||
time.sleep(retry_after)
|
||
return _get(path, params)
|
||
resp.raise_for_status()
|
||
return resp.json()
|
||
|
||
|
||
def _build_airports_param() -> str:
|
||
"""Build airports param: both:SVO,both:DME,both:VKO,both:ZIA"""
|
||
prefix = config.AIRPORT_DIRECTION_PREFIX
|
||
codes = [a.strip() for a in config.AIRPORTS.split(",") if a.strip()]
|
||
return ",".join(f"{prefix}{code}" for code in codes)
|
||
|
||
|
||
def iter_flight_summary_pages(target_date: date) -> Iterator[List[Dict]]:
|
||
"""Yield one page (list of flights) at a time. Stops on error/empty/MAX_PAGES.
|
||
|
||
Deduplicates across pages by fr24_id — avoids ×4 duplicates from
|
||
airports param 'both:SVO,both:DME,both:VKO,both:ZIA'.
|
||
"""
|
||
PAGE = 20 # Explorer tier hard limit per request
|
||
airports_param = _build_airports_param()
|
||
dt_from = f"{target_date}T00:00:00"
|
||
dt_to = f"{target_date}T23:59:59"
|
||
offset = 0
|
||
seen_fr24_ids: set = set() # dedup across pages
|
||
page_num = 0
|
||
|
||
while True:
|
||
try:
|
||
data = _get("/api/flight-summary/full", params={
|
||
"flight_datetime_from": dt_from,
|
||
"flight_datetime_to": dt_to,
|
||
"airports": airports_param,
|
||
"limit": PAGE,
|
||
"offset": offset,
|
||
})
|
||
except Exception as e:
|
||
log.error("fetch page offset=%d failed: %s", offset, e)
|
||
break
|
||
|
||
items = data.get("data", data) if isinstance(data, dict) else data
|
||
if not items or not isinstance(items, list):
|
||
break
|
||
|
||
# Deduplicate by fr24_id
|
||
unique = [x for x in items if x.get("fr24_id") not in seen_fr24_ids]
|
||
seen_fr24_ids.update(x["fr24_id"] for x in items if x.get("fr24_id"))
|
||
|
||
log.debug(
|
||
"iter_flight_summary_pages: page=%d offset=%d got=%d unique=%d total_seen=%d",
|
||
page_num, offset, len(items), len(unique), len(seen_fr24_ids),
|
||
)
|
||
|
||
yield unique
|
||
|
||
page_num += 1
|
||
if page_num >= config.MAX_PAGES:
|
||
log.warning("Reached MAX_PAGES=%d, stopping pagination", config.MAX_PAGES)
|
||
break
|
||
|
||
if len(items) < PAGE:
|
||
break # last page
|
||
offset += PAGE
|
||
|
||
|
||
def fetch_track(fr24_id: str) -> Optional[List[Dict]]:
|
||
"""Fetch track points for a single flight."""
|
||
try:
|
||
data = _get("/api/flight-tracks", params={"flight_id": fr24_id})
|
||
if isinstance(data, list) and data:
|
||
return data[0].get("tracks", [])
|
||
if isinstance(data, dict):
|
||
return data.get("tracks", [])
|
||
return []
|
||
except requests.HTTPError as e:
|
||
log.warning("Failed to fetch track for %s: %s", fr24_id, e)
|
||
return None
|
||
|
||
|
||
# ── DB upsert: flight_actual ─────────────────────────────────────────────────
|
||
|
||
def upsert_flight_actual(conn, item: Dict, target_date: date) -> Optional[int]:
|
||
"""Insert/update actual flight data. Returns DB id."""
|
||
fr24_id = item.get("fr24_id")
|
||
if not fr24_id:
|
||
return None
|
||
|
||
with conn.cursor() as cur:
|
||
cur.execute(
|
||
"""
|
||
INSERT INTO fr24_ext.flight_actual
|
||
(fr24_id, flight, callsign, operated_as, origin_icao, dest_icao,
|
||
orig_iata, dest_iata,
|
||
datetime_takeoff, datetime_landed, flight_time,
|
||
runway_takeoff, runway_landed, actual_distance, category,
|
||
registration,
|
||
flight_ended, first_seen, last_seen, flight_date)
|
||
VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)
|
||
ON CONFLICT (fr24_id) DO UPDATE SET
|
||
flight = EXCLUDED.flight,
|
||
callsign = EXCLUDED.callsign,
|
||
operated_as = EXCLUDED.operated_as,
|
||
origin_icao = EXCLUDED.origin_icao,
|
||
dest_icao = EXCLUDED.dest_icao,
|
||
orig_iata = EXCLUDED.orig_iata,
|
||
dest_iata = EXCLUDED.dest_iata,
|
||
datetime_takeoff = EXCLUDED.datetime_takeoff,
|
||
datetime_landed = EXCLUDED.datetime_landed,
|
||
flight_time = EXCLUDED.flight_time,
|
||
runway_takeoff = EXCLUDED.runway_takeoff,
|
||
runway_landed = EXCLUDED.runway_landed,
|
||
actual_distance = EXCLUDED.actual_distance,
|
||
category = EXCLUDED.category,
|
||
registration = EXCLUDED.registration,
|
||
flight_ended = EXCLUDED.flight_ended,
|
||
first_seen = EXCLUDED.first_seen,
|
||
last_seen = EXCLUDED.last_seen,
|
||
fetched_at = now()
|
||
RETURNING id
|
||
""",
|
||
(
|
||
fr24_id,
|
||
item.get("flight"),
|
||
item.get("callsign"),
|
||
item.get("operating_as") or item.get("operated_as"), # FR24 uses operating_as
|
||
item.get("orig_icao") or item.get("origin_icao"), # FR24 uses orig_icao
|
||
item.get("dest_icao"),
|
||
item.get("orig_iata"),
|
||
item.get("dest_iata"),
|
||
item.get("datetime_takeoff"),
|
||
item.get("datetime_landed"),
|
||
item.get("flight_time"),
|
||
item.get("runway_takeoff"),
|
||
item.get("runway_landed"),
|
||
item.get("actual_distance") or item.get("distance"),
|
||
item.get("category"),
|
||
item.get("reg") or item.get("registration"), # FR24 uses reg
|
||
item.get("flight_ended", False),
|
||
item.get("first_seen"),
|
||
item.get("last_seen"),
|
||
target_date,
|
||
),
|
||
)
|
||
row = cur.fetchone()
|
||
return row[0] if row else None
|
||
|
||
|
||
# ── DB upsert: flight_tracks_fr24 (existing, kept for tracks) ────────────────
|
||
|
||
def upsert_flight(conn, summary: Dict, target_date: date) -> Optional[int]:
|
||
"""Insert/update flight header for tracks. Return DB id."""
|
||
with conn.cursor() as cur:
|
||
cur.execute(
|
||
"""
|
||
INSERT INTO fr24_ext.flight_tracks_fr24
|
||
(fr24_id, flight_number, callsign, aircraft_type, registration,
|
||
origin_icao, destination_icao, actual_takeoff, actual_landed, flight_date)
|
||
VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)
|
||
ON CONFLICT (fr24_id) DO UPDATE SET
|
||
flight_number = EXCLUDED.flight_number,
|
||
callsign = EXCLUDED.callsign,
|
||
aircraft_type = EXCLUDED.aircraft_type,
|
||
registration = EXCLUDED.registration,
|
||
origin_icao = EXCLUDED.origin_icao,
|
||
destination_icao = EXCLUDED.destination_icao,
|
||
actual_takeoff = EXCLUDED.actual_takeoff,
|
||
actual_landed = EXCLUDED.actual_landed,
|
||
fetched_at = now()
|
||
RETURNING id
|
||
""",
|
||
(
|
||
summary.get("fr24_id"),
|
||
summary.get("flight"),
|
||
summary.get("callsign"),
|
||
summary.get("type"),
|
||
summary.get("reg"),
|
||
summary.get("origin_icao"),
|
||
summary.get("destination_icao"),
|
||
summary.get("datetime_takeoff"),
|
||
summary.get("datetime_landed"),
|
||
target_date,
|
||
),
|
||
)
|
||
row = cur.fetchone()
|
||
return row[0] if row else None
|
||
|
||
|
||
def upsert_track_points(conn, track_id: int, points: List[Dict]):
|
||
"""Delete old points and insert fresh ones."""
|
||
with conn.cursor() as cur:
|
||
cur.execute("DELETE FROM fr24_ext.track_points_fr24 WHERE track_id = %s", (track_id,))
|
||
if not points:
|
||
return
|
||
args = [
|
||
(
|
||
track_id,
|
||
p.get("timestamp"),
|
||
p.get("lat"),
|
||
p.get("lon"),
|
||
p.get("alt"),
|
||
p.get("gspeed"),
|
||
p.get("vspeed"),
|
||
p.get("track"),
|
||
p.get("squawk"),
|
||
p.get("source"),
|
||
)
|
||
for p in points
|
||
if p.get("lat") is not None and p.get("lon") is not None
|
||
]
|
||
psycopg2.extras.execute_values(
|
||
cur,
|
||
"""
|
||
INSERT INTO fr24_ext.track_points_fr24
|
||
(track_id, observed_at, lat, lon, altitude_ft, gspeed_kt,
|
||
vspeed_fpm, heading, squawk, source)
|
||
VALUES %s
|
||
""",
|
||
args,
|
||
)
|
||
|
||
|
||
# ── Enrich schedule with actual times ────────────────────────────────────────
|
||
|
||
def _normalize_flight_number(fn: str) -> str:
|
||
"""
|
||
Normalize flight number for matching.
|
||
'SU 1234' → 'SU1234', 'SU1234' → 'SU1234'
|
||
"""
|
||
if not fn:
|
||
return ""
|
||
return re.sub(r"\s+", "", fn.strip().upper())
|
||
|
||
|
||
def enrich_schedule(conn, target_date: date) -> int:
|
||
"""
|
||
Match flight_actual rows to schedule rows by flight number + date.
|
||
Update schedule with actual times, delays, fr24_id, and category.
|
||
Returns number of schedule rows updated.
|
||
"""
|
||
with conn.cursor() as cur:
|
||
# Match by normalized flight number + flight_date
|
||
# IATA → ICAO mapping for Moscow airports
|
||
# For departures: schedule airport is origin → fa.origin_icao must be Moscow ICAO
|
||
# For arrivals: schedule airport is destination → fa.dest_icao must be Moscow ICAO
|
||
cur.execute(
|
||
"""
|
||
WITH matches AS (
|
||
SELECT
|
||
s.schedule_id,
|
||
fa.fr24_id,
|
||
fa.datetime_takeoff AS actual_takeoff,
|
||
fa.datetime_landed AS actual_landed,
|
||
fa.category AS flight_category,
|
||
CASE
|
||
WHEN s.direction = 'departure' AND fa.datetime_takeoff IS NOT NULL
|
||
THEN EXTRACT(EPOCH FROM (fa.datetime_takeoff - s.scheduled_at))::int / 60
|
||
WHEN s.direction = 'arrival' AND fa.datetime_landed IS NOT NULL
|
||
THEN EXTRACT(EPOCH FROM (fa.datetime_landed - s.scheduled_at))::int / 60
|
||
END AS delay_takeoff_min,
|
||
CASE
|
||
WHEN s.direction = 'arrival' AND fa.datetime_landed IS NOT NULL AND s.scheduled_at IS NOT NULL
|
||
THEN EXTRACT(EPOCH FROM (fa.datetime_landed - s.scheduled_at))::int / 60
|
||
WHEN s.direction = 'departure' AND fa.datetime_takeoff IS NOT NULL AND s.scheduled_at IS NOT NULL
|
||
THEN EXTRACT(EPOCH FROM (fa.datetime_takeoff - s.scheduled_at))::int / 60
|
||
END AS delay_landed_min
|
||
FROM fr24_ext.schedule s
|
||
JOIN fr24_ext.flight_actual fa
|
||
ON UPPER(REPLACE(fa.flight, ' ', '')) = UPPER(REPLACE(s.flight_number, ' ', ''))
|
||
AND fa.flight_date = s.flight_date
|
||
WHERE s.flight_date = %s
|
||
AND (
|
||
(s.direction = 'departure' AND fa.origin_icao IN ('UUEE','UUDD','UUWW','UUBW'))
|
||
OR
|
||
(s.direction = 'arrival' AND fa.dest_icao IN ('UUEE','UUDD','UUWW','UUBW'))
|
||
)
|
||
)
|
||
UPDATE fr24_ext.schedule s
|
||
SET
|
||
actual_takeoff = m.actual_takeoff,
|
||
actual_landed = m.actual_landed,
|
||
fr24_id = m.fr24_id,
|
||
flight_category = m.flight_category,
|
||
delay_takeoff_min = m.delay_takeoff_min,
|
||
delay_landed_min = m.delay_landed_min,
|
||
fetched_at = now()
|
||
FROM matches m
|
||
WHERE s.schedule_id = m.schedule_id
|
||
""",
|
||
(target_date,),
|
||
)
|
||
updated = cur.rowcount
|
||
return updated
|
||
|
||
|
||
# ── Supplement schedule with FR24 flights not in Yandex ─────────────────────
|
||
|
||
# Moscow airports: ICAO → IATA (for supplement_schedule)
|
||
_MOSCOW_ICAO_TO_IATA: Dict[str, str] = {
|
||
"UUEE": "SVO",
|
||
"UUDD": "DME",
|
||
"UUWW": "VKO",
|
||
"UUBW": "ZIA",
|
||
}
|
||
_MOSCOW_ICAO_SET = set(_MOSCOW_ICAO_TO_IATA.keys())
|
||
|
||
|
||
def supplement_schedule(conn, target_date: date) -> int:
|
||
"""
|
||
Insert into fr24_ext.schedule flights from flight_actual
|
||
that have no matching schedule record.
|
||
|
||
Source: fr24_ext.flight_actual
|
||
Target: fr24_ext.schedule (source='fr24')
|
||
|
||
Returns: number of rows inserted
|
||
"""
|
||
with conn.cursor() as cur:
|
||
cur.execute(
|
||
"""
|
||
INSERT INTO fr24_ext.schedule
|
||
(flight_date, airport_iata, direction, flight_number,
|
||
airline_iata, origin_iata, destination_iata,
|
||
aircraft_type, scheduled_at,
|
||
actual_takeoff, actual_landed,
|
||
status, source, fr24_id)
|
||
SELECT
|
||
fa.flight_date,
|
||
-- airport_iata: the Moscow airport involved in this flight
|
||
CASE
|
||
WHEN fa.origin_icao IN ('UUEE','UUDD','UUWW','UUBW')
|
||
THEN CASE fa.origin_icao
|
||
WHEN 'UUEE' THEN 'SVO'
|
||
WHEN 'UUDD' THEN 'DME'
|
||
WHEN 'UUWW' THEN 'VKO'
|
||
WHEN 'UUBW' THEN 'ZIA'
|
||
END
|
||
ELSE CASE fa.dest_icao
|
||
WHEN 'UUEE' THEN 'SVO'
|
||
WHEN 'UUDD' THEN 'DME'
|
||
WHEN 'UUWW' THEN 'VKO'
|
||
WHEN 'UUBW' THEN 'ZIA'
|
||
END
|
||
END AS airport_iata,
|
||
-- direction: departure if origin is Moscow, otherwise arrival
|
||
CASE
|
||
WHEN fa.origin_icao IN ('UUEE','UUDD','UUWW','UUBW')
|
||
THEN 'departure'
|
||
ELSE 'arrival'
|
||
END AS direction,
|
||
-- flight_number: normalised (strip spaces)
|
||
UPPER(REGEXP_REPLACE(fa.flight, '\\s+', '', 'g')) AS flight_number,
|
||
-- airline_iata: leading letter prefix of the flight number
|
||
UPPER(SUBSTRING(REGEXP_REPLACE(fa.flight, '\\s+', '', 'g') FROM '^([A-Z]{2,3})')) AS airline_iata,
|
||
-- origin_iata: map known Moscow ICAOs, others NULL
|
||
CASE fa.origin_icao
|
||
WHEN 'UUEE' THEN 'SVO'
|
||
WHEN 'UUDD' THEN 'DME'
|
||
WHEN 'UUWW' THEN 'VKO'
|
||
WHEN 'UUBW' THEN 'ZIA'
|
||
ELSE NULL
|
||
END AS origin_iata,
|
||
-- destination_iata: map known Moscow ICAOs, others NULL
|
||
CASE fa.dest_icao
|
||
WHEN 'UUEE' THEN 'SVO'
|
||
WHEN 'UUDD' THEN 'DME'
|
||
WHEN 'UUWW' THEN 'VKO'
|
||
WHEN 'UUBW' THEN 'ZIA'
|
||
ELSE NULL
|
||
END AS destination_iata,
|
||
NULL AS aircraft_type,
|
||
-- scheduled_at: takeoff time for departures, landed for arrivals; fallback to first_seen
|
||
COALESCE(
|
||
CASE
|
||
WHEN fa.origin_icao IN ('UUEE','UUDD','UUWW','UUBW')
|
||
THEN fa.datetime_takeoff
|
||
ELSE fa.datetime_landed
|
||
END,
|
||
fa.first_seen
|
||
) AS scheduled_at,
|
||
fa.datetime_takeoff AS actual_takeoff,
|
||
fa.datetime_landed AS actual_landed,
|
||
'actual' AS status,
|
||
'fr24' AS source,
|
||
fa.fr24_id
|
||
FROM fr24_ext.flight_actual fa
|
||
WHERE fa.flight_date = %(date)s
|
||
AND fa.flight IS NOT NULL
|
||
AND fa.flight != ''
|
||
-- Must involve at least one Moscow airport
|
||
AND (
|
||
fa.origin_icao IN ('UUEE','UUDD','UUWW','UUBW')
|
||
OR fa.dest_icao IN ('UUEE','UUDD','UUWW','UUBW')
|
||
)
|
||
-- Skip flights already present in schedule
|
||
AND NOT EXISTS (
|
||
SELECT 1 FROM fr24_ext.schedule s
|
||
WHERE UPPER(REPLACE(s.flight_number, ' ', ''))
|
||
= UPPER(REPLACE(fa.flight, ' ', ''))
|
||
AND s.flight_date = fa.flight_date
|
||
)
|
||
ON CONFLICT (flight_number, airport_iata, scheduled_at, direction) DO NOTHING
|
||
""",
|
||
{"date": target_date},
|
||
)
|
||
return cur.rowcount
|
||
|
||
|
||
# ── Main entry ───────────────────────────────────────────────────────────────
|
||
|
||
def run(target_date: date, conn) -> Dict:
|
||
"""Main entry: load flight summaries + optionally tracks. Returns stats dict."""
|
||
log.info("FR24 worker: starting for %s", target_date)
|
||
estimated_max = config.MAX_PAGES * config.PAGE_SIZE
|
||
log.info("FR24 worker: MAX_PAGES=%d, PAGE_SIZE=%d, estimated max flights=%d",
|
||
config.MAX_PAGES, config.PAGE_SIZE, estimated_max)
|
||
if estimated_max > config.CREDIT_GUARD_MAX_FLIGHTS:
|
||
log.critical("CREDIT GUARD: estimated max flights %d exceeds limit %d "
|
||
"(верифицируй MAX_PAGES=%d и PAGE_SIZE=%d перед запуском!)",
|
||
estimated_max, config.CREDIT_GUARD_MAX_FLIGHTS,
|
||
config.MAX_PAGES, config.PAGE_SIZE)
|
||
stats = {
|
||
"date": str(target_date),
|
||
"flights_found": 0,
|
||
"flights_upserted": 0,
|
||
"tracks_loaded": 0,
|
||
"schedule_enriched": 0,
|
||
"errors": 0,
|
||
}
|
||
|
||
# 1. Fetch flight summaries page by page, commit after each page
|
||
for page in iter_flight_summary_pages(target_date):
|
||
stats["flights_found"] += len(page)
|
||
for item in page:
|
||
fr24_id = item.get("fr24_id")
|
||
if not fr24_id:
|
||
continue
|
||
try:
|
||
actual_id = upsert_flight_actual(conn, item, target_date)
|
||
if actual_id:
|
||
stats["flights_upserted"] += 1
|
||
|
||
# Optionally fetch tracks (costs extra credits)
|
||
if config.FETCH_TRACKS:
|
||
track_id = upsert_flight(conn, item, target_date)
|
||
if track_id:
|
||
points = fetch_track(fr24_id)
|
||
if points is not None:
|
||
upsert_track_points(conn, track_id, points)
|
||
stats["tracks_loaded"] += 1
|
||
else:
|
||
stats["errors"] += 1
|
||
|
||
log.debug("FR24: %s upserted", fr24_id)
|
||
except Exception as e:
|
||
conn.rollback()
|
||
stats["errors"] += 1
|
||
log.error("FR24: error processing %s: %s", fr24_id, e)
|
||
|
||
# Commit after each page — partial progress survives errors on later pages
|
||
try:
|
||
conn.commit()
|
||
log.debug("Committed page, total so far: %d", stats["flights_upserted"])
|
||
except Exception as e:
|
||
conn.rollback()
|
||
log.error("Commit failed: %s", e)
|
||
stats["errors"] += 1
|
||
|
||
log.info("FR24 worker: found %d flights, upserted %d",
|
||
stats["flights_found"], stats["flights_upserted"])
|
||
|
||
# 3. Enrich schedule with actual times
|
||
try:
|
||
enriched = enrich_schedule(conn, target_date)
|
||
conn.commit()
|
||
stats["schedule_enriched"] = enriched
|
||
log.info("FR24 worker: enriched %d schedule rows", enriched)
|
||
except Exception as e:
|
||
conn.rollback()
|
||
log.error("FR24 worker: schedule enrichment failed: %s", e)
|
||
stats["errors"] += 1
|
||
|
||
# 4. Supplement schedule with flights from FR24 not in Yandex
|
||
try:
|
||
supplemented = supplement_schedule(conn, target_date)
|
||
conn.commit()
|
||
stats["schedule_supplemented"] = supplemented
|
||
log.info("FR24 worker: supplemented %d new schedule rows", supplemented)
|
||
except Exception as e:
|
||
conn.rollback()
|
||
log.error("FR24 worker: supplement_schedule failed: %s", e)
|
||
stats["errors"] += 1
|
||
|
||
log.info("FR24 worker done: %s", stats)
|
||
return stats
|