122 lines
3.8 KiB
Python
122 lines
3.8 KiB
Python
"""
|
|
Backfill CLI — loads historical schedule data for a date range.
|
|
Saves progress to fr24_ext.load_state so it can resume after interruption.
|
|
|
|
Usage:
|
|
python backfill.py --start-date 2026-04-01 --end-date 2026-04-19
|
|
python backfill.py --start-date 2026-04-01 --end-date 2026-04-19 --skip-opensky
|
|
"""
|
|
import argparse
|
|
import json
|
|
import logging
|
|
import sys
|
|
from datetime import date, timedelta
|
|
|
|
import psycopg2
|
|
|
|
from config import config
|
|
from yandex_worker import fetch_day as yandex_fetch_day
|
|
from opensky_worker import enrich_day as opensky_enrich_day
|
|
|
|
log = logging.getLogger(__name__)
|
|
|
|
STATE_KEY = "backfill_last_date"
|
|
|
|
|
|
def load_state(conn, key: str):
|
|
with conn.cursor() as cur:
|
|
cur.execute(
|
|
"SELECT state_value FROM fr24_ext.load_state WHERE state_key = %s",
|
|
(key,),
|
|
)
|
|
row = cur.fetchone()
|
|
return row[0] if row else None
|
|
|
|
|
|
def save_state(conn, key: str, value: dict):
|
|
with conn.cursor() as cur:
|
|
cur.execute(
|
|
"""
|
|
INSERT INTO fr24_ext.load_state (state_key, state_value)
|
|
VALUES (%s, %s::jsonb)
|
|
ON CONFLICT (state_key) DO UPDATE
|
|
SET state_value = EXCLUDED.state_value,
|
|
updated_at = now()
|
|
""",
|
|
(key, json.dumps(value)),
|
|
)
|
|
conn.commit()
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(description="Backfill fr24_ext.schedule")
|
|
parser.add_argument("--start-date", required=True, help="YYYY-MM-DD")
|
|
parser.add_argument("--end-date", required=True, help="YYYY-MM-DD")
|
|
parser.add_argument("--skip-opensky", action="store_true",
|
|
help="Skip OpenSky enrichment (faster, no icao24)")
|
|
parser.add_argument("--resume", action="store_true",
|
|
help="Resume from last saved state (ignores --start-date if state exists)")
|
|
args = parser.parse_args()
|
|
|
|
start = date.fromisoformat(args.start_date)
|
|
end = date.fromisoformat(args.end_date)
|
|
|
|
if start > end:
|
|
log.error("start-date must be <= end-date")
|
|
sys.exit(1)
|
|
|
|
conn = psycopg2.connect(config.DB_DSN)
|
|
|
|
# Resume from saved state if requested
|
|
if args.resume:
|
|
state = load_state(conn, STATE_KEY)
|
|
if state and state.get("last_date"):
|
|
last = date.fromisoformat(state["last_date"])
|
|
resume_from = last + timedelta(days=1)
|
|
if resume_from > start:
|
|
log.info("Resuming from %s (last completed: %s)", resume_from, last)
|
|
start = resume_from
|
|
|
|
current = start
|
|
total_flights = 0
|
|
total_enriched = 0
|
|
|
|
log.info("Backfill: %s → %s (%d days)", start, end, (end - start).days + 1)
|
|
|
|
while current <= end:
|
|
log.info("── Processing %s ──", current)
|
|
|
|
try:
|
|
yandex_count = yandex_fetch_day(current, conn)
|
|
total_flights += yandex_count
|
|
log.info("Yandex: %d flights", yandex_count)
|
|
|
|
if not args.skip_opensky:
|
|
opensky_count = opensky_enrich_day(current, conn)
|
|
total_enriched += opensky_count
|
|
log.info("OpenSky: %d enriched", opensky_count)
|
|
|
|
save_state(conn, STATE_KEY, {"last_date": current.isoformat()})
|
|
|
|
except KeyboardInterrupt:
|
|
log.info("Interrupted. Progress saved up to %s", current - timedelta(days=1))
|
|
break
|
|
except Exception as e:
|
|
log.error("Failed on %s: %s — stopping", current, e)
|
|
break
|
|
|
|
current += timedelta(days=1)
|
|
|
|
conn.close()
|
|
log.info("Backfill done. Flights: %d, Enriched: %d", total_flights, total_enriched)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
logging.basicConfig(
|
|
level=logging.INFO,
|
|
format="%(asctime)s [backfill] %(levelname)s %(message)s",
|
|
datefmt="%Y-%m-%dT%H:%M:%S",
|
|
handlers=[logging.StreamHandler(sys.stdout)],
|
|
)
|
|
main()
|