""" Backfill CLI — loads historical schedule data for a date range. Saves progress to fr24_ext.load_state so it can resume after interruption. Usage: python backfill.py --start-date 2026-04-01 --end-date 2026-04-19 python backfill.py --start-date 2026-04-01 --end-date 2026-04-19 --resume """ import argparse import json import logging import sys from datetime import date, timedelta import psycopg2 from config import config from yandex_worker import fetch_day as yandex_fetch_day log = logging.getLogger(__name__) STATE_KEY = "backfill_last_date" def load_state(conn, key: str): with conn.cursor() as cur: cur.execute( "SELECT state_value FROM fr24_ext.load_state WHERE state_key = %s", (key,), ) row = cur.fetchone() return row[0] if row else None def save_state(conn, key: str, value: dict): with conn.cursor() as cur: cur.execute( """ INSERT INTO fr24_ext.load_state (state_key, state_value) VALUES (%s, %s::jsonb) ON CONFLICT (state_key) DO UPDATE SET state_value = EXCLUDED.state_value, updated_at = now() """, (key, json.dumps(value)), ) conn.commit() def main(): parser = argparse.ArgumentParser(description="Backfill fr24_ext.schedule") parser.add_argument("--start-date", required=True, help="YYYY-MM-DD") parser.add_argument("--end-date", required=True, help="YYYY-MM-DD") parser.add_argument("--resume", action="store_true", help="Resume from last saved state") args = parser.parse_args() start = date.fromisoformat(args.start_date) end = date.fromisoformat(args.end_date) if start > end: log.error("start-date must be <= end-date") sys.exit(1) conn = psycopg2.connect(config.DB_DSN) if args.resume: state = load_state(conn, STATE_KEY) if state and state.get("last_date"): last = date.fromisoformat(state["last_date"]) resume_from = last + timedelta(days=1) if resume_from > start: log.info("Resuming from %s (last completed: %s)", resume_from, last) start = resume_from current = start total_flights = 0 log.info("Backfill: %s → %s (%d days)", start, end, (end - start).days + 1) while current <= end: log.info("── Processing %s ──", current) try: yandex_count = yandex_fetch_day(current, conn) total_flights += yandex_count log.info("Yandex: %d flights", yandex_count) save_state(conn, STATE_KEY, {"last_date": current.isoformat()}) except KeyboardInterrupt: log.info("Interrupted. Progress saved up to %s", current - timedelta(days=1)) break except Exception as e: log.error("Failed on %s: %s — stopping", current, e) break current += timedelta(days=1) conn.close() log.info("Backfill done. Flights: %d", total_flights) if __name__ == "__main__": logging.basicConfig( level=logging.INFO, format="%(asctime)s [backfill] %(levelname)s %(message)s", datefmt="%Y-%m-%dT%H:%M:%S", handlers=[logging.StreamHandler(sys.stdout)], ) main()