auto-sync: 2026-04-19 18:30:01
This commit is contained in:
24
tasks/flightradar24/backup/backup.sh
Executable file
24
tasks/flightradar24/backup/backup.sh
Executable file
@@ -0,0 +1,24 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
BACKUP_DIR="/backup"
|
||||||
|
TIMESTAMP=$(date +%Y%m%d_%H%M%S)
|
||||||
|
DUMP_FILE="${BACKUP_DIR}/fr24_${TIMESTAMP}.sql.gz"
|
||||||
|
KEEP_LAST=7
|
||||||
|
|
||||||
|
echo "[backup] starting dump at ${TIMESTAMP}"
|
||||||
|
|
||||||
|
pg_dump \
|
||||||
|
-h "${PGHOST:-postgres}" \
|
||||||
|
-U "${PGUSER:-fr24}" \
|
||||||
|
-d "${PGDATABASE:-fr24}" \
|
||||||
|
--no-password \
|
||||||
|
| gzip > "${DUMP_FILE}"
|
||||||
|
|
||||||
|
echo "[backup] dump written: ${DUMP_FILE} ($(du -sh "${DUMP_FILE}" | cut -f1))"
|
||||||
|
|
||||||
|
# Keep only last N dumps
|
||||||
|
cd "${BACKUP_DIR}"
|
||||||
|
ls -1t fr24_*.sql.gz 2>/dev/null | tail -n +$((KEEP_LAST + 1)) | xargs -r rm -v
|
||||||
|
|
||||||
|
echo "[backup] done. kept last ${KEEP_LAST} dumps."
|
||||||
@@ -129,19 +129,17 @@ services:
|
|||||||
- fr24-net
|
- fr24-net
|
||||||
|
|
||||||
monitoring:
|
monitoring:
|
||||||
image: alpine:3.20
|
build:
|
||||||
|
context: ../monitoring
|
||||||
|
dockerfile: Dockerfile
|
||||||
|
image: fr24-monitoring
|
||||||
container_name: fr24-monitoring
|
container_name: fr24-monitoring
|
||||||
command: ["sh", "-c", "echo 'monitoring placeholder: disk, db, capture status, alerts'; tail -f /dev/null"]
|
|
||||||
environment:
|
environment:
|
||||||
<<: *common-env
|
<<: *common-env
|
||||||
SERVICE_ROLE: monitoring
|
SERVICE_ROLE: monitoring
|
||||||
MONITORING_INTERVAL_SECONDS: ${MONITORING_INTERVAL_SECONDS:-30}
|
MONITORING_INTERVAL_SECONDS: ${MONITORING_INTERVAL_SECONDS:-60}
|
||||||
ports:
|
|
||||||
- "${MONITORING_PUBLISHED_PORT:-9090}:9090"
|
|
||||||
volumes:
|
volumes:
|
||||||
- ../logs/monitoring:/var/log/fr24
|
- ../logs/monitoring:/var/log/fr24
|
||||||
- ../backup:/backup
|
|
||||||
- /var/run/docker.sock:/var/run/docker.sock:ro
|
|
||||||
depends_on:
|
depends_on:
|
||||||
postgres:
|
postgres:
|
||||||
condition: service_healthy
|
condition: service_healthy
|
||||||
@@ -152,11 +150,36 @@ services:
|
|||||||
api:
|
api:
|
||||||
condition: service_started
|
condition: service_started
|
||||||
healthcheck:
|
healthcheck:
|
||||||
test: ["CMD-SHELL", "test -f /tmp/monitoring-ready || exit 1"]
|
test: ["CMD-SHELL", "test -f /tmp/monitoring-ready && pgrep -f main.py > /dev/null || exit 1"]
|
||||||
interval: 30s
|
interval: 30s
|
||||||
timeout: 5s
|
timeout: 5s
|
||||||
retries: 3
|
retries: 3
|
||||||
start_period: 10s
|
start_period: 15s
|
||||||
|
restart: unless-stopped
|
||||||
|
networks:
|
||||||
|
- fr24-net
|
||||||
|
|
||||||
|
backup:
|
||||||
|
image: postgres:16-alpine
|
||||||
|
container_name: fr24-backup
|
||||||
|
environment:
|
||||||
|
PGPASSWORD: ${POSTGRES_PASSWORD:-change-me}
|
||||||
|
PGHOST: ${POSTGRES_HOST:-postgres}
|
||||||
|
PGUSER: ${POSTGRES_USER:-fr24}
|
||||||
|
PGDATABASE: ${POSTGRES_DB:-fr24}
|
||||||
|
TZ: ${TZ:-UTC}
|
||||||
|
volumes:
|
||||||
|
- ../backup:/backup
|
||||||
|
- ../backup/backup.sh:/usr/local/bin/backup.sh:ro
|
||||||
|
depends_on:
|
||||||
|
postgres:
|
||||||
|
condition: service_healthy
|
||||||
|
command: >
|
||||||
|
sh -c 'echo "[backup] service started, interval=6h" &&
|
||||||
|
while true; do
|
||||||
|
/usr/local/bin/backup.sh;
|
||||||
|
sleep 21600;
|
||||||
|
done'
|
||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
networks:
|
networks:
|
||||||
- fr24-net
|
- fr24-net
|
||||||
|
|||||||
13
tasks/flightradar24/monitoring/Dockerfile
Normal file
13
tasks/flightradar24/monitoring/Dockerfile
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
FROM python:3.11-slim
|
||||||
|
|
||||||
|
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||||
|
libpq-dev gcc python3-dev \
|
||||||
|
&& rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
COPY requirements.txt .
|
||||||
|
RUN pip install --no-cache-dir -r requirements.txt
|
||||||
|
|
||||||
|
COPY main.py .
|
||||||
|
|
||||||
|
CMD ["python", "-u", "main.py"]
|
||||||
114
tasks/flightradar24/monitoring/main.py
Normal file
114
tasks/flightradar24/monitoring/main.py
Normal file
@@ -0,0 +1,114 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""FR24 monitoring service — checks disk, DB size, capture lag, throughput every 60s."""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import time
|
||||||
|
import logging
|
||||||
|
import subprocess
|
||||||
|
import psycopg2
|
||||||
|
|
||||||
|
logging.basicConfig(
|
||||||
|
level=logging.INFO,
|
||||||
|
format="%(asctime)s %(levelname)s %(message)s",
|
||||||
|
datefmt="%Y-%m-%dT%H:%M:%S",
|
||||||
|
)
|
||||||
|
log = logging.getLogger("monitor")
|
||||||
|
|
||||||
|
DB_DSN = (
|
||||||
|
f"host={os.environ.get('POSTGRES_HOST', 'postgres')} "
|
||||||
|
f"port={os.environ.get('POSTGRES_PORT', '5432')} "
|
||||||
|
f"dbname={os.environ.get('POSTGRES_DB', 'fr24')} "
|
||||||
|
f"user={os.environ.get('POSTGRES_USER', 'fr24')} "
|
||||||
|
f"password={os.environ.get('POSTGRES_PASSWORD', 'change-me')}"
|
||||||
|
)
|
||||||
|
INTERVAL = int(os.environ.get("MONITORING_INTERVAL_SECONDS", "60"))
|
||||||
|
DISK_WARN_PCT = 80
|
||||||
|
LAG_WARN_SEC = 300 # 5 minutes
|
||||||
|
|
||||||
|
|
||||||
|
def get_disk_usage() -> str:
|
||||||
|
"""Return disk usage percent for / as integer string, e.g. '45'."""
|
||||||
|
try:
|
||||||
|
result = subprocess.run(
|
||||||
|
["df", "-P", "/"],
|
||||||
|
capture_output=True, text=True, timeout=5
|
||||||
|
)
|
||||||
|
# last line: Filesystem 1024-blocks Used Available Capacity Mounted
|
||||||
|
line = result.stdout.strip().splitlines()[-1]
|
||||||
|
pct = line.split()[4].rstrip("%")
|
||||||
|
return pct
|
||||||
|
except Exception as e:
|
||||||
|
log.warning("disk check failed: %s", e)
|
||||||
|
return "?"
|
||||||
|
|
||||||
|
|
||||||
|
def run_checks():
|
||||||
|
disk_pct_str = get_disk_usage()
|
||||||
|
|
||||||
|
try:
|
||||||
|
conn = psycopg2.connect(DB_DSN, connect_timeout=5)
|
||||||
|
conn.autocommit = True
|
||||||
|
cur = conn.cursor()
|
||||||
|
|
||||||
|
# DB size
|
||||||
|
cur.execute("SELECT pg_database_size(current_database())")
|
||||||
|
db_bytes = cur.fetchone()[0]
|
||||||
|
db_size_gb = db_bytes / (1024 ** 3)
|
||||||
|
db_size_str = f"{db_size_gb:.2f}GB" if db_size_gb >= 1 else f"{db_bytes / (1024**2):.1f}MB"
|
||||||
|
|
||||||
|
# Capture lag
|
||||||
|
cur.execute("SELECT EXTRACT(EPOCH FROM (now() - MAX(observed_at))) FROM fr24.raw_packets")
|
||||||
|
row = cur.fetchone()
|
||||||
|
lag_sec = int(row[0]) if row and row[0] is not None else None
|
||||||
|
lag_str = f"{lag_sec}s" if lag_sec is not None else "N/A"
|
||||||
|
|
||||||
|
# Throughput: packets in last 5 minutes
|
||||||
|
cur.execute(
|
||||||
|
"SELECT COUNT(*) FROM fr24.raw_packets "
|
||||||
|
"WHERE observed_at >= now() - INTERVAL '5 minutes'"
|
||||||
|
)
|
||||||
|
throughput = cur.fetchone()[0]
|
||||||
|
|
||||||
|
cur.close()
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
db_ok = True
|
||||||
|
except Exception as e:
|
||||||
|
log.warning("db check failed: %s", e)
|
||||||
|
db_size_str = "ERR"
|
||||||
|
lag_sec = None
|
||||||
|
lag_str = "ERR"
|
||||||
|
throughput = "ERR"
|
||||||
|
db_ok = False
|
||||||
|
|
||||||
|
# Emit metrics line
|
||||||
|
disk_display = f"{disk_pct_str}%" if disk_pct_str != "?" else "?"
|
||||||
|
print(
|
||||||
|
f"[monitor] disk={disk_display} db_size={db_size_str} "
|
||||||
|
f"capture_lag={lag_str} throughput={throughput}pkt/5min",
|
||||||
|
flush=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Warnings
|
||||||
|
if disk_pct_str not in ("?",) and int(disk_pct_str) > DISK_WARN_PCT:
|
||||||
|
log.warning("DISK USAGE HIGH: %s%%", disk_pct_str)
|
||||||
|
|
||||||
|
if db_ok and lag_sec is not None and lag_sec > LAG_WARN_SEC:
|
||||||
|
log.warning("CAPTURE LAG HIGH: %ds (threshold %ds)", lag_sec, LAG_WARN_SEC)
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
log.info("FR24 monitoring started (interval=%ds)", INTERVAL)
|
||||||
|
# Signal readiness
|
||||||
|
open("/tmp/monitoring-ready", "w").close()
|
||||||
|
|
||||||
|
while True:
|
||||||
|
try:
|
||||||
|
run_checks()
|
||||||
|
except Exception as e:
|
||||||
|
log.error("unexpected error in run_checks: %s", e)
|
||||||
|
time.sleep(INTERVAL)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
1
tasks/flightradar24/monitoring/requirements.txt
Normal file
1
tasks/flightradar24/monitoring/requirements.txt
Normal file
@@ -0,0 +1 @@
|
|||||||
|
psycopg2-binary==2.9.9
|
||||||
Reference in New Issue
Block a user