feat(launcher): prune old run logs (L-2)
This commit is contained in:
@@ -16,6 +16,62 @@ from ..plane_sync import notify_stage_change as plane_notify_stage, add_comment
|
||||
logger = logging.getLogger("orchestrator.launcher")
|
||||
|
||||
|
||||
def prune_run_logs(runs_dir, keep_days=30, keep_max=500, active_paths=None):
|
||||
"""L-2: best-effort rotation of per-run logs (<runs_dir>/*.log).
|
||||
|
||||
A log file is removed if it is older than keep_days OR it is not within the
|
||||
keep_max most-recent logs (whichever condition is met first). Only *.log
|
||||
files directly inside runs_dir are considered; non-.log files and
|
||||
subdirectories are never touched. Files whose path is in active_paths (the
|
||||
currently running log) are always kept.
|
||||
|
||||
Returns the number of files removed. Never raises: any error is logged and
|
||||
swallowed so log rotation can never bring the app down.
|
||||
"""
|
||||
removed = 0
|
||||
try:
|
||||
active = set()
|
||||
for ap in (active_paths or []):
|
||||
try:
|
||||
active.add(os.path.realpath(ap))
|
||||
except Exception:
|
||||
active.add(ap)
|
||||
|
||||
if not os.path.isdir(runs_dir):
|
||||
return 0
|
||||
|
||||
logs = []
|
||||
for name in os.listdir(runs_dir):
|
||||
if not name.endswith(".log"):
|
||||
continue
|
||||
path = os.path.join(runs_dir, name)
|
||||
if not os.path.isfile(path):
|
||||
continue
|
||||
if os.path.realpath(path) in active:
|
||||
continue
|
||||
try:
|
||||
mtime = os.path.getmtime(path)
|
||||
except OSError:
|
||||
continue
|
||||
logs.append((path, mtime))
|
||||
|
||||
logs.sort(key=lambda t: t[1], reverse=True)
|
||||
|
||||
cutoff = time.time() - keep_days * 86400
|
||||
for idx, (path, mtime) in enumerate(logs):
|
||||
too_old = mtime < cutoff
|
||||
over_max = idx >= keep_max
|
||||
if too_old or over_max:
|
||||
try:
|
||||
os.remove(path)
|
||||
removed += 1
|
||||
except OSError as e:
|
||||
logger.warning(f"prune_run_logs: failed to remove {path}: {e}")
|
||||
except Exception as e:
|
||||
logger.warning(f"prune_run_logs failed for {runs_dir}: {e}")
|
||||
return removed
|
||||
|
||||
|
||||
class AgentLauncher:
|
||||
"""Launch Claude CLI agents directly (binary mounted into container)."""
|
||||
|
||||
|
||||
@@ -66,6 +66,15 @@ class Settings(BaseSettings):
|
||||
agent_kill_grace_seconds: int = 20
|
||||
agent_timeout_overrides_json: str = ""
|
||||
|
||||
# L-2: run-log rotation. Old per-run logs in <data>/runs/*.log are pruned at
|
||||
# app startup (best-effort). A *.log is removed if it is older than
|
||||
# log_keep_days OR not within the log_keep_max most-recent logs (whichever
|
||||
# hits first). Only *.log files are touched; the active run log is skipped.
|
||||
# log_keep_days -> max age in days (env ORCH_LOG_KEEP_DAYS).
|
||||
# log_keep_max -> max number of newest logs to retain (env ORCH_LOG_KEEP_MAX).
|
||||
log_keep_days: int = 30
|
||||
log_keep_max: int = 500
|
||||
|
||||
|
||||
# Telegram notifications
|
||||
telegram_bot_token: str = ""
|
||||
|
||||
16
src/main.py
16
src/main.py
@@ -60,6 +60,22 @@ async def lifespan(app: FastAPI):
|
||||
if requeued:
|
||||
log.warning(f"Queue-recovery: requeued {requeued} running job(s) after restart")
|
||||
|
||||
# L-2: rotate old per-run logs at startup (best-effort; never fatal).
|
||||
try:
|
||||
import os as _os
|
||||
from .config import settings as _settings
|
||||
from .agents.launcher import prune_run_logs
|
||||
_runs_dir = _os.path.join(_os.path.dirname(_settings.db_path), "runs")
|
||||
_removed = prune_run_logs(
|
||||
_runs_dir,
|
||||
keep_days=_settings.log_keep_days,
|
||||
keep_max=_settings.log_keep_max,
|
||||
)
|
||||
if _removed:
|
||||
log.info(f"Log rotation: pruned {_removed} old run log(s) from {_runs_dir}")
|
||||
except Exception as e:
|
||||
log.warning(f"Log rotation skipped: {e}")
|
||||
|
||||
# Start the background job-queue worker (ORCH-1).
|
||||
from .queue_worker import worker
|
||||
worker.start()
|
||||
|
||||
Reference in New Issue
Block a user