ORCH cleanup L-1/L-2/L-3: stages comment, prune run logs, emoji constants #7
@@ -16,6 +16,62 @@ from ..plane_sync import notify_stage_change as plane_notify_stage, add_comment
|
||||
logger = logging.getLogger("orchestrator.launcher")
|
||||
|
||||
|
||||
def prune_run_logs(runs_dir, keep_days=30, keep_max=500, active_paths=None):
|
||||
"""L-2: best-effort rotation of per-run logs (<runs_dir>/*.log).
|
||||
|
||||
A log file is removed if it is older than keep_days OR it is not within the
|
||||
keep_max most-recent logs (whichever condition is met first). Only *.log
|
||||
files directly inside runs_dir are considered; non-.log files and
|
||||
subdirectories are never touched. Files whose path is in active_paths (the
|
||||
currently running log) are always kept.
|
||||
|
||||
Returns the number of files removed. Never raises: any error is logged and
|
||||
swallowed so log rotation can never bring the app down.
|
||||
"""
|
||||
removed = 0
|
||||
try:
|
||||
active = set()
|
||||
for ap in (active_paths or []):
|
||||
try:
|
||||
active.add(os.path.realpath(ap))
|
||||
except Exception:
|
||||
active.add(ap)
|
||||
|
||||
if not os.path.isdir(runs_dir):
|
||||
return 0
|
||||
|
||||
logs = []
|
||||
for name in os.listdir(runs_dir):
|
||||
if not name.endswith(".log"):
|
||||
continue
|
||||
path = os.path.join(runs_dir, name)
|
||||
if not os.path.isfile(path):
|
||||
continue
|
||||
if os.path.realpath(path) in active:
|
||||
continue
|
||||
try:
|
||||
mtime = os.path.getmtime(path)
|
||||
except OSError:
|
||||
continue
|
||||
logs.append((path, mtime))
|
||||
|
||||
logs.sort(key=lambda t: t[1], reverse=True)
|
||||
|
||||
cutoff = time.time() - keep_days * 86400
|
||||
for idx, (path, mtime) in enumerate(logs):
|
||||
too_old = mtime < cutoff
|
||||
over_max = idx >= keep_max
|
||||
if too_old or over_max:
|
||||
try:
|
||||
os.remove(path)
|
||||
removed += 1
|
||||
except OSError as e:
|
||||
logger.warning(f"prune_run_logs: failed to remove {path}: {e}")
|
||||
except Exception as e:
|
||||
logger.warning(f"prune_run_logs failed for {runs_dir}: {e}")
|
||||
return removed
|
||||
|
||||
|
||||
class AgentLauncher:
|
||||
"""Launch Claude CLI agents directly (binary mounted into container)."""
|
||||
|
||||
|
||||
@@ -66,6 +66,15 @@ class Settings(BaseSettings):
|
||||
agent_kill_grace_seconds: int = 20
|
||||
agent_timeout_overrides_json: str = ""
|
||||
|
||||
# L-2: run-log rotation. Old per-run logs in <data>/runs/*.log are pruned at
|
||||
# app startup (best-effort). A *.log is removed if it is older than
|
||||
# log_keep_days OR not within the log_keep_max most-recent logs (whichever
|
||||
# hits first). Only *.log files are touched; the active run log is skipped.
|
||||
# log_keep_days -> max age in days (env ORCH_LOG_KEEP_DAYS).
|
||||
# log_keep_max -> max number of newest logs to retain (env ORCH_LOG_KEEP_MAX).
|
||||
log_keep_days: int = 30
|
||||
log_keep_max: int = 500
|
||||
|
||||
|
||||
# Telegram notifications
|
||||
telegram_bot_token: str = ""
|
||||
|
||||
16
src/main.py
16
src/main.py
@@ -60,6 +60,22 @@ async def lifespan(app: FastAPI):
|
||||
if requeued:
|
||||
log.warning(f"Queue-recovery: requeued {requeued} running job(s) after restart")
|
||||
|
||||
# L-2: rotate old per-run logs at startup (best-effort; never fatal).
|
||||
try:
|
||||
import os as _os
|
||||
from .config import settings as _settings
|
||||
from .agents.launcher import prune_run_logs
|
||||
_runs_dir = _os.path.join(_os.path.dirname(_settings.db_path), "runs")
|
||||
_removed = prune_run_logs(
|
||||
_runs_dir,
|
||||
keep_days=_settings.log_keep_days,
|
||||
keep_max=_settings.log_keep_max,
|
||||
)
|
||||
if _removed:
|
||||
log.info(f"Log rotation: pruned {_removed} old run log(s) from {_runs_dir}")
|
||||
except Exception as e:
|
||||
log.warning(f"Log rotation skipped: {e}")
|
||||
|
||||
# Start the background job-queue worker (ORCH-1).
|
||||
from .queue_worker import worker
|
||||
worker.start()
|
||||
|
||||
@@ -6,6 +6,12 @@ from .config import settings
|
||||
|
||||
logger = logging.getLogger("orchestrator.plane_sync")
|
||||
|
||||
# L-3: emoji literals used in Plane comment bodies, named for readability.
|
||||
# Message text stays byte-for-byte identical to the previous output.
|
||||
EMOJI_STAGE = "\U0001F504" # stage transition
|
||||
EMOJI_QG_FAIL = "\u26A0\uFE0F" # quality-gate failure
|
||||
EMOJI_DONE = "\u2705" # task completed
|
||||
|
||||
PLANE_BASE = f"{settings.plane_api_url}/api/v1"
|
||||
PLANE_HEADERS = {"X-API-Key": settings.plane_api_token}
|
||||
WORKSPACE = settings.plane_workspace_slug
|
||||
@@ -194,7 +200,7 @@ def notify_stage_change(work_item_id: str, old_stage: str, new_stage: str, agent
|
||||
project_id = _resolve_project_id(work_item_id, project_id)
|
||||
update_issue_state(work_item_id, new_stage, project_id)
|
||||
|
||||
msg = f"🔄 Stage: {old_stage} → {new_stage}"
|
||||
msg = f"{EMOJI_STAGE} Stage: {old_stage} → {new_stage}"
|
||||
if agent:
|
||||
msg += f" (launching {agent})"
|
||||
|
||||
@@ -232,11 +238,11 @@ def notify_stage_change(work_item_id: str, old_stage: str, new_stage: str, agent
|
||||
|
||||
def notify_qg_failure(work_item_id: str, stage: str, check: str, reason: str, project_id: str = None):
|
||||
"""Notify Plane about QG failure."""
|
||||
add_comment(work_item_id, f"⚠️ QG failed at {stage}: {check} — {reason}", project_id)
|
||||
add_comment(work_item_id, f"{EMOJI_QG_FAIL} QG failed at {stage}: {check} — {reason}", project_id)
|
||||
|
||||
|
||||
def notify_done(work_item_id: str, project_id: str = None):
|
||||
"""Mark issue as Done in Plane."""
|
||||
project_id = _resolve_project_id(work_item_id, project_id)
|
||||
update_issue_state(work_item_id, "done", project_id)
|
||||
add_comment(work_item_id, "✅ Task completed! PR merged and deployed.", project_id)
|
||||
add_comment(work_item_id, f"{EMOJI_DONE} Task completed! PR merged and deployed.", project_id)
|
||||
|
||||
@@ -5,7 +5,7 @@ Stages:
|
||||
|
||||
Each stage defines:
|
||||
- next: the stage to advance to
|
||||
- agent: the agent to launch when entering the NEXT stage
|
||||
- agent: the agent to launch when advancing FROM this stage (NOT the next stage's agent)
|
||||
- qg: the quality gate check required to leave this stage
|
||||
"""
|
||||
|
||||
|
||||
92
tests/test_log_rotation.py
Normal file
92
tests/test_log_rotation.py
Normal file
@@ -0,0 +1,92 @@
|
||||
"""L-2: tests for prune_run_logs (run-log rotation).
|
||||
|
||||
Verifies that old / surplus *.log files are removed while fresh logs, non-.log
|
||||
files, the active log, and subdirectories are left intact. Function is
|
||||
best-effort and must never raise.
|
||||
"""
|
||||
import os
|
||||
import time
|
||||
|
||||
from src.agents.launcher import prune_run_logs
|
||||
|
||||
|
||||
def _touch(path, age_days=0):
|
||||
with open(path, "w") as f:
|
||||
f.write("x")
|
||||
mtime = time.time() - age_days * 86400
|
||||
os.utime(path, (mtime, mtime))
|
||||
return path
|
||||
|
||||
|
||||
def test_old_logs_removed_fresh_kept(tmp_path):
|
||||
runs = tmp_path
|
||||
fresh = _touch(str(runs / "1.log"), age_days=1)
|
||||
old = _touch(str(runs / "2.log"), age_days=40)
|
||||
|
||||
removed = prune_run_logs(str(runs), keep_days=30, keep_max=500)
|
||||
|
||||
assert removed == 1
|
||||
assert os.path.exists(fresh)
|
||||
assert not os.path.exists(old)
|
||||
|
||||
|
||||
def test_non_log_files_untouched(tmp_path):
|
||||
runs = tmp_path
|
||||
old_log = _touch(str(runs / "stale.log"), age_days=99)
|
||||
keep_txt = _touch(str(runs / "notes.txt"), age_days=99)
|
||||
keep_db = _touch(str(runs / "orchestrator.db"), age_days=99)
|
||||
|
||||
prune_run_logs(str(runs), keep_days=30, keep_max=500)
|
||||
|
||||
assert not os.path.exists(old_log)
|
||||
assert os.path.exists(keep_txt)
|
||||
assert os.path.exists(keep_db)
|
||||
|
||||
|
||||
def test_keep_max_retains_newest(tmp_path):
|
||||
runs = tmp_path
|
||||
# 5 logs, all recent (within keep_days), increasing age 0..4 days.
|
||||
paths = []
|
||||
for i in range(5):
|
||||
paths.append(_touch(str(runs / f"{i}.log"), age_days=i))
|
||||
|
||||
removed = prune_run_logs(str(runs), keep_days=365, keep_max=2)
|
||||
|
||||
# Only the 2 newest (age 0, 1) survive.
|
||||
assert removed == 3
|
||||
assert os.path.exists(paths[0])
|
||||
assert os.path.exists(paths[1])
|
||||
for p in paths[2:]:
|
||||
assert not os.path.exists(p)
|
||||
|
||||
|
||||
def test_active_log_never_removed(tmp_path):
|
||||
runs = tmp_path
|
||||
active = _touch(str(runs / "active.log"), age_days=99)
|
||||
other = _touch(str(runs / "other.log"), age_days=99)
|
||||
|
||||
removed = prune_run_logs(
|
||||
str(runs), keep_days=30, keep_max=500, active_paths=[active]
|
||||
)
|
||||
|
||||
assert removed == 1
|
||||
assert os.path.exists(active)
|
||||
assert not os.path.exists(other)
|
||||
|
||||
|
||||
def test_subdirs_untouched(tmp_path):
|
||||
runs = tmp_path
|
||||
sub = runs / "sub.log"
|
||||
sub.mkdir() # a directory that happens to end in .log
|
||||
old_log = _touch(str(runs / "old.log"), age_days=99)
|
||||
|
||||
prune_run_logs(str(runs), keep_days=30, keep_max=500)
|
||||
|
||||
assert sub.is_dir()
|
||||
assert not os.path.exists(old_log)
|
||||
|
||||
|
||||
def test_missing_dir_is_noop(tmp_path):
|
||||
missing = tmp_path / "does-not-exist"
|
||||
# Must not raise.
|
||||
assert prune_run_logs(str(missing)) == 0
|
||||
Reference in New Issue
Block a user