"""ORCH-044 (P3): empty run log / no result-JSON at exit 0 == failure. claude can exit 0 yet leave an empty (or JSON-less) run log — e.g. it died fast because the session was logged out, or a flag silenced stdout. Before ORCH-044 that looked identical to success: job -> done, stage auto-advanced. Now the launcher validates the result; only (exit 0 AND valid result-JSON) is a success. No real claude/Popen is spawned. The git/usage/notify side effects of _monitor_agent are stubbed; DB is a fresh per-test sqlite. """ import os import tempfile import pytest _test_db = os.path.join(tempfile.gettempdir(), "test_orchestrator_empty_log.db") os.environ["ORCH_DB_PATH"] = _test_db os.environ["ORCH_REPOS_DIR"] = tempfile.gettempdir() os.environ["ORCH_GITEA_TOKEN"] = "test-token" os.environ["ORCH_PLANE_API_TOKEN"] = "test-token" import src.db as db from src.db import init_db, enqueue_job, claim_next_job, get_job from src import preflight from src.agents.launcher import AgentLauncher VALID_RESULT_LOG = ( "some preamble text from the agent run...\n" '{"type":"result","subtype":"success","usage":' '{"input_tokens":120,"output_tokens":45},"total_cost_usd":0.12}\n' ) @pytest.fixture(autouse=True) def fresh_db(tmp_path, monkeypatch): monkeypatch.setattr(db.settings, "db_path", str(tmp_path / "res.db")) init_db() preflight.reset_cache() yield # =========================================================================== # _validate_result — the result-JSON contract (TR-3.1) # =========================================================================== class TestValidateResult: def test_missing_path(self): ok, reason = AgentLauncher._validate_result(None) assert ok is False def test_missing_file(self, tmp_path): ok, reason = AgentLauncher._validate_result(str(tmp_path / "nope.log")) assert ok is False assert "missing" in reason.lower() def test_empty_file(self, tmp_path): p = tmp_path / "empty.log" p.write_text("") ok, reason = AgentLauncher._validate_result(str(p)) assert ok is False assert "empty" in reason.lower() def test_whitespace_only(self, tmp_path): p = tmp_path / "ws.log" p.write_text(" \n\t\n") ok, _ = AgentLauncher._validate_result(str(p)) assert ok is False def test_no_json(self, tmp_path): p = tmp_path / "garbage.log" p.write_text("this is not json at all, just noise\n") ok, reason = AgentLauncher._validate_result(str(p)) assert ok is False assert "json" in reason.lower() def test_valid_result_json(self, tmp_path): p = tmp_path / "good.log" p.write_text(VALID_RESULT_LOG) ok, _ = AgentLauncher._validate_result(str(p)) assert ok is True # =========================================================================== # _finalize_job — job state under result_ok (TC-12/13/15/16/17) # =========================================================================== class TestFinalizeJobResultOk: def _spy_telegram(self, monkeypatch): sent = [] monkeypatch.setattr("src.notifications.send_telegram", lambda *a, **k: sent.append(a[0] if a else "")) return sent # TC-15 / AC-13: valid result -> done (no regression). def test_valid_result_done(self, tmp_path, monkeypatch): self._spy_telegram(monkeypatch) log = tmp_path / "1.log" log.write_text(VALID_RESULT_LOG) jid = enqueue_job("developer", "r") claim_next_job() AgentLauncher()._finalize_job(jid, "developer", run_id=1, exit_code=0, output_path=str(log), result_ok=True) assert get_job(jid)["status"] == "done" # TC-12 / AC-10: exit 0 + empty log -> NOT done; terminal failed + alert. def test_empty_log_exit0_terminal_failed_alerts(self, tmp_path, monkeypatch): sent = self._spy_telegram(monkeypatch) log = tmp_path / "2.log" log.write_text("") # 0 bytes # max_attempts=1 -> after the claim (attempts=1) the budget is spent -> # the permanent path goes straight to 'failed' and alerts. jid = enqueue_job("developer", "r", max_attempts=1) claim_next_job() AgentLauncher()._finalize_job(jid, "developer", run_id=2, exit_code=0, output_path=str(log), result_ok=False) job = get_job(jid) assert job["status"] == "failed" assert job["status"] != "done" assert "empty run log" in (job["error"] or "") assert sent, "a Telegram alert must be sent on terminal failure" # TC-13 / AC-11: exit 0 + JSON-less log -> failure (here: requeue). def test_garbage_log_exit0_not_done(self, tmp_path, monkeypatch): self._spy_telegram(monkeypatch) log = tmp_path / "3.log" log.write_text("noise, no json here\n") jid = enqueue_job("developer", "r", max_attempts=2) claim_next_job() AgentLauncher()._finalize_job(jid, "developer", run_id=3, exit_code=0, output_path=str(log), result_ok=False) job = get_job(jid) assert job["status"] != "done" assert job["status"] == "queued" # retry budget remained assert "no result JSON" in (job["error"] or "") # TC-16 / AC-14: exit 0 + empty log never leaves the job 'running'. def test_never_running_after_empty_result(self, tmp_path, monkeypatch): self._spy_telegram(monkeypatch) log = tmp_path / "4.log" log.write_text("") jid = enqueue_job("developer", "r", max_attempts=2) claim_next_job() assert get_job(jid)["status"] == "running" # claimed AgentLauncher()._finalize_job(jid, "developer", run_id=4, exit_code=0, output_path=str(log), result_ok=False) assert get_job(jid)["status"] in ("failed", "queued") # TC-17 / TR-3.3: empty result defaults to permanent (no backoff, no # transient budget burn). def test_empty_result_defaults_permanent(self, tmp_path, monkeypatch): self._spy_telegram(monkeypatch) log = tmp_path / "5.log" log.write_text("") # no transient marker jid = enqueue_job("developer", "r", max_attempts=2) claim_next_job() AgentLauncher()._finalize_job(jid, "developer", run_id=5, exit_code=0, output_path=str(log), result_ok=False) job = get_job(jid) assert job["status"] == "queued" assert job["transient_attempts"] == 0 # NOT transient assert job["available_at"] is None # no backoff gate # TC-17 / TR-3.3: a transient marker in the log routes to the transient path. def test_empty_result_with_transient_marker_goes_transient(self, tmp_path, monkeypatch): self._spy_telegram(monkeypatch) log = tmp_path / "6.log" log.write_text("overloaded_error: 429 rate limit. Retry-After: 12\n") jid = enqueue_job("developer", "r", max_attempts=2) claim_next_job() AgentLauncher()._finalize_job(jid, "developer", run_id=6, exit_code=0, output_path=str(log), result_ok=False) job = get_job(jid) assert job["status"] == "queued" assert job["transient_attempts"] == 1 # transient path taken assert job["available_at"] is not None # backoff gate set # =========================================================================== # _monitor_agent — success gating (TC-14/15) + auth-marker reset (P1b) # =========================================================================== class _FakeProc: def __init__(self, exit_code): self._ec = exit_code self.pid = 4242 def wait(self): return self._ec def _seed_task_and_run(repo, branch, agent="developer", work_item_id="ORCH-001"): conn = db.get_db() conn.execute( "INSERT INTO tasks (work_item_id, repo, branch, stage) VALUES (?,?,?,?)", (work_item_id, repo, branch, "development"), ) cur = conn.execute( "INSERT INTO agent_runs (task_id, agent) VALUES ((SELECT id FROM tasks " "WHERE repo=? AND branch=?), ?)", (repo, branch, agent), ) run_id = cur.lastrowid conn.commit() conn.close() return run_id class TestMonitorAgentGating: def _patch_monitor_env(self, monkeypatch, tmp_path): """Stub the heavy side effects of _monitor_agent (git/usage/notify).""" monkeypatch.setattr("src.agents.launcher.notify_agent_finished", lambda *a, **k: None) monkeypatch.setattr("src.agents.launcher.get_worktree_path", lambda repo, branch: str(tmp_path)) class _R: returncode = 0 stdout = "" # "no changes to commit" -> skips git add/commit/push stderr = "" monkeypatch.setattr("src.agents.launcher.subprocess.run", lambda *a, **k: _R()) def test_success_advances_and_comments(self, tmp_path, monkeypatch): self._patch_monitor_env(monkeypatch, tmp_path) run_id = _seed_task_and_run("r", "feature/x") log = tmp_path / f"{run_id}.log" log.write_text(VALID_RESULT_LOG) spy = {"post": 0, "advance": 0, "finalize": None, "alert": 0} monkeypatch.setattr("src.notifications.send_telegram", lambda *a, **k: spy.__setitem__("alert", spy["alert"] + 1)) lr = AgentLauncher() monkeypatch.setattr(lr, "_post_usage_comments", lambda *a, **k: spy.__setitem__("post", spy["post"] + 1)) monkeypatch.setattr(lr, "_try_advance_stage", lambda *a, **k: spy.__setitem__("advance", spy["advance"] + 1)) monkeypatch.setattr(lr, "_finalize_job", lambda *a, **k: spy.__setitem__("finalize", k.get("result_ok"))) lr._monitor_agent(_FakeProc(0), run_id, "developer", "r", "feature/x", output_path=str(log), log_fh=None, job_id=99) assert spy["post"] == 1 assert spy["advance"] == 1 assert spy["finalize"] is True assert spy["alert"] == 0 # no empty-result alert on a valid run # TC-14 / AC-12: empty result -> no advance, no success comment, alert sent. def test_empty_result_suppresses_advance_and_comment(self, tmp_path, monkeypatch): self._patch_monitor_env(monkeypatch, tmp_path) run_id = _seed_task_and_run("r", "feature/y") log = tmp_path / f"{run_id}.log" log.write_text("") # empty -> invalid result spy = {"post": 0, "advance": 0, "finalize": None, "alert": 0} monkeypatch.setattr("src.notifications.send_telegram", lambda *a, **k: spy.__setitem__("alert", spy["alert"] + 1)) lr = AgentLauncher() monkeypatch.setattr(lr, "_post_usage_comments", lambda *a, **k: spy.__setitem__("post", spy["post"] + 1)) monkeypatch.setattr(lr, "_try_advance_stage", lambda *a, **k: spy.__setitem__("advance", spy["advance"] + 1)) monkeypatch.setattr(lr, "_finalize_job", lambda *a, **k: spy.__setitem__("finalize", k.get("result_ok"))) lr._monitor_agent(_FakeProc(0), run_id, "developer", "r", "feature/y", output_path=str(log), log_fh=None, job_id=99) assert spy["post"] == 0 # no success comment assert spy["advance"] == 0 # stage NOT advanced assert spy["finalize"] is False # finalize told the result was invalid assert spy["alert"] == 1 # empty-result alert fired # =========================================================================== # _handle_auth_marker — post-factum auth detection resets preflight cache (P1b) # =========================================================================== class TestAuthMarkerHandling: def test_auth_marker_resets_preflight_cache(self, tmp_path, monkeypatch): log = tmp_path / "auth.log" log.write_text("Error: Not logged in. Please run /login\n") reset = {"n": 0} monkeypatch.setattr(preflight, "reset_cache", lambda: reset.__setitem__("n", reset["n"] + 1)) found = AgentLauncher()._handle_auth_marker(str(log)) assert found is True assert reset["n"] == 1 def test_no_auth_marker_no_reset(self, tmp_path, monkeypatch): log = tmp_path / "plain.log" log.write_text("Traceback: ValueError somewhere\n") reset = {"n": 0} monkeypatch.setattr(preflight, "reset_cache", lambda: reset.__setitem__("n", reset["n"] + 1)) found = AgentLauncher()._handle_auth_marker(str(log)) assert found is False assert reset["n"] == 0