orchestrator/tests/test_empty_log_failure.py

"""ORCH-044 (P3): empty run log / no result-JSON at exit 0 == failure.

claude can exit 0 yet leave an empty (or JSON-less) run log — e.g. it died fast
because the session was logged out, or a flag silenced stdout. Before ORCH-044
that looked identical to success: job -> done, stage auto-advanced. Now the
launcher validates the result; only (exit 0 AND valid result-JSON) is a success.

No real claude/Popen is spawned. The git/usage/notify side effects of
_monitor_agent are stubbed; DB is a fresh per-test sqlite.
"""
import os
import tempfile

import pytest

_test_db = os.path.join(tempfile.gettempdir(), "test_orchestrator_empty_log.db")
os.environ["ORCH_DB_PATH"] = _test_db
os.environ["ORCH_REPOS_DIR"] = tempfile.gettempdir()
os.environ["ORCH_GITEA_TOKEN"] = "test-token"
os.environ["ORCH_PLANE_API_TOKEN"] = "test-token"

import src.db as db
from src.db import init_db, enqueue_job, claim_next_job, get_job
from src import preflight
from src.agents.launcher import AgentLauncher


VALID_RESULT_LOG = (
    "some preamble text from the agent run...\n"
    '{"type":"result","subtype":"success","usage":'
    '{"input_tokens":120,"output_tokens":45},"total_cost_usd":0.12}\n'
)


@pytest.fixture(autouse=True)
def fresh_db(tmp_path, monkeypatch):
    monkeypatch.setattr(db.settings, "db_path", str(tmp_path / "res.db"))
    init_db()
    preflight.reset_cache()
    yield


# ===========================================================================
# _validate_result — the result-JSON contract (TR-3.1)
# ===========================================================================
class TestValidateResult:
    def test_missing_path(self):
        ok, reason = AgentLauncher._validate_result(None)
        assert ok is False

    def test_missing_file(self, tmp_path):
        ok, reason = AgentLauncher._validate_result(str(tmp_path / "nope.log"))
        assert ok is False
        assert "missing" in reason.lower()

    def test_empty_file(self, tmp_path):
        p = tmp_path / "empty.log"
        p.write_text("")
        ok, reason = AgentLauncher._validate_result(str(p))
        assert ok is False
        assert "empty" in reason.lower()

    def test_whitespace_only(self, tmp_path):
        p = tmp_path / "ws.log"
        p.write_text("   \n\t\n")
        ok, _ = AgentLauncher._validate_result(str(p))
        assert ok is False

    def test_no_json(self, tmp_path):
        p = tmp_path / "garbage.log"
        p.write_text("this is not json at all, just noise\n")
        ok, reason = AgentLauncher._validate_result(str(p))
        assert ok is False
        assert "json" in reason.lower()

    def test_valid_result_json(self, tmp_path):
        p = tmp_path / "good.log"
        p.write_text(VALID_RESULT_LOG)
        ok, _ = AgentLauncher._validate_result(str(p))
        assert ok is True


# ===========================================================================
# _finalize_job — job state under result_ok (TC-12/13/15/16/17)
# ===========================================================================
class TestFinalizeJobResultOk:
    def _spy_telegram(self, monkeypatch):
        sent = []
        monkeypatch.setattr("src.notifications.send_telegram",
                            lambda *a, **k: sent.append(a[0] if a else ""))
        return sent

    # TC-15 / AC-13: valid result -> done (no regression).
    def test_valid_result_done(self, tmp_path, monkeypatch):
        self._spy_telegram(monkeypatch)
        log = tmp_path / "1.log"
        log.write_text(VALID_RESULT_LOG)
        jid = enqueue_job("developer", "r")
        claim_next_job()
        AgentLauncher()._finalize_job(jid, "developer", run_id=1, exit_code=0,
                                      output_path=str(log), result_ok=True)
        assert get_job(jid)["status"] == "done"

    # TC-12 / AC-10: exit 0 + empty log -> NOT done; terminal failed + alert.
    def test_empty_log_exit0_terminal_failed_alerts(self, tmp_path, monkeypatch):
        sent = self._spy_telegram(monkeypatch)
        log = tmp_path / "2.log"
        log.write_text("")  # 0 bytes
        # max_attempts=1 -> after the claim (attempts=1) the budget is spent ->
        # the permanent path goes straight to 'failed' and alerts.
        jid = enqueue_job("developer", "r", max_attempts=1)
        claim_next_job()
        AgentLauncher()._finalize_job(jid, "developer", run_id=2, exit_code=0,
                                      output_path=str(log), result_ok=False)
        job = get_job(jid)
        assert job["status"] == "failed"
        assert job["status"] != "done"
        assert "empty run log" in (job["error"] or "")
        assert sent, "a Telegram alert must be sent on terminal failure"

    # TC-13 / AC-11: exit 0 + JSON-less log -> failure (here: requeue).
    def test_garbage_log_exit0_not_done(self, tmp_path, monkeypatch):
        self._spy_telegram(monkeypatch)
        log = tmp_path / "3.log"
        log.write_text("noise, no json here\n")
        jid = enqueue_job("developer", "r", max_attempts=2)
        claim_next_job()
        AgentLauncher()._finalize_job(jid, "developer", run_id=3, exit_code=0,
                                      output_path=str(log), result_ok=False)
        job = get_job(jid)
        assert job["status"] != "done"
        assert job["status"] == "queued"          # retry budget remained
        assert "no result JSON" in (job["error"] or "")

    # TC-16 / AC-14: exit 0 + empty log never leaves the job 'running'.
    def test_never_running_after_empty_result(self, tmp_path, monkeypatch):
        self._spy_telegram(monkeypatch)
        log = tmp_path / "4.log"
        log.write_text("")
        jid = enqueue_job("developer", "r", max_attempts=2)
        claim_next_job()
        assert get_job(jid)["status"] == "running"   # claimed
        AgentLauncher()._finalize_job(jid, "developer", run_id=4, exit_code=0,
                                      output_path=str(log), result_ok=False)
        assert get_job(jid)["status"] in ("failed", "queued")

    # TC-17 / TR-3.3: empty result defaults to permanent (no backoff, no
    # transient budget burn).
    def test_empty_result_defaults_permanent(self, tmp_path, monkeypatch):
        self._spy_telegram(monkeypatch)
        log = tmp_path / "5.log"
        log.write_text("")  # no transient marker
        jid = enqueue_job("developer", "r", max_attempts=2)
        claim_next_job()
        AgentLauncher()._finalize_job(jid, "developer", run_id=5, exit_code=0,
                                      output_path=str(log), result_ok=False)
        job = get_job(jid)
        assert job["status"] == "queued"
        assert job["transient_attempts"] == 0     # NOT transient
        assert job["available_at"] is None        # no backoff gate

    # TC-17 / TR-3.3: a transient marker in the log routes to the transient path.
    def test_empty_result_with_transient_marker_goes_transient(self, tmp_path, monkeypatch):
        self._spy_telegram(monkeypatch)
        log = tmp_path / "6.log"
        log.write_text("overloaded_error: 429 rate limit. Retry-After: 12\n")
        jid = enqueue_job("developer", "r", max_attempts=2)
        claim_next_job()
        AgentLauncher()._finalize_job(jid, "developer", run_id=6, exit_code=0,
                                      output_path=str(log), result_ok=False)
        job = get_job(jid)
        assert job["status"] == "queued"
        assert job["transient_attempts"] == 1     # transient path taken
        assert job["available_at"] is not None     # backoff gate set


# ===========================================================================
# _monitor_agent — success gating (TC-14/15) + auth-marker reset (P1b)
# ===========================================================================
class _FakeProc:
    def __init__(self, exit_code):
        self._ec = exit_code
        self.pid = 4242

    def wait(self):
        return self._ec


def _seed_task_and_run(repo, branch, agent="developer", work_item_id="ORCH-001"):
    conn = db.get_db()
    conn.execute(
        "INSERT INTO tasks (work_item_id, repo, branch, stage) VALUES (?,?,?,?)",
        (work_item_id, repo, branch, "development"),
    )
    cur = conn.execute(
        "INSERT INTO agent_runs (task_id, agent) VALUES ((SELECT id FROM tasks "
        "WHERE repo=? AND branch=?), ?)",
        (repo, branch, agent),
    )
    run_id = cur.lastrowid
    conn.commit()
    conn.close()
    return run_id


class TestMonitorAgentGating:
    def _patch_monitor_env(self, monkeypatch, tmp_path):
        """Stub the heavy side effects of _monitor_agent (git/usage/notify)."""
        monkeypatch.setattr("src.agents.launcher.notify_agent_finished",
                            lambda *a, **k: None)
        monkeypatch.setattr("src.agents.launcher.get_worktree_path",
                            lambda repo, branch: str(tmp_path))

        class _R:
            returncode = 0
            stdout = ""       # "no changes to commit" -> skips git add/commit/push
            stderr = ""

        monkeypatch.setattr("src.agents.launcher.subprocess.run",
                            lambda *a, **k: _R())

    def test_success_advances_and_comments(self, tmp_path, monkeypatch):
        self._patch_monitor_env(monkeypatch, tmp_path)
        run_id = _seed_task_and_run("r", "feature/x")
        log = tmp_path / f"{run_id}.log"
        log.write_text(VALID_RESULT_LOG)

        spy = {"post": 0, "advance": 0, "finalize": None, "alert": 0}
        monkeypatch.setattr("src.notifications.send_telegram",
                            lambda *a, **k: spy.__setitem__("alert", spy["alert"] + 1))

        lr = AgentLauncher()
        monkeypatch.setattr(lr, "_post_usage_comments",
                            lambda *a, **k: spy.__setitem__("post", spy["post"] + 1))
        monkeypatch.setattr(lr, "_try_advance_stage",
                            lambda *a, **k: spy.__setitem__("advance", spy["advance"] + 1))
        monkeypatch.setattr(lr, "_finalize_job",
                            lambda *a, **k: spy.__setitem__("finalize", k.get("result_ok")))

        lr._monitor_agent(_FakeProc(0), run_id, "developer", "r", "feature/x",
                          output_path=str(log), log_fh=None, job_id=99)

        assert spy["post"] == 1
        assert spy["advance"] == 1
        assert spy["finalize"] is True
        assert spy["alert"] == 0   # no empty-result alert on a valid run

    # TC-14 / AC-12: empty result -> no advance, no success comment, alert sent.
    def test_empty_result_suppresses_advance_and_comment(self, tmp_path, monkeypatch):
        self._patch_monitor_env(monkeypatch, tmp_path)
        run_id = _seed_task_and_run("r", "feature/y")
        log = tmp_path / f"{run_id}.log"
        log.write_text("")  # empty -> invalid result

        spy = {"post": 0, "advance": 0, "finalize": None, "alert": 0}
        monkeypatch.setattr("src.notifications.send_telegram",
                            lambda *a, **k: spy.__setitem__("alert", spy["alert"] + 1))

        lr = AgentLauncher()
        monkeypatch.setattr(lr, "_post_usage_comments",
                            lambda *a, **k: spy.__setitem__("post", spy["post"] + 1))
        monkeypatch.setattr(lr, "_try_advance_stage",
                            lambda *a, **k: spy.__setitem__("advance", spy["advance"] + 1))
        monkeypatch.setattr(lr, "_finalize_job",
                            lambda *a, **k: spy.__setitem__("finalize", k.get("result_ok")))

        lr._monitor_agent(_FakeProc(0), run_id, "developer", "r", "feature/y",
                          output_path=str(log), log_fh=None, job_id=99)

        assert spy["post"] == 0          # no success comment
        assert spy["advance"] == 0       # stage NOT advanced
        assert spy["finalize"] is False  # finalize told the result was invalid
        assert spy["alert"] == 1         # empty-result alert fired


# ===========================================================================
# _handle_auth_marker — post-factum auth detection resets preflight cache (P1b)
# ===========================================================================
class TestAuthMarkerHandling:
    def test_auth_marker_resets_preflight_cache(self, tmp_path, monkeypatch):
        log = tmp_path / "auth.log"
        log.write_text("Error: Not logged in. Please run /login\n")
        reset = {"n": 0}
        monkeypatch.setattr(preflight, "reset_cache",
                            lambda: reset.__setitem__("n", reset["n"] + 1))
        found = AgentLauncher()._handle_auth_marker(str(log))
        assert found is True
        assert reset["n"] == 1

    def test_no_auth_marker_no_reset(self, tmp_path, monkeypatch):
        log = tmp_path / "plain.log"
        log.write_text("Traceback: ValueError somewhere\n")
        reset = {"n": 0}
        monkeypatch.setattr(preflight, "reset_cache",
                            lambda: reset.__setitem__("n", reset["n"] + 1))
        found = AgentLauncher()._handle_auth_marker(str(log))
        assert found is False
        assert reset["n"] == 0