From c167c6930d6a09c2475b98cbb1c4a0a3bad7283f Mon Sep 17 00:00:00 2001 From: Dev Agent Date: Wed, 3 Jun 2026 08:28:09 +0300 Subject: [PATCH] test(launcher): watchdog graceful kill ordering + timeout config + M-4 removal Cover M-2: SIGTERM-before-SIGKILL ordering, graceful exit within grace skips SIGKILL, ProcessLookupError before SIGTERM is tolerated (no _record_kill), and _resolve_timeout per-agent override / default / malformed-JSON fallback. Cover M-4: _auto_merge_pr removed, _ensure_pr retained. --- tests/test_launcher.py | 140 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 140 insertions(+) diff --git a/tests/test_launcher.py b/tests/test_launcher.py index 8f05dda..e2ec215 100644 --- a/tests/test_launcher.py +++ b/tests/test_launcher.py @@ -7,6 +7,7 @@ Covers the audit-2026-06-02 fixes: the YAML frontmatter only (no fragile substring matching). """ import os +import signal import tempfile import pytest @@ -20,6 +21,7 @@ os.environ["ORCH_PLANE_API_TOKEN"] = "test-token" from src.agents.launcher import AgentLauncher from src.qg.checks import check_reviewer_verdict +from src.config import settings # --------------------------------------------------------------------------- @@ -138,3 +140,141 @@ class TestCheckReviewerVerdict: passed, reason = check_reviewer_verdict("enduro-trails", "ET-999") assert passed is False assert "not found" in reason.lower() + + +# --------------------------------------------------------------------------- +# ORCH-7 (M-4): dead code removed +# --------------------------------------------------------------------------- +class TestDeadCodeRemoved: + """M-4: _auto_merge_pr was never called (merge is the deployer's job) and is + removed. _ensure_pr (used by the auto-advance path) must stay.""" + + def test_auto_merge_pr_is_gone(self): + assert not hasattr(AgentLauncher, "_auto_merge_pr") + + def test_ensure_pr_still_present(self): + assert hasattr(AgentLauncher, "_ensure_pr") + + +# --------------------------------------------------------------------------- +# ORCH-7 (M-2): configurable timeout + per-agent override +# --------------------------------------------------------------------------- +class TestResolveTimeout: + """M-2: _resolve_timeout honours a per-agent JSON override, else the default.""" + + def test_default_when_no_override(self, monkeypatch): + monkeypatch.setattr(settings, "agent_timeout_seconds", 1800) + monkeypatch.setattr(settings, "agent_timeout_overrides_json", "") + assert AgentLauncher._resolve_timeout("developer") == 1800 + assert AgentLauncher._resolve_timeout(None) == 1800 + + def test_override_for_specific_agent(self, monkeypatch): + monkeypatch.setattr(settings, "agent_timeout_seconds", 1800) + monkeypatch.setattr( + settings, "agent_timeout_overrides_json", '{"reviewer": 3600, "architect": 2700}' + ) + assert AgentLauncher._resolve_timeout("reviewer") == 3600 + assert AgentLauncher._resolve_timeout("architect") == 2700 + # an agent not in the override map falls back to the default + assert AgentLauncher._resolve_timeout("developer") == 1800 + + def test_malformed_override_falls_back_to_default(self, monkeypatch): + monkeypatch.setattr(settings, "agent_timeout_seconds", 1800) + monkeypatch.setattr(settings, "agent_timeout_overrides_json", "{not-json") + # must not raise, must return the default + assert AgentLauncher._resolve_timeout("reviewer") == 1800 + + +class TestWatchdogGracefulKill: + """M-2: SIGTERM -> grace -> SIGKILL ordering, with graceful-exit short-circuit + and ProcessLookupError tolerance. The OS process is fully faked: we record the + signals sent and decide liveness from a script, so no real process is touched.""" + + def _patch_db(self, monkeypatch): + """Stub get_db so _record_kill does not need a real DB.""" + class _Conn: + def execute(self, *a, **k): + return self + def commit(self): + pass + def close(self): + pass + monkeypatch.setattr("src.agents.launcher.get_db", lambda: _Conn()) + + def test_sigterm_then_sigkill_after_grace(self, monkeypatch): + """Process stays alive through the whole grace window -> SIGTERM then SIGKILL.""" + self._patch_db(monkeypatch) + monkeypatch.setattr(settings, "agent_kill_grace_seconds", 1) + monkeypatch.setattr("src.agents.launcher.time.sleep", lambda s: None) + + sent = [] + + def fake_kill(pid, sig): + sent.append(sig) + # signal 0 (liveness probe) -> always alive; never raise + return None + + monkeypatch.setattr("src.agents.launcher.os.kill", fake_kill) + + launcher = AgentLauncher() + launcher._watchdog(pid=4242, run_id=1, timeout=0, agent="developer") + + assert signal.SIGTERM in sent + assert signal.SIGKILL in sent + # SIGTERM must come before SIGKILL + assert sent.index(signal.SIGTERM) < sent.index(signal.SIGKILL) + + def test_graceful_exit_in_grace_skips_sigkill(self, monkeypatch): + """Process dies during the grace window -> SIGKILL is NOT sent.""" + self._patch_db(monkeypatch) + monkeypatch.setattr(settings, "agent_kill_grace_seconds", 5) + monkeypatch.setattr("src.agents.launcher.time.sleep", lambda s: None) + + sent = [] + state = {"alive": True, "probes": 0} + + def fake_kill(pid, sig): + if sig == 0: + state["probes"] += 1 + # die on the 2nd liveness probe (within grace) + if state["probes"] >= 2: + raise ProcessLookupError + return None + sent.append(sig) + return None + + monkeypatch.setattr("src.agents.launcher.os.kill", fake_kill) + + launcher = AgentLauncher() + launcher._watchdog(pid=4242, run_id=2, timeout=0, agent="developer") + + assert signal.SIGTERM in sent + assert signal.SIGKILL not in sent + + def test_already_dead_before_sigterm(self, monkeypatch): + """Process already gone at SIGTERM -> ProcessLookupError tolerated, no SIGKILL, + and _record_kill is NOT called (the monitor's proc.wait owns the exit).""" + self._patch_db(monkeypatch) + monkeypatch.setattr("src.agents.launcher.time.sleep", lambda s: None) + + sent = [] + + def fake_kill(pid, sig): + if sig == signal.SIGTERM: + raise ProcessLookupError + sent.append(sig) + return None + + recorded = {"called": False} + monkeypatch.setattr( + AgentLauncher, "_record_kill", + staticmethod(lambda rid: recorded.__setitem__("called", True)), + ) + monkeypatch.setattr("src.agents.launcher.os.kill", fake_kill) + + launcher = AgentLauncher() + # must not raise + launcher._watchdog(pid=4242, run_id=3, timeout=0, agent="developer") + + assert signal.SIGKILL not in sent + assert recorded["called"] is False