"""ORCH-110 TC-12: observability of the infra-timeout path (D6 / FR-6 / AC-9). The infra-timeout state must be (a) reflected in read-only counters, (b) surfaced in the additive ``merge_gate`` block of ``GET /queue``, and (c) distinguishable from a code-fault rollback — with the exhaustion alert carrying the CLICKABLE issue number and an explicitly infrastructural (NOT "developer must fix") wording. No dedup/overlap with ORCH-111 (which only OBSERVES surviving processes; ORCH-110 prevents/tolerates). """ import asyncio import os import tempfile _test_db = os.path.join(tempfile.gettempdir(), "test_orch110_observability.db") os.environ["ORCH_DB_PATH"] = _test_db os.environ["ORCH_REPOS_DIR"] = tempfile.gettempdir() os.environ.setdefault("ORCH_GITEA_TOKEN", "test-token") os.environ.setdefault("ORCH_PLANE_API_TOKEN", "test-token") from unittest.mock import MagicMock # noqa: E402 import pytest # noqa: E402 import src.db as _db # noqa: E402 from src.db import init_db, get_db # noqa: E402 from src import merge_gate # noqa: E402 from src import stage_engine # noqa: E402 from src.stage_engine import AdvanceResult # noqa: E402 @pytest.fixture(autouse=True) def fresh_db(monkeypatch): monkeypatch.setattr(_db.settings, "db_path", _test_db) if os.path.exists(_test_db): os.unlink(_test_db) init_db() # Reset the in-process counters so each test starts from a known baseline. merge_gate._MERGE_GATE_COUNTERS.update( retest_timeout_total=0, retest_infra_retry_total=0, retest_infra_exhausted_total=0, retest_skipped_current_total=0, last_infra_timeout_wi=None, ) yield # --------------------------------------------------------------------------- # merge_gate_status() snapshot + counter increments. # --------------------------------------------------------------------------- def test_tc12_status_exposes_flags_and_counters(): snap = merge_gate.merge_gate_status() for key in ( "infra_tolerance_enabled", "infra_max_retries", "infra_retry_delay_s", "skip_when_current_enabled", "tree_kill_enabled", "retest_timeout_s", "retest_timeout_total", "retest_infra_retry_total", "retest_infra_exhausted_total", "retest_skipped_current_total", "last_infra_timeout_wi", ): assert key in snap, f"missing /queue merge_gate key: {key}" def test_tc12_counters_track_infra_timeout_distinctly(): merge_gate.note_retest_timeout("ORCH-110") merge_gate.note_retest_infra_retry() merge_gate.note_retest_infra_exhausted() merge_gate.note_retest_skipped_current() snap = merge_gate.merge_gate_status() assert snap["retest_timeout_total"] == 1 assert snap["retest_infra_retry_total"] == 1 assert snap["retest_infra_exhausted_total"] == 1 assert snap["retest_skipped_current_total"] == 1 # Distinguishable from a code-fault: the last infra-timeout WI is tracked here, # NOT in the merge-verify (code/merge) counters. assert snap["last_infra_timeout_wi"] == "ORCH-110" def test_tc12_status_never_raises(monkeypatch): # A broken settings attribute -> the snapshot degrades, never raises. monkeypatch.delattr(merge_gate.settings, "merge_retest_infra_tolerance_enabled", raising=False) snap = merge_gate.merge_gate_status() assert isinstance(snap, dict) # --------------------------------------------------------------------------- # GET /queue carries the additive merge_gate block (and the legacy keys stay). # --------------------------------------------------------------------------- def test_tc12_queue_endpoint_includes_merge_gate_block(): from src.main import queue out = asyncio.run(queue()) assert "merge_gate" in out assert "infra_tolerance_enabled" in out["merge_gate"] # The pre-existing observability keys are untouched (additive only). assert "merge_verify" in out and "coverage" in out # --------------------------------------------------------------------------- # The exhaustion alert is INFRA-specific + carries the clickable issue number, # distinct from the code-fault "Merge-gate still failing after N developer retries". # --------------------------------------------------------------------------- def test_tc12_exhaustion_alert_is_infra_specific_and_clickable(monkeypatch): sent = {} def _tg(msg): sent["msg"] = msg monkeypatch.setattr(stage_engine, "send_telegram", _tg) monkeypatch.setattr(stage_engine, "set_issue_blocked", MagicMock()) monkeypatch.setattr(stage_engine, "plane_add_comment", MagicMock()) # link_for builds the clickable number; use a recognisable sentinel. monkeypatch.setattr(stage_engine, "link_for", lambda wi, **k: f"{wi}") monkeypatch.setattr(stage_engine.settings, "merge_retest_infra_max_retries", 2) # Seed an exhausted budget for the task. conn = get_db() cur = conn.execute( "INSERT INTO tasks (plane_id, work_item_id, repo, branch, stage) VALUES (?,?,?,?,?)", ("plane-ORCH-110", "ORCH-110", "orchestrator", "feature/ORCH-110-x", "deploy-staging"), ) task_id = cur.lastrowid for _ in range(2): conn.execute( "INSERT INTO jobs (agent, repo, task_id, task_content) " "VALUES ('deployer','orchestrator',?, 'Note: merge-gate infra-timeout retry')", (task_id,), ) conn.commit() conn.close() res = AdvanceResult() stage_engine._handle_merge_gate_infra_retry( task_id, "deploy-staging", "orchestrator", "ORCH-110", "feature/ORCH-110-x", "re-test timeout after 900s", res, ) assert res.note == "merge-gate-infra-retry-exhausted" assert "ORCH-110" in sent["msg"] # clickable issue number present assert "developer retries" not in sent["msg"] # NOT the code-fault wording assert "НЕ дефект кода" in sent["msg"] # explicitly infrastructural