"""ORCH-110 TC-04 / TC-05 / TC-06 / TC-09: merge-gate infra-timeout routing. Drives the engine (``stage_engine.advance_stage``) on the deploy-staging -> deploy edge with ``check_branch_mergeable`` monkeypatched, exactly like the existing ``test_stage_engine.TestMergeGate`` suite, and asserts the NEW routing (D3): * TC-04 — an INFRA re-test timeout -> bounded infra-retry (re-queue the staging-deployer with a delay, task STAYS on deploy-staging) — NOT a rollback to development and NOT a developer-retry. * TC-05 — a deterministically RED re-test STILL rolls back to development + developer retry (BR-6 / AC-3 anti-over-tolerance). * TC-06 — the infra-retry is bounded (anti-loop): after the budget it blocks with ONE infra-alert, no infinite bounce, no new job, task NOT in development. * TC-09 — never-raise: an error in the transient path is swallowed (a WARNING) and never escapes into advance_stage. Offline: isolated sqlite DB; Plane/Telegram/notifications mocked at stage_engine. """ import os import tempfile import pytest _test_db = os.path.join(tempfile.gettempdir(), "test_orch110_routing.db") os.environ["ORCH_DB_PATH"] = _test_db os.environ["ORCH_REPOS_DIR"] = tempfile.gettempdir() os.environ.setdefault("ORCH_GITEA_TOKEN", "test-token") os.environ.setdefault("ORCH_PLANE_API_TOKEN", "test-token") from unittest.mock import MagicMock # noqa: E402 import src.db as _db # noqa: E402 from src.db import init_db, get_db # noqa: E402 from src import stage_engine # noqa: E402 from src.stage_engine import advance_stage # noqa: E402 _REPO = "orchestrator" _WI = "ORCH-110" _BRANCH = "feature/ORCH-110-x" _TIMEOUT_REASON = "re-test timeout after 900s" @pytest.fixture(autouse=True) def fresh_db(monkeypatch): monkeypatch.setattr(_db.settings, "db_path", _test_db) if os.path.exists(_test_db): os.unlink(_test_db) init_db() yield @pytest.fixture(autouse=True) def silence_side_effects(monkeypatch): for name in ( "notify_stage_change", "notify_qg_failure", "send_telegram", "plane_notify_stage", "plane_notify_qg", "plane_add_comment", "set_issue_in_progress", "set_issue_blocked", ): monkeypatch.setattr(stage_engine, name, MagicMock()) # The merge-gate sub-gate runs only AFTER the stage QG + security + coverage pass; # the self-deploy Phase A interception is irrelevant (merge-gate intervenes first). monkeypatch.setattr(stage_engine.settings, "deploy_require_manual_approve", False) @pytest.fixture(autouse=True) def tolerance_on(monkeypatch): monkeypatch.setattr(stage_engine.settings, "merge_retest_infra_tolerance_enabled", True) monkeypatch.setattr(stage_engine.settings, "merge_retest_infra_max_retries", 2) monkeypatch.setattr(stage_engine.settings, "merge_retest_infra_retry_delay_s", 120) def _pass(*a, **k): return (True, "ok") def _fail(reason): def _f(*a, **k): return (False, reason) return _f def _patch_gates(monkeypatch, merge_reason): monkeypatch.setattr( stage_engine, "QG_CHECKS", {**stage_engine.QG_CHECKS, "check_staging_status": _pass, "check_security_gate": _pass, "check_coverage_gate": _pass, "check_branch_mergeable": _fail(merge_reason), "check_staging_image_fresh": _pass}, ) def _make_task(stage="deploy-staging"): conn = get_db() cur = conn.execute( "INSERT INTO tasks (plane_id, work_item_id, repo, branch, stage) VALUES (?,?,?,?,?)", (f"plane-{_WI}", _WI, _REPO, _BRANCH, stage), ) tid = cur.lastrowid conn.commit() conn.close() return tid def _stage(task_id): conn = get_db() row = conn.execute("SELECT stage FROM tasks WHERE id=?", (task_id,)).fetchone() conn.close() return row[0] def _jobs(): conn = get_db() rows = conn.execute( "SELECT agent, task_content, available_at FROM jobs ORDER BY id" ).fetchall() conn.close() return [dict(r) for r in rows] def _seed_infra_retry_jobs(task_id, n): conn = get_db() for _ in range(n): conn.execute( "INSERT INTO jobs (agent, repo, task_id, task_content) " "VALUES ('deployer','orchestrator',?, 'Note: merge-gate infra-timeout retry')", (task_id,), ) conn.commit() conn.close() def _advance(task_id): return advance_stage( task_id, "deploy-staging", _REPO, _WI, _BRANCH, finished_agent="deployer" ) # --------------------------------------------------------------------------- # TC-04 — infra-timeout -> bounded infra-retry, NOT a rollback to development. # --------------------------------------------------------------------------- def test_tc04_infra_timeout_reschedules_not_rollback(monkeypatch): _patch_gates(monkeypatch, _TIMEOUT_REASON) task_id = _make_task() res = _advance(task_id) assert res.advanced is False assert res.rolled_back_to is None # NOT a code-fault rollback assert res.note == "merge-gate-infra-retry" assert _stage(task_id) == "deploy-staging" # stays put jobs = _jobs() assert len(jobs) == 1 assert jobs[0]["agent"] == "deployer" # re-queued staging-deployer, NOT developer assert "merge-gate infra-timeout retry" in jobs[0]["task_content"] assert jobs[0]["available_at"] is not None # delayed re-pickup assert stage_engine.set_issue_blocked.called is False # No developer-retry semantics: the rollback comment / in-progress is never set. assert stage_engine.set_issue_in_progress.called is False def test_tc04_killswitch_within_routing_is_observed(monkeypatch): """The infra-timeout always bumps the timeout counter (observability), even when routed to the retry path.""" _patch_gates(monkeypatch, _TIMEOUT_REASON) before = stage_engine.merge_gate.merge_gate_status()["retest_timeout_total"] task_id = _make_task() _advance(task_id) after = stage_engine.merge_gate.merge_gate_status()["retest_timeout_total"] assert after == before + 1 # --------------------------------------------------------------------------- # TC-05 — a deterministically RED re-test STILL rolls back (BR-6 / AC-3). # --------------------------------------------------------------------------- def test_tc05_red_retest_still_rolls_back(monkeypatch): _patch_gates(monkeypatch, "re-test failed after rebase: 1 failed, 5 passed") task_id = _make_task() res = _advance(task_id) assert res.advanced is False assert res.rolled_back_to == "development" assert _stage(task_id) == "development" jobs = _jobs() assert len(jobs) == 1 assert jobs[0]["agent"] == "developer" # developer re-queued (retry) def test_tc05_conflict_still_rolls_back(monkeypatch): _patch_gates(monkeypatch, "rebase conflict: src/db.py") task_id = _make_task() res = _advance(task_id) assert res.rolled_back_to == "development" assert _stage(task_id) == "development" assert _jobs()[0]["agent"] == "developer" # --------------------------------------------------------------------------- # TC-06 — anti-loop: infra-retry is bounded; exhaustion -> ONE infra-alert. # --------------------------------------------------------------------------- def test_tc06_infra_retry_bounded_then_infra_alert(monkeypatch): monkeypatch.setattr(stage_engine.settings, "merge_retest_infra_max_retries", 2) _patch_gates(monkeypatch, _TIMEOUT_REASON) task_id = _make_task() _seed_infra_retry_jobs(task_id, 2) # budget already spent res = _advance(task_id) assert res.advanced is False assert res.rolled_back_to is None # NOT a rollback even at exhaustion assert res.note == "merge-gate-infra-retry-exhausted" assert res.alerted is True assert _stage(task_id) == "deploy-staging" # NOT moved to development assert stage_engine.set_issue_blocked.called assert stage_engine.send_telegram.called # No NEW retry job past the cap (still only the 2 we seeded). assert len(_jobs()) == 2 # The alert is INFRA-specific, not "developer must fix". msg = stage_engine.send_telegram.call_args[0][0] assert "infra" in msg.lower() or "ресурс" in msg.lower() assert "НЕ дефект кода" in msg def test_tc06_below_budget_keeps_retrying(monkeypatch): monkeypatch.setattr(stage_engine.settings, "merge_retest_infra_max_retries", 3) _patch_gates(monkeypatch, _TIMEOUT_REASON) task_id = _make_task() _seed_infra_retry_jobs(task_id, 1) # one retry already done, budget 3 res = _advance(task_id) assert res.note == "merge-gate-infra-retry" assert res.alerted is not True # The seeded job + the new retry job. assert len(_jobs()) == 2 # --------------------------------------------------------------------------- # TC-09 — never-raise: an error in the transient path is swallowed. # --------------------------------------------------------------------------- def test_tc09_infra_retry_never_raises(monkeypatch): _patch_gates(monkeypatch, _TIMEOUT_REASON) def _boom(*a, **k): raise RuntimeError("enqueue exploded") monkeypatch.setattr(stage_engine, "enqueue_job", _boom) task_id = _make_task() # Must NOT raise into advance_stage. res = _advance(task_id) assert res.note == "merge-gate-infra-retry-error" assert _stage(task_id) == "deploy-staging" # left for the reconciler/reaper def test_tc09_killswitch_off_falls_back_to_rollback(monkeypatch): """tolerance off -> a timeout takes the prior rollback path byte-for-byte (NFR-2).""" monkeypatch.setattr(stage_engine.settings, "merge_retest_infra_tolerance_enabled", False) _patch_gates(monkeypatch, _TIMEOUT_REASON) task_id = _make_task() res = _advance(task_id) assert res.rolled_back_to == "development" assert _stage(task_id) == "development" assert _jobs()[0]["agent"] == "developer"