"""ORCH-027 / TC-01..TC-15: the coverage-gate leaf module (src/coverage_gate.py). These exercise the DETERMINISTIC core: the pure verdict / delta / frontmatter helpers (no binaries needed), the ratchet baseline against a real tmp SQLite DB, the conditionality / kill-switch / fail-open behaviour with the measurer mocked, never-raise, and the gate's integration into advance_stage / GET /queue. Contract under test (ADR-001 §7): * the verdict is a deterministic pure function of (measured, baseline, floor, policy, epsilon) — no LLM, all border / epsilon cases covered; * the ratchet baseline only moves UP and bootstraps on the first merge; * conditionality: empty scope -> self-hosting only; out-of-scope -> no-op N/A; kill-switch off -> inert; * a coverage-tool error degrades fail-open + WARNING by default, fail-closed only when configured; * the machine verdict lives ONLY in the YAML frontmatter (read-back == written); * never-raise: any internal error -> a (bool, reason) pair, no exception escapes; * self-hosting safety: the gate never deploys / restarts prod / pushes main. """ import os import tempfile os.environ["ORCH_DB_PATH"] = os.path.join(tempfile.gettempdir(), "test_coverage_gate.db") os.environ.setdefault("ORCH_GITEA_TOKEN", "test-token") os.environ.setdefault("ORCH_PLANE_API_TOKEN", "test-token") import pytest # noqa: E402 import src.db as db # noqa: E402 from src import config as cfg # noqa: E402 from src import coverage_gate as cg # noqa: E402 _REPO = "orchestrator" _BRANCH = "feature/ORCH-027-code-coverage" _WI = "ORCH-027" @pytest.fixture(autouse=True) def fresh_db(tmp_path, monkeypatch): """Isolated tmp SQLite DB + gate ON / empty scope (self-hosting) by default.""" dbfile = tmp_path / "cov.db" monkeypatch.setattr(db.settings, "db_path", str(dbfile)) monkeypatch.setattr(cfg.settings, "coverage_gate_enabled", True, raising=False) monkeypatch.setattr(cfg.settings, "coverage_gate_repos", "", raising=False) monkeypatch.setattr(cfg.settings, "coverage_min_percent", 80.0, raising=False) monkeypatch.setattr(cfg.settings, "coverage_policy", "both", raising=False) monkeypatch.setattr(cfg.settings, "coverage_epsilon", 0.5, raising=False) monkeypatch.setattr(cfg.settings, "coverage_tool_fail_closed", False, raising=False) monkeypatch.setattr(cfg.settings, "coverage_run_timeout_s", 900, raising=False) db.init_db() yield # =========================================================================== # TC-01 — policy=absolute # =========================================================================== def test_tc01_policy_absolute(): # measured >= floor -> PASS ok, _ = cg.compute_coverage_verdict(85.0, None, 80.0, "absolute", 0.0) assert ok is True # exactly on the floor -> PASS (>=) ok, _ = cg.compute_coverage_verdict(80.0, None, 80.0, "absolute", 0.0) assert ok is True # below floor-epsilon -> FAIL ok, _ = cg.compute_coverage_verdict(78.0, None, 80.0, "absolute", 0.5) assert ok is False # baseline is IGNORED under absolute (even a high baseline cannot fail it) ok, _ = cg.compute_coverage_verdict(85.0, 99.0, 80.0, "absolute", 0.0) assert ok is True # =========================================================================== # TC-02 — policy=baseline (no-regression / ratchet) # =========================================================================== def test_tc02_policy_baseline(): # measured >= baseline -> PASS ok, _ = cg.compute_coverage_verdict(90.0, 85.0, 0.0, "baseline", 0.0) assert ok is True # exactly on baseline -> PASS ok, _ = cg.compute_coverage_verdict(85.0, 85.0, 0.0, "baseline", 0.0) assert ok is True # below baseline-epsilon -> FAIL ok, _ = cg.compute_coverage_verdict(83.0, 85.0, 0.0, "baseline", 0.5) assert ok is False # floor is IGNORED under baseline (low measured vs floor but >= baseline -> PASS) ok, _ = cg.compute_coverage_verdict(40.0, 30.0, 80.0, "baseline", 0.0) assert ok is True # bootstrap: baseline None under baseline policy -> PASS (cannot regress vs nothing) ok, reason = cg.compute_coverage_verdict(10.0, None, 80.0, "baseline", 0.0) assert ok is True assert "bootstrap" in reason.lower() # =========================================================================== # TC-03 — policy=both (PASS only if BOTH hold) # =========================================================================== def test_tc03_policy_both(): # both hold -> PASS ok, _ = cg.compute_coverage_verdict(90.0, 85.0, 80.0, "both", 0.0) assert ok is True # absolute fails (below floor) -> FAIL even though >= baseline ok, _ = cg.compute_coverage_verdict(82.0, 80.0, 85.0, "both", 0.0) assert ok is False # baseline fails (below baseline) -> FAIL even though >= floor ok, _ = cg.compute_coverage_verdict(84.0, 90.0, 80.0, "both", 0.0) assert ok is False # bootstrap under both: baseline None -> only absolute decides ok, _ = cg.compute_coverage_verdict(85.0, None, 80.0, "both", 0.0) assert ok is True ok, _ = cg.compute_coverage_verdict(70.0, None, 80.0, "both", 0.0) assert ok is False # =========================================================================== # TC-04 — epsilon tolerance (anti-flap, NFR-4) # =========================================================================== def test_tc04_epsilon_tolerance(): # measured 0.3% under baseline, epsilon 0.5 -> still PASS (within noise) ok, _ = cg.compute_coverage_verdict(84.7, 85.0, 80.0, "both", 0.5) assert ok is True # measured 0.3% under floor, epsilon 0.5 -> still PASS ok, _ = cg.compute_coverage_verdict(79.7, 80.0, 0.0, "absolute", 0.5) assert ok is True # just beyond epsilon -> FAIL ok, _ = cg.compute_coverage_verdict(84.4, 85.0, 80.0, "baseline", 0.5) assert ok is False # negative epsilon is clamped to 0 (no negative tolerance) ok, _ = cg.compute_coverage_verdict(84.9, 85.0, 0.0, "baseline", -5.0) assert ok is False # =========================================================================== # TC-05 — ratchet baseline (up only; never lowers) # =========================================================================== def test_tc05_ratchet_up_only(): # bootstrap seeds the baseline assert db.get_coverage_baseline(_REPO) is None assert db.ratchet_coverage_baseline(_REPO, 80.0, "sha1") is True assert db.get_coverage_baseline(_REPO) == pytest.approx(80.0) # higher value raises it assert db.ratchet_coverage_baseline(_REPO, 85.0, "sha2") is True assert db.get_coverage_baseline(_REPO) == pytest.approx(85.0) # equal value re-stamps (idempotent, no harm) — baseline unchanged db.ratchet_coverage_baseline(_REPO, 85.0, "sha3") assert db.get_coverage_baseline(_REPO) == pytest.approx(85.0) # LOWER value does NOT lower the baseline assert db.ratchet_coverage_baseline(_REPO, 70.0, "sha4") is False assert db.get_coverage_baseline(_REPO) == pytest.approx(85.0) def test_tc05_ratchet_per_repo_isolated(): db.ratchet_coverage_baseline(_REPO, 85.0, "s") db.ratchet_coverage_baseline("enduro-trails", 42.0, "s") assert db.get_coverage_baseline(_REPO) == pytest.approx(85.0) assert db.get_coverage_baseline("enduro-trails") == pytest.approx(42.0) # =========================================================================== # TC-06 — bootstrap baseline (first init from main measurement) # =========================================================================== def test_tc06_bootstrap(monkeypatch, tmp_path): # No baseline yet -> ratchet_baseline_on_merge seeds it from the artefact value. report = ( "---\ncoverage_status: PASS\nwork_item: ORCH-027\n" "measured_coverage: 77.50\nbaseline: \nfloor: 0.00\npolicy: both\n" "epsilon: 0.50\ndelta: 0.00\n---\n# body\n" ) monkeypatch.setattr(cg, "_report_path", lambda *a, **k: str(tmp_path / "18.md")) (tmp_path / "18.md").write_text(report, encoding="utf-8") assert db.get_coverage_baseline(_REPO) is None assert cg.ratchet_baseline_on_merge(_REPO, _WI, _BRANCH, "sha") is True assert db.get_coverage_baseline(_REPO) == pytest.approx(77.5) # =========================================================================== # TC-07 — conditionality applies(repo) (empty scope -> self-hosting only) # =========================================================================== def test_tc07_applies_self_hosting_only(monkeypatch): monkeypatch.setattr(cfg.settings, "coverage_gate_repos", "", raising=False) assert cg.coverage_gate_applies("orchestrator") is True assert cg.coverage_gate_applies("enduro-trails") is False def test_tc07_applies_csv_scope(monkeypatch): monkeypatch.setattr(cfg.settings, "coverage_gate_repos", "foo, enduro-trails", raising=False) assert cg.coverage_gate_applies("enduro-trails") is True assert cg.coverage_gate_applies("orchestrator") is False def test_tc07_out_of_scope_noop_no_measure(monkeypatch): # Out-of-scope repo -> (True, "...N/A") and the expensive measurer is NOT called. called = {"n": 0} monkeypatch.setattr(cg, "measure_coverage", lambda *a, **k: called.__setitem__("n", called["n"] + 1) or 99.0) ok, reason = cg.check_coverage_gate("enduro-trails", "ET-1", "feature/x") assert ok is True assert "N/A" in reason assert called["n"] == 0 # =========================================================================== # TC-08 — kill-switch off -> inert (1:1 as before ORCH-027) # =========================================================================== def test_tc08_kill_switch_off(monkeypatch): monkeypatch.setattr(cfg.settings, "coverage_gate_enabled", False, raising=False) called = {"n": 0} monkeypatch.setattr(cg, "measure_coverage", lambda *a, **k: called.__setitem__("n", called["n"] + 1) or 10.0) ok, reason = cg.check_coverage_gate(_REPO, _WI, _BRANCH) assert ok is True assert "disabled" in reason assert called["n"] == 0 assert cg.coverage_gate_applies(_REPO) is False # =========================================================================== # TC-09 — fail-open by default on a tool error; fail-closed when configured # =========================================================================== def test_tc09_fail_open_default(monkeypatch, tmp_path): monkeypatch.setattr(cg, "measure_coverage", lambda *a, **k: None) # tool error monkeypatch.setattr(cg, "_report_path", lambda *a, **k: str(tmp_path / "18.md")) ok, reason = cg.check_coverage_gate(_REPO, _WI, _BRANCH) assert ok is True assert "fail-open" in reason.lower() # The report records the fail-open PASS. content = (tmp_path / "18.md").read_text(encoding="utf-8") assert "coverage_status: PASS" in content def test_tc09_fail_closed_when_configured(monkeypatch, tmp_path): monkeypatch.setattr(cfg.settings, "coverage_tool_fail_closed", True, raising=False) monkeypatch.setattr(cg, "measure_coverage", lambda *a, **k: None) monkeypatch.setattr(cg, "_report_path", lambda *a, **k: str(tmp_path / "18.md")) ok, reason = cg.check_coverage_gate(_REPO, _WI, _BRANCH) assert ok is False assert "fail-closed" in reason.lower() content = (tmp_path / "18.md").read_text(encoding="utf-8") assert "coverage_status: FAIL" in content # =========================================================================== # TC-10 — never-raise (broken inputs / internal error never escape) # =========================================================================== def test_tc10_verdict_never_raises_on_bad_inputs(): ok, reason = cg.compute_coverage_verdict("not-a-number", None, 80.0, "both", 0.5) assert ok is False assert "bad inputs" in reason def test_tc10_parse_coverage_percent_tolerant(): assert cg.parse_coverage_percent({"totals": {"percent_covered": 73.2}}) == pytest.approx(73.2) assert cg.parse_coverage_percent({}) is None assert cg.parse_coverage_percent("garbage") is None assert cg.parse_coverage_percent({"totals": {}}) is None def test_tc10_check_never_raises(monkeypatch): # measure_coverage explodes -> the gate swallows it and returns a pair (fail-open). def _boom(*a, **k): raise RuntimeError("coverage exploded") monkeypatch.setattr(cg, "measure_coverage", _boom) ok, reason = cg.check_coverage_gate(_REPO, _WI, _BRANCH) assert isinstance(ok, bool) assert "error (fail-open)" in reason def test_tc10_ratchet_never_raises_on_missing_report(monkeypatch, tmp_path): monkeypatch.setattr(cg, "_report_path", lambda *a, **k: str(tmp_path / "nope.md")) assert cg.ratchet_baseline_on_merge(_REPO, _WI, _BRANCH, "sha") is False # =========================================================================== # TC-11 — write/read report; single source of truth via frontmatter # =========================================================================== def test_tc11_report_roundtrip(tmp_path): fields = { "coverage_status": "PASS", "measured_coverage": 88.25, "baseline": 85.0, "floor": 80.0, "policy": "both", "epsilon": 0.5, "delta": 3.25, "reason": "ok", "measurement": "pytest --cov=src: 88.25%", "policy_detail": "policy=both", } content = cg.render_coverage_report(_WI, fields) # machine key present and parseable ok, verdict = cg.parse_coverage_status(content) assert ok is True assert "PASS" in verdict # measured_coverage read back from the SAME file (ratchet source of truth) assert cg.read_measured_coverage(content) == pytest.approx(88.25) # FAIL roundtrip (FAIL token authoritative) fields["coverage_status"] = "FAIL" content = cg.render_coverage_report(_WI, fields) ok, verdict = cg.parse_coverage_status(content) assert ok is False assert "FAIL" in verdict def test_tc11_parse_missing_frontmatter(): ok, reason = cg.parse_coverage_status("no frontmatter here") assert ok is False assert "coverage_status" in reason assert cg.read_measured_coverage("no frontmatter") is None def test_tc11_bootstrap_report_blank_baseline(): # bootstrap: baseline None -> renders an EMPTY baseline field, still parseable. fields = { "coverage_status": "PASS", "measured_coverage": 50.0, "baseline": None, "floor": 0.0, "policy": "both", "epsilon": 0.5, "delta": 0.0, } content = cg.render_coverage_report(_WI, fields) assert "baseline: \n" in content or "baseline:\n" in content assert cg.parse_coverage_status(content)[0] is True # =========================================================================== # TC-12 — self-hosting safety: the leaf imports no engine, touches no prod # =========================================================================== def test_tc12_leaf_no_engine_import(): # AST-based (not prose): the leaf must never IMPORT the engine, and the only # external command it runs is pytest — no docker/compose/force-push literals. import ast import inspect tree = ast.parse(inspect.getsource(cg)) imported: set[str] = set() for node in ast.walk(tree): if isinstance(node, ast.ImportFrom) and node.module: imported.add(node.module) elif isinstance(node, ast.Import): for n in node.names: imported.add(n.name) assert not any("stage_engine" in m for m in imported), imported assert not any(("launcher" in m or "self_deploy" in m) for m in imported), imported # No deploy / restart / force-push command tokens used as actual string literals. consts = [ n.value for n in ast.walk(tree) if isinstance(n, ast.Constant) and isinstance(n.value, str) ] for forbidden in ("compose", "--force-with-lease", "--force", "docker"): assert forbidden not in consts, f"coverage_gate leaf must not run {forbidden!r}" def test_tc12_delta_signed(): assert cg.compute_delta(85.0, 80.0, 70.0) == pytest.approx(5.0) # vs max(80,70) assert cg.compute_delta(75.0, 80.0, 70.0) == pytest.approx(-5.0) assert cg.compute_delta(50.0, None, None) == pytest.approx(0.0) # =========================================================================== # TC-13 — gate integration into advance_stage (rollback on FAIL, retry++) # =========================================================================== def test_tc13_advance_rolls_back_on_fail(monkeypatch): from src import stage_engine as se captured = {} def _fake_run_qg(name, repo, wi, branch): captured["qg"] = name return (False, "measured=70.00% policy=both: absolute FAIL") monkeypatch.setattr(se, "_run_qg", _fake_run_qg) monkeypatch.setattr(se, "update_task_stage", lambda *a, **k: None) monkeypatch.setattr(se, "notify_stage_change", lambda *a, **k: None) monkeypatch.setattr(se, "plane_notify_stage", lambda *a, **k: None) monkeypatch.setattr(se, "set_issue_in_progress", lambda *a, **k: None) monkeypatch.setattr(se, "notify_qg_failure", lambda *a, **k: None) monkeypatch.setattr(se, "plane_add_comment", lambda *a, **k: None) monkeypatch.setattr(se, "_developer_retry_count", lambda *a, **k: 0) released = {"n": 0} monkeypatch.setattr(se.merge_gate, "release_merge_lease", lambda *a, **k: released.__setitem__("n", released["n"] + 1)) enq = {"n": 0} monkeypatch.setattr(se, "enqueue_job", lambda *a, **k: enq.__setitem__("n", enq["n"] + 1) or 123) result = se.AdvanceResult() intervened = se._handle_coverage_gate(1, "deploy-staging", _REPO, _WI, _BRANCH, "deployer", result) assert intervened is True assert captured["qg"] == "check_coverage_gate" assert result.rolled_back_to == "development" assert result.enqueued_agent == "developer" assert enq["n"] == 1 # merge lease released on the coverage rollback (ADR-001 D1/TR-2) assert released["n"] == 1 def test_tc13_advance_passes_through_on_ok(monkeypatch): from src import stage_engine as se monkeypatch.setattr(se, "_run_qg", lambda *a, **k: (True, "coverage OK")) result = se.AdvanceResult() intervened = se._handle_coverage_gate(1, "deploy-staging", _REPO, _WI, _BRANCH, "deployer", result) assert intervened is False assert result.rolled_back_to is None # =========================================================================== # TC-14 — real measurement on a minimal fixture repo (pytest --cov in worktree) # =========================================================================== def test_tc14_real_measurement(tmp_path, monkeypatch): # Build a minimal project: src/ with one function, tests covering part of it. proj = tmp_path / "fixture_repo" (proj / "src").mkdir(parents=True) (proj / "tests").mkdir() (proj / "src" / "__init__.py").write_text("", encoding="utf-8") (proj / "src" / "mod.py").write_text( "def covered():\n return 1\n\n\ndef uncovered():\n return 2\n", encoding="utf-8", ) (proj / "tests" / "test_mod.py").write_text( "from src.mod import covered\n\n\ndef test_covered():\n assert covered() == 1\n", encoding="utf-8", ) # Point the measurer's worktree resolution at our fixture. monkeypatch.setattr(cg, "ensure_worktree", lambda repo, branch: str(proj)) pct = cg.measure_coverage(_REPO, _BRANCH) assert pct is not None # mod.py: 4 statements, uncovered() body (1) unrun -> ~75%; bounds-check only. assert 50.0 <= pct <= 90.0 # the scratch json is cleaned up assert not (proj / ".coverage-report.json").exists() def test_tc14_measure_timeout_returns_none(monkeypatch): import subprocess monkeypatch.setattr(cg, "ensure_worktree", lambda r, b: "/tmp") def _timeout(*a, **k): raise subprocess.TimeoutExpired(cmd="pytest", timeout=1) monkeypatch.setattr(cg.subprocess, "run", _timeout) assert cg.measure_coverage(_REPO, _BRANCH) is None # =========================================================================== # TC-15 — observability (snapshot block) + registry compatibility unchanged # =========================================================================== def test_tc15_snapshot_shape(monkeypatch): db.ratchet_coverage_baseline(_REPO, 81.0, "sha") snap = cg.snapshot() assert snap["enabled"] is True assert snap["policy"] == "both" assert snap["floor"] == pytest.approx(80.0) assert "baselines" in snap assert _REPO in snap["baselines"] assert snap["baselines"][_REPO]["coverage"] == pytest.approx(81.0) def test_tc15_snapshot_never_raises(monkeypatch): monkeypatch.setattr(db, "all_coverage_baselines", lambda: (_ for _ in ()).throw(RuntimeError("boom"))) snap = cg.snapshot() assert snap["enabled"] is True assert snap["baselines"] == {} def test_tc15_registry_and_transitions_unchanged(): from src.qg.checks import QG_CHECKS from src.stages import STAGE_TRANSITIONS # new check registered... assert "check_coverage_gate" in QG_CHECKS # ...without touching the existing verdict checks (byte-for-byte names present) for name in ( "check_ci_green", "check_tests_passed", "check_security_gate", "check_staging_status", "check_staging_image_fresh", "check_branch_mergeable", ): assert name in QG_CHECKS # coverage is an edge sub-gate, NOT a STAGE_TRANSITIONS edge for _stage, spec in STAGE_TRANSITIONS.items(): assert "check_coverage_gate" not in str(spec)