Files
orchestrator/tests/test_orch110_retest_contract.py
claude-bot b80edf3383
All checks were successful
CI / test (push) Successful in 3m2s
CI / test (pull_request) Successful in 3m1s
fix(merge-gate): tolerate re-test infra-timeout + tree-kill spawned pytest
Eliminate the false `deploy-staging -> development` rollback that fired when the
merge-gate local re-test timed out (infra/resource) on a green CI + tester +
staging branch (incident ORCH-109/PR #129: a 516.7s suite blew its 600s budget
under CPU starvation from orphaned pytest processes -> timeout misrouted as a
code fault -> developer-retry loop -> manual gate).

Additive, 5 independent kill-switches, never-raise, self-hosting scope. Untouched
byte-for-byte: STAGE_TRANSITIONS, the QG_CHECKS registry, check_branch_mergeable
name/semantics, machine-verdict keys, the DB schema. INV-4 (never push/force-push
main) and the no-prod-restart rule are preserved.

- D1: new stdlib-only leaf src/proc_group.py runs the spawned re-test/coverage
  pytest in its own process group (start_new_session) and tree-kills the WHOLE
  group on timeout (os.killpg SIGTERM->grace->SIGKILL); used by
  merge_gate.retest_branch and coverage_gate.measure_coverage. No orphan leak.
  Fallback never-break: subprocess_tree_kill_enabled=False / non-POSIX -> the
  prior subprocess.run.
- D2/D3: merge_gate.classify_retest_failure distinguishes timeout/red/lock-busy/
  other; an infra timeout routes to _handle_merge_gate_infra_retry (bounded
  re-queue, task stays on deploy-staging, no rollback / no developer-retry); a
  red re-test / conflict still rolls back (BR-6). Exhaustion -> one infra alert.
- D4: skip the local re-test when the pre-merge rebase was a proven no-op (HEAD
  already CI/tester/staging-validated); fail-safe runs the re-test on any
  uncertainty. Flag merge_retest_skip_when_current_enabled.
- D5: merge_retest_timeout_s 600 -> 900 + _resolve_retest_timeout validation;
  reaper_max_running_s invariant preserved without change.
- D6: in-process counters + read-only merge_gate block in GET /queue; appended
  ("ORCH-110","classify_retest_failure","src/merge_gate.py") to
  MAIN_REGRESSION_MARKERS. Docs (README/internals overview/CLAUDE/CHANGELOG/
  .env.example) updated in the same PR.

Tests: tests/test_orch110_*.py (TC-01..TC-12, incl. the red-before/green-after
incident regression). Full suite green (1988 passed).

Refs: ORCH-110

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-15 10:14:55 +03:00

96 lines
4.0 KiB
Python

"""ORCH-110 TC-11: the local re-test necessity contract (D4 / FR-4 / AC-6).
The chosen contract: run the local re-test IFF the pre-merge rebase actually moved
HEAD (``main`` had moved -> a real semantic-conflict risk). When the rebase is a
PROVEN no-op (HEAD unchanged -> branch already at origin/main, the CI/tester/staging-
validated commit) the re-test is SKIPPED — it would be a redundant single point of
false failure. On ANY uncertainty (an empty SHA) the re-test runs (fail-safe -> the
red-rollback contract BR-6/AC-3 is never weakened).
"""
import os
import tempfile
os.environ.setdefault("ORCH_DB_PATH", os.path.join(tempfile.gettempdir(), "test_orch110_contract.db"))
os.environ.setdefault("ORCH_REPOS_DIR", tempfile.gettempdir())
os.environ.setdefault("ORCH_GITEA_TOKEN", "test-token")
os.environ.setdefault("ORCH_PLANE_API_TOKEN", "test-token")
import pytest # noqa: E402
from src import merge_gate # noqa: E402
from src.qg import checks as qg # noqa: E402
_REPO = "orchestrator"
_BRANCH = "feature/ORCH-110-x"
_WI = "ORCH-110"
@pytest.fixture
def gate(monkeypatch):
"""Real check_branch_mergeable, mocked primitives; record control flow."""
state = {"retest": 0, "released": 0, "rebased": 0}
monkeypatch.setattr(qg.settings, "merge_gate_enabled", True, raising=False)
monkeypatch.setattr(qg.settings, "merge_gate_repos", "", raising=False)
monkeypatch.setattr(qg.settings, "premerge_rebase_always", True, raising=False)
monkeypatch.setattr(qg.settings, "merge_retest_skip_when_current_enabled", True, raising=False)
monkeypatch.setattr(merge_gate, "acquire_merge_lease", lambda *a, **k: (True, "lease acquired"))
monkeypatch.setattr(merge_gate, "branch_is_behind_main", lambda r, b: True)
def _rebase(r, b):
state["rebased"] += 1
return True, "rebased onto origin/main"
def _release(r, b=None):
state["released"] += 1
def _retest(r, b):
state["retest"] += 1
return True, "re-test green"
monkeypatch.setattr(merge_gate, "auto_rebase_onto_main", _rebase)
monkeypatch.setattr(merge_gate, "release_merge_lease", _release)
monkeypatch.setattr(merge_gate, "retest_branch", _retest)
return state
def _set_head_shas(monkeypatch, shas):
seq = list(shas)
monkeypatch.setattr(merge_gate, "head_sha", lambda r, b: seq.pop(0) if seq else "")
def test_tc11_noop_rebase_skips_retest_lease_held(gate, monkeypatch):
"""Proven no-op rebase (HEAD unchanged) -> skip re-test, PASS, lease HELD."""
_set_head_shas(monkeypatch, ["sha_same", "sha_same"])
ok, reason = qg.check_branch_mergeable(_REPO, _WI, _BRANCH)
assert ok is True
assert "re-test skipped" in reason
assert gate["retest"] == 0 # re-test NOT run
assert gate["released"] == 0 # lease HELD until the merge
def test_tc11_head_moved_runs_retest(gate, monkeypatch):
"""A real catch-up (HEAD moved) -> re-test RUNS (the ORCH-043 risk is real)."""
_set_head_shas(monkeypatch, ["sha_old", "sha_new"])
ok, reason = qg.check_branch_mergeable(_REPO, _WI, _BRANCH)
assert ok is True
assert reason == "rebased onto main, re-test green"
assert gate["retest"] == 1
@pytest.mark.parametrize("shas", [["", "sha_new"], ["sha_old", ""], ["", ""]])
def test_tc11_uncertain_sha_runs_retest_failsafe(gate, monkeypatch, shas):
"""Cannot prove a no-op (empty SHA) -> re-test RUNS (fail-safe, never skips on
uncertainty)."""
_set_head_shas(monkeypatch, shas)
ok, reason = qg.check_branch_mergeable(_REPO, _WI, _BRANCH)
assert ok is True
assert gate["retest"] == 1 # re-test still runs on uncertainty
def test_tc11_skip_bumps_observability_counter(gate, monkeypatch):
"""The skip increments the read-only observability counter (D6)."""
merge_gate._MERGE_GATE_COUNTERS["retest_skipped_current_total"] = 0
_set_head_shas(monkeypatch, ["sha_same", "sha_same"])
qg.check_branch_mergeable(_REPO, _WI, _BRANCH)
assert merge_gate._MERGE_GATE_COUNTERS["retest_skipped_current_total"] == 1