fix(merge_gate): retry transient Gitea merge errors + already-in-main guard
merge_pr now wraps ONLY the mutating POST /pulls/{n}/merge in a bounded
exponential-backoff retry-loop on TRANSIENT outcomes (405 "try again later",
408, any 5xx, network/timeout, and 409|422 while the PR is still mergeable);
TERMINAL outcomes (403/404/real conflict via mergeable==False) -> fast honest
False, so the ORCH-071/081 not-merged HOLD backstop is unchanged. Fixes the
ORCH-063 false HOLD + manual re-merge on Gitea's post-push mergeability hiccup.
ensure_open_pr gains an "already fully in main" guard (_branch_fully_in_main,
git merge-base --is-ancestor HEAD origin/main) BEFORE creating a PR -> new
"already-in-main" outcome avoids the garbage empty PR on a re-driven finalizer;
_handle_merge_verify skips merge_pr on that outcome and lets the authoritative
SHA-in-main check confirm -> done (not a HOLD). git error of the guard fails
OPEN to the create path.
New ORCH_MERGE_RETRY_* settings (kill-switch merge_retry_enabled -> one-shot,
max_attempts=3, backoff base=2/max=5). INV-4 (merge only via Gitea PR-merge API,
never push/force-push main), never-raise, STAGE_TRANSITIONS/QG_CHECKS/DB schema
unchanged. Docs (README merge-verify section, CLAUDE.md, CHANGELOG, .env.example)
updated in the same PR. Tests: test_merge_gate.py TC-01..12, test_config.py
TC-13, test_merge_verify.py TC-14..16; full suite green (1389).
Refs: ORCH-093
Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -257,3 +257,38 @@ def test_tc19_check_branch_mergeable_signature_intact():
|
||||
from src.qg.checks import check_branch_mergeable
|
||||
params = list(inspect.signature(check_branch_mergeable).parameters)
|
||||
assert params == ["repo", "work_item_id", "branch"]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# ORCH-093 / TC-13: merge_retry_* settings defaults + env override (AC-5).
|
||||
# ---------------------------------------------------------------------------
|
||||
_MERGE_RETRY_ENV = (
|
||||
"ORCH_MERGE_RETRY_ENABLED",
|
||||
"ORCH_MERGE_RETRY_MAX_ATTEMPTS",
|
||||
"ORCH_MERGE_RETRY_BACKOFF_BASE_S",
|
||||
"ORCH_MERGE_RETRY_BACKOFF_MAX_S",
|
||||
)
|
||||
|
||||
|
||||
def test_merge_retry_settings_defaults(monkeypatch):
|
||||
"""Documented defaults when no ORCH_MERGE_RETRY_* env is set."""
|
||||
for name in _MERGE_RETRY_ENV:
|
||||
monkeypatch.delenv(name, raising=False)
|
||||
s = Settings()
|
||||
assert s.merge_retry_enabled is True
|
||||
assert s.merge_retry_max_attempts == 3
|
||||
assert s.merge_retry_backoff_base_s == 2
|
||||
assert s.merge_retry_backoff_max_s == 5
|
||||
|
||||
|
||||
def test_merge_retry_settings_env_override(monkeypatch):
|
||||
"""Each field is read from its ORCH_MERGE_RETRY_* env var."""
|
||||
monkeypatch.setenv("ORCH_MERGE_RETRY_ENABLED", "false")
|
||||
monkeypatch.setenv("ORCH_MERGE_RETRY_MAX_ATTEMPTS", "5")
|
||||
monkeypatch.setenv("ORCH_MERGE_RETRY_BACKOFF_BASE_S", "1")
|
||||
monkeypatch.setenv("ORCH_MERGE_RETRY_BACKOFF_MAX_S", "8")
|
||||
s = Settings()
|
||||
assert s.merge_retry_enabled is False
|
||||
assert s.merge_retry_max_attempts == 5
|
||||
assert s.merge_retry_backoff_base_s == 1
|
||||
assert s.merge_retry_backoff_max_s == 8
|
||||
|
||||
@@ -389,3 +389,207 @@ def test_tc16_deployer_prompt_consults_guard():
|
||||
assert "no second merge" in lowered, (
|
||||
"deployer prompt must document the already-merged no-op (AC-11)"
|
||||
)
|
||||
|
||||
|
||||
# ===========================================================================
|
||||
# ORCH-093: merge_pr transient-retry + ensure_open_pr already-in-main guard.
|
||||
# TC-01..TC-12 — httpx mocked; time.sleep no-op so backoff never slows tests.
|
||||
# ===========================================================================
|
||||
ORCH093_BRANCH = "feature/ORCH-093-x"
|
||||
|
||||
|
||||
class _Resp093:
|
||||
"""Response stand-in with status_code / json() / text (merge_pr reads .text)."""
|
||||
|
||||
def __init__(self, status_code, payload=None, text=""):
|
||||
self.status_code = status_code
|
||||
self._payload = payload if payload is not None else []
|
||||
self.text = text
|
||||
|
||||
def json(self):
|
||||
return self._payload
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def merge093(monkeypatch):
|
||||
"""Wire Gitea settings + retry defaults; no-op backoff; PR not-already-merged."""
|
||||
monkeypatch.setattr(merge_gate.settings, "gitea_url", "http://gitea.test")
|
||||
monkeypatch.setattr(merge_gate.settings, "gitea_owner", "admin")
|
||||
monkeypatch.setattr(merge_gate.settings, "gitea_token", "tok")
|
||||
monkeypatch.setattr(merge_gate.settings, "merge_pr_timeout_s", 5)
|
||||
monkeypatch.setattr(merge_gate.settings, "merge_retry_enabled", True)
|
||||
monkeypatch.setattr(merge_gate.settings, "merge_retry_max_attempts", 3)
|
||||
monkeypatch.setattr(merge_gate.settings, "merge_retry_backoff_base_s", 2)
|
||||
monkeypatch.setattr(merge_gate.settings, "merge_retry_backoff_max_s", 5)
|
||||
monkeypatch.setattr(merge_gate.time, "sleep", lambda *a, **k: None)
|
||||
monkeypatch.setattr(merge_gate, "pr_already_merged", lambda r, b: False)
|
||||
|
||||
|
||||
def _open_code_pr_get(number=7):
|
||||
"""A list-PRs GET returning exactly one open code-PR (head==branch, base==main)."""
|
||||
return lambda *a, **k: _Resp093(
|
||||
200, [{"head": {"ref": ORCH093_BRANCH}, "base": {"ref": "main"}, "number": number}]
|
||||
)
|
||||
|
||||
|
||||
class _PostSeq:
|
||||
"""Returns queued responses (or raises queued exceptions) on each POST call."""
|
||||
|
||||
def __init__(self, items):
|
||||
self._items = list(items)
|
||||
self.calls = 0
|
||||
|
||||
def __call__(self, *a, **k):
|
||||
self.calls += 1
|
||||
item = self._items.pop(0) if self._items else self._items_last
|
||||
self._items_last = item
|
||||
if isinstance(item, Exception):
|
||||
raise item
|
||||
return item
|
||||
|
||||
|
||||
# --- TC-01: 405, 405, 200 -> (True, ...); exactly 3 POST; no false False (AC-1) ---
|
||||
def test_tc01_merge_retries_405_then_succeeds(merge093, monkeypatch):
|
||||
monkeypatch.setattr(httpx, "get", _open_code_pr_get(7))
|
||||
seq = _PostSeq([_Resp093(405, text="try again later"),
|
||||
_Resp093(405, text="try again later"),
|
||||
_Resp093(200)])
|
||||
monkeypatch.setattr(httpx, "post", seq)
|
||||
ok, msg = merge_gate.merge_pr("orchestrator", ORCH093_BRANCH)
|
||||
assert ok is True and "PR #7" in msg
|
||||
assert seq.calls == 3
|
||||
|
||||
|
||||
# --- TC-02: 503 (5xx) then 200 -> retry -> (True, ...) (AC-1) ---
|
||||
def test_tc02_merge_retries_5xx_then_succeeds(merge093, monkeypatch):
|
||||
monkeypatch.setattr(httpx, "get", _open_code_pr_get(7))
|
||||
seq = _PostSeq([_Resp093(503, text="bad gateway"), _Resp093(200)])
|
||||
monkeypatch.setattr(httpx, "post", seq)
|
||||
ok, msg = merge_gate.merge_pr("orchestrator", ORCH093_BRANCH)
|
||||
assert ok is True and seq.calls == 2
|
||||
|
||||
|
||||
# --- TC-03: httpx Timeout in attempt 1, then 200 -> retry; never-raise (AC-1/AC-6) ---
|
||||
def test_tc03_merge_retries_network_error_then_succeeds(merge093, monkeypatch):
|
||||
monkeypatch.setattr(httpx, "get", _open_code_pr_get(7))
|
||||
seq = _PostSeq([httpx.ConnectTimeout("timed out"), _Resp093(200)])
|
||||
monkeypatch.setattr(httpx, "post", seq)
|
||||
ok, msg = merge_gate.merge_pr("orchestrator", ORCH093_BRANCH)
|
||||
assert ok is True and seq.calls == 2
|
||||
|
||||
|
||||
# --- TC-04: real conflict 409 + mergeable=False -> (False, ...), no extra POST (AC-2) ---
|
||||
def test_tc04_real_conflict_terminal_no_retry(merge093, monkeypatch):
|
||||
monkeypatch.setattr(httpx, "get", _open_code_pr_get(7))
|
||||
monkeypatch.setattr(merge_gate, "_pr_mergeable", lambda r, i: False)
|
||||
seq = _PostSeq([_Resp093(409, text="conflict")])
|
||||
monkeypatch.setattr(httpx, "post", seq)
|
||||
ok, msg = merge_gate.merge_pr("orchestrator", ORCH093_BRANCH)
|
||||
assert ok is False and "HTTP 409" in msg
|
||||
assert seq.calls == 1 # terminal -> no retry
|
||||
|
||||
|
||||
# --- TC-05: ambiguous 409 + mergeable=True -> transient -> retry -> 200 (AC-2) ---
|
||||
def test_tc05_ambiguous_409_mergeable_true_retries(merge093, monkeypatch):
|
||||
monkeypatch.setattr(httpx, "get", _open_code_pr_get(7))
|
||||
monkeypatch.setattr(merge_gate, "_pr_mergeable", lambda r, i: True)
|
||||
seq = _PostSeq([_Resp093(409, text="recomputing"), _Resp093(200)])
|
||||
monkeypatch.setattr(httpx, "post", seq)
|
||||
ok, msg = merge_gate.merge_pr("orchestrator", ORCH093_BRANCH)
|
||||
assert ok is True and seq.calls == 2
|
||||
|
||||
|
||||
# --- TC-06: 403 (no rights) -> immediate (False, ...) without retry (AC-2) ---
|
||||
def test_tc06_403_terminal_no_retry(merge093, monkeypatch):
|
||||
monkeypatch.setattr(httpx, "get", _open_code_pr_get(7))
|
||||
seq = _PostSeq([_Resp093(403, text="forbidden")])
|
||||
monkeypatch.setattr(httpx, "post", seq)
|
||||
ok, msg = merge_gate.merge_pr("orchestrator", ORCH093_BRANCH)
|
||||
assert ok is False and "HTTP 403" in msg and seq.calls == 1
|
||||
|
||||
|
||||
# --- TC-07: 405 on all N attempts -> (False, "merge failed after N attempts: HTTP 405") (AC-3) ---
|
||||
def test_tc07_exhausts_retries_clear_reason(merge093, monkeypatch):
|
||||
monkeypatch.setattr(httpx, "get", _open_code_pr_get(7))
|
||||
seq = _PostSeq([_Resp093(405), _Resp093(405), _Resp093(405)])
|
||||
monkeypatch.setattr(httpx, "post", seq)
|
||||
ok, msg = merge_gate.merge_pr("orchestrator", ORCH093_BRANCH)
|
||||
assert ok is False
|
||||
assert "after 3 attempts" in msg and "HTTP 405" in msg
|
||||
assert seq.calls == 3
|
||||
|
||||
|
||||
# --- TC-08: kill-switch off -> exactly one POST (one-shot) at 405 -> (False, ...) (AC-5/AC-3) ---
|
||||
def test_tc08_killswitch_off_one_shot(merge093, monkeypatch):
|
||||
monkeypatch.setattr(merge_gate.settings, "merge_retry_enabled", False)
|
||||
monkeypatch.setattr(httpx, "get", _open_code_pr_get(7))
|
||||
seq = _PostSeq([_Resp093(405), _Resp093(200)]) # 2nd would succeed if retried
|
||||
monkeypatch.setattr(httpx, "post", seq)
|
||||
ok, msg = merge_gate.merge_pr("orchestrator", ORCH093_BRANCH)
|
||||
assert ok is False and seq.calls == 1 # one-shot: never retried
|
||||
|
||||
|
||||
# --- TC-09: ensure_open_pr — no open PR, branch fully in main -> already-in-main, no POST (AC-4) ---
|
||||
def test_tc09_ensure_already_in_main_no_post(merge093, monkeypatch):
|
||||
monkeypatch.setattr(httpx, "get", lambda *a, **k: _Resp093(200, [])) # no open PR
|
||||
monkeypatch.setattr(merge_gate, "_branch_fully_in_main", lambda r, b: True)
|
||||
monkeypatch.setattr(httpx, "post", lambda *a, **k: (_ for _ in ()).throw(
|
||||
AssertionError("must NOT POST /pulls for an already-in-main branch")))
|
||||
status, detail = merge_gate.ensure_open_pr("orchestrator", ORCH093_BRANCH)
|
||||
assert status == "already-in-main"
|
||||
|
||||
|
||||
# --- TC-10: ensure_open_pr — no open PR, commits beyond main -> creates PR (regress) (AC-4) ---
|
||||
def test_tc10_ensure_creates_when_commits_beyond_main(merge093, monkeypatch):
|
||||
monkeypatch.setattr(httpx, "get", lambda *a, **k: _Resp093(200, []))
|
||||
monkeypatch.setattr(merge_gate, "_branch_fully_in_main", lambda r, b: False)
|
||||
post_calls = []
|
||||
|
||||
def fake_post(url, json=None, headers=None, timeout=None):
|
||||
post_calls.append(url)
|
||||
return _Resp093(201, {"number": 12})
|
||||
|
||||
monkeypatch.setattr(httpx, "post", fake_post)
|
||||
status, detail = merge_gate.ensure_open_pr("orchestrator", ORCH093_BRANCH)
|
||||
assert status == "created" and detail == "12"
|
||||
assert len(post_calls) == 1
|
||||
|
||||
|
||||
# --- TC-11: ensure_open_pr — git error in guard (None) -> fail-OPEN -> create path (AC-6) ---
|
||||
def test_tc11_ensure_guard_git_error_fail_open(merge093, monkeypatch):
|
||||
monkeypatch.setattr(httpx, "get", lambda *a, **k: _Resp093(200, []))
|
||||
# None == git/OS error / ambiguous -> must NOT block; degrade to create.
|
||||
monkeypatch.setattr(merge_gate, "_branch_fully_in_main", lambda r, b: None)
|
||||
monkeypatch.setattr(httpx, "post", lambda *a, **k: _Resp093(201, {"number": 13}))
|
||||
status, detail = merge_gate.ensure_open_pr("orchestrator", ORCH093_BRANCH)
|
||||
assert status == "created" # fail-open: did not become a false no-op
|
||||
|
||||
|
||||
def test_tc11_branch_fully_in_main_never_raises(monkeypatch):
|
||||
"""_branch_fully_in_main: any git/OS error -> None (never-raise) (AC-6)."""
|
||||
monkeypatch.setattr(merge_gate, "ensure_worktree", lambda r, b: "/wt")
|
||||
|
||||
def boom(*a, **k):
|
||||
raise OSError("git exploded")
|
||||
|
||||
monkeypatch.setattr(merge_gate.subprocess, "run", boom)
|
||||
assert merge_gate._branch_fully_in_main("orchestrator", ORCH093_BRANCH) is None
|
||||
|
||||
|
||||
# --- TC-12: merge_pr / ensure_open_pr — uncaught httpx error -> safe tuple (never-raise) (AC-6) ---
|
||||
def test_tc12_merge_pr_never_raises(merge093, monkeypatch):
|
||||
def boom(*a, **k):
|
||||
raise httpx.HTTPError("kaboom")
|
||||
|
||||
monkeypatch.setattr(httpx, "get", boom)
|
||||
ok, msg = merge_gate.merge_pr("orchestrator", ORCH093_BRANCH)
|
||||
assert ok is False and isinstance(msg, str)
|
||||
|
||||
|
||||
def test_tc12_ensure_open_pr_never_raises(merge093, monkeypatch):
|
||||
def boom(*a, **k):
|
||||
raise httpx.HTTPError("kaboom")
|
||||
|
||||
monkeypatch.setattr(httpx, "get", boom)
|
||||
status, detail = merge_gate.ensure_open_pr("orchestrator", ORCH093_BRANCH)
|
||||
assert status == "failed" and isinstance(detail, str)
|
||||
|
||||
@@ -131,3 +131,92 @@ def test_tc12_kill_switch_disables_under_gate(monkeypatch):
|
||||
monkeypatch.setattr(merge_gate.settings, "merge_verify_enabled", False)
|
||||
assert merge_gate.merge_verify_applies("orchestrator") is False
|
||||
assert merge_gate.merge_verify_applies("enduro-trails") is False
|
||||
|
||||
|
||||
# ===========================================================================
|
||||
# ORCH-093 / TC-14..16: _handle_merge_verify integration (deploy->done under-gate).
|
||||
# already-in-main skips merge_pr; transient-retry success -> done; exhausted -> HOLD.
|
||||
# ===========================================================================
|
||||
import os # noqa: E402
|
||||
import tempfile # noqa: E402
|
||||
|
||||
os.environ.setdefault("ORCH_GITEA_TOKEN", "test-token")
|
||||
os.environ.setdefault("ORCH_PLANE_API_TOKEN", "test-token")
|
||||
os.environ.setdefault("ORCH_DB_PATH", os.path.join(tempfile.gettempdir(), "test_orch093.db"))
|
||||
|
||||
from unittest.mock import MagicMock # noqa: E402
|
||||
|
||||
from src import stage_engine, image_freshness # noqa: E402
|
||||
from src.stage_engine import AdvanceResult, _handle_merge_verify # noqa: E402
|
||||
|
||||
_O93_REPO = "orchestrator"
|
||||
_O93_WI = "ORCH-093"
|
||||
_O93_BRANCH = "feature/ORCH-093-x"
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def _o93_wire(monkeypatch):
|
||||
monkeypatch.setattr(stage_engine.merge_gate, "merge_verify_applies", lambda r: True)
|
||||
monkeypatch.setattr(stage_engine.settings, "merge_verify_autocreate_pr_enabled", True)
|
||||
monkeypatch.setattr(stage_engine.settings, "regression_guard_enabled", False)
|
||||
monkeypatch.setattr(image_freshness, "validated_revision", lambda r, b: "deadbeef")
|
||||
for name in ("set_issue_blocked", "plane_add_comment", "send_telegram", "link_for"):
|
||||
monkeypatch.setattr(stage_engine, name, MagicMock())
|
||||
monkeypatch.setattr(
|
||||
stage_engine.self_deploy, "record_merged_to_main", MagicMock(return_value=True)
|
||||
)
|
||||
|
||||
|
||||
# --- TC-14: ensure_open_pr -> already-in-main -> skip merge_pr; SHA-in-main -> done (AC-4) ---
|
||||
def test_tc14_already_in_main_skips_merge_pr_then_done(_o93_wire, monkeypatch):
|
||||
monkeypatch.setattr(
|
||||
stage_engine.merge_gate, "ensure_open_pr", lambda r, b: ("already-in-main", "x")
|
||||
)
|
||||
merge = MagicMock()
|
||||
monkeypatch.setattr(stage_engine.merge_gate, "merge_pr", merge)
|
||||
monkeypatch.setattr(stage_engine.merge_gate, "verify_merged_to_main", lambda r, b, s: True)
|
||||
|
||||
res = AdvanceResult()
|
||||
intervened = _handle_merge_verify(1, _O93_REPO, _O93_WI, _O93_BRANCH, res)
|
||||
|
||||
assert intervened is False # advance to done
|
||||
assert res.alerted is False
|
||||
assert not merge.called # merge_pr SKIPPED (nothing to merge)
|
||||
assert not stage_engine.set_issue_blocked.called
|
||||
|
||||
|
||||
# --- TC-15: merge_pr exhausted (False) + SHA not in main -> HOLD + alert (ORCH-071/081) (AC-3) ---
|
||||
def test_tc15_merge_failed_and_not_in_main_holds(_o93_wire, monkeypatch):
|
||||
monkeypatch.setattr(
|
||||
stage_engine.merge_gate, "ensure_open_pr", lambda r, b: ("existed", "9")
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
stage_engine.merge_gate, "merge_pr",
|
||||
lambda r, b: (False, "merge failed after 3 attempts: HTTP 405"),
|
||||
)
|
||||
monkeypatch.setattr(stage_engine.merge_gate, "verify_merged_to_main", lambda r, b, s: False)
|
||||
|
||||
res = AdvanceResult()
|
||||
intervened = _handle_merge_verify(1, _O93_REPO, _O93_WI, _O93_BRANCH, res)
|
||||
|
||||
assert intervened is True # HOLD, NOT done
|
||||
assert res.advanced is False
|
||||
assert res.note == "merge-not-verified-hold"
|
||||
assert stage_engine.set_issue_blocked.called
|
||||
|
||||
|
||||
# --- TC-16: happy path — transient retry success in merge_pr -> SHA-in-main -> done (AC-1) ---
|
||||
def test_tc16_transient_retry_success_then_done(_o93_wire, monkeypatch):
|
||||
monkeypatch.setattr(
|
||||
stage_engine.merge_gate, "ensure_open_pr", lambda r, b: ("existed", "9")
|
||||
)
|
||||
# merge_pr already rode out the 405x2->200 transient internally -> (True, ...).
|
||||
monkeypatch.setattr(stage_engine.merge_gate, "merge_pr", lambda r, b: (True, "merged PR #9"))
|
||||
monkeypatch.setattr(stage_engine.merge_gate, "verify_merged_to_main", lambda r, b, s: True)
|
||||
|
||||
res = AdvanceResult()
|
||||
intervened = _handle_merge_verify(1, _O93_REPO, _O93_WI, _O93_BRANCH, res)
|
||||
|
||||
assert intervened is False # done, no false HOLD
|
||||
assert res.alerted is False
|
||||
assert not stage_engine.set_issue_blocked.called
|
||||
|
||||
@@ -32,6 +32,11 @@ def _settings(monkeypatch):
|
||||
monkeypatch.setattr(merge_gate.settings, "gitea_owner", "owner")
|
||||
monkeypatch.setattr(merge_gate.settings, "gitea_token", "tok")
|
||||
monkeypatch.setattr(merge_gate.settings, "gitea_url", "http://gitea.test")
|
||||
# ORCH-093: these tests target the HTTP create/race logic of ensure_open_pr.
|
||||
# The new already-in-main guard (_branch_fully_in_main) runs real git; pin it
|
||||
# to "commits beyond main" (False) so the create path is exercised as intended.
|
||||
# The guard itself has dedicated coverage (test_merge_gate.py TC-09/10/11).
|
||||
monkeypatch.setattr(merge_gate, "_branch_fully_in_main", lambda r, b: False)
|
||||
|
||||
|
||||
def _install_httpx(monkeypatch, get_resp, post_resp=None, record=None):
|
||||
|
||||
Reference in New Issue
Block a user