Files
orchestrator/tests/test_merge_gate_race.py
claude-bot a74379f657 feat(ORCH-026): task dependencies (B waits for A) + single-repo merge serialization
Level A — merge/deploy serialization within one repo: reuse the existing
ORCH-043/065 merge-lease (no new mechanism); the only new logic is an
unconditional pre-merge rebase in check_branch_mergeable — under the held
lease, auto_rebase_onto_main is ALWAYS called when premerge_rebase_always
(default True), not just when the branch is behind. No-op on an up-to-date
branch (rebase keeps HEAD, force-with-lease -> "Everything up-to-date", CI
not triggered). Kill-switch off -> ORCH-043 behaviour 1:1.

Level B — declarative task dependencies: additive job_deps table
(CREATE ... IF NOT EXISTS, no live-DB migration); claim_next_job gate
(NOT EXISTS) defers a job whose depends-on tasks are not yet 'done' without
occupying a max_concurrency slot; inert on empty job_deps -> zero regression.
New leaf src/task_deps.py (never-raise): is_task_ready (fail-open), DFS cycle
detection + Blocked/alert, declare/ingest_plane_relations (db source never
hits the network on the hot path), snapshot. Telegram waiting-line, /queue
observability, reconciler skip + cycle backstop, reaper untouched.

Invariants unchanged: STAGE_TRANSITIONS, QG_CHECKS registry (dep gate is a
claim_next_job врезка, not a registered QG), DB schema of existing tables,
HTTP endpoints; non-self repos remain a no-op on empty deps/scope.

Flags: ORCH_PREMERGE_REBASE_ALWAYS, ORCH_TASK_DEPS_ENABLED, ORCH_TASK_DEPS_SOURCE.
Docs: docs/architecture/README.md, CLAUDE.md, .env.example, CHANGELOG.md,
adr-0015. Tests: tests/test_orch026_*.py (64 tests); full suite 991 green.

Refs: ORCH-026

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-06-08 19:17:44 +03:00

218 lines
9.7 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""ORCH-043 / TC-24: the parallel-merge race the gate exists to prevent.
Scenario (two green branches in ONE repo, the self-hosting risk, ТЗ §1):
* main is at C1 because branch A already merged.
* branch B was validated against C0 (the main it branched from) and is GREEN
there — but B has NOT seen A's change. A blind merge of B could break main
(semantic conflict): B is "green" yet stale.
The merge-gate makes this deterministic:
1. While A holds the merge-lease, B's gate sees "merge-lock busy" -> DEFER
(serialisation: no two catch-up+merge sequences interleave).
2. After A releases, B acquires the lease, rebases onto the CURRENT origin/main
(C1) and re-tests the COMBINED tree:
- re-test GREEN -> gate passes, lease HELD -> B is safe to merge; main stays green.
- re-test RED -> gate fails, lease RELEASED -> B rolls back to development;
main is NEVER touched.
origin/main's SHA is asserted unchanged throughout — the gate never pushes main.
Real local git (bare origin + clone), real file lease; only the pytest re-test is
stubbed (its real behaviour lives in test_merge_gate.py::retest_branch tests).
"""
import os
import subprocess
import tempfile
import pytest
_test_db = os.path.join(tempfile.gettempdir(), "test_orchestrator_merge_gate_race.db")
os.environ["ORCH_DB_PATH"] = _test_db
os.environ["ORCH_REPOS_DIR"] = tempfile.gettempdir()
os.environ["ORCH_GITEA_TOKEN"] = "test-token"
os.environ["ORCH_PLANE_API_TOKEN"] = "test-token"
from src import git_worktree, merge_gate # noqa: E402
from src.qg import checks as qg # noqa: E402
from src.qg.checks import check_branch_mergeable # noqa: E402
def _git(cwd, *args):
return subprocess.run(["git", "-C", cwd, *args], capture_output=True, text=True)
@pytest.fixture
def race_repo(tmp_path, monkeypatch):
"""Bare origin at C1 (A merged) + clone + feature/B branched from C0.
Returns (repo, origin_path). feature/B rebases cleanly onto origin/main.
The gate is forced REAL for this repo via merge_gate_repos.
"""
repo = "orchestrator"
repos_dir = tmp_path / "repos"
wt_dir = tmp_path / "repos" / "_wt"
repos_dir.mkdir(parents=True)
monkeypatch.setattr(merge_gate.settings, "repos_dir", str(repos_dir))
monkeypatch.setattr(git_worktree.settings, "repos_dir", str(repos_dir))
monkeypatch.setattr(git_worktree.settings, "worktrees_dir", str(wt_dir))
monkeypatch.setattr(qg.settings, "merge_gate_enabled", True)
monkeypatch.setattr(qg.settings, "merge_gate_repos", repo)
monkeypatch.setattr(merge_gate.settings, "merge_lock_timeout_s", 300)
# ORCH-026: this redrive test asserts the ORCH-043 ancestor-based short-circuit
# ("already caught up" -> skip expensive re-test). Pin the always-rebase
# kill-switch OFF so the legacy short-circuit path is exercised here; the new
# default (True) is covered by tests/test_orch026_premerge_rebase.py (TC-A01).
monkeypatch.setattr(qg.settings, "premerge_rebase_always", False, raising=False)
origin = tmp_path / "origin.git"
subprocess.run(["git", "init", "--bare", "-b", "main", str(origin)], capture_output=True)
seed = tmp_path / "seed"
seed.mkdir()
_git(str(seed), "init", "-b", "main")
_git(str(seed), "config", "user.email", "t@t")
_git(str(seed), "config", "user.name", "t")
(seed / "README.md").write_text("base\n")
_git(str(seed), "add", ".")
_git(str(seed), "commit", "-m", "C0")
_git(str(seed), "remote", "add", "origin", str(origin))
_git(str(seed), "push", "origin", "main")
# B branches off C0, adds an isolated file (clean rebase onto C1).
_git(str(seed), "checkout", "-b", "feature/B")
(seed / "b.txt").write_text("from B\n")
_git(str(seed), "add", ".")
_git(str(seed), "commit", "-m", "feat(B): add b.txt")
_git(str(seed), "push", "origin", "feature/B")
# A merged -> main advances to C1 (touches a DIFFERENT file: no textual conflict).
_git(str(seed), "checkout", "main")
(seed / "a.txt").write_text("from A\n")
_git(str(seed), "add", ".")
_git(str(seed), "commit", "-m", "C1 (A merged)")
_git(str(seed), "push", "origin", "main")
main_clone = repos_dir / repo
subprocess.run(["git", "clone", str(origin), str(main_clone)], capture_output=True)
_git(str(main_clone), "config", "user.email", "t@t")
_git(str(main_clone), "config", "user.name", "t")
return repo, origin
def _origin_main_sha(origin):
return _git(str(origin), "rev-parse", "main").stdout.strip()
def test_tc24_busy_lock_serialises_then_green_catch_up_is_safe(race_repo, monkeypatch):
"""A holds the lease -> B defers; after release B catches up + green re-test ->
safe merge (lease held), and origin/main is never pushed by the gate."""
repo, origin = race_repo
main_before = _origin_main_sha(origin)
# A is mid-merge: it holds the lease.
ok, _ = merge_gate.acquire_merge_lease(repo, "feature/A", "ORCH-A")
assert ok is True
# B's gate must DEFER (serialisation), touching nothing.
passed, reason = check_branch_mergeable(repo, "ORCH-B", "feature/B")
assert passed is False
assert reason == "merge-lock busy"
assert _origin_main_sha(origin) == main_before # main untouched
# A finishes and releases.
merge_gate.release_merge_lease(repo, "feature/A")
# B catches up: real rebase onto C1, GREEN re-test -> pass, lease HELD.
monkeypatch.setattr(merge_gate, "retest_branch", lambda r, b: (True, "re-test green"))
passed, reason = check_branch_mergeable(repo, "ORCH-B", "feature/B")
assert passed is True
assert reason == "rebased onto main, re-test green"
# The gate rebased+pushed ONLY the task branch; origin/main is unchanged.
assert _origin_main_sha(origin) == main_before
# feature/B now contains C1 (a.txt) on origin after the force-with-lease push.
assert "a.txt" in _git(str(origin), "ls-tree", "--name-only", "feature/B").stdout
# Lease is HELD by B until the actual merge.
held = merge_gate._read_lease(merge_gate._lease_path(repo))
assert held is not None and held.get("branch") == "feature/B"
def test_tc24_red_catch_up_fails_and_releases_main_stays_green(race_repo, monkeypatch):
"""B catches up but the COMBINED tree is red -> gate fails, lease released,
origin/main never touched (B will roll back to development upstream)."""
repo, origin = race_repo
main_before = _origin_main_sha(origin)
monkeypatch.setattr(
merge_gate, "retest_branch",
lambda r, b: (False, "re-test failed: ...1 failed, 9 passed"),
)
passed, reason = check_branch_mergeable(repo, "ORCH-B", "feature/B")
assert passed is False
assert reason.startswith("re-test failed after rebase:")
# main is still green / untouched.
assert _origin_main_sha(origin) == main_before
# The lease was released on failure (a later task can proceed).
assert merge_gate._read_lease(merge_gate._lease_path(repo)) is None
# ---------------------------------------------------------------------------
# ORCH-065 / TC-17: recovery — "rebase+re-test green, merge not done, process
# died" -> reaper requeues -> the merge re-drives the STANDARD path WITHOUT a
# second expensive re-test when safe (the branch is already up-to-date). AC-10.
# ---------------------------------------------------------------------------
def test_tc17_redrive_skips_expensive_retest_when_already_caught_up(
race_repo, monkeypatch
):
repo, origin = race_repo
main_before = _origin_main_sha(origin)
# First pass: B catches up (real rebase onto C1) with a GREEN re-test. This is
# the work that completed before the process died — the lease is held, the
# branch is now caught up on origin.
retest_calls = []
def _retest(r, b):
retest_calls.append((r, b))
return True, "re-test green"
monkeypatch.setattr(merge_gate, "retest_branch", _retest)
passed, reason = check_branch_mergeable(repo, "ORCH-B", "feature/B")
assert passed is True
assert reason == "rebased onto main, re-test green"
assert len(retest_calls) == 1 # the expensive re-test ran ONCE
# The process "died" before the merge: release the lease the way the reaper /
# reconciler recovery path would (the row is requeued; the branch stays caught
# up because the rebase was already pushed).
merge_gate.release_merge_lease(repo, "feature/B")
# Re-drive (standard path) after recovery: the branch already contains
# origin/main, so branch_is_behind_main is False and the gate short-circuits to
# the up-to-date pass WITHOUT re-running the expensive rebase+re-test.
assert merge_gate.branch_is_behind_main(repo, "feature/B") is False
passed2, reason2 = check_branch_mergeable(repo, "ORCH-B", "feature/B")
assert passed2 is True
assert reason2 == "branch up-to-date with main"
assert len(retest_calls) == 1 # NOT re-run on the re-drive (no double cost)
# origin/main was never pushed by the gate across the whole recovery.
assert _origin_main_sha(origin) == main_before
def test_tc17_pr_already_merged_makes_redrive_a_noop(race_repo, monkeypatch):
"""If the PR actually merged before the process died, the idempotency guard
reports it so the re-drive is a no-op (no second merge)."""
import httpx
repo, _ = race_repo
class _R:
status_code = 200
@staticmethod
def json():
# ORCH-073 FR-2: the guard counts a PR only when it carries THIS branch's
# code into main (merged & head.ref==branch & base.ref=="main").
return [{"merged": True, "head": {"ref": "feature/B"}, "base": {"ref": "main"}}]
monkeypatch.setattr(httpx, "get", lambda *a, **k: _R())
assert merge_gate.pr_already_merged(repo, "feature/B") is True