Closes the "zombie jobs" incident class: job status was set only inside the live launcher process, so a process death left jobs.status='running' forever; at max_concurrency=1 one zombie blocked ALL projects' queue (self-hosting risk). Adds a background daemon (src/job_reaper.py) with three-tier liveness (dead-pid streak / known exit_code / max-running backstop) whose only mutating write is an atomic terminal flip guarded by WHERE status='running' (no double-process). For exit0 the canonical QG is the source of truth via gate-driven advance, not "exit0". Also proactively reclaims stale merge-lease (dead pid OR TTL) via file delete only (no git ops), and makes merge finalization idempotent (pr_already_merged guard + up-to-date short-circuit on re-drive). New jobs.pid column via idempotent _ensure_column (no migration); pid stamped in launcher._spawn after Popen. Reaper start/stop in lifespan; "reaper" snapshot in GET /queue. Kill-switches: ORCH_REAPER_ENABLED, ORCH_REAPER_INTERVAL_S, ORCH_REAPER_DEAD_TICKS, ORCH_REAPER_MAX_RUNNING_S, ORCH_LEASE_RECLAIM_ENABLED. Invariants unchanged (AC-13): STAGE_TRANSITIONS, QG_CHECKS registry, check_branch_mergeable signature/behaviour, BUG-8 rollback, hook exit codes. restart-safe, never-raise per unit of background work. Docs: docs/architecture/README.md, CHANGELOG.md, .env.example. Tests: tests/test_job_reaper.py, tests/test_merge_lease_reclaim.py, tests/test_merge_gate.py (TC-16), tests/test_merge_gate_race.py (TC-17), tests/test_queue.py, tests/test_config.py (TC-19/TC-20). 742 passed. Refs: ORCH-065 Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
356 lines
13 KiB
Python
356 lines
13 KiB
Python
"""ORCH-043: tests for src/merge_gate core (TC-01..TC-11).
|
|
|
|
Git tests use REAL local repos in tmp (a bare 'origin' + a main clone), so
|
|
fetch / merge-base / rebase / push --force-with-lease are exercised without
|
|
network, mirroring tests/test_git_worktree.py. The re-test (pytest) and lease
|
|
units are isolated with monkeypatch / tmp files.
|
|
"""
|
|
import json
|
|
import os
|
|
import subprocess
|
|
import tempfile
|
|
import time
|
|
|
|
import httpx
|
|
import pytest
|
|
|
|
# Env before importing app modules (same convention as the other suites).
|
|
_test_db = os.path.join(tempfile.gettempdir(), "test_orchestrator_merge_gate.db")
|
|
os.environ["ORCH_DB_PATH"] = _test_db
|
|
os.environ["ORCH_REPOS_DIR"] = tempfile.gettempdir()
|
|
os.environ["ORCH_GITEA_TOKEN"] = "test-token"
|
|
os.environ["ORCH_PLANE_API_TOKEN"] = "test-token"
|
|
|
|
from src import git_worktree, merge_gate # noqa: E402
|
|
|
|
|
|
def _git(cwd, *args):
|
|
return subprocess.run(["git", "-C", cwd, *args], capture_output=True, text=True)
|
|
|
|
|
|
def _origin_sha(origin, ref):
|
|
return _git(str(origin), "rev-parse", ref).stdout.strip()
|
|
|
|
|
|
@pytest.fixture
|
|
def repos(tmp_path, monkeypatch):
|
|
"""Bare 'origin' (main@C1) + main clone + two feature branches branched from C0.
|
|
|
|
Layout:
|
|
C0 README.md
|
|
feature/behind : C0 + adds f.txt (rebases cleanly onto C1)
|
|
feature/conflict : C0 + edits README.md (textual conflict with C1)
|
|
feature/uptodate : branched from C1 (already contains origin/main)
|
|
main C1 : edits README.md + adds other.txt
|
|
Returns (repo_name, origin_path).
|
|
"""
|
|
repo = "orchestrator"
|
|
repos_dir = tmp_path / "repos"
|
|
wt_dir = tmp_path / "repos" / "_wt"
|
|
repos_dir.mkdir(parents=True)
|
|
|
|
monkeypatch.setattr(merge_gate.settings, "repos_dir", str(repos_dir))
|
|
monkeypatch.setattr(git_worktree.settings, "repos_dir", str(repos_dir))
|
|
monkeypatch.setattr(git_worktree.settings, "worktrees_dir", str(wt_dir))
|
|
|
|
origin = tmp_path / "origin.git"
|
|
subprocess.run(["git", "init", "--bare", "-b", "main", str(origin)], capture_output=True)
|
|
|
|
seed = tmp_path / "seed"
|
|
seed.mkdir()
|
|
_git(str(seed), "init", "-b", "main")
|
|
_git(str(seed), "config", "user.email", "t@t")
|
|
_git(str(seed), "config", "user.name", "t")
|
|
(seed / "README.md").write_text("base\n")
|
|
_git(str(seed), "add", ".")
|
|
_git(str(seed), "commit", "-m", "C0")
|
|
_git(str(seed), "remote", "add", "origin", str(origin))
|
|
_git(str(seed), "push", "origin", "main")
|
|
|
|
# Branches off C0.
|
|
_git(str(seed), "checkout", "-b", "feature/behind")
|
|
(seed / "f.txt").write_text("feature\n")
|
|
_git(str(seed), "add", ".")
|
|
_git(str(seed), "commit", "-m", "feat: add f.txt")
|
|
_git(str(seed), "push", "origin", "feature/behind")
|
|
|
|
_git(str(seed), "checkout", "main")
|
|
_git(str(seed), "checkout", "-b", "feature/conflict")
|
|
(seed / "README.md").write_text("feature readme\n")
|
|
_git(str(seed), "add", ".")
|
|
_git(str(seed), "commit", "-m", "feat: edit README")
|
|
_git(str(seed), "push", "origin", "feature/conflict")
|
|
|
|
# Advance main to C1.
|
|
_git(str(seed), "checkout", "main")
|
|
(seed / "README.md").write_text("main v2\n")
|
|
(seed / "other.txt").write_text("main change\n")
|
|
_git(str(seed), "add", ".")
|
|
_git(str(seed), "commit", "-m", "C1")
|
|
_git(str(seed), "push", "origin", "main")
|
|
|
|
# Branch that already contains C1.
|
|
_git(str(seed), "checkout", "-b", "feature/uptodate")
|
|
(seed / "g.txt").write_text("uptodate\n")
|
|
_git(str(seed), "add", ".")
|
|
_git(str(seed), "commit", "-m", "feat: on top of C1")
|
|
_git(str(seed), "push", "origin", "feature/uptodate")
|
|
|
|
# Main clone at repos_dir/<repo>.
|
|
main_clone = repos_dir / repo
|
|
subprocess.run(["git", "clone", str(origin), str(main_clone)], capture_output=True)
|
|
_git(str(main_clone), "config", "user.email", "t@t")
|
|
_git(str(main_clone), "config", "user.name", "t")
|
|
return repo, origin
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# TC-01..03: branch_is_behind_main
|
|
# ---------------------------------------------------------------------------
|
|
def test_tc01_behind_when_main_ahead(repos):
|
|
repo, _ = repos
|
|
assert merge_gate.branch_is_behind_main(repo, "feature/behind") is True
|
|
|
|
|
|
def test_tc02_not_behind_when_branch_contains_main(repos):
|
|
repo, _ = repos
|
|
assert merge_gate.branch_is_behind_main(repo, "feature/uptodate") is False
|
|
|
|
|
|
def test_tc03_never_raises_on_bad_repo(monkeypatch, tmp_path):
|
|
# Point repos_dir at an empty dir -> ensure_worktree raises -> caught -> True.
|
|
monkeypatch.setattr(merge_gate.settings, "repos_dir", str(tmp_path / "nope"))
|
|
monkeypatch.setattr(git_worktree.settings, "repos_dir", str(tmp_path / "nope"))
|
|
monkeypatch.setattr(git_worktree.settings, "worktrees_dir", str(tmp_path / "_wt"))
|
|
result = merge_gate.branch_is_behind_main("orchestrator", "feature/x")
|
|
assert result is True # safe bool, not an exception
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# TC-04..06: auto_rebase_onto_main
|
|
# ---------------------------------------------------------------------------
|
|
def test_tc04_clean_catchup_pushes_with_lease(repos):
|
|
repo, origin = repos
|
|
main_before = _origin_sha(origin, "main")
|
|
|
|
ok, reason = merge_gate.auto_rebase_onto_main(repo, "feature/behind")
|
|
assert ok is True, reason
|
|
|
|
# origin/main must be UNTOUCHED (AC-7).
|
|
assert _origin_sha(origin, "main") == main_before
|
|
# The pushed branch now contains origin/main (origin/main is its ancestor).
|
|
rc = subprocess.run(
|
|
["git", "-C", str(origin), "merge-base", "--is-ancestor",
|
|
"main", "feature/behind"],
|
|
capture_output=True,
|
|
).returncode
|
|
assert rc == 0
|
|
# And it carries main's new file plus its own.
|
|
assert _git(str(origin), "cat-file", "-e", "feature/behind:other.txt").returncode == 0
|
|
assert _git(str(origin), "cat-file", "-e", "feature/behind:f.txt").returncode == 0
|
|
|
|
|
|
def test_tc05_conflict_aborts_clean_and_reports(repos):
|
|
repo, origin = repos
|
|
main_before = _origin_sha(origin, "main")
|
|
branch_before = _origin_sha(origin, "feature/conflict")
|
|
|
|
ok, reason = merge_gate.auto_rebase_onto_main(repo, "feature/conflict")
|
|
assert ok is False
|
|
assert "rebase conflict" in reason
|
|
# main untouched, branch NOT force-pushed past the conflict.
|
|
assert _origin_sha(origin, "main") == main_before
|
|
assert _origin_sha(origin, "feature/conflict") == branch_before
|
|
# Worktree left clean (no rebase in progress).
|
|
wt = git_worktree.get_worktree_path(repo, "feature/conflict")
|
|
assert not os.path.isdir(os.path.join(wt, ".git", "rebase-merge"))
|
|
assert not os.path.isdir(os.path.join(wt, ".git", "rebase-apply"))
|
|
|
|
|
|
def test_tc06_never_pushes_main(repos, monkeypatch):
|
|
repo, origin = repos
|
|
calls = []
|
|
real_run = subprocess.run
|
|
|
|
def _spy(cmd, *a, **k):
|
|
if isinstance(cmd, list):
|
|
calls.append(cmd)
|
|
return real_run(cmd, *a, **k)
|
|
|
|
monkeypatch.setattr(merge_gate.subprocess, "run", _spy)
|
|
merge_gate.auto_rebase_onto_main(repo, "feature/behind")
|
|
|
|
pushes = [c for c in calls if "push" in c]
|
|
assert pushes, "expected at least one push"
|
|
for c in pushes:
|
|
# Never push main; force only via --force-with-lease on the task branch.
|
|
assert "main" not in c, f"push touched main: {c}"
|
|
assert "--force-with-lease" in c
|
|
assert "feature/behind" in c
|
|
# Hard force must never be used.
|
|
assert "--force" not in c or "--force-with-lease" in c
|
|
assert "-f" not in c
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# TC-07..09: retest_branch (isolated from real pytest)
|
|
# ---------------------------------------------------------------------------
|
|
@pytest.fixture
|
|
def fake_worktree(tmp_path, monkeypatch):
|
|
wt = tmp_path / "wt"
|
|
wt.mkdir()
|
|
monkeypatch.setattr(merge_gate, "get_worktree_path", lambda repo, branch: str(wt))
|
|
return str(wt)
|
|
|
|
|
|
def test_tc07_retest_green(fake_worktree, monkeypatch):
|
|
monkeypatch.setattr(
|
|
merge_gate.subprocess, "run",
|
|
lambda *a, **k: subprocess.CompletedProcess(a, 0, "1 passed", ""),
|
|
)
|
|
ok, reason = merge_gate.retest_branch("orchestrator", "feature/x")
|
|
assert ok is True
|
|
assert reason == "re-test green"
|
|
|
|
|
|
def test_tc08_retest_red_with_tail(fake_worktree, monkeypatch):
|
|
monkeypatch.setattr(
|
|
merge_gate.subprocess, "run",
|
|
lambda *a, **k: subprocess.CompletedProcess(
|
|
a, 1, "FAILED tests/test_x.py::t - AssertionError\n1 failed", ""
|
|
),
|
|
)
|
|
ok, reason = merge_gate.retest_branch("orchestrator", "feature/x")
|
|
assert ok is False
|
|
assert "re-test failed" in reason
|
|
assert "AssertionError" in reason # output tail embedded
|
|
|
|
|
|
def test_tc09_retest_timeout(fake_worktree, monkeypatch):
|
|
def _boom(*a, **k):
|
|
raise subprocess.TimeoutExpired(cmd="pytest", timeout=1)
|
|
|
|
monkeypatch.setattr(merge_gate.settings, "merge_retest_timeout_s", 1)
|
|
monkeypatch.setattr(merge_gate.subprocess, "run", _boom)
|
|
ok, reason = merge_gate.retest_branch("orchestrator", "feature/x")
|
|
assert ok is False
|
|
assert "re-test timeout" in reason
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# TC-10..11: merge-lease (serialisation)
|
|
# ---------------------------------------------------------------------------
|
|
@pytest.fixture
|
|
def lease_dir(tmp_path, monkeypatch):
|
|
d = tmp_path / "repos"
|
|
d.mkdir()
|
|
monkeypatch.setattr(merge_gate.settings, "repos_dir", str(d))
|
|
monkeypatch.setattr(merge_gate.settings, "merge_lock_timeout_s", 300)
|
|
return d
|
|
|
|
|
|
def test_tc10_second_acquire_busy_until_released(lease_dir):
|
|
repo = "orchestrator"
|
|
ok, _ = merge_gate.acquire_merge_lease(repo, "feature/A", "ORCH-1")
|
|
assert ok is True
|
|
|
|
# A different branch cannot acquire while held.
|
|
ok2, reason2 = merge_gate.acquire_merge_lease(repo, "feature/B", "ORCH-2")
|
|
assert ok2 is False
|
|
assert reason2 == "merge-lock busy"
|
|
|
|
# Same holder is idempotent.
|
|
ok_self, _ = merge_gate.acquire_merge_lease(repo, "feature/A", "ORCH-1")
|
|
assert ok_self is True
|
|
|
|
# Release (holder-aware) frees it for B.
|
|
merge_gate.release_merge_lease(repo, "feature/A")
|
|
ok3, _ = merge_gate.acquire_merge_lease(repo, "feature/B", "ORCH-2")
|
|
assert ok3 is True
|
|
|
|
|
|
def test_tc10_release_is_holder_aware(lease_dir):
|
|
repo = "orchestrator"
|
|
merge_gate.acquire_merge_lease(repo, "feature/A", "ORCH-1")
|
|
# A stale release from a DIFFERENT branch must NOT drop A's lease.
|
|
merge_gate.release_merge_lease(repo, "feature/OTHER")
|
|
ok, reason = merge_gate.acquire_merge_lease(repo, "feature/B", "ORCH-2")
|
|
assert ok is False and reason == "merge-lock busy"
|
|
|
|
|
|
def test_tc11_stale_lease_is_reclaimed(lease_dir, monkeypatch):
|
|
repo = "orchestrator"
|
|
monkeypatch.setattr(merge_gate.settings, "merge_lock_timeout_s", 10)
|
|
# Write a lease that is older than the timeout (orphaned by a dead process).
|
|
path = merge_gate._lease_path(repo)
|
|
with open(path, "w", encoding="utf-8") as f:
|
|
json.dump(
|
|
{"branch": "feature/dead", "acquired_at": time.time() - 999, "pid": 1},
|
|
f,
|
|
)
|
|
ok, reason = merge_gate.acquire_merge_lease(repo, "feature/new", "ORCH-9")
|
|
assert ok is True
|
|
assert "reclaimed" in reason
|
|
# The new holder now owns it.
|
|
held = json.load(open(path, encoding="utf-8"))
|
|
assert held["branch"] == "feature/new"
|
|
|
|
|
|
def test_tc11_release_missing_is_noop(lease_dir):
|
|
# Releasing a non-existent lease never raises.
|
|
merge_gate.release_merge_lease("orchestrator", "feature/none")
|
|
merge_gate.release_merge_lease("orchestrator") # force form
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# ORCH-065 / TC-16: idempotent merge finalization — pr_already_merged guard.
|
|
# ---------------------------------------------------------------------------
|
|
class _FakeResp:
|
|
def __init__(self, status_code, payload):
|
|
self.status_code = status_code
|
|
self._payload = payload
|
|
|
|
def json(self):
|
|
return self._payload
|
|
|
|
|
|
def test_tc16_pr_already_merged_true(monkeypatch):
|
|
"""A merged PR -> True so a re-driven/reaped task is a no-op (no second merge)."""
|
|
monkeypatch.setattr(
|
|
httpx, "get",
|
|
lambda *a, **k: _FakeResp(200, [{"number": 7, "merged": True}]),
|
|
)
|
|
assert merge_gate.pr_already_merged("orchestrator", "feature/x") is True
|
|
|
|
|
|
def test_tc16_pr_open_not_merged_false(monkeypatch):
|
|
"""An open / not-yet-merged PR -> False (the normal merge path proceeds)."""
|
|
monkeypatch.setattr(
|
|
httpx, "get",
|
|
lambda *a, **k: _FakeResp(200, [{"number": 7, "merged": False}]),
|
|
)
|
|
assert merge_gate.pr_already_merged("orchestrator", "feature/x") is False
|
|
|
|
|
|
def test_tc16_pr_no_pr_false(monkeypatch):
|
|
monkeypatch.setattr(
|
|
httpx, "get", lambda *a, **k: _FakeResp(200, []),
|
|
)
|
|
assert merge_gate.pr_already_merged("orchestrator", "feature/x") is False
|
|
|
|
|
|
def test_tc16_pr_already_merged_never_raises(monkeypatch):
|
|
"""Any HTTP/parse error -> False (conservative), never an exception (AC-9)."""
|
|
def boom(*a, **k):
|
|
raise RuntimeError("gitea down")
|
|
|
|
monkeypatch.setattr(httpx, "get", boom)
|
|
assert merge_gate.pr_already_merged("orchestrator", "feature/x") is False
|
|
|
|
|
|
def test_tc16_pr_non_200_false(monkeypatch):
|
|
monkeypatch.setattr(
|
|
httpx, "get", lambda *a, **k: _FakeResp(500, None),
|
|
)
|
|
assert merge_gate.pr_already_merged("orchestrator", "feature/x") is False
|