fix(cancel): narrow STOP critical-window so deploy-park cancel applies (ORCH-090)
Review P1: a STOP while a self-hosting task is PARKED on `deploy` awaiting the manual `Confirm Deploy` was classified as a critical merge/deploy window solely because the task still held the per-repo merge-lease (held from merge-gate through deploy->done). That window is fully reversible — nothing is merged or deployed yet (the irreversible merge_pr runs later in _handle_merge_verify, always under an INITIATED marker). So the cancel was DEFERRED to run_deploy_finalizer, which only runs after Phase B (Confirm Deploy) — the very step the operator pressed STOP to avoid. Result: the deferred cancel was never applied, the task wedged non-terminal holding the lease, blocking the repo's serial-gate (ORCH-088) and merges. Fix: gate the merge-lease branch of cancel.in_critical_window on an actively RUNNING actor (_task_has_running_actor). Lease held + running deploy/merge job -> still deferred (genuine in-flight step). Lease held + no running actor (idle deploy parking) -> NOT critical -> immediate full reset, which itself releases the lease (step 3c) and drives the task terminal. INITIATED-marker deferral unchanged. Also fixes review P2 (AC-6): set_task_cancel_requested now returns the first-stamp fact (rowcount), and the deferred branch only notifies on the first transition — a repeated STOP while still deferred no longer spams duplicate notifications. Tests: test_d7_lease_held_idle_parking_is_not_critical, test_d7_lease_held_with_running_actor_still_critical, test_d7_stop_on_deploy_awaiting_confirm_full_resets, test_d7_repeated_stop_in_critical_window_no_duplicate_notify. Full suite green (1349). Refs: ORCH-090 Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -440,6 +440,72 @@ def test_d7_in_critical_window_detection(monkeypatch):
|
||||
assert cancel_mod.in_critical_window(task) is True
|
||||
|
||||
|
||||
def test_d7_lease_held_idle_parking_is_not_critical(monkeypatch):
|
||||
"""ORCH-090 review P1: a task PARKED on `deploy` awaiting Confirm Deploy holds the
|
||||
merge-lease but is fully reversible -> NOT a critical window (else the deferred
|
||||
cancel is never applied and the task wedges)."""
|
||||
from src import merge_gate
|
||||
os.makedirs(cfg.settings.repos_dir, exist_ok=True)
|
||||
branch = "feature/ORCH-432-park"
|
||||
tid = _make_task("PL-432", "ORCH-432", stage="deploy", branch=branch)
|
||||
# Lease HELD by this task's branch, NO INITIATED marker, NO running job.
|
||||
acquired, _ = merge_gate.acquire_merge_lease("orchestrator", branch, "ORCH-432")
|
||||
assert acquired
|
||||
assert merge_gate.current_lease_holder("orchestrator") == branch
|
||||
task = get_task(tid)
|
||||
assert cancel_mod.in_critical_window(task) is False
|
||||
|
||||
|
||||
def test_d7_lease_held_with_running_actor_still_critical(monkeypatch):
|
||||
"""Lease held AND a deploy/merge actor actually running -> still critical (defer)."""
|
||||
from src import merge_gate
|
||||
os.makedirs(cfg.settings.repos_dir, exist_ok=True)
|
||||
branch = "feature/ORCH-433-merge"
|
||||
tid = _make_task("PL-433", "ORCH-433", stage="deploy", branch=branch)
|
||||
merge_gate.acquire_merge_lease("orchestrator", branch, "ORCH-433")
|
||||
_make_job(tid, status="running", pid=9191) # the merge/deploy actor
|
||||
task = get_task(tid)
|
||||
assert cancel_mod.in_critical_window(task) is True
|
||||
|
||||
|
||||
def test_d7_stop_on_deploy_awaiting_confirm_full_resets(monkeypatch):
|
||||
"""End-to-end of the P1 fix: STOP while parked on `deploy` awaiting Confirm Deploy
|
||||
-> immediate FULL reset (terminal cancelled, branch deleted, lease released)."""
|
||||
calls = _stub_full_reset(monkeypatch)
|
||||
from src import merge_gate
|
||||
os.makedirs(cfg.settings.repos_dir, exist_ok=True)
|
||||
branch = "feature/ORCH-434-park"
|
||||
tid = _make_task("PL-434", "ORCH-434", stage="deploy", branch=branch)
|
||||
merge_gate.acquire_merge_lease("orchestrator", branch, "ORCH-434")
|
||||
|
||||
res = stage_engine.cancel_task(tid)
|
||||
|
||||
assert res["ok"] and not res["deferred"]
|
||||
assert res["note"] == "cancelled"
|
||||
# Durable terminal + branch deleted -> repo no longer wedged.
|
||||
assert get_task(tid)["stage"] == "cancelled"
|
||||
assert calls["branch"], "full reset deletes the remote feature branch"
|
||||
# The held lease was released (step 3c) -> the repo's serial-gate is unblocked.
|
||||
assert merge_gate.current_lease_holder("orchestrator") is None
|
||||
|
||||
|
||||
def test_d7_repeated_stop_in_critical_window_no_duplicate_notify(monkeypatch):
|
||||
"""AC-6 / P2: a repeated STOP while still deferred does not re-notify."""
|
||||
_stub_full_reset(monkeypatch)
|
||||
from src import self_deploy
|
||||
notifies = []
|
||||
monkeypatch.setattr(stage_engine, "_notify_cancel",
|
||||
lambda *a, **k: notifies.append(a), raising=True)
|
||||
tid = _make_task("PL-435", "ORCH-435", stage="deploy", branch="feature/ORCH-435-d")
|
||||
self_deploy.write_marker("orchestrator", "ORCH-435", self_deploy.INITIATED, content="1")
|
||||
|
||||
r1 = stage_engine.cancel_task(tid)
|
||||
r2 = stage_engine.cancel_task(tid)
|
||||
assert r1["deferred"] and r1["note"] == "deferred-critical-window"
|
||||
assert r2["deferred"] and r2["note"] == "deferred-already-pending"
|
||||
assert len(notifies) == 1, "only the first deferral transition notifies"
|
||||
|
||||
|
||||
def test_d7_deferred_applied_by_finalizer(monkeypatch):
|
||||
"""After the irreversible step finishes, the finalizer applies the deferred cancel."""
|
||||
calls = _stub_full_reset(monkeypatch)
|
||||
|
||||
Reference in New Issue
Block a user