fix(stage-engine): address ORCH-114 review — env/docs canon + in-region rollback CAS
Resolves the REQUEST_CHANGES findings on ORCH-114 (durable transition-ownership lease + expected-stage CAS): P1 — documentation = golden source: - .env.example: add ORCH_TRANSITION_LEASE_ENABLED / ORCH_TRANSITION_LEASE_REPOS (canon of 100% start keys, ORCH-101), next to the other gate kill-switches. - CLAUDE.md: add the ORCH-114 passport section (mechanism, invariant, flags, ADR links) so a future agent editing advance_stage/reaper/webhooks finds the ownership invariant in the first mandatory-read doc (ORCH-078 traceability index). P2 — should-fix: - docs/overview/ (system showcase, ORCH-011): add transition_lease to tech-data-model.md (helper tables), tech-observability.md (/queue blocks) and tech-architecture.md (components). - ADR-001 D4 alignment: the four side-effectful-edge rollback handlers (_handle_merge_gate_rollback / _handle_security_gate / _handle_coverage_gate / _handle_image_freshness) now write `development` through the expected-stage CAS via a shared _rollback_stage_cas helper (defence against the rollback↔done contradiction, BR-6) instead of a bare unconditional update_task_stage. Under the held lease the sole owner always wins; a lost race aborts WITHOUT side effects. Kill-switch off / out-of-scope repo -> degenerates to the prior write -> 1:1. - Test isolation: make tests/test_webhooks.py order-independent by pinning the proj-1 registry per-test (mirrors test_webhook_dedup.proj_registry); it had only passed by relying on import order. Drop the needless module-level ORCH_DB_PATH setdefault in test_orch114 (fresh_db already isolates db_path). New regression tests (TC-11): in-region rollback writes route through CAS; rollback CAS wins when at expected stage; rollback CAS-lost does NOT clobber `done`; kill-switch-off rollback degenerates to the unconditional write. ruff clean (src/stage_engine.py, src/transition_lease.py); full suite 2052 passed. Refs: ORCH-114 Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -19,7 +19,11 @@ import tempfile
|
||||
|
||||
import pytest
|
||||
|
||||
os.environ.setdefault("ORCH_DB_PATH", os.path.join(tempfile.gettempdir(), "test_orch114.db"))
|
||||
# NB: deliberately NO module-level os.environ["ORCH_DB_PATH"] setdefault — pinning the
|
||||
# process-wide settings.db_path on first import is needless here (the autouse `fresh_db`
|
||||
# fixture below isolates db_path per-test via monkeypatch). The cross-module settings
|
||||
# singleton (e.g. ORCH_PROJECTS_JSON) is whoever imports `src` first; test_webhooks now
|
||||
# pins its own registry per-test rather than relying on import order (ORCH-114 review P2).
|
||||
os.environ.setdefault("ORCH_REPOS_DIR", tempfile.gettempdir())
|
||||
os.environ.setdefault("ORCH_GITEA_TOKEN", "test-token")
|
||||
os.environ.setdefault("ORCH_PLANE_API_TOKEN", "test-token")
|
||||
@@ -525,6 +529,71 @@ def test_tc11_bypass_paths_use_cas_not_unconditional_write():
|
||||
assert "commit_stage_cas(task_id, current_stage, next_stage" in asrc
|
||||
|
||||
|
||||
def test_tc11_inregion_rollback_writes_use_cas(monkeypatch):
|
||||
"""ADR-001 D4: the four side-effectful-edge rollback handlers
|
||||
(_handle_merge_gate_rollback / _handle_security_gate / _handle_coverage_gate /
|
||||
_handle_image_freshness) write `development` through the expected-stage CAS
|
||||
(via _rollback_stage_cas), NOT a bare unconditional update_task_stage. (The
|
||||
non-side-effectful launcher rollbacks in _handle_qg_failure_rollbacks are out of
|
||||
scope — no lease is held there.)"""
|
||||
for fn in (
|
||||
se._handle_merge_gate_rollback,
|
||||
se._handle_security_gate,
|
||||
se._handle_coverage_gate,
|
||||
se._handle_image_freshness,
|
||||
):
|
||||
src = inspect.getsource(fn)
|
||||
assert "_rollback_stage_cas(task_id, current_stage, repo, result)" in src, (
|
||||
f"{fn.__name__} must route the rollback write through the CAS helper"
|
||||
)
|
||||
assert 'update_task_stage(task_id, "development")' not in src, (
|
||||
f"{fn.__name__} must not do a bare unconditional rollback write"
|
||||
)
|
||||
# The helper itself goes through commit_stage_cas.
|
||||
assert "commit_stage_cas(task_id, current_stage" in inspect.getsource(
|
||||
se._rollback_stage_cas
|
||||
)
|
||||
|
||||
|
||||
def test_tc11_rollback_cas_wins_when_at_expected_stage(monkeypatch):
|
||||
"""With the mechanism ON, a rollback whose task is STILL at current_stage wins the
|
||||
CAS -> the stage is written to `development` and the caller proceeds (returns True)."""
|
||||
_enable(monkeypatch)
|
||||
tid = _make_task(stage="deploy-staging")
|
||||
result = se.AdvanceResult()
|
||||
assert se._rollback_stage_cas(tid, "deploy-staging", _REPO, result) is True
|
||||
assert _task_stage(tid) == "development"
|
||||
assert result.note != "rollback-cas-lost"
|
||||
|
||||
|
||||
def test_tc11_rollback_cas_lost_aborts_without_overwriting_done(monkeypatch):
|
||||
"""BR-6 / ADR-001 D4: if a concurrent winner already advanced the task to `done`,
|
||||
the stale rollback LOSES the expected-stage CAS -> it must NOT overwrite `done`
|
||||
with `development`, and the caller aborts the rollback side effects."""
|
||||
_enable(monkeypatch)
|
||||
tid = _make_task(stage="deploy-staging")
|
||||
# Simulate a concurrent winner having advanced the task to terminal `done`.
|
||||
conn = get_db()
|
||||
conn.execute("UPDATE tasks SET stage='done' WHERE id=?", (tid,))
|
||||
conn.commit()
|
||||
conn.close()
|
||||
result = se.AdvanceResult()
|
||||
# The rollback still believes current_stage is deploy-staging (its read-on-entry).
|
||||
assert se._rollback_stage_cas(tid, "deploy-staging", _REPO, result) is False
|
||||
assert _task_stage(tid) == "done" # NOT clobbered back to development
|
||||
assert result.note == "rollback-cas-lost"
|
||||
|
||||
|
||||
def test_tc11_rollback_cas_killswitch_off_unconditional(monkeypatch):
|
||||
"""Kill-switch off -> _rollback_stage_cas degenerates to the prior unconditional
|
||||
write (always True, no CAS), so behaviour is byte-for-byte pre-ORCH-114 (AC-9)."""
|
||||
_disable(monkeypatch)
|
||||
tid = _make_task(stage="done") # even a mismatched stage writes unconditionally
|
||||
result = se.AdvanceResult()
|
||||
assert se._rollback_stage_cas(tid, "deploy-staging", _REPO, result) is True
|
||||
assert _task_stage(tid) == "development"
|
||||
|
||||
|
||||
# ===========================================================================
|
||||
# TC-12 — observability (AC-12)
|
||||
# ===========================================================================
|
||||
|
||||
@@ -25,6 +25,28 @@ os.environ["ORCH_PROJECTS_JSON"] = (
|
||||
from fastapi.testclient import TestClient
|
||||
from src.main import app
|
||||
from src.db import init_db, get_db
|
||||
from src import projects as projects_mod
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def proj_registry():
|
||||
"""Pin the shared project registry to proj-1/enduro-trails for each test.
|
||||
|
||||
The registry (projects.PROJECTS / _BY_PLANE_ID) is a process-wide singleton built
|
||||
at FIRST `src` import: this module's import-time ORCH_PROJECTS_JSON only wins if
|
||||
test_webhooks happens to import `src` before any other module (true when it runs
|
||||
right after test_webhook_dedup, false for an arbitrary subset like
|
||||
`pytest test_orch114… test_webhooks`). Forcing the registry per-test makes these
|
||||
fixtures order-independent (mirrors test_webhook_dedup.proj_registry; ORCH-114
|
||||
review P2)."""
|
||||
os.environ["ORCH_PROJECTS_JSON"] = (
|
||||
'[{"plane_project_id": "proj-1", "repo": "enduro-trails", '
|
||||
'"work_item_prefix": "ET", "name": "enduro-trails"}]'
|
||||
)
|
||||
projects_mod.settings.projects_json = os.environ["ORCH_PROJECTS_JSON"]
|
||||
projects_mod.reload_projects()
|
||||
yield
|
||||
projects_mod.reload_projects()
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
|
||||
Reference in New Issue
Block a user