A DB stage=done task with 0 active jobs flapped in Plane between `Awaiting Deploy` and `Monitoring after Deploy` instead of holding `Done` (verified live on ORCH-061, task 47): the three deploy-phase setters were terminal-blind, so any stale/duplicate/unknown caller under the bot token re-stamped an intermediate status over the terminal Done, forever. - New leaf src/deploy_status_guard.py (pure, never-raise, config-gated): decide() -> ALLOW | CONVERGE_DONE | SUPPRESS on the entry of set_issue_awaiting_deploy / set_issue_deploying / set_issue_monitoring. A deploy-phase status is legitimate iff the task is non-terminal OR (done AND post-deploy window active); otherwise done converges to Done idempotently, cancelled is suppressed (FR-2, D1/D2). - D3: move post_deploy.arm_monitor ABOVE the terminal-sync block in advance_stage so window_active is True when the legitimate first Monitoring is set (the task is already DB-done by then); a re-drive after the window closes converges to Done. - D4: run_post_deploy_monitor no-ops without a status PATCH / re-queue when the task became cancelled mid-window (zombie-tick guard, FR-3). - D5: additive `reason` kwarg on the three setters + one structured log line per verdict (work_item/caller/target/db_stage/window_active/verdict); new read-only db.get_task_by_work_item_id; post_deploy.window_active helper. - Flags deploy_status_guard_enabled (kill-switch -> 1:1) / deploy_status_guard_repos (CSV; empty = self-hosting only). STAGE_TRANSITIONS / QG_CHECKS / check_* / machine-verdict keys / DB schema untouched (reads existing tasks.stage). Tests: TC-01..TC-12 across 5 new test modules + config flags; updated the reason-kwarg assertions in test_deploy_terminal_sync / test_deploy_approve. Full regress green (1413). Docs: CHANGELOG, CLAUDE.md, docs/architecture/README.md (status -> реализовано), .env.example. Refs: ORCH-094 Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
83 lines
3.3 KiB
Python
83 lines
3.3 KiB
Python
"""ORCH-094 — sync convergence for a done task stuck on a deploy status (TC-10).
|
|
|
|
Integration-level: ANY sync source (reconciler tick / monitor tick / a direct
|
|
deploy-status setter call) that touches a DB-done task converges Plane to Done
|
|
idempotently instead of an intermediate deploy status, and a repeated tick does
|
|
NOT swing the Done<->deploy-status pendulum. The guard lives on the setter
|
|
(ADR-001 D1/D7), so the reconciler code itself is unchanged — driving the setter
|
|
the way a stale actor would is the faithful reproduction of the 061 flapp.
|
|
"""
|
|
import os
|
|
import tempfile
|
|
|
|
import pytest
|
|
|
|
_test_db = os.path.join(tempfile.gettempdir(), "test_reconciler_done_converge.db")
|
|
os.environ["ORCH_DB_PATH"] = _test_db
|
|
os.environ["ORCH_REPOS_DIR"] = tempfile.gettempdir()
|
|
os.environ.setdefault("ORCH_GITEA_TOKEN", "test-token")
|
|
os.environ.setdefault("ORCH_PLANE_API_TOKEN", "test-token")
|
|
|
|
from unittest.mock import MagicMock # noqa: E402
|
|
|
|
import src.db as _db # noqa: E402
|
|
from src.db import init_db, get_db # noqa: E402
|
|
from src import plane_sync # noqa: E402
|
|
from src import post_deploy # noqa: E402
|
|
from src import config as cfg # noqa: E402
|
|
|
|
|
|
@pytest.fixture(autouse=True)
|
|
def fresh_db(monkeypatch, tmp_path):
|
|
monkeypatch.setattr(_db.settings, "db_path", _test_db)
|
|
if os.path.exists(_test_db):
|
|
os.unlink(_test_db)
|
|
init_db()
|
|
monkeypatch.setattr(cfg.settings, "deploy_status_guard_enabled", True, raising=False)
|
|
monkeypatch.setattr(cfg.settings, "deploy_status_guard_repos", "", raising=False)
|
|
monkeypatch.setattr(post_deploy.settings, "repos_dir", str(tmp_path))
|
|
monkeypatch.setattr(post_deploy.settings, "host_repos_dir", str(tmp_path))
|
|
yield
|
|
|
|
|
|
@pytest.fixture
|
|
def spy(monkeypatch):
|
|
direct = MagicMock()
|
|
done = MagicMock()
|
|
monkeypatch.setattr(plane_sync, "_set_issue_state_direct", direct)
|
|
monkeypatch.setattr(plane_sync, "set_issue_done", done)
|
|
monkeypatch.setattr(plane_sync, "_resolve_project_id", lambda w=None, p=None: "proj-1")
|
|
monkeypatch.setattr(
|
|
plane_sync, "get_project_states",
|
|
lambda pid: {"awaiting_deploy": "S-aw", "deploying": "S-dep", "monitoring": "S-mon"},
|
|
)
|
|
return direct, done
|
|
|
|
|
|
def _make_task(stage="done", repo="orchestrator", wi="ORCH-061"):
|
|
conn = get_db()
|
|
conn.execute(
|
|
"INSERT INTO tasks (plane_id, work_item_id, repo, branch, stage) "
|
|
"VALUES (?, ?, ?, ?, ?)",
|
|
(f"plane-{wi}", wi, repo, "feature/ORCH-061-x", stage),
|
|
)
|
|
conn.commit()
|
|
conn.close()
|
|
|
|
|
|
def test_tc10_repeated_sync_converges_no_pendulum(spy):
|
|
direct, done = spy
|
|
_make_task("done") # done, window closed (no ARMED sentinel)
|
|
# Simulate many sync ticks alternately trying to set Monitoring / Awaiting,
|
|
# exactly like the observed 061 pendulum (Awaiting <-> Monitoring forever).
|
|
for i in range(10):
|
|
if i % 2 == 0:
|
|
plane_sync.set_issue_monitoring("ORCH-061", reason="reconciler-tick")
|
|
else:
|
|
plane_sync.set_issue_awaiting_deploy("ORCH-061", reason="reconciler-tick")
|
|
# Every tick converged to Done; not a single intermediate deploy-status PATCH.
|
|
assert direct.call_count == 0
|
|
assert done.call_count == 10
|
|
# All convergence calls target the same terminal Done (no swing).
|
|
assert all(c.args == ("ORCH-061",) for c in done.call_args_list)
|