feat(serial-gate): per-repo serial gate + deferred branch cut + rollback-freeze (ORCH-088)
Этап 1 (serial e2e) пакетного автономного режима. Новая задача репо не входит в analysis (analyst-job не выбирается, ветка не режется), пока в репо есть более ранняя незавершённая задача (FIFO, t2.id < jobs.task_id) ИЛИ репо заморожен. - src/serial_gate.py — новый leaf (never-raise): build_claim_clause (fail-OPEN), is_repo_frozen (fail-CLOSED), set/clear_repo_freeze, serial_gate_applies, snapshot. - src/db.py — идемпотентная миграция repo_freeze + serial_gate-фрагмент в claim_next_job. - src/webhooks/plane.py + src/agents/launcher.py — отложенный срез ветки: start_pipeline не создаёт Gitea-ветку/docs для применимого репо; релокация в _materialize_deferred_branch на момент claim analyst-job (база = свежий origin/main с кодом предшественника, AC-6). - src/stage_engine.py — post-deploy DEGRADED → durable per-repo freeze + Telegram-алерт. - src/main.py — блок serial_gate в GET /queue + POST /serial-gate/unfreeze. - src/config.py — serial_gate_enabled / serial_gate_repos / serial_gate_freeze_enabled. FIFO-уточнение реализации (FR-2): ADR-001 D1 фиксировал t2.id != jobs.task_id; при != пакет одновременно созданных свежих задач взаимно блокировался бы (дедлок). t2.id < jobs.task_id допускает самую раннюю задачу и сериализует остальные, сохраняя AC-1/R-7. STAGE_TRANSITIONS / QG_CHECKS / check_* — без изменений. Аддитивно, под kill-switch, never-raise, restart-safe; при выключенном флаге — нулевая регрессия (enduro не затронут). Тесты: TC-01..TC-22 (test_serial_gate*.py + test_queue_endpoint.py); полный прогон 1114 зелёных. Docs: README (serial gate / /queue / API / БД), CLAUDE.md, CHANGELOG.md, .env.example. Refs: ORCH-088 Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
34
src/db.py
34
src/db.py
@@ -168,6 +168,26 @@ def init_db():
|
||||
CREATE INDEX IF NOT EXISTS idx_tracker_messages_open
|
||||
ON tracker_messages(task_id) WHERE deleted_at IS NULL;
|
||||
""")
|
||||
# ORCH-088 (FR-5, ADR-001 D2): durable per-repo rollback-freeze. After a
|
||||
# post-deploy DEGRADED verdict the repo is frozen so the serial gate stays
|
||||
# CLOSED unconditionally (the degraded task is already stage='done' — BR-7 — so
|
||||
# the ordinary active-task gate would not hold it) until an operator clears it
|
||||
# via POST /serial-gate/unfreeze. Append-only journal: an ACTIVE freeze for repo
|
||||
# R ⇔ a row with repo=R AND cleared_at IS NULL. Purely ADDITIVE (CREATE
|
||||
# TABLE/INDEX IF NOT EXISTS) -> idempotent, restart-safe on the live shared prod
|
||||
# DB (enduro-trails data untouched). See 08-data-requirements.md.
|
||||
conn.executescript("""
|
||||
CREATE TABLE IF NOT EXISTS repo_freeze (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
repo TEXT NOT NULL,
|
||||
frozen_at TEXT NOT NULL DEFAULT (datetime('now')),
|
||||
reason TEXT,
|
||||
work_item_id TEXT,
|
||||
cleared_at TEXT
|
||||
);
|
||||
CREATE INDEX IF NOT EXISTS idx_repo_freeze_active
|
||||
ON repo_freeze (repo, cleared_at);
|
||||
""")
|
||||
conn.commit()
|
||||
conn.close()
|
||||
|
||||
@@ -588,6 +608,19 @@ def claim_next_job() -> dict | None:
|
||||
" WHERE d.task_id = jobs.task_id AND t.stage != 'done'"
|
||||
") "
|
||||
)
|
||||
# ORCH-088 (FR-1, ADR-001 D1): per-repo serial gate. An analyst-job of a NEW
|
||||
# task is NOT claimable while the same repo has another unfinished task OR is
|
||||
# frozen. The fragment is built in the serial_gate leaf (sanitised repo scope,
|
||||
# fail-OPEN on any build error so a transient fault never wedges the queue of
|
||||
# ALL projects — AC-8). Jobs of an already-active task (architect/.../deployer)
|
||||
# are unaffected — the gate keys on jobs.agent='analyst' only. Reads only the
|
||||
# local DB (offline-safe hot path, NFR-2).
|
||||
serial_gate = ""
|
||||
try:
|
||||
from . import serial_gate as _serial_gate
|
||||
serial_gate = _serial_gate.build_claim_clause()
|
||||
except Exception: # noqa: BLE001 - fail-OPEN: never wedge the claim
|
||||
serial_gate = ""
|
||||
conn = get_db()
|
||||
try:
|
||||
while True:
|
||||
@@ -595,6 +628,7 @@ def claim_next_job() -> dict | None:
|
||||
"SELECT id FROM jobs WHERE status='queued' "
|
||||
"AND (available_at IS NULL OR available_at <= datetime('now')) "
|
||||
f"{dep_gate}"
|
||||
f"{serial_gate}"
|
||||
"ORDER BY id LIMIT 1"
|
||||
).fetchone()
|
||||
if not row:
|
||||
|
||||
Reference in New Issue
Block a user