feat(serial-gate): per-repo serial gate + deferred branch cut + rollback-freeze (ORCH-088)
Этап 1 (serial e2e) пакетного автономного режима. Новая задача репо не входит в analysis (analyst-job не выбирается, ветка не режется), пока в репо есть более ранняя незавершённая задача (FIFO, t2.id < jobs.task_id) ИЛИ репо заморожен. - src/serial_gate.py — новый leaf (never-raise): build_claim_clause (fail-OPEN), is_repo_frozen (fail-CLOSED), set/clear_repo_freeze, serial_gate_applies, snapshot. - src/db.py — идемпотентная миграция repo_freeze + serial_gate-фрагмент в claim_next_job. - src/webhooks/plane.py + src/agents/launcher.py — отложенный срез ветки: start_pipeline не создаёт Gitea-ветку/docs для применимого репо; релокация в _materialize_deferred_branch на момент claim analyst-job (база = свежий origin/main с кодом предшественника, AC-6). - src/stage_engine.py — post-deploy DEGRADED → durable per-repo freeze + Telegram-алерт. - src/main.py — блок serial_gate в GET /queue + POST /serial-gate/unfreeze. - src/config.py — serial_gate_enabled / serial_gate_repos / serial_gate_freeze_enabled. FIFO-уточнение реализации (FR-2): ADR-001 D1 фиксировал t2.id != jobs.task_id; при != пакет одновременно созданных свежих задач взаимно блокировался бы (дедлок). t2.id < jobs.task_id допускает самую раннюю задачу и сериализует остальные, сохраняя AC-1/R-7. STAGE_TRANSITIONS / QG_CHECKS / check_* — без изменений. Аддитивно, под kill-switch, never-raise, restart-safe; при выключенном флаге — нулевая регрессия (enduro не затронут). Тесты: TC-01..TC-22 (test_serial_gate*.py + test_queue_endpoint.py); полный прогон 1114 зелёных. Docs: README (serial gate / /queue / API / БД), CLAUDE.md, CHANGELOG.md, .env.example. Refs: ORCH-088 Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -418,6 +418,32 @@ class AgentLauncher:
|
||||
pass
|
||||
return None
|
||||
|
||||
def _materialize_deferred_branch(
|
||||
self, repo: str, branch: str, work_item_id: str | None, title: str | None
|
||||
) -> None:
|
||||
"""ORCH-088 (ADR-001 D1): create the deferred Gitea branch + initial docs.
|
||||
|
||||
Relocated from ``webhooks.plane.start_pipeline``: the two coroutines are run
|
||||
SYNCHRONOUSLY here (this method executes in the worker THREAD — no running
|
||||
event loop — so ``asyncio.run`` is safe, R-4). Sequence mirrors the original
|
||||
start_pipeline order so the downstream worktree/PR flow is identical:
|
||||
``_create_gitea_branch`` (from a fresh ``main``) -> ``_create_initial_docs``.
|
||||
Both are idempotent (409/422 -> no-op) so a re-claim after a restart is safe.
|
||||
A transient Gitea error PROPAGATES so the caller (_spawn) fails the launch and
|
||||
the queue worker requeues the job for a later tick (never a half-cut state).
|
||||
"""
|
||||
import asyncio
|
||||
from ..webhooks.plane import _create_gitea_branch, _create_initial_docs
|
||||
|
||||
name = title or work_item_id or branch
|
||||
logger.info(
|
||||
f"ORCH-088: materialising deferred branch '{branch}' for {repo} "
|
||||
f"({work_item_id}) at analyst-job claim"
|
||||
)
|
||||
asyncio.run(_create_gitea_branch(repo, branch))
|
||||
if work_item_id:
|
||||
asyncio.run(_create_initial_docs(repo, branch, work_item_id, name))
|
||||
|
||||
def _spawn(self, agent: str, repo: str, task_content: str = None,
|
||||
task_id: int = None, job_id: int = None) -> int:
|
||||
"""Shared spawn implementation for launch() and launch_job().
|
||||
@@ -437,9 +463,33 @@ class AgentLauncher:
|
||||
raise FileNotFoundError(f"Repo not found: {local_repo_path}")
|
||||
|
||||
# Determine branch (needed before we touch the worktree / task file).
|
||||
_br_row = get_db().execute("SELECT branch FROM tasks WHERE id=?", (task_id,)).fetchone() if task_id else None
|
||||
_br_row = (
|
||||
get_db().execute(
|
||||
"SELECT branch, work_item_id, title FROM tasks WHERE id=?", (task_id,)
|
||||
).fetchone()
|
||||
if task_id else None
|
||||
)
|
||||
agent_branch = _br_row[0] if _br_row else "main"
|
||||
|
||||
# ORCH-088 (FR-1/AC-6, ADR-001 D1): materialise a DEFERRED branch cut. When
|
||||
# the serial gate applies, start_pipeline did NOT create the Gitea branch /
|
||||
# initial docs — they were deferred to this claim so the cut happens from a
|
||||
# fresh origin/main that already contains the predecessor. We only reach this
|
||||
# claim because the gate is OPEN (predecessor done), so it is now safe. This
|
||||
# runs ONLY for the analyst-job (pipeline entry); every later stage reuses the
|
||||
# existing branch. Idempotent (409/422 -> no-op) so a re-claim is safe. On a
|
||||
# transient Gitea error this raises -> _drain_once requeues the job (R-4).
|
||||
if agent == "analyst" and _br_row is not None:
|
||||
try:
|
||||
from .. import serial_gate
|
||||
_applies = serial_gate.serial_gate_applies(repo)
|
||||
except Exception: # noqa: BLE001 - never let the gate check block a launch
|
||||
_applies = False
|
||||
if _applies:
|
||||
self._materialize_deferred_branch(
|
||||
repo, agent_branch, _br_row[1], _br_row[2]
|
||||
)
|
||||
|
||||
# ORCH-41: resolve the Plane project uuid for this repo so per-project
|
||||
# model/effort overrides apply. Unknown repo -> None (env/default only).
|
||||
from ..projects import get_project_by_repo
|
||||
|
||||
Reference in New Issue
Block a user