feat(deploy): dedicated "Confirm Deploy" status triggers prod deploy
Split the overloaded `Approved` Plane status: it served BOTH as the human BRD
gate on `analysis` AND as the silent Phase B prod-deploy trigger on `deploy`
(ORCH-036), so a routine approve could launch a self-hosting prod restart.
ORCH-059 introduces a dedicated logical status `confirm_deploy` ("Confirm
Deploy") that triggers ONLY Phase B on `deploy`; `Approved` stays purely a
pipeline gate.
- plane_sync: map "Confirm Deploy" -> "confirm_deploy" in _PLANE_NAME_TO_KEY;
intentionally absent from _DEFAULT_STATES => fail-closed (no UUID -> .get
yields None, no KeyError, no blind deploy).
- webhooks/plane: handle_issue_updated routes "Confirm Deploy" (fail-closed
.get) to new handle_confirm_deploy (guarded to stage=="deploy") ->
_try_advance_stage(confirm_deploy=True).
- stage_engine: advance_stage gains keyword-only confirm_deploy=False; Phase B
block returns early for deploy+finished_agent is None but only initiates the
deploy when confirm_deploy=True; a plain Approved is a deterministic no-op
(returns before check_deploy_status -> no false БАГ-8 rollback).
- Phase A CTA now asks the operator for "Confirm Deploy", not "Approved".
Contracts unchanged: STAGE_TRANSITIONS, QG_CHECKS, check_deploy_status, hook
exit codes, Phases A/C, merge-gate, DB schema. Conditional like ORCH-35/36
(self-hosting only). Docs updated (CLAUDE.md, architecture/README.md, CHANGELOG).
Refs: ORCH-059
Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -128,6 +128,12 @@ _PLANE_NAME_TO_KEY: dict[str, str] = {
|
||||
"Needs Input": "needs_input",
|
||||
"In Review": "in_review",
|
||||
"Blocked": "blocked",
|
||||
# ORCH-059: dedicated prod-deploy trigger status, distinct from the
|
||||
# human-gate "Approved". Resolved from the live Plane API for the ORCH
|
||||
# project; intentionally ABSENT from _DEFAULT_STATES so environments without
|
||||
# this board status (enduro / API fallback) fail-closed — no UUID, no
|
||||
# confirm-deploy branch, no KeyError (accessed via .get).
|
||||
"Confirm Deploy": "confirm_deploy",
|
||||
}
|
||||
|
||||
# Per-project state cache: {project_id: {logical_key: state_uuid}}
|
||||
|
||||
@@ -171,6 +171,8 @@ def advance_stage(
|
||||
work_item_id: str,
|
||||
branch: str,
|
||||
finished_agent: str | None = None,
|
||||
*,
|
||||
confirm_deploy: bool = False,
|
||||
) -> AdvanceResult:
|
||||
"""Run the current stage's quality gate and advance / roll back the pipeline.
|
||||
|
||||
@@ -187,6 +189,13 @@ def advance_stage(
|
||||
approved/REQUEST_CHANGES/tester/architect branches. In the
|
||||
plane webhook path it is None, so those agent-specific
|
||||
branches simply do not trigger (matches old plane behavior).
|
||||
confirm_deploy: ORCH-059 — keyword-only signal that the human flipped the
|
||||
issue to the dedicated "Confirm Deploy" status. ONLY this
|
||||
signal initiates Phase B of the self-hosting prod deploy on
|
||||
the `deploy` stage. A plain `Approved` on `deploy`
|
||||
(confirm_deploy=False) is a deliberate no-op (no prod
|
||||
deploy, no false БАГ-8 rollback). All non-webhook callers
|
||||
leave it at the default.
|
||||
|
||||
Returns AdvanceResult describing what happened.
|
||||
"""
|
||||
@@ -203,21 +212,32 @@ def advance_stage(
|
||||
result.note = "terminal"
|
||||
return result
|
||||
|
||||
# --- ORCH-036 Phase B: human Approved on `deploy` -> initiate deploy --
|
||||
# A human flipping the Plane status to Approved on the `deploy` stage
|
||||
# (finished_agent is None) is the prod-deploy trigger for the self-hosting
|
||||
# repo. Initiate the DETACHED host deploy + enqueue the finalizer and
|
||||
# return WITHOUT running check_deploy_status (the verdict does not exist
|
||||
# yet — running the gate now would read a stale/absent log and falsely
|
||||
# roll back, R-2). The finalizer (Phase C, finished_agent="deployer")
|
||||
# records the verdict later; that path is NOT intercepted here.
|
||||
# --- ORCH-036/059 Phase B: "Confirm Deploy" on `deploy` -> initiate ----
|
||||
# ORCH-059: the prod-deploy trigger is now the DEDICATED "Confirm Deploy"
|
||||
# status (confirm_deploy=True), NOT the overloaded "Approved". On the
|
||||
# `deploy` stage (finished_agent is None) for the self-hosting repo we
|
||||
# always return early WITHOUT running check_deploy_status (the verdict
|
||||
# does not exist yet — running the gate now would read a stale/absent log
|
||||
# and falsely roll back, R-2/БАГ-8), but we only initiate the DETACHED
|
||||
# host deploy + enqueue the finalizer when confirm_deploy is set. A plain
|
||||
# Approved (confirm_deploy=False) is a deliberate no-op — it neither
|
||||
# deploys nor rolls back (TRZ-3/AC-3). The finalizer (Phase C,
|
||||
# finished_agent="deployer") records the verdict later; that path is NOT
|
||||
# intercepted here (it requires finished_agent set).
|
||||
if (
|
||||
current_stage == "deploy"
|
||||
and finished_agent is None
|
||||
and settings.deploy_require_manual_approve
|
||||
and self_deploy.self_deploy_applies(repo)
|
||||
):
|
||||
_handle_self_deploy_phase_b(task_id, repo, work_item_id, branch, result)
|
||||
if confirm_deploy:
|
||||
_handle_self_deploy_phase_b(task_id, repo, work_item_id, branch, result)
|
||||
else:
|
||||
result.note = "approved-on-deploy-noop"
|
||||
logger.info(
|
||||
f"Task {task_id}: Approved on `deploy` without Confirm Deploy "
|
||||
f"— no-op (prod deploy requires the 'Confirm Deploy' status)"
|
||||
)
|
||||
return result
|
||||
|
||||
# --- Quality gate ----------------------------------------------------
|
||||
@@ -998,9 +1018,11 @@ def _handle_self_deploy_phase_a(
|
||||
|
||||
Staging is green and the branch is mergeable; for the self-hosting repo we do
|
||||
NOT auto-deploy to prod. Move the task onto the `deploy` stage (so a later
|
||||
human Approved lands there -> Phase B), set the issue approval-pending and ask
|
||||
the human to flip the status to Approved. A restart-safe `approve-requested`
|
||||
marker records that Phase A ran. The merge lease stays HELD.
|
||||
human "Confirm Deploy" lands there -> Phase B), set the issue approval-pending
|
||||
and ask the human to flip the status to "Confirm Deploy" (ORCH-059: the
|
||||
dedicated prod-deploy trigger, distinct from the human-gate "Approved"). A
|
||||
restart-safe `approve-requested` marker records that Phase A ran. The merge
|
||||
lease stays HELD.
|
||||
"""
|
||||
update_task_stage(task_id, "deploy")
|
||||
notify_stage_change(task_id, current_stage, "deploy")
|
||||
@@ -1022,13 +1044,14 @@ def _handle_self_deploy_phase_a(
|
||||
if work_item_id:
|
||||
plane_add_comment(
|
||||
work_item_id,
|
||||
"\U0001f7e1 Staging зелёный. Требуется ручной approve для ПРОД-деплоя: "
|
||||
"смените статус задачи на «Approved», чтобы запустить деплой в прод (8500).",
|
||||
"\U0001f7e1 Staging зелёный. Требуется ручное подтверждение ПРОД-деплоя: "
|
||||
"смените статус задачи на «Confirm Deploy», чтобы запустить деплой в прод "
|
||||
"(8500). Статус «Approved» прод-деплой НЕ запускает.",
|
||||
author="deployer",
|
||||
)
|
||||
send_telegram(
|
||||
f"\U0001f7e1 {work_item_id}: staging OK. Ждёт approve на ПРОД-деплой "
|
||||
f"(смените статус на Approved)."
|
||||
f"\U0001f7e1 {work_item_id}: staging OK. Ждёт подтверждения ПРОД-деплоя "
|
||||
f"(смените статус на «Confirm Deploy»)."
|
||||
)
|
||||
logger.info(
|
||||
f"Task {task_id}: self-deploy Phase A — advanced to deploy, "
|
||||
|
||||
@@ -150,8 +150,15 @@ async def handle_issue_updated(data: dict, project_id: str = ""):
|
||||
# both enduro (b873d9eb) and orchestrator (e331bfb3) In Progress trigger the
|
||||
# pipeline. Using PLANE_STATES["in_progress"] here was the root-cause blocker.
|
||||
proj_states = get_project_states(project_id)
|
||||
# ORCH-059: the dedicated "Confirm Deploy" status is the prod-deploy trigger.
|
||||
# fail-closed via .get — environments without the status (enduro / API
|
||||
# fallback) resolve to None, so the branch simply never activates (no KeyError,
|
||||
# no blind deploy). Checked before `approved` so the two gestures never alias.
|
||||
confirm_state = proj_states.get("confirm_deploy")
|
||||
if new_state == proj_states["in_progress"]:
|
||||
await handle_status_start(data, project_id)
|
||||
elif confirm_state and new_state == confirm_state:
|
||||
await handle_confirm_deploy(data, project_id)
|
||||
elif new_state == proj_states["approved"]:
|
||||
await handle_verdict(data, project_id, approved=True)
|
||||
elif new_state == proj_states["rejected"]:
|
||||
@@ -160,6 +167,45 @@ async def handle_issue_updated(data: dict, project_id: str = ""):
|
||||
logger.info(f"issue {plane_id} updated to state {new_state[:8]}..., no pipeline action")
|
||||
|
||||
|
||||
async def handle_confirm_deploy(data: dict, project_id: str = ""):
|
||||
"""ORCH-059: a human flipped the issue to the dedicated "Confirm Deploy"
|
||||
status — the explicit trigger for the self-hosting prod deploy (Phase B).
|
||||
|
||||
Guarded to the `deploy` stage: "Confirm Deploy" is only meaningful on the
|
||||
approval-pending `deploy` stage (Phase A advanced the task there). On any
|
||||
other stage it is a no-op-with-log, so a stray Confirm Deploy can never
|
||||
perturb another gate.
|
||||
|
||||
Routes to the unified stage engine with ``confirm_deploy=True`` so ONLY this
|
||||
path initiates Phase B; a plain Approved on `deploy` stays a no-op (TRZ-3).
|
||||
"""
|
||||
plane_id = str(data.get("id") or "")
|
||||
task = get_task_by_plane_id(plane_id)
|
||||
if not task:
|
||||
logger.warning(f"Confirm Deploy for {plane_id} but no task found, ignoring")
|
||||
return
|
||||
|
||||
task_id = task["id"]
|
||||
current_stage = task["stage"]
|
||||
repo = task["repo"]
|
||||
work_item_id = task.get("work_item_id", "")
|
||||
branch = task.get("branch", "")
|
||||
|
||||
if current_stage != "deploy":
|
||||
logger.info(
|
||||
f"Confirm Deploy for {plane_id} but stage is '{current_stage}' "
|
||||
f"(not 'deploy'); no-op"
|
||||
)
|
||||
return
|
||||
|
||||
logger.info(
|
||||
f"Task {task_id}: Confirm Deploy status on `deploy` -> initiate Phase B prod deploy"
|
||||
)
|
||||
await _try_advance_stage(
|
||||
task_id, current_stage, repo, work_item_id, branch, confirm_deploy=True
|
||||
)
|
||||
|
||||
|
||||
async def handle_status_start(data: dict, project_id: str = ""):
|
||||
"""An issue moved into In Progress.
|
||||
|
||||
@@ -633,7 +679,8 @@ async def _rollback_stage(
|
||||
|
||||
|
||||
async def _try_advance_stage(
|
||||
task_id: int, current_stage: str, repo: str, work_item_id: str, branch: str
|
||||
task_id: int, current_stage: str, repo: str, work_item_id: str, branch: str,
|
||||
confirm_deploy: bool = False,
|
||||
):
|
||||
"""Thin async wrapper over the unified stage engine (ORCH-4 / M-3).
|
||||
|
||||
@@ -642,10 +689,15 @@ async def _try_advance_stage(
|
||||
is synchronous. We run it off the event loop via asyncio.to_thread so there
|
||||
is exactly one implementation shared with the launcher.
|
||||
|
||||
finished_agent is None on this webhook path (a human Approved status change,
|
||||
not a finished agent), so the agent-specific rollback branches inside the
|
||||
engine intentionally do not trigger — the webhook path only runs the QG and
|
||||
either advances or reports the failure.
|
||||
finished_agent is None on this webhook path (a human status change, not a
|
||||
finished agent), so the agent-specific rollback branches inside the engine
|
||||
intentionally do not trigger — the webhook path only runs the QG and either
|
||||
advances or reports the failure.
|
||||
|
||||
ORCH-059: ``confirm_deploy`` is threaded through (keyword-only on
|
||||
advance_stage). It is True ONLY on the "Confirm Deploy" path
|
||||
(handle_confirm_deploy) and gates Phase B of the self-hosting prod deploy; the
|
||||
plain Approved path (handle_verdict) leaves it at the default False.
|
||||
"""
|
||||
import asyncio
|
||||
from ..stage_engine import advance_stage
|
||||
@@ -658,6 +710,7 @@ async def _try_advance_stage(
|
||||
work_item_id,
|
||||
branch,
|
||||
None,
|
||||
confirm_deploy=confirm_deploy,
|
||||
)
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user