feat(pipeline): add deploy-staging gate before prod deploy (ORCH-35)
This commit is contained in:
@@ -440,6 +440,100 @@ def check_deploy_status(repo: str, work_item_id: str, branch: str | None = None)
|
||||
return False, "Deploy log not found (14-deploy-log.md)"
|
||||
|
||||
|
||||
|
||||
def _parse_staging_status(content: str) -> tuple[bool, str]:
|
||||
"""Parse a 15-staging-log.md body and map its `staging_status:` frontmatter to a
|
||||
quality-gate verdict. Reads ONLY the machine-readable YAML field, never prose.
|
||||
|
||||
staging_status: SUCCESS -> (True, "Staging status: SUCCESS")
|
||||
staging_status: FAILED -> (False, "Staging status: FAILED")
|
||||
missing field / no frontmatter / bad YAML -> (False, <reason>)
|
||||
"""
|
||||
import yaml
|
||||
status = None
|
||||
if content.startswith("---"):
|
||||
parts = content.split("---", 2)
|
||||
if len(parts) >= 3:
|
||||
try:
|
||||
fm = yaml.safe_load(parts[1]) or {}
|
||||
except yaml.YAMLError as e:
|
||||
return False, f"Invalid YAML frontmatter in staging log: {e}"
|
||||
status = str(fm.get("staging_status", "")).upper().strip()
|
||||
if status == "SUCCESS":
|
||||
return True, "Staging status: SUCCESS"
|
||||
if status == "FAILED":
|
||||
return False, "Staging status: FAILED"
|
||||
return False, f"No machine-readable staging_status in frontmatter (got: {status!r})"
|
||||
|
||||
|
||||
def _staging_log_from_main(repo: str, work_item_id: str) -> str | None:
|
||||
"""Best-effort read of 15-staging-log.md from origin/main on the shared clone.
|
||||
|
||||
The deployer writes 15-staging-log.md and merges the staging artifacts into main
|
||||
via a separate PR (mirroring the deploy-log pattern), so the file lands in
|
||||
origin/main, NOT in the feature branch worktree the gate normally reads.
|
||||
This recovers it from main.
|
||||
|
||||
Degrades gracefully: any git failure (no clone, network/fetch error, file
|
||||
absent in main) returns None instead of raising, so the caller falls back to
|
||||
the plain "not found" verdict. Never raises.
|
||||
"""
|
||||
repo_clone = os.path.join(settings.repos_dir, repo)
|
||||
if not os.path.isdir(os.path.join(repo_clone, ".git")):
|
||||
return None
|
||||
rel = f"docs/work-items/{work_item_id}/15-staging-log.md"
|
||||
try:
|
||||
# Refresh origin/main so we see freshly-merged staging artifacts.
|
||||
subprocess.run(
|
||||
["git", "-C", repo_clone, "fetch", "origin", "main"],
|
||||
check=False, capture_output=True, timeout=30,
|
||||
)
|
||||
show = subprocess.run(
|
||||
["git", "-C", repo_clone, "show", f"origin/main:{rel}"],
|
||||
check=False, capture_output=True, text=True, timeout=15,
|
||||
)
|
||||
except (subprocess.SubprocessError, OSError) as e:
|
||||
logger.warning("staging-log origin/main lookup failed for %s/%s: %s", repo, work_item_id, e)
|
||||
return None
|
||||
if show.returncode != 0:
|
||||
return None
|
||||
return show.stdout
|
||||
|
||||
|
||||
def check_staging_status(repo: str, work_item_id: str, branch: str | None = None) -> tuple[bool, str]:
|
||||
"""
|
||||
Gate the deploy-staging -> deploy transition on the deployer's machine-readable
|
||||
verdict in 15-staging-log.md frontmatter (staging_status: SUCCESS|FAILED).
|
||||
|
||||
Mirrors check_deploy_status (БАГ 8): reads ONLY the machine-readable YAML field,
|
||||
never the body prose. The deployer runs the staging test suite against localhost:8501
|
||||
and writes the verdict into 15-staging-log.md.
|
||||
|
||||
Lookup order: worktree -> origin/main -> not found.
|
||||
|
||||
Returns:
|
||||
(True, ...) -> staging_status: SUCCESS
|
||||
(False, ...) -> staging_status: FAILED, missing field, or no frontmatter
|
||||
"""
|
||||
repo_path = _repo_path(repo, branch)
|
||||
log_path = os.path.join(repo_path, f"docs/work-items/{work_item_id}/15-staging-log.md")
|
||||
|
||||
if os.path.isfile(log_path):
|
||||
try:
|
||||
with open(log_path, "r") as f:
|
||||
content = f.read()
|
||||
except OSError as e:
|
||||
return False, f"Error reading staging log: {e}"
|
||||
return _parse_staging_status(content)
|
||||
|
||||
# Not in the feature worktree — the deployer may have merged it into main.
|
||||
main_content = _staging_log_from_main(repo, work_item_id)
|
||||
if main_content is not None:
|
||||
return _parse_staging_status(main_content)
|
||||
|
||||
return False, "Staging log not found (15-staging-log.md)"
|
||||
|
||||
|
||||
# Registry for dynamic lookup by name
|
||||
QG_CHECKS = {
|
||||
"check_analysis_approved": check_analysis_approved,
|
||||
@@ -451,4 +545,5 @@ QG_CHECKS = {
|
||||
"check_reviewer_verdict": check_reviewer_verdict,
|
||||
"check_tests_local": check_tests_local,
|
||||
"check_deploy_status": check_deploy_status,
|
||||
"check_staging_status": check_staging_status,
|
||||
}
|
||||
|
||||
@@ -517,6 +517,32 @@ def _handle_qg_failure_rollbacks(
|
||||
f"(job_id={new_job})"
|
||||
)
|
||||
|
||||
# ORCH-35: deployer staging verdict FAILED -> roll deploy-staging back to development.
|
||||
# Staging-провал = код плох; откат на development по образцу БАГ-8 (deploy->development).
|
||||
# НЕ трогает ветку check_deploy_status ниже.
|
||||
if agent == "deployer" and qg_name == "check_staging_status":
|
||||
update_task_stage(task_id, "development")
|
||||
notify_stage_change(task_id, current_stage, "development")
|
||||
plane_notify_stage(work_item_id, current_stage, "development")
|
||||
result.rolled_back_to = "development"
|
||||
set_issue_blocked(work_item_id)
|
||||
notify_qg_failure(task_id, "deploy-staging", "check_staging_status", reason)
|
||||
plane_add_comment(
|
||||
work_item_id,
|
||||
f"\u274c Staging gate FAILED ({reason}). Rolled back to development. "
|
||||
f"Developer \u043d\u0443\u0436\u0435\u043d \u0434\u043b\u044f \u0444\u0438\u043a\u0441\u0430.",
|
||||
author="deployer",
|
||||
)
|
||||
send_telegram(
|
||||
f"\U0001f6a8 {work_item_id}: Staging FAILED ({reason}). "
|
||||
f"Rolled back to development. Needs fix."
|
||||
)
|
||||
result.alerted = True
|
||||
logger.error(
|
||||
f"Task {task_id}: deployer staging verdict FAILED, rolled back deploy-staging -> "
|
||||
f"development ({reason})"
|
||||
)
|
||||
|
||||
# БАГ 8: deployer verdict FAILED -> roll deploy back to development.
|
||||
# The launcher's exit_code-based guard (launcher.py:475) never fires because
|
||||
# the LLM process exit code is always 0; this gate fires on the machine-readable
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
"""Stage machine for orchestrator pipeline.
|
||||
|
||||
Stages:
|
||||
created → analysis → architecture → development → review → testing → deploy → done
|
||||
created → analysis → architecture → development → review → testing → deploy-staging → deploy → done
|
||||
|
||||
Each stage defines:
|
||||
- next: the stage to advance to
|
||||
@@ -15,8 +15,9 @@ STAGE_TRANSITIONS = {
|
||||
"architecture": {"next": "development", "agent": "developer", "qg": "check_architecture_done"},
|
||||
"development": {"next": "review", "agent": "reviewer", "qg": "check_ci_green"},
|
||||
"review": {"next": "testing", "agent": "tester", "qg": "check_reviewer_verdict"},
|
||||
"testing": {"next": "deploy", "agent": "deployer", "qg": "check_tests_passed"},
|
||||
"deploy": {"next": "done", "agent": None, "qg": "check_deploy_status"},
|
||||
"testing": {"next": "deploy-staging", "agent": "deployer", "qg": "check_tests_passed"},
|
||||
"deploy-staging": {"next": "deploy", "agent": "deployer", "qg": "check_staging_status"},
|
||||
"deploy": {"next": "done", "agent": None, "qg": "check_deploy_status"},
|
||||
"done": {"next": None, "agent": None, "qg": None},
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user