developer(ET): auto-commit from developer run_id=355
This commit is contained in:
@@ -374,6 +374,27 @@ class Settings(BaseSettings):
|
||||
reaper_finalize_grace_s: int = 300
|
||||
lease_reclaim_enabled: bool = True
|
||||
|
||||
# ORCH-071: merge-verify under-gate on the `deploy -> done` edge. For the
|
||||
# self-hosting repo the `deploy` stage runs the DETERMINISTIC self-deploy path
|
||||
# (Phase A/B/C), where the LLM `deployer` agent — historically the ONLY actor
|
||||
# that merged the feature PR into `main` — never runs. Result: a "green" deploy
|
||||
# could reach `done` while the PR stayed `open` (phantom merge, postmortem
|
||||
# LESSONS_2026-06-08). This under-gate (врезка in advance_stage, NOT a new
|
||||
# STAGE_TRANSITIONS edge or registered QG) runs a deterministic merge-actor +
|
||||
# post-deploy verification before `done`: not-merged -> alert + HOLD (no done),
|
||||
# merged -> normal advance. Mirrors merge_gate_* / image_freshness_* rollout.
|
||||
# merge_verify_enabled -> global kill-switch; False -> strictly the prior
|
||||
# behaviour (no merge/verify), env ORCH_MERGE_VERIFY_ENABLED.
|
||||
# merge_verify_repos -> CSV of repos where the under-gate is REAL; empty ->
|
||||
# only the self-hosting repo (orchestrator). Mirrors
|
||||
# merge_gate_repos / self_deploy_repos.
|
||||
# merge_pr_timeout_s -> per Gitea merge/list HTTP call timeout.
|
||||
# merge_verify_timeout_s-> git fetch/merge-base timeout for the ancestor check.
|
||||
merge_verify_enabled: bool = True
|
||||
merge_verify_repos: str = ""
|
||||
merge_pr_timeout_s: int = 60
|
||||
merge_verify_timeout_s: int = 60
|
||||
|
||||
# Telegram notifications
|
||||
telegram_bot_token: str = ""
|
||||
telegram_chat_id: str = ""
|
||||
|
||||
@@ -147,6 +147,7 @@ async def queue():
|
||||
from .reconciler import reconciler
|
||||
from .job_reaper import reaper
|
||||
from . import post_deploy
|
||||
from . import merge_gate
|
||||
return {
|
||||
"counts": job_status_counts(),
|
||||
"max_concurrency": worker.max_concurrency,
|
||||
@@ -155,5 +156,6 @@ async def queue():
|
||||
"reconcile": reconciler.status(),
|
||||
"reaper": reaper.status(),
|
||||
"post_deploy": post_deploy.status(),
|
||||
"merge_verify": merge_gate.merge_verify_status(),
|
||||
"recent": recent_jobs(10),
|
||||
}
|
||||
|
||||
@@ -485,3 +485,193 @@ def pr_already_merged(repo: str, branch: str) -> bool:
|
||||
except Exception as e: # noqa: BLE001 - never-raise contract
|
||||
logger.warning("pr_already_merged check failed for %s/%s: %s", repo, branch, e)
|
||||
return False
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# ORCH-071: deterministic merge-actor + post-deploy merge verification.
|
||||
#
|
||||
# For the self-hosting repo the `deploy` stage runs the deterministic self-deploy
|
||||
# path (Phase A/B/C) and the LLM `deployer` agent — historically the ONLY actor
|
||||
# that merged the feature PR into `main` — never runs. These two helpers close the
|
||||
# "phantom merge" gap (LESSONS_2026-06-08): a deterministic actor merges the PR via
|
||||
# the Gitea PR-merge API (NEVER a push/force-push to main, INV-4) and a verifier
|
||||
# confirms `main` actually received the commit before the pipeline reaches `done`.
|
||||
# Both wire into the `deploy -> done` under-gate (stage_engine._handle_merge_verify).
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
# Lightweight in-process observability counters (D8). Reset only on process start;
|
||||
# surfaced read-only via `merge_verify_status()` in GET /queue. Never the source of
|
||||
# truth for any decision — purely informational.
|
||||
_MERGE_VERIFY_COUNTERS: dict = {
|
||||
"merge_verified_total": 0,
|
||||
"not_merged_alerts_total": 0,
|
||||
"last_alert_wi": None,
|
||||
}
|
||||
|
||||
|
||||
def note_merge_verified() -> None:
|
||||
"""Bump the 'merge verified -> done' counter (observability only). Never raises."""
|
||||
try:
|
||||
_MERGE_VERIFY_COUNTERS["merge_verified_total"] += 1
|
||||
except Exception: # noqa: BLE001 - observability must never break a decision
|
||||
pass
|
||||
|
||||
|
||||
def note_not_merged_alert(work_item_id: str | None) -> None:
|
||||
"""Bump the 'deploy succeeded but not merged' counter. Never raises."""
|
||||
try:
|
||||
_MERGE_VERIFY_COUNTERS["not_merged_alerts_total"] += 1
|
||||
_MERGE_VERIFY_COUNTERS["last_alert_wi"] = work_item_id
|
||||
except Exception: # noqa: BLE001 - observability must never break a decision
|
||||
pass
|
||||
|
||||
|
||||
def merge_verify_status() -> dict:
|
||||
"""Snapshot of the merge-verify under-gate for GET /queue. Never raises."""
|
||||
try:
|
||||
return {
|
||||
"enabled": bool(settings.merge_verify_enabled),
|
||||
"repos": settings.merge_verify_repos or "",
|
||||
"merge_verified_total": _MERGE_VERIFY_COUNTERS["merge_verified_total"],
|
||||
"not_merged_alerts_total": _MERGE_VERIFY_COUNTERS["not_merged_alerts_total"],
|
||||
"last_alert_wi": _MERGE_VERIFY_COUNTERS["last_alert_wi"],
|
||||
}
|
||||
except Exception as e: # noqa: BLE001 - never-raise contract
|
||||
logger.warning("merge_verify_status error: %s", e)
|
||||
return {"enabled": False}
|
||||
|
||||
|
||||
def merge_verify_applies(repo: str) -> bool:
|
||||
"""Whether the ORCH-071 merge-verify under-gate is REAL for this repo.
|
||||
|
||||
Mirrors ``self_deploy_applies`` / ``image_freshness_applies`` (FR-5 / AC-10):
|
||||
* ``merge_verify_enabled=False`` -> always False (global kill-switch -> the
|
||||
pipeline behaves exactly as before ORCH-071 for everyone).
|
||||
* ``merge_verify_repos`` (CSV) non-empty -> real only for listed repos.
|
||||
* empty CSV -> real ONLY for the self-hosting repo (``orchestrator``); other
|
||||
repos keep the LLM-``deployer`` merge path unchanged (AC-4b).
|
||||
Never raises (any error -> False = no-op, the safe default).
|
||||
"""
|
||||
try:
|
||||
if not settings.merge_verify_enabled:
|
||||
return False
|
||||
raw = (settings.merge_verify_repos or "").strip()
|
||||
if raw:
|
||||
allowed = {r.strip().lower() for r in raw.split(",") if r.strip()}
|
||||
return (repo or "").strip().lower() in allowed
|
||||
# Lazy import keeps this a leaf-ish module (qg.checks imports merge_gate lazily).
|
||||
from .qg.checks import is_self_hosting_repo
|
||||
return is_self_hosting_repo(repo)
|
||||
except Exception as e: # noqa: BLE001 - never-raise contract
|
||||
logger.warning("merge_verify_applies error for %s: %s", repo, e)
|
||||
return False
|
||||
|
||||
|
||||
def merge_pr(repo: str, branch: str) -> tuple[bool, str]:
|
||||
"""Deterministically merge the open PR for ``branch`` via the Gitea PR-merge API.
|
||||
|
||||
The self-hosting deterministic merge-actor (FR-1 / D3). NEVER pushes or
|
||||
force-pushes ``main`` (INV-4/AC-8) — the ONLY mutation is the Gitea
|
||||
``POST /pulls/{index}/merge`` call, exactly what the LLM ``deployer`` used to do
|
||||
on non-self repos.
|
||||
|
||||
Algorithm:
|
||||
1. ``pr_already_merged`` -> True -> no-op ``(True, "already-merged")`` (INV-5/AC-9).
|
||||
2. ``GET /repos/{owner}/{repo}/pulls?state=open`` -> the open PR whose head ref
|
||||
== ``branch`` -> its index. No open PR -> ``(False, "no open PR")``.
|
||||
3. ``POST /repos/{owner}/{repo}/pulls/{index}/merge`` (Do: ``merge``) ->
|
||||
200/201 -> ``(True, "merged PR #<n>")``; otherwise ``(False, "<reason>")``.
|
||||
|
||||
Never-raise (INV-1/AC-9 / TC-09): any HTTP/parse error -> ``(False, reason)``.
|
||||
"""
|
||||
try:
|
||||
if pr_already_merged(repo, branch):
|
||||
logger.info("merge_pr: %s/%s already merged -> no-op", repo, branch)
|
||||
return True, "already-merged"
|
||||
|
||||
import httpx
|
||||
owner = settings.gitea_owner
|
||||
headers = {"Authorization": f"token {settings.gitea_token}"}
|
||||
base = f"{settings.gitea_url}/api/v1/repos/{owner}/{repo}"
|
||||
timeout = settings.merge_pr_timeout_s
|
||||
|
||||
resp = httpx.get(
|
||||
f"{base}/pulls", params={"state": "open"}, headers=headers, timeout=timeout
|
||||
)
|
||||
if resp.status_code != 200:
|
||||
return False, f"list PRs failed: HTTP {resp.status_code}"
|
||||
index = None
|
||||
for pr in resp.json() or []:
|
||||
if pr.get("head", {}).get("ref") == branch:
|
||||
index = pr.get("number")
|
||||
break
|
||||
if index is None:
|
||||
return False, "no open PR"
|
||||
|
||||
m = httpx.post(
|
||||
f"{base}/pulls/{index}/merge",
|
||||
json={"Do": "merge"},
|
||||
headers=headers,
|
||||
timeout=timeout,
|
||||
)
|
||||
if m.status_code in (200, 201):
|
||||
logger.info("merge_pr: merged PR #%s for %s/%s", index, repo, branch)
|
||||
return True, f"merged PR #{index}"
|
||||
detail = (m.text or "").strip()[:200]
|
||||
logger.warning(
|
||||
"merge_pr: merge failed for %s/%s PR #%s: HTTP %s %s",
|
||||
repo, branch, index, m.status_code, detail,
|
||||
)
|
||||
return False, f"merge failed: HTTP {m.status_code}"
|
||||
except Exception as e: # noqa: BLE001 - never-raise contract
|
||||
logger.warning("merge_pr unexpected error for %s/%s: %s", repo, branch, e)
|
||||
return False, f"merge error: {e}"
|
||||
|
||||
|
||||
def verify_merged_to_main(repo: str, branch: str, sha: str) -> bool:
|
||||
"""Return True iff the deployed commit is confirmed merged into ``origin/main``.
|
||||
|
||||
Post-deploy verification (FR-2 / D4): the merge is confirmed when EITHER
|
||||
* ``pr_already_merged(repo, branch)`` is True (Gitea ``PR.merged == true``), OR
|
||||
* ``git merge-base --is-ancestor <sha> origin/main`` succeeds in the per-branch
|
||||
worktree (after ``git fetch origin main``), i.e. the validated SHA is an
|
||||
ancestor of the current ``origin/main``.
|
||||
|
||||
``sha`` is the validated commit (``image_freshness.validated_revision`` =
|
||||
worktree ``git rev-parse HEAD``). An empty ``sha`` makes the git branch
|
||||
inconclusive (only the PR-merged branch can then confirm).
|
||||
|
||||
Never-raise (INV-1/AC-7 / TC-04): any git/HTTP error -> ``False`` (= "not
|
||||
confirmed" -> fail-closed for ``done``: alert + HOLD). The exception is NEVER
|
||||
propagated into ``advance_stage``.
|
||||
"""
|
||||
try:
|
||||
if pr_already_merged(repo, branch):
|
||||
return True
|
||||
if not sha:
|
||||
logger.warning(
|
||||
"verify_merged_to_main: empty SHA for %s/%s and PR not known-merged",
|
||||
repo, branch,
|
||||
)
|
||||
return False
|
||||
try:
|
||||
wt = ensure_worktree(repo, branch)
|
||||
except Exception as e: # noqa: BLE001 - never-raise contract
|
||||
logger.warning(
|
||||
"verify_merged_to_main: worktree error for %s/%s: %s", repo, branch, e
|
||||
)
|
||||
return False
|
||||
subprocess.run(
|
||||
["git", "-C", wt, "fetch", "origin", "main"],
|
||||
capture_output=True, timeout=settings.merge_verify_timeout_s,
|
||||
)
|
||||
r = subprocess.run(
|
||||
["git", "-C", wt, "merge-base", "--is-ancestor", sha, "origin/main"],
|
||||
capture_output=True, timeout=settings.merge_verify_timeout_s,
|
||||
)
|
||||
return r.returncode == 0
|
||||
except Exception as e: # noqa: BLE001 - never-raise contract
|
||||
logger.warning(
|
||||
"verify_merged_to_main unexpected error for %s/%s: %s", repo, branch, e
|
||||
)
|
||||
return False
|
||||
|
||||
@@ -349,3 +349,66 @@ def write_deploy_log(repo: str, work_item_id: str, branch: str, exit_code, statu
|
||||
except (subprocess.SubprocessError, OSError) as e:
|
||||
logger.warning("write_deploy_log: git commit/push best-effort failed: %s", e)
|
||||
return True
|
||||
|
||||
|
||||
def record_merged_to_main(repo: str, work_item_id: str, branch: str, merged: bool) -> bool:
|
||||
"""Stamp ``merged_to_main: true|false`` into 14-deploy-log.md frontmatter (ORCH-071).
|
||||
|
||||
Machine-readable observability for the merge-verify under-gate. ONLY the
|
||||
``merged_to_main:`` line is added/updated inside the YAML frontmatter block; the
|
||||
``deploy_status:`` field is left untouched, so the ``check_deploy_status`` /
|
||||
``_parse_deploy_status`` parsing contract is unchanged (TRZ §6 / AC §5).
|
||||
|
||||
Best-effort and idempotent: a missing log or any I/O error is logged and
|
||||
swallowed. Never raises.
|
||||
"""
|
||||
from .git_worktree import get_worktree_path
|
||||
|
||||
rel = f"docs/work-items/{work_item_id}/14-deploy-log.md"
|
||||
try:
|
||||
wt = get_worktree_path(repo, branch)
|
||||
except Exception as e: # noqa: BLE001 - never-raise
|
||||
logger.warning("record_merged_to_main: worktree error for %s/%s: %s", repo, branch, e)
|
||||
return False
|
||||
path = os.path.join(wt, rel)
|
||||
try:
|
||||
with open(path, "r", encoding="utf-8") as f:
|
||||
content = f.read()
|
||||
except FileNotFoundError:
|
||||
logger.info("record_merged_to_main: no deploy log at %s (skip)", path)
|
||||
return False
|
||||
except OSError as e:
|
||||
logger.warning("record_merged_to_main: read error at %s: %s", path, e)
|
||||
return False
|
||||
|
||||
value = "true" if merged else "false"
|
||||
if not content.startswith("---"):
|
||||
# No frontmatter to amend — do not fabricate one (keep the contract minimal).
|
||||
logger.info("record_merged_to_main: no frontmatter in %s (skip)", path)
|
||||
return False
|
||||
parts = content.split("---", 2)
|
||||
if len(parts) < 3:
|
||||
return False
|
||||
fm_lines = parts[1].splitlines()
|
||||
new_lines = []
|
||||
replaced = False
|
||||
for ln in fm_lines:
|
||||
if ln.strip().lower().startswith("merged_to_main:"):
|
||||
new_lines.append(f"merged_to_main: {value}")
|
||||
replaced = True
|
||||
else:
|
||||
new_lines.append(ln)
|
||||
if not replaced:
|
||||
# Insert before the closing of the frontmatter block (append to the body).
|
||||
if new_lines and new_lines[0] == "":
|
||||
new_lines = new_lines[1:]
|
||||
new_lines.append(f"merged_to_main: {value}")
|
||||
new_fm = "\n".join(new_lines)
|
||||
new_content = "---\n" + new_fm.strip("\n") + "\n---" + parts[2]
|
||||
try:
|
||||
with open(path, "w", encoding="utf-8") as f:
|
||||
f.write(new_content)
|
||||
except OSError as e:
|
||||
logger.warning("record_merged_to_main: write error at %s: %s", path, e)
|
||||
return False
|
||||
return True
|
||||
|
||||
@@ -346,6 +346,22 @@ def advance_stage(
|
||||
)
|
||||
return result
|
||||
|
||||
# --- ORCH-071 merge-verify under-gate (deploy -> done edge) ----------
|
||||
# The SINGLE choke-point that gates EVERY path into terminal `done`
|
||||
# (finalizer Phase C, reconciler F-1, job-reaper re-drive) on a CONFIRMED
|
||||
# merge of the feature PR into `main`. For the self-hosting repo the
|
||||
# deterministic self-deploy path never runs the LLM `deployer` that used to
|
||||
# merge the PR, so a green deploy could reach `done` while the PR stayed
|
||||
# `open` (phantom merge, ORCH-071). This врезка runs a deterministic
|
||||
# merge-actor + post-deploy verification BEFORE update_task_stage; if the
|
||||
# merge is not confirmed it HOLDs (alert, NO done, NO rollback) and returns
|
||||
# without advancing. Not a STAGE_TRANSITIONS edge / registered QG — it is an
|
||||
# edge sub-gate (mirrors the merge-gate врезка), so those contracts are
|
||||
# unchanged. No-op for non-self repos / kill-switch off (1:1 prior behaviour).
|
||||
if current_stage == "deploy" and next_stage == "done":
|
||||
if _handle_merge_verify(task_id, repo, work_item_id, branch, result):
|
||||
return result
|
||||
|
||||
# --- Advance ---------------------------------------------------------
|
||||
update_task_stage(task_id, next_stage)
|
||||
# Telegram live tracker: the analysis->architecture advance is the human
|
||||
@@ -1260,6 +1276,106 @@ def _deploy_finalize_defer_count(task_id: int) -> int:
|
||||
return n
|
||||
|
||||
|
||||
def _handle_merge_verify(task_id, repo, work_item_id, branch, result: AdvanceResult) -> bool:
|
||||
"""ORCH-071 merge-verify under-gate on the `deploy -> done` edge.
|
||||
|
||||
Returns:
|
||||
* ``True`` -> INTERVENED (HOLD): the merge is NOT confirmed -> alert +
|
||||
``set_issue_blocked`` (Plane non-terminal), task stays on `deploy`, NO
|
||||
``done``, NO rollback to development (not-merged is an INFRA defect, not a
|
||||
code fault -> ALERT-only, FR-3). The caller returns without advancing. A
|
||||
later re-drive (reaper / reconciler / re-approve) re-evaluates and, once the
|
||||
merge is fixed, lets the task advance to `done`.
|
||||
* ``False`` -> the merge is CONFIRMED (or the under-gate does not apply for
|
||||
this repo / kill-switch off) -> ``advance_stage`` proceeds to `done`
|
||||
unchanged (happy-path AC-4 / AC-4b).
|
||||
|
||||
Steps (D5):
|
||||
1. Conditionality (FR-5): not applicable -> return False (1:1 prior behaviour).
|
||||
2. Resolve the validated SHA; run the deterministic merge-actor
|
||||
``merge_gate.merge_pr`` (no-op if already merged, INV-5).
|
||||
3. ``merge_gate.verify_merged_to_main`` -> confirmed?
|
||||
* yes -> stamp ``merged_to_main: true``, return False (advance).
|
||||
* no -> alert + Blocked + stamp ``merged_to_main: false``, return True (HOLD).
|
||||
|
||||
Wrapped never-raise (INV-1/AC-7): any internal error is treated as "not
|
||||
confirmed" (HOLD + alert), never a propagated exception into ``advance_stage``.
|
||||
"""
|
||||
try:
|
||||
if not merge_gate.merge_verify_applies(repo):
|
||||
return False # non-self / kill-switch off -> behave exactly as before.
|
||||
|
||||
from . import image_freshness
|
||||
sha = image_freshness.validated_revision(repo, branch)
|
||||
|
||||
# Deterministic merge-actor (no-op if the PR is already merged, INV-5/AC-9).
|
||||
merged_ok, merge_msg = merge_gate.merge_pr(repo, branch)
|
||||
logger.info(
|
||||
f"Task {task_id}: merge-verify merge_pr -> ok={merged_ok} ({merge_msg})"
|
||||
)
|
||||
|
||||
confirmed = merge_gate.verify_merged_to_main(repo, branch, sha)
|
||||
if confirmed:
|
||||
merge_gate.note_merge_verified()
|
||||
try:
|
||||
self_deploy.record_merged_to_main(repo, work_item_id, branch, True)
|
||||
except Exception as e: # noqa: BLE001 - observability best-effort
|
||||
logger.warning(f"Task {task_id}: record merged_to_main(true) failed: {e}")
|
||||
logger.info(f"Task {task_id}: merge-verify CONFIRMED -> deploy->done allowed")
|
||||
return False
|
||||
|
||||
# Not confirmed -> alert + HOLD (no done, no rollback).
|
||||
merge_gate.note_not_merged_alert(work_item_id)
|
||||
try:
|
||||
self_deploy.record_merged_to_main(repo, work_item_id, branch, False)
|
||||
except Exception as e: # noqa: BLE001 - observability best-effort
|
||||
logger.warning(f"Task {task_id}: record merged_to_main(false) failed: {e}")
|
||||
msg = (
|
||||
f"deploy succeeded but not merged: {work_item_id} (repo={repo}, "
|
||||
f"branch={branch}). `main` НЕ получил commit задачи — задача удержана "
|
||||
f"на `deploy` (НЕ done). Нужно ручное вмешательство."
|
||||
)
|
||||
logger.warning(f"Task {task_id}: {msg}")
|
||||
if work_item_id:
|
||||
try:
|
||||
set_issue_blocked(work_item_id)
|
||||
except Exception as e: # noqa: BLE001 - never break the HOLD
|
||||
logger.warning(f"Task {task_id}: set_issue_blocked failed: {e}")
|
||||
try:
|
||||
plane_add_comment(
|
||||
work_item_id,
|
||||
"\U0001f6a8 Deploy прошёл, но PR НЕ влит в `main` "
|
||||
f"(merge: {merge_msg}). Задача удержана на `deploy` (НЕ done). "
|
||||
"Нужно влить PR вручную и повторить approve.",
|
||||
author="deployer",
|
||||
)
|
||||
except Exception as e: # noqa: BLE001 - never break the HOLD
|
||||
logger.warning(f"Task {task_id}: plane not-merged comment failed: {e}")
|
||||
try:
|
||||
send_telegram(f"\U0001f6a8 {msg}")
|
||||
except Exception as e: # noqa: BLE001 - never break the HOLD
|
||||
logger.warning(f"Task {task_id}: not-merged telegram failed: {e}")
|
||||
result.alerted = True
|
||||
result.note = "merge-not-verified-hold"
|
||||
result.advanced = False
|
||||
return True
|
||||
except Exception as e: # noqa: BLE001 - never-raise contract (INV-1/AC-7)
|
||||
# Any internal error -> treat as "not confirmed" -> HOLD + alert, never crash.
|
||||
logger.error(f"Task {task_id}: _handle_merge_verify error: {e}")
|
||||
try:
|
||||
merge_gate.note_not_merged_alert(work_item_id)
|
||||
send_telegram(
|
||||
f"\U0001f6a8 {work_item_id}: ошибка merge-verify ({e}). "
|
||||
f"Задача удержана на `deploy` (НЕ done)."
|
||||
)
|
||||
except Exception: # noqa: BLE001 - best-effort alert
|
||||
pass
|
||||
result.alerted = True
|
||||
result.note = f"merge-verify-error: {e}"
|
||||
result.advanced = False
|
||||
return True
|
||||
|
||||
|
||||
def run_deploy_finalizer(job: dict):
|
||||
"""Phase C — deterministic finalizer (reserved-agent `deploy-finalizer`, no LLM).
|
||||
|
||||
|
||||
Reference in New Issue
Block a user