developer(ET): auto-commit from developer run_id=192

2026-06-06 20:01:07 +00:00
parent 1acbb1d28d
commit be980e2a44
18 changed files with 1690 additions and 2 deletions
--- a/src/agents/launcher.py
+++ b/src/agents/launcher.py
@@ -214,7 +214,14 @@ class AgentLauncher:
        Same spawn path as launch(), but threads job['id'] through so the monitor
        can update the job's status (done / requeue / failed) and link jobs.run_id
        to the agent_runs row. Returns the agent_run_id.
+
+        ORCH-036: the reserved-agent ``deploy-finalizer`` is a DETERMINISTIC
+        (no-LLM) job — intercept it BEFORE _spawn (which would raise
+        "Unknown agent", R-6) and run the deploy finalizer synchronously, driving
+        the jobs row status itself. Returns None (no agent_run row).
        """
+        if job.get("agent") == "deploy-finalizer":
+            return self._run_deploy_finalizer_job(job)
        return self._spawn(
            job["agent"],
            job["repo"],
@@ -223,6 +230,27 @@ class AgentLauncher:
            job_id=job["id"],
        )

+    def _run_deploy_finalizer_job(self, job: dict):
+        """ORCH-036 Phase C: run the deterministic deploy finalizer for a job.
+
+        Not an LLM spawn — there is no subprocess/monitor, so we mark the jobs row
+        done/failed here. Any error is contained (the finalizer never-raises, but
+        we guard anyway so a finalizer fault can't wedge the worker).
+        """
+        from ..db import mark_job
+        from .. import stage_engine
+        try:
+            stage_engine.run_deploy_finalizer(job)
+            mark_job(job["id"], "done")
+            logger.info(f"deploy-finalizer job {job['id']} done")
+        except Exception as e:
+            logger.error(f"deploy-finalizer job {job['id']} failed: {e}")
+            try:
+                mark_job(job["id"], "failed", error=f"deploy-finalizer error: {e}")
+            except Exception:
+                pass
+        return None
+
    def _spawn(self, agent: str, repo: str, task_content: str = None,
               task_id: int = None, job_id: int = None) -> int:
        """Shared spawn implementation for launch() and launch_job().
--- a/src/config.py
+++ b/src/config.py
@@ -152,6 +152,50 @@ class Settings(BaseSettings):
    merge_defer_delay_s: int = 60
    merge_defer_max_attempts: int = 5

+    # ORCH-036: executable self-deploy (deploy stage drives the host hook).
+    # The `deploy` stage for the self-hosting repo is turned into a REAL prod
+    # restart via a detached host process, gated by a manual approve. Three-phase
+    # design (ADR-001): A=approve-request, B=initiate (human Approved), C=finalizer
+    # maps the hook exit-code -> deploy_status. Non-self repos are unaffected.
+    #
+    #   self_deploy_enabled            -> global kill-switch; False -> no Phase A/B/C
+    #                                     interception (the legacy synchronous deployer
+    #                                     path runs for everyone, env ORCH_SELF_DEPLOY_ENABLED).
+    #   self_deploy_repos              -> CSV of repos where executable self-deploy is
+    #                                     REAL; empty -> only the self-hosting repo
+    #                                     (orchestrator). Mirrors merge_gate_repos.
+    #   deploy_require_manual_approve  -> require a human Approved before the prod
+    #                                     restart (BR-5). Default true; NOT toggled in
+    #                                     ORCH-36 (AC-12). false -> Phase A initiates
+    #                                     immediately (structural branch, off by default).
+    #   deploy_finalize_delay_s        -> delay before the first finalize poll; must be
+    #                                     > the hook health-loop (~60s) so the verdict
+    #                                     usually exists on the first poll.
+    #   deploy_finalize_max_attempts   -> bounded finalize-defer budget (anti-livelock).
+    # ssh / hook target (detached prod restart; real values live on the host):
+    #   deploy_ssh_user / deploy_ssh_host -> ssh target for the host hook (INFRA P-2).
+    #   deploy_hook_script             -> path to the hook ON THE HOST (relative to repo).
+    #   deploy_host_repo_path          -> orchestrator clone path on the host.
+    # prod overrides passed to the hook for build-once (retag staging image -> prod):
+    #   deploy_prod_source_image       -> image validated on staging (retagged, no rebuild).
+    #   deploy_prod_target_service / _port / _image / _compose_profile -> prod profile.
+    #   deploy_prod_prev_image_file    -> prod prev-image snapshot (separate from staging).
+    self_deploy_enabled: bool = True
+    self_deploy_repos: str = ""
+    deploy_require_manual_approve: bool = True
+    deploy_finalize_delay_s: int = 90
+    deploy_finalize_max_attempts: int = 10
+    deploy_ssh_user: str = "slin"
+    deploy_ssh_host: str = ""
+    deploy_hook_script: str = "scripts/orchestrator-deploy-hook.sh"
+    deploy_host_repo_path: str = "/home/slin/repos/orchestrator"
+    deploy_prod_source_image: str = "orchestrator-orchestrator-staging"
+    deploy_prod_target_service: str = "orchestrator"
+    deploy_prod_target_port: int = 8500
+    deploy_prod_target_image: str = "orchestrator-orchestrator"
+    deploy_prod_compose_profile: str = ""
+    deploy_prod_prev_image_file: str = ".deploy-prev-image-prod"
+
    # Telegram notifications
    telegram_bot_token: str = ""
    telegram_chat_id: str = ""
--- a/src/self_deploy.py
+++ b/src/self_deploy.py
@@ -0,0 +1,312 @@
+"""Executable self-deploy primitives (ORCH-036).
+
+The ``deploy`` stage for the self-hosting ``orchestrator`` repo is a REAL prod
+restart, not a paper LLM verdict. Because the prod container (8500) runs the
+worker/agent itself, the restart must be performed by an EXTERNAL host process
+that survives the container dying (BR-2). The orchestration is split into three
+deterministic phases (ADR-001), wired in ``stage_engine``:
+
+  * Phase A — request approve on the ``deploy-staging -> deploy`` edge.
+  * Phase B — a human Plane ``Approved`` initiates the detached host deploy.
+  * Phase C — a deterministic finalizer maps the hook exit-code -> deploy_status.
+
+This module is a **leaf**: it imports only config / git_worktree (and lazily
+``qg.checks.is_self_hosting_repo``), never ``stage_engine`` / ``launcher`` — the
+orchestration that needs those lives in ``stage_engine``. Every public helper
+honours a **never-raise** contract so a deploy-state hiccup can never crash the
+stage engine.
+
+Restart-safe state lives in sentinel files under
+``<repos_dir>/.deploy-state-<repo>/<work_item_id>/`` (mirrors the merge-lease
+pattern, ТЗ §4 — no DB migration), on the shared mount visible to BOTH the
+container (reads markers) and the host (writes ``result``):
+  * ``approve-requested`` — Phase A done;
+  * ``initiated``         — Phase B started (idempotency-guard);
+  * ``result``            — the hook exit-code, written by the host WRAPPER
+                            (``echo $? > result``), NOT by the hook itself.
+"""
+
+import logging
+import os
+import shlex
+import subprocess
+
+from .config import settings
+
+logger = logging.getLogger("orchestrator.self_deploy")
+
+# Sentinel marker filenames (see module docstring).
+APPROVE_REQUESTED = "approve-requested"
+INITIATED = "initiated"
+RESULT = "result"
+
+# ssh launch is detached (returns immediately); keep a bounded timeout so a hung
+# ssh handshake never wedges the caller.
+_SSH_TIMEOUT = 30
+_GIT_TIMEOUT = 60
+
+
+# ---------------------------------------------------------------------------
+# Conditionality
+# ---------------------------------------------------------------------------
+def self_deploy_applies(repo: str) -> bool:
+    """Whether executable self-deploy (Phase A/B/C) is REAL for this repo.
+
+    Mirrors the ORCH-35 / ORCH-43 conditional rollout:
+      * ``self_deploy_enabled=False`` -> always False (global kill-switch); the
+        legacy synchronous deployer path runs for everyone.
+      * ``self_deploy_repos`` (CSV) non-empty -> real only for listed repos.
+      * empty CSV -> real ONLY for the self-hosting repo (``orchestrator``).
+    Never raises.
+    """
+    try:
+        if not settings.self_deploy_enabled:
+            return False
+        raw = (settings.self_deploy_repos or "").strip()
+        if raw:
+            allowed = {r.strip().lower() for r in raw.split(",") if r.strip()}
+            return (repo or "").strip().lower() in allowed
+        # Lazy import keeps this module a leaf (avoids importing qg at module load).
+        from .qg.checks import is_self_hosting_repo
+        return is_self_hosting_repo(repo)
+    except Exception as e:  # noqa: BLE001 - never-raise contract
+        logger.warning("self_deploy_applies error for %s: %s", repo, e)
+        return False
+
+
+# ---------------------------------------------------------------------------
+# exit-code -> deploy_status mapping (pure, unit-tested: TC-01/02/03)
+# ---------------------------------------------------------------------------
+def map_exit_code_to_status(exit_code) -> str:
+    """Map a deploy-hook exit-code to a machine verdict (deterministic, pure).
+
+    Contract (AC-1 / AC-3, hook exit-code contract 0/1/2):
+      * ``0``            -> ``"SUCCESS"`` (health-ok proven by the hook).
+      * ``1`` (rolled back), ``2`` (rollback also failed), anything else, or a
+        non-int/None -> ``"FAILED"`` (fail-closed; never advances on doubt).
+    """
+    try:
+        code = int(exit_code)
+    except (TypeError, ValueError):
+        return "FAILED"
+    return "SUCCESS" if code == 0 else "FAILED"
+
+
+def build_deploy_log(work_item_id: str, exit_code, status: str) -> str:
+    """Render a 14-deploy-log.md body whose ``deploy_status:`` frontmatter is the
+    verdict ``check_deploy_status`` / ``_parse_deploy_status`` reads (contract
+    unchanged, AC-10). The body is informational only — only the frontmatter is
+    machine-read.
+    """
+    return (
+        "---\n"
+        f"deploy_status: {status}\n"
+        f"work_item: {work_item_id}\n"
+        f"hook_exit_code: {exit_code}\n"
+        "deployed_by: deploy-finalizer\n"
+        "---\n\n"
+        "# Deploy log — ORCH-036 executable self-deploy\n\n"
+        f"Прод-деплой завершён хост-хуком с exit-code `{exit_code}` -> "
+        f"`deploy_status: {status}`.\n\n"
+        "Вердикт зафиксирован детерминированным finalizer'ом (Фаза C), не LLM.\n"
+    )
+
+
+# ---------------------------------------------------------------------------
+# Sentinel state (restart-safe, no DB migration — ТЗ §4)
+# ---------------------------------------------------------------------------
+def _state_dir(base: str, repo: str, work_item_id: str | None) -> str:
+    return os.path.join(base, f".deploy-state-{repo}", (work_item_id or "_"))
+
+
+def container_state_dir(repo: str, work_item_id: str | None) -> str:
+    """State dir as seen FROM THE CONTAINER (settings.repos_dir mount)."""
+    return _state_dir(settings.repos_dir, repo, work_item_id)
+
+
+def host_state_dir(repo: str, work_item_id: str | None) -> str:
+    """State dir as seen FROM THE HOST (settings.host_repos_dir).
+
+    Same physical directory as ``container_state_dir`` via the shared mount; the
+    host path is what we embed in the ssh command so the host wrapper writes the
+    ``result`` sentinel where the container can read it.
+    """
+    return _state_dir(settings.host_repos_dir, repo, work_item_id)
+
+
+def marker_path(repo: str, work_item_id: str | None, name: str) -> str:
+    return os.path.join(container_state_dir(repo, work_item_id), name)
+
+
+def has_marker(repo: str, work_item_id: str | None, name: str) -> bool:
+    """True iff the named sentinel exists. Never raises."""
+    try:
+        return os.path.isfile(marker_path(repo, work_item_id, name))
+    except Exception as e:  # noqa: BLE001 - never-raise
+        logger.warning("has_marker error for %s/%s/%s: %s", repo, work_item_id, name, e)
+        return False
+
+
+def write_marker(repo: str, work_item_id: str | None, name: str, content: str = "") -> bool:
+    """Create/overwrite a sentinel (best-effort). Returns True on success."""
+    try:
+        d = container_state_dir(repo, work_item_id)
+        os.makedirs(d, exist_ok=True)
+        with open(os.path.join(d, name), "w", encoding="utf-8") as f:
+            f.write(str(content))
+        return True
+    except OSError as e:
+        logger.warning("write_marker error for %s/%s/%s: %s", repo, work_item_id, name, e)
+        return False
+
+
+def read_result(repo: str, work_item_id: str | None) -> tuple[bool, int | None]:
+    """Read the ``result`` sentinel (hook exit-code written by the host wrapper).
+
+    Returns ``(present, exit_code)``:
+      * ``(False, None)`` -> not written yet (finalizer should DEFER);
+      * ``(True, <int>)`` -> verdict ready;
+      * ``(True, 1)``     -> present but corrupt/unparseable -> treated as a
+                             failure code (fail-closed) so we never advance on garbage.
+    Never raises.
+    """
+    p = marker_path(repo, work_item_id, RESULT)
+    try:
+        with open(p, "r", encoding="utf-8") as f:
+            raw = f.read().strip()
+    except FileNotFoundError:
+        return False, None
+    except OSError as e:
+        logger.warning("read_result error for %s/%s: %s", repo, work_item_id, e)
+        return False, None
+    if raw == "":
+        return False, None
+    try:
+        return True, int(raw)
+    except ValueError:
+        logger.warning("read_result: corrupt result %r for %s/%s", raw, repo, work_item_id)
+        return True, 1
+
+
+# ---------------------------------------------------------------------------
+# Detached host deploy: ssh + setsid (Phase B)
+# ---------------------------------------------------------------------------
+def build_deploy_command(repo: str, work_item_id: str | None, branch: str) -> list[str]:
+    """Build the ssh argv that launches the DETACHED prod deploy on the host.
+
+    The remote command runs the hook via ``setsid`` with stdin/stdout detached and
+    backgrounded (``&``) so the process SURVIVES the prod container restart (BR-2),
+    then the WRAPPER (not the hook) writes the exit-code to the ``result`` sentinel:
+
+        setsid bash -c 'cd <repo> && <prod env...> bash <hook> --deploy; \
+            echo $? > <result>' >> <hook.log> 2>&1 </dev/null &
+
+    Build-once (BR-6): ``SOURCE_IMAGE=<staging-image>`` makes the hook retag the
+    staging-validated image to the prod tag instead of rebuilding (no ``docker
+    build``). The exit-code contract of the hook is untouched.
+    """
+    host_dir = host_state_dir(repo, work_item_id)
+    result_sentinel = os.path.join(host_dir, RESULT)
+    hook_log = os.path.join(host_dir, "hook.log")
+
+    env_assignments = (
+        f"SOURCE_IMAGE={shlex.quote(settings.deploy_prod_source_image)} "
+        f"TARGET_SERVICE={shlex.quote(settings.deploy_prod_target_service)} "
+        f"TARGET_PORT={int(settings.deploy_prod_target_port)} "
+        f"TARGET_IMAGE={shlex.quote(settings.deploy_prod_target_image)} "
+        f"COMPOSE_PROFILE={shlex.quote(settings.deploy_prod_compose_profile)} "
+        f"PREV_IMAGE_FILE={shlex.quote(settings.deploy_prod_prev_image_file)}"
+    )
+    inner = (
+        f"cd {shlex.quote(settings.deploy_host_repo_path)} && "
+        f"{env_assignments} "
+        f"bash {shlex.quote(settings.deploy_hook_script)} --deploy; "
+        f"echo $? > {shlex.quote(result_sentinel)}"
+    )
+    remote = (
+        f"setsid bash -c {shlex.quote(inner)} "
+        f">> {shlex.quote(hook_log)} 2>&1 </dev/null &"
+    )
+    user = (settings.deploy_ssh_user or "").strip()
+    host = (settings.deploy_ssh_host or "").strip()
+    target = f"{user}@{host}" if user else host
+    return ["ssh", "-o", "StrictHostKeyChecking=no", target, remote]
+
+
+def initiate_deploy(repo: str, work_item_id: str | None, branch: str) -> tuple[bool, str]:
+    """Launch the detached prod deploy on the host (Phase B). Never raises.
+
+    The ssh call returns immediately (the remote process is detached via setsid +
+    ``&``). Returns ``(True, msg)`` when ssh dispatched the detached process, or
+    ``(False, reason)`` so the caller can alert and let the human re-approve.
+    """
+    # Ensure the shared state dir exists so the host wrapper can write `result`.
+    try:
+        os.makedirs(container_state_dir(repo, work_item_id), exist_ok=True)
+    except OSError as e:
+        logger.warning("initiate_deploy: state dir error for %s/%s: %s", repo, work_item_id, e)
+
+    cmd = build_deploy_command(repo, work_item_id, branch)
+    try:
+        r = subprocess.run(cmd, capture_output=True, text=True, timeout=_SSH_TIMEOUT)
+    except subprocess.TimeoutExpired:
+        return False, "ssh launch timeout"
+    except (subprocess.SubprocessError, OSError) as e:
+        return False, f"ssh launch error: {e}"
+    if r.returncode != 0:
+        detail = ((r.stderr or "") + (r.stdout or "")).strip()[:200]
+        return False, f"ssh launch failed (rc={r.returncode}): {detail}"
+    logger.info("initiate_deploy: detached prod deploy dispatched for %s/%s", repo, work_item_id)
+    return True, "deploy initiated (detached host process)"
+
+
+# ---------------------------------------------------------------------------
+# Deploy log write + best-effort merge (Phase C)
+# ---------------------------------------------------------------------------
+def write_deploy_log(repo: str, work_item_id: str, branch: str, exit_code, status: str) -> bool:
+    """Write 14-deploy-log.md into the task worktree (so check_deploy_status reads
+    it) and best-effort commit+push it. Returns True iff the file was written.
+    Never raises.
+    """
+    from .git_worktree import get_worktree_path
+
+    rel = f"docs/work-items/{work_item_id}/14-deploy-log.md"
+    try:
+        wt = get_worktree_path(repo, branch)
+    except Exception as e:  # noqa: BLE001 - never-raise
+        logger.error("write_deploy_log: worktree error for %s/%s: %s", repo, branch, e)
+        return False
+
+    path = os.path.join(wt, rel)
+    content = build_deploy_log(work_item_id, exit_code, status)
+    try:
+        os.makedirs(os.path.dirname(path), exist_ok=True)
+        with open(path, "w", encoding="utf-8") as f:
+            f.write(content)
+    except OSError as e:
+        logger.error("write_deploy_log: write error at %s: %s", path, e)
+        return False
+
+    # Best-effort commit + push (the gate also falls back to origin/main).
+    git_env = {
+        **os.environ,
+        "HOME": "/home/slin",
+        "GIT_AUTHOR_NAME": "deploy-finalizer",
+        "GIT_AUTHOR_EMAIL": "deploy-finalizer@mva154.local",
+        "GIT_COMMITTER_NAME": "deploy-finalizer",
+        "GIT_COMMITTER_EMAIL": "deploy-finalizer@mva154.local",
+    }
+    try:
+        subprocess.run(["git", "-C", wt, "add", rel],
+                       capture_output=True, timeout=_GIT_TIMEOUT, env=git_env)
+        commit = subprocess.run(
+            ["git", "-C", wt, "commit", "-m",
+             f"deploy(ORCH-036): finalize {status} for {work_item_id}"],
+            capture_output=True, text=True, timeout=_GIT_TIMEOUT, env=git_env,
+        )
+        if commit.returncode == 0:
+            subprocess.run(["git", "-C", wt, "push", "origin", branch],
+                           capture_output=True, timeout=_GIT_TIMEOUT, env=git_env)
+    except (subprocess.SubprocessError, OSError) as e:
+        logger.warning("write_deploy_log: git commit/push best-effort failed: %s", e)
+    return True
--- a/src/stage_engine.py
+++ b/src/stage_engine.py
@@ -27,6 +27,7 @@ Agent-selection bug fix (ORCH-4):

 import logging
 import os
+import time
 from dataclasses import dataclass, field

 from .db import get_db, update_task_stage, enqueue_job
@@ -35,6 +36,7 @@ from .git_worktree import get_worktree_path
 from .review_parse import extract_review_findings, extract_test_failures
 from .qg.checks import QG_CHECKS
 from . import merge_gate
+from . import self_deploy
 from .notifications import (
    notify_stage_change,
    notify_qg_failure,
@@ -190,6 +192,23 @@ def advance_stage(
            result.note = "terminal"
            return result

+        # --- ORCH-036 Phase B: human Approved on `deploy` -> initiate deploy --
+        # A human flipping the Plane status to Approved on the `deploy` stage
+        # (finished_agent is None) is the prod-deploy trigger for the self-hosting
+        # repo. Initiate the DETACHED host deploy + enqueue the finalizer and
+        # return WITHOUT running check_deploy_status (the verdict does not exist
+        # yet — running the gate now would read a stale/absent log and falsely
+        # roll back, R-2). The finalizer (Phase C, finished_agent="deployer")
+        # records the verdict later; that path is NOT intercepted here.
+        if (
+            current_stage == "deploy"
+            and finished_agent is None
+            and settings.deploy_require_manual_approve
+            and self_deploy.self_deploy_applies(repo)
+        ):
+            _handle_self_deploy_phase_b(task_id, repo, work_item_id, branch, result)
+            return result
+
        # --- Quality gate ----------------------------------------------------
        if qg_name and qg_name in QG_CHECKS:
            # Human-approval gate: split by path.
@@ -252,6 +271,22 @@ def advance_stage(
            ):
                return result

+        # --- ORCH-036 Phase A: request approve before the prod deploy ---------
+        # On the deploy-staging -> deploy edge, AFTER a green check_staging_status
+        # and the merge-gate, the self-hosting repo does NOT auto-launch a prod
+        # deployer. Instead advance the STAGE to `deploy`, put the issue into an
+        # approval-pending state and wait for a human Approved (Phase B). The
+        # merge lease stays HELD across the wait (released on done / rollback).
+        if (
+            current_stage == "deploy-staging"
+            and settings.deploy_require_manual_approve
+            and self_deploy.self_deploy_applies(repo)
+        ):
+            _handle_self_deploy_phase_a(
+                task_id, current_stage, repo, work_item_id, branch, result
+            )
+            return result
+
        # --- Advance ---------------------------------------------------------
        update_task_stage(task_id, next_stage)
        # Telegram live tracker: the analysis->architecture advance is the human
@@ -762,3 +797,199 @@ def _handle_merge_gate_rollback(
        f"Task {task_id}: merge-gate FAILED, rolled back deploy-staging -> "
        f"development ({reason})"
    )
+
+
+# ---------------------------------------------------------------------------
+# ORCH-036: executable self-deploy (Phase A/B/C)
+# ---------------------------------------------------------------------------
+def _handle_self_deploy_phase_a(
+    task_id, current_stage, repo, work_item_id, branch, result: AdvanceResult
+):
+    """Phase A — advance to `deploy` and request a manual approve (no prod deploy).
+
+    Staging is green and the branch is mergeable; for the self-hosting repo we do
+    NOT auto-deploy to prod. Move the task onto the `deploy` stage (so a later
+    human Approved lands there -> Phase B), set the issue approval-pending and ask
+    the human to flip the status to Approved. A restart-safe `approve-requested`
+    marker records that Phase A ran. The merge lease stays HELD.
+    """
+    update_task_stage(task_id, "deploy")
+    notify_stage_change(task_id, current_stage, "deploy")
+    result.advanced = True
+    result.to_stage = "deploy"
+    result.note = "self-deploy-approval-pending"
+
+    if work_item_id:
+        set_issue_in_review(work_item_id)
+    self_deploy.write_marker(
+        repo, work_item_id, self_deploy.APPROVE_REQUESTED, content=str(time.time())
+    )
+    if work_item_id:
+        plane_add_comment(
+            work_item_id,
+            "\U0001f7e1 Staging зелёный. Требуется ручной approve для ПРОД-деплоя: "
+            "смените статус задачи на «Approved», чтобы запустить деплой в прод (8500).",
+            author="deployer",
+        )
+    send_telegram(
+        f"\U0001f7e1 {work_item_id}: staging OK. Ждёт approve на ПРОД-деплой "
+        f"(смените статус на Approved)."
+    )
+    logger.info(
+        f"Task {task_id}: self-deploy Phase A — advanced to deploy, "
+        f"approval-pending (awaiting human Approved)"
+    )
+
+
+def _handle_self_deploy_phase_b(task_id, repo, work_item_id, branch, result: AdvanceResult):
+    """Phase B — a human Approved initiates the DETACHED prod deploy (idempotent).
+
+    Idempotency-guard: if the `initiated` marker already exists (double Approved /
+    duplicate webhook, R-4) this is a no-op. Otherwise launch the detached host
+    deploy, and ONLY on success record `initiated` + enqueue the finalizer (so a
+    failed launch can be retried by re-approving). Returns without advancing — the
+    finalizer (Phase C) records the verdict once the hook finishes.
+    """
+    if self_deploy.has_marker(repo, work_item_id, self_deploy.INITIATED):
+        result.note = "self-deploy-already-initiated"
+        logger.info(
+            f"Task {task_id}: prod deploy already initiated; ignoring repeat Approved"
+        )
+        return
+
+    ok, msg = self_deploy.initiate_deploy(repo, work_item_id, branch)
+    if not ok:
+        result.note = f"self-deploy-initiate-failed: {msg}"
+        if work_item_id:
+            plane_add_comment(
+                work_item_id,
+                f"⚠️ Не удалось запустить прод-деплой: {msg}. "
+                "Повторите approve после устранения причины.",
+                author="deployer",
+            )
+        send_telegram(f"⚠️ {work_item_id}: прод-деплой не запустился: {msg}")
+        logger.error(f"Task {task_id}: self-deploy initiate failed: {msg}")
+        return
+
+    self_deploy.write_marker(
+        repo, work_item_id, self_deploy.INITIATED, content=str(time.time())
+    )
+    task_desc = (
+        f"Work item: {work_item_id}\nRepo: {repo}\nBranch: {branch}\n"
+        f"Stage: deploy\nNote: deploy-finalize poll (prod self-deploy initiated)."
+    )
+    new_job = enqueue_job(
+        "deploy-finalizer", repo, task_desc, task_id=task_id,
+        available_at_delay_s=settings.deploy_finalize_delay_s,
+    )
+    result.enqueued_agent = "deploy-finalizer"
+    result.enqueued_job_id = new_job
+    result.note = "self-deploy-initiated"
+    if work_item_id:
+        plane_add_comment(
+            work_item_id,
+            "\U0001f680 Прод-деплой стартовал (detached host-процесс). "
+            "Вердикт будет зафиксирован после health-check.",
+            author="deployer",
+        )
+    send_telegram(f"\U0001f680 {work_item_id}: прод-деплой стартовал. Жду результат.")
+    logger.info(
+        f"Task {task_id}: self-deploy Phase B — detached deploy initiated, "
+        f"finalizer enqueued (job_id={new_job})"
+    )
+
+
+def _deploy_finalize_defer_count(task_id: int) -> int:
+    """How many times this task's finalizer has already deferred (restart-safe).
+
+    Counted from the persisted jobs queue by the defer marker in task_content
+    (mirrors _merge_defer_count), so a service restart never resets the budget.
+    """
+    conn = get_db()
+    n = conn.execute(
+        "SELECT COUNT(*) FROM jobs WHERE task_id=? AND task_content LIKE '%deploy-finalize defer%'",
+        (task_id,),
+    ).fetchone()[0]
+    conn.close()
+    return n
+
+
+def run_deploy_finalizer(job: dict):
+    """Phase C — deterministic finalizer (reserved-agent `deploy-finalizer`, no LLM).
+
+    Claimed by the worker in the NEW container after the prod restart. Reads the
+    `result` sentinel (hook exit-code written by the host wrapper):
+      * not written yet & budget left -> DEFER (re-queue with a delay);
+      * budget exhausted              -> set_issue_blocked + Telegram (anti-livelock);
+      * present                       -> map exit-code -> deploy_status, write
+        14-deploy-log.md, then advance_stage(finished_agent="deployer") so the
+        EXISTING contracts fire: SUCCESS -> terminal-sync deploy->done + release
+        lease; FAILED -> БАГ-8 rollback deploy->development + set_issue_blocked.
+    Never raises into the caller (the launcher marks the job done/failed).
+    """
+    task_id = job.get("task_id")
+    repo = job.get("repo")
+    conn = get_db()
+    row = conn.execute(
+        "SELECT work_item_id, branch FROM tasks WHERE id=?", (task_id,)
+    ).fetchone()
+    conn.close()
+    if not row:
+        logger.error(f"deploy-finalizer: no task row for task_id={task_id}")
+        return
+    work_item_id, branch = row[0], row[1]
+
+    present, code = self_deploy.read_result(repo, work_item_id)
+    if not present:
+        defers = _deploy_finalize_defer_count(task_id)
+        if defers < settings.deploy_finalize_max_attempts:
+            task_desc = (
+                f"Work item: {work_item_id}\nRepo: {repo}\nBranch: {branch}\n"
+                f"Stage: deploy\nNote: deploy-finalize defer "
+                f"(attempt {defers + 1}/{settings.deploy_finalize_max_attempts}) — "
+                f"deploy result not ready, retrying after {settings.deploy_finalize_delay_s}s."
+            )
+            new_job = enqueue_job(
+                "deploy-finalizer", repo, task_desc, task_id=task_id,
+                available_at_delay_s=settings.deploy_finalize_delay_s,
+            )
+            logger.info(
+                f"Task {task_id}: deploy result not ready, finalizer deferred "
+                f"(job_id={new_job}, attempt {defers + 1}/{settings.deploy_finalize_max_attempts})"
+            )
+        else:
+            if work_item_id:
+                set_issue_blocked(work_item_id)
+            send_telegram(
+                f"\U0001f6a8 {work_item_id}: deploy result не появился после "
+                f"{settings.deploy_finalize_max_attempts} попыток. Нужно ручное вмешательство."
+            )
+            logger.error(
+                f"Task {task_id}: deploy-finalize defer attempts exhausted "
+                f"({settings.deploy_finalize_max_attempts})"
+            )
+        return
+
+    # Result present -> deterministic verdict.
+    status = self_deploy.map_exit_code_to_status(code)
+    self_deploy.write_deploy_log(repo, work_item_id, branch, code, status)
+    logger.info(
+        f"Task {task_id}: deploy finalized, hook exit={code} -> deploy_status={status}"
+    )
+    if status == "SUCCESS" and work_item_id:
+        plane_add_comment(
+            work_item_id,
+            f"✅ Прод-деплой успешен (health-check OK, exit {code}).",
+            author="deployer",
+        )
+        send_telegram(f"✅ {work_item_id}: прод-деплой успешен (exit {code}).")
+
+    # Drive the EXISTING deploy contracts via the gate verdict we just wrote.
+    advance_stage(
+        task_id=task_id,
+        current_stage="deploy",
+        repo=repo,
+        work_item_id=work_item_id,
+        branch=branch,
+        finished_agent="deployer",
+    )