2026-06-07 00:23:41 +03:00
35 changed files with 2886 additions and 6 deletions
--- a/.env.example
+++ b/.env.example
@@ -36,6 +36,41 @@ ORCH_MERGE_RETEST_TARGET=tests/
 ORCH_MERGE_LOCK_TIMEOUT_S=300
 ORCH_MERGE_DEFER_DELAY_S=60
 ORCH_MERGE_DEFER_MAX_ATTEMPTS=5
+# ORCH-036: executable self-deploy of the `deploy` stage. For the self-hosting repo
+# (orchestrator) the stage REALLY restarts prod (8500) via a detached host hook;
+# deploy_status: SUCCESS means proven health-ok, not an LLM declaration. Three
+# deterministic phases (A: request approve, B: human Approved -> detached deploy,
+# C: finalizer maps hook exit-code -> deploy_status). Non-self repos: unchanged
+# synchronous ssh deploy. SECRETS / host paths live ONLY on the host — do NOT commit.
+#   SELF_DEPLOY_ENABLED -> global kill-switch (false -> legacy synchronous deploy for all).
+#   SELF_DEPLOY_REPOS   -> CSV of repos where Phase A/B/C is REAL; empty -> only the
+#                          self-hosting repo (orchestrator); others -> no-op (mirrors ORCH-35).
+#   DEPLOY_REQUIRE_MANUAL_APPROVE -> require a human Plane "Approved" before the prod
+#                          deploy (true on rollout; full auto is ORCH-54).
+#   DEPLOY_FINALIZE_DELAY_S       -> delay before the first/each finalize poll (>= hook+health).
+#   DEPLOY_FINALIZE_MAX_ATTEMPTS  -> bounded finalize-defer budget (anti-livelock).
+#   DEPLOY_SSH_USER / DEPLOY_SSH_HOST -> ssh target for the host hook (DEPLOY_SSH_HOST
+#                          empty -> detached deploy will NOT launch; set on the host).
+#   DEPLOY_HOOK_SCRIPT            -> path to the hook ON THE HOST (relative to the repo).
+#   DEPLOY_HOST_REPO_PATH         -> orchestrator clone path on the host.
+#   DEPLOY_PROD_SOURCE_IMAGE      -> staging-validated image, retagged build-once (no rebuild).
+#   DEPLOY_PROD_TARGET_SERVICE / _PORT / _IMAGE / _COMPOSE_PROFILE -> prod compose profile.
+#   DEPLOY_PROD_PREV_IMAGE_FILE   -> prod prev-image snapshot (separate from staging's).
+ORCH_SELF_DEPLOY_ENABLED=true
+ORCH_SELF_DEPLOY_REPOS=
+ORCH_DEPLOY_REQUIRE_MANUAL_APPROVE=true
+ORCH_DEPLOY_FINALIZE_DELAY_S=90
+ORCH_DEPLOY_FINALIZE_MAX_ATTEMPTS=10
+ORCH_DEPLOY_SSH_USER=slin
+ORCH_DEPLOY_SSH_HOST=
+ORCH_DEPLOY_HOOK_SCRIPT=scripts/orchestrator-deploy-hook.sh
+ORCH_DEPLOY_HOST_REPO_PATH=/home/slin/repos/orchestrator
+ORCH_DEPLOY_PROD_SOURCE_IMAGE=orchestrator-orchestrator-staging
+ORCH_DEPLOY_PROD_TARGET_SERVICE=orchestrator
+ORCH_DEPLOY_PROD_TARGET_PORT=8500
+ORCH_DEPLOY_PROD_TARGET_IMAGE=orchestrator-orchestrator
+ORCH_DEPLOY_PROD_COMPOSE_PROFILE=
+ORCH_DEPLOY_PROD_PREV_IMAGE_FILE=.deploy-prev-image-prod

 # ORCH-053: stuck-task reconciler (sweeper for lost webhooks). A background daemon
 # replays a missed stage transition through the SAME gates/handlers a webhook would,
--- a/.openclaw/agents/deployer.md
+++ b/.openclaw/agents/deployer.md
@@ -73,13 +73,39 @@ On stage `deploy-staging` your job is to run the staging test suite and write a

 ---

-## Stage: `deploy` (Production Deploy — ORCH-36, future)
-
-On stage `deploy` your job is to perform (or simulate) the production deployment and write a machine-readable verdict to `docs/work-items/<work_item_id>/14-deploy-log.md` with frontmatter field `deploy_status: SUCCESS|FAILED`.
+## Stage: `deploy` (Production Deploy — ORCH-36, executable self-deploy)

 This stage is only reached if the staging gate (`deploy-staging`) passed with `staging_status: SUCCESS`.
+The verdict contract is unchanged: `docs/work-items/<work_item_id>/14-deploy-log.md` with
+frontmatter field `deploy_status: SUCCESS|FAILED` (the gate `check_deploy_status` parses ONLY this).
+**What changed (ORCH-36): WHO and WHEN writes that verdict, for the self-hosting repo.**

-⚠️ **CRITICAL**: Do NOT trigger real production deploys unless explicitly instructed. Real docker/SSH deploys are handled by `scripts/orchestrator-deploy-hook.sh` (ORCH-36).
+### Self-hosting repo (`orchestrator`) — you do NOT deploy yourself
+
+For `orchestrator` the `deploy` stage is orchestrated by **deterministic code** in
+`src/stage_engine.py` + `src/self_deploy.py`, NOT by you, and NOT by a "paper" `SUCCESS`:
+
+- **Phase A** (entering `deploy`): the pipeline does NOT launch you. It sets the issue to an
+  approval-pending state and asks a human to flip the Plane status to **Approved**.
+- **Phase B** (human Approved): the code launches a **detached host process**
+  (`ssh + setsid` → `scripts/orchestrator-deploy-hook.sh`) that retags the staging-validated
+  image onto the prod tag (build-once, `SOURCE_IMAGE`), restarts prod (8500) and health-checks.
+  The orchestrator NEVER restarts its own 8500 container from inside — that would kill the
+  worker mid-call.
+- **Phase C** (finalizer): a deterministic finalizer-job in the NEW container reads the hook
+  exit-code, maps `0 → SUCCESS`, `1|2|other → FAILED`, writes `14-deploy-log.md` and drives the
+  existing contracts (`SUCCESS → done`, `FAILED → rollback to development`).
+
+⚠️ **CRITICAL for self-hosting**: NEVER run `docker compose up -d orchestrator`, `--build`, or any
+restart of 8500 from inside the agent. `deploy_status: SUCCESS` must reflect a REAL host health-ok,
+never an LLM declaration. If you are ever launched on `deploy` for `orchestrator`, do nothing that
+restarts prod — the host hook owns the restart.
+
+### Non-self repos (e.g. `enduro-trails`) — unchanged synchronous ssh deploy
+
+For non-self repos behaviour is unchanged: perform the production deployment (ssh to the project
+host) and write the machine-readable verdict (`deploy_status: SUCCESS|FAILED`). Real docker/SSH
+deploys go through `scripts/orchestrator-deploy-hook.sh` (parametrised; defaults are STAGING-safe).

 ---

--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,6 +5,7 @@
 ## [Unreleased]

 ### Added
+- **Исполняемый самодеплой стадии `deploy` (стадия дёргает хост-хук, manual-approve)** (ORCH-036): стадия `deploy` перестаёт быть «бумажной» — для self-hosting репозитория `orchestrator` `deploy_status: SUCCESS` означает ДОКАЗАННЫЙ health-ok реального рестарта прод-контейнера (8500), а не декларацию LLM. Критический путь self-restart детерминирован (без LLM), по образцу merge-gate ORCH-043, и разбит на три фазы (`src/stage_engine.py` + новый модуль `src/self_deploy.py`): **Фаза A** (вход в `deploy`) — вместо запуска прод-deployer'а при `deploy_require_manual_approve=true` задача переводится в approval-pending (`set_issue_in_review`) и ждёт ручного approve; restart-safe маркер `approve-requested`. **Фаза B** (человек ставит статус Plane → `Approved`; `advance_stage(deploy, finished_agent=None)`) — запускается **detached host-процесс** (`ssh + setsid` → `scripts/orchestrator-deploy-hook.sh`, чтобы рестарт 8500 пережил гибель контейнера; орк НЕ убивает себя из docker.sock) с build-once retag staging-образа (`SOURCE_IMAGE`), ставится детерминированный **finalizer-job**; маркер `initiated` — идемпотентность повторного Approved. **Фаза C** (`run_deploy_finalizer`, reserved-agent `deploy-finalizer`, claim'ится новым контейнером после рестарта) — читает sentinel `result` (exit-code хука, записан host-обёрткой), `not-ready` → defer (бюджет `deploy_finalize_max_attempts`, restart-safe по `task_content`), маппит `0→SUCCESS / 1|2|иное→FAILED` (чистая функция `map_exit_code_to_status`, unit-тест), пишет `14-deploy-log.md` и вызывает `advance_stage(deploy, finished_agent="deployer")` → существующие контракты: `SUCCESS → done` + release merge-lease, `FAILED → откат БАГ-8 на development` + `set_issue_blocked`. Уведомления Plane+Telegram на approve-request / initiate / success / rollback (BR-5, ни одного «молчаливого» деплоя). Хост-хук `scripts/orchestrator-deploy-hook.sh` расширен **обратно-совместимым** `SOURCE_IMAGE`: при заданном — `docker tag $SOURCE_IMAGE $TARGET_IMAGE` перед `up -d --no-build` (деплой РОВНО протестированного образа, без `docker build`); не задан → прежнее поведение; exit-code-контракт (0/1/2) и health-loop (10×6с, авто-rollback) не тронуты. Restart-safe состояние — sentinel-файлы (`<repos_dir>/.deploy-state-<repo>/<work_item_id>/`), без миграции БД. Условность как ORCH-35: реальный самодеплой только для `is_self_hosting_repo("orchestrator")`; прочие репо (enduro-trails) — прежний синхронный ssh-путь агентом. Контракты НЕ менялись: `STAGE_TRANSITIONS`, реестр `QG_CHECKS`, `check_deploy_status`/`_parse_deploy_status` (frontmatter-only), terminal-sync `deploy→done`, merge-gate (ORCH-43), БАГ-8. Флаг `DEPLOY_REQUIRE_MANUAL_APPROVE` остаётся `true` (полный авто — отдельная задача ORCH-54). Новые настройки: `ORCH_DEPLOY_REQUIRE_MANUAL_APPROVE` (true), `ORCH_DEPLOY_SSH_USER`, `ORCH_DEPLOY_SSH_HOST`, `ORCH_DEPLOY_HOOK_SCRIPT`, `ORCH_DEPLOY_PROD_SOURCE_IMAGE`, `ORCH_DEPLOY_PROD_TARGET_SERVICE/PORT/IMAGE`, `ORCH_DEPLOY_FINALIZE_DELAY_S`, `ORCH_DEPLOY_FINALIZE_MAX_ATTEMPTS`. ADR `docs/work-items/ORCH-036/06-adr/ADR-001-executable-self-deploy.md`, глобальный `docs/architecture/adr/adr-0007-executable-self-deploy.md`. Документация: `.openclaw/agents/deployer.md` (стадия `deploy` = вызов хука, запрет self-restart), `docs/operations/INFRA.md`, `docs/operations/DEPLOY_HOOK.md`. Тесты: `tests/test_deploy_hook_mapping.py`, `tests/test_deploy_approve.py`, `tests/test_deploy_routing.py`, `tests/test_deploy_rollback.py`, `tests/test_deploy_notifications.py`, `tests/test_deploy_build_once.py`, `tests/test_deploy_terminal_sync.py`, `tests/test_staging_precondition.py`, `tests/test_deploy_hook_rollback_sim.py`.
 - **Sweeper потерянных webhook (реконсиляция застрявших стадий)** (ORCH-053): фоновый daemon-поток `src/reconciler.py` (паттерн `queue_worker`), который устраняет тихое застревание задач, когда конвейер не двигается из-за потерянного события (502 на ребилде инстанса, отсутствие ретраев у Plane/Gitea, неразрезолвленный `sha→branch` — класс инцидента ORCH-044). Реконсилятор периодически (`reconcile_interval_s`) доигрывает пропущенный переход **через те же штатные гейты/обработчики**, что и webhook, не дублируя логику конвейера: **F-1 gate-side** (`reconcile_gate_once`) — для задач `stage≠done`, без активного job и `age(updated_at) ≥ grace_for_stage(stage)` делает read-only пред-оценку канонического QG стадии; зелёный → продвижение строго через неизменный `stage_engine.advance_stage(..., finished_agent=None)`; красный → тишина (спам нотификаций структурно невозможен — `advance_stage` на красном гейте не вызывается вовсе); `analysis` F-1 не трогает (человеческий гейт). **F-2 plane-side** (`reconcile_plane_once`) — опрос Plane API per-project (новый `plane_sync.list_issues_by_state`, курсорная пагинация, never-raise) и реплей In Progress / Approved / Rejected через существующие `webhooks.plane.handle_status_start` / `handle_verdict` (async-обработчики вызываются из sync-потока через `asyncio.run`). **F-3** — усиление `sha→branch` в `handle_ci_status`: при неразрезолвленном sha — БД-fallback по единственной development-задаче repo (`db.get_development_tasks_by_repo`; неоднозначность → не резолвим, ложного матча нет), `logger.debug`→`logger.info` для видимости потерянного CI-события. Анти-дубль на создании задачи (`db.create_task_atomic` под process-wide `threading.Lock`: SELECT-exists→INSERT, проигравший в гонке reconcile↔webhook не плодит второй task/branch/worktree/стартовый analyst-job). Старт/стоп в `main.lifespan` (после `worker.start()` / перед `worker.stop()`), restart-safe, never-raise на единицу работы. Наблюдаемость (F-4): при разблокировке — лог-строка `reconciler: <wi> <stage> разблокирована (потерян webhook)` + Telegram (`reconcile_notify_unblock`) и блок `reconcile` в `GET /queue`. Kill-switches: `ORCH_RECONCILE_ENABLED` (глобально), `ORCH_RECONCILE_PLANE_ENABLED` (гасит только F-2), `ORCH_RECONCILE_INTERVAL_S` (120), `ORCH_RECONCILE_GRACE_DEFAULT_S` (600), `ORCH_RECONCILE_GRACE_OVERRIDES_JSON` (per-stage), `ORCH_RECONCILE_NOTIFY_UNBLOCK` (true). Схема БД и реестры (`STAGE_TRANSITIONS`/`QG_CHECKS`) НЕ менялись. ADR `docs/work-items/ORCH-053/06-adr/ADR-001-stuck-task-reconciler.md`, глобальный `docs/architecture/adr/adr-0007-reconciler.md`. Тесты: `tests/test_reconciler.py`, `tests/test_reconciler_plane.py`, `tests/test_gitea_sha_resolve.py`, `tests/test_config.py`.
 - **Merge-gate: авто-rebase на текущий `origin/main` + повторный прогон тестов + сериализация мержей** (ORCH-043): детерминированный (без LLM) суб-гейт на ребре `deploy-staging → deploy`, выполняемый ПЕРЕД мержем PR деплоером. Закрывает класс гонок «две зелёные ветки в одном репо ломают `main`»: пайплайн валидирует ветку против того `main`, от которого она ответвилась, а не против `main` в момент мержа — между «ветка зелёная» и «ветка смержена» параллельная задача может сдвинуть `main` (семантический конфликт: git мержит без текстового конфликта, но совмещённый `main` красный). Для self-hosting репозитория `orchestrator` это означало бы красный `main` инструмента, обслуживающего ВСЕ проекты. Новый модуль `src/merge_gate.py` (контракт «never raise», все git-операции — в per-branch worktree, ORCH-2/S-4): `branch_is_behind_main` (`git merge-base --is-ancestor origin/main HEAD`), `auto_rebase_onto_main` (rebase + `git push --force-with-lease` ТОЛЬКО ветки задачи — `main` НИКОГДА не пушится; текстовый конфликт → `rebase --abort` + чистый worktree), `retest_branch` (`python -m pytest <target>` в догнанном worktree, бюджет `merge_retest_timeout_s`), файловый merge-lease (`acquire_merge_lease`/`release_merge_lease`, атомарный `O_CREAT|O_EXCL`, holder-aware release, реклейм протухшего/битого лиза — без изменения схемы БД). Новый quality-gate `check_branch_mergeable` (`src/qg/checks.py`, зарегистрирован в `QG_CHECKS`) композирует примитивы под лизом: kill-switch/вне-области → no-op pass; lock занят → `(False, "merge-lock busy")` (сигнал DEFER, не код-фолт); ветка свежая → pass (лиз ДЕРЖИТСЯ до мержа); отстала → rebase → конфликт = fail+release, чисто → retest → зелёный = pass (лиз держится) / красный|timeout = fail+release. Интеграция в `src/stage_engine.py` (суб-гейт на `deploy-staging`, БЕЗ новой стадии в `STAGE_TRANSITIONS`): pass → advance на `deploy`; «merge-lock busy» → DEFER (повторная постановка деплоера на `deploy-staging` с задержкой `available_at`, анти-дедлок при `max_concurrency=1`, restart-safe счётчик по `task_content`, лимит `merge_defer_max_attempts` → block+Telegram); конфликт/красный retest → ROLLBACK на `development` + ретрай developer-а (кап `MAX_DEVELOPER_RETRIES`, без бесконечного баунса). Лиз освобождается на `deploy→done`, на rollback и по webhook смерженного PR (`src/webhooks/gitea.py`). Новый параметр `enqueue_job(..., available_at_delay_s=...)` (`src/db.py`) — отложенная постановка без изменения схемы. Условность раскатки (зеркало ORCH-35): `merge_gate_repos` (CSV) или по умолчанию только self-hosting `orchestrator`; глобальный kill-switch `merge_gate_enabled`. Новые настройки `ORCH_MERGE_GATE_ENABLED` (true), `ORCH_MERGE_GATE_REPOS` (""), `ORCH_MERGE_RETEST_TIMEOUT_S` (600), `ORCH_MERGE_RETEST_TARGET` (tests/), `ORCH_MERGE_LOCK_TIMEOUT_S` (300), `ORCH_MERGE_DEFER_DELAY_S` (60), `ORCH_MERGE_DEFER_MAX_ATTEMPTS` (5). ADR `docs/work-items/ORCH-043/06-adr/ADR-001-merge-gate.md`, глобальный `docs/architecture/adr/adr-0006-merge-gate.md`. Тесты: `tests/test_merge_gate.py`, `tests/test_qg_merge_gate.py`, `tests/test_merge_gate_race.py`, `tests/test_stage_engine.py::TestMergeGate`, `tests/test_config.py`.
 - **Режим `bump` live-трекера Telegram** (ORCH-042): новый `ORCH_TRACKER_MODE` (`Settings.tracker_mode`, дефолт `edit`) выбирает поведение карточки задачи. `edit` (как было) — карточка редактируется на месте (`editMessageText`). `bump` — на каждом обновлении старое сообщение удаляется и карточка отправляется заново вниз чата (best-effort `delete_telegram(старый_id)` → `send_telegram(text, disable_notification=True)` → `set_tracker_message_id(new_id)`), чтобы актуальный статус всегда был последним в чате при активной переписке. Инвариант «одна карточка на задачу» сохранён в обоих режимах: за один вызов `update_task_tracker` шлётся ≤1 нового сообщения; `set_tracker_message_id` вызывается ТОЛЬКО при успешном send (транзиентный `None` не затирает указатель); результат delete НЕ блокирует отправку новой карточки (delete-fail у сообщения >48ч → всё равно шлём новое). Резолюция режима в `notifications` (case-insensitive, trim): всё, что ≠ `"bump"` (включая пустое/мусор) → `edit` → нулевая регрессия и оркестратор не падает на любом значении флага. Новый low-level helper `delete_telegram(message_id) -> bool` (контракт «never raises», маркеры `_DELETE_GONE_MARKERS`): `ok:true` или «уже нет / нельзя удалить» → `True`; неизвестный `ok:false`/5xx/исключение → `False`; нет кредов → `False` без HTTP. Сигнатуры `send_telegram`/`edit_telegram`/`update_task_tracker` и схема БД (`tasks.tracker_message_id`) не менялись. ADR `docs/work-items/ORCH-042/06-adr/ADR-001-tracker-bump-mode.md`. Тесты: `tests/test_tracker_bump.py`, `tests/test_config.py`.
@@ -27,6 +28,7 @@
 - Цепочка стадий: `... testing → deploy-staging → deploy → done` (была без `deploy-staging`).

 ### Fixed
+- **Re-deploy после отката больше не зависает на `deploy`; `.env.example` дополнен** (ORCH-036, review-fix): sentinel-маркеры самодеплоя (`approve-requested`/`initiated`/`result`) ключуются по стабильному `work_item_id`, поэтому при FAILED-деплое и откате БАГ-8 (`deploy → development`) они оставались на диске — после фикса developer-ом и повторного захода задачи на `deploy` Фаза B по idempotency-guard видела STALE `initiated` и становилась no-op: detached-хук не перезапускался, finalizer не ставился, задача висела на `deploy` навсегда (нарушался retry-контракт стадии, AC-4/AC-10; устаревший `result` к тому же был бы перечитан новым finalizer'ом). Добавлен `self_deploy.clear_state(repo, work_item_id)` (never-raise, idempotent, рекурсивное удаление `<repos_dir>/.deploy-state-<repo>/<wi>/`), вызывается в ветке БАГ-8-отката `check_deploy_status` FAILED (`src/stage_engine.py`) и дополнительно в начале Фазы A (`_handle_self_deploy_phase_a`) — каждый новый прод-деплой-проход стартует с чистого состояния. Отдельно: канонический `.env.example` (CLAUDE.md правило №8, ТЗ §2.6) дополнен полным блоком новых дескрипторов `ORCH_SELF_DEPLOY_*` / `ORCH_DEPLOY_*` (плейсхолдеры, секреты не коммитятся) по образцу merge-gate ORCH-043. Контракты `STAGE_TRANSITIONS` / `QG_CHECKS` / `_parse_deploy_status` / БАГ-8 / merge-gate не тронуты. Тесты: `tests/test_deploy_rollback.py::test_tc11_re_deploy_after_rollback_not_wedged`, `tests/test_deploy_hook_mapping.py::test_clear_state_removes_all_markers_and_is_idempotent`.
 - **Контейнер и агенты бегут под uid хоста (1000:1000), не root** (ORCH-040): оба сервиса в `docker-compose.yml` (`orchestrator`, `orchestrator-staging`) получили `user: "1000:1000"` (slin) — устраняет корень проблемы, при которой Claude-CLI агенты, запускаемые через `subprocess.Popen` внутри root-контейнера, создавали все артефакты конвейера (git worktree `/repos/_wt/...`, коммиты в `docs/work-items/...`) с владельцем `root:root` на хосте, из-за чего `git pull`/`git reset` под slin падали с `insufficient permission for adding an object` и каждый деплой требовал ручного `chown`. Теперь файлы сразу `slin:slin`. Доступ к docker.sock сохранён через `group_add: ["999"]` (МИНА 1 — НЕ удалена). SSH-маунт приведён к единому HOME агента: target `/root/.ssh` → `/home/slin/.ssh` (`/home/slin/.orchestrator-ssh:/home/slin/.ssh:ro`), синхронно с `HOME=/home/slin`, который launcher форсит в env Popen и git_env — устранён скрытый рассинхрон SSH-маунта с форсимым HOME. `src/agents/launcher.py` и `Dockerfile` НЕ менялись (numeric uid работает без записи в `/etc/passwd`; `safe.directory '*'` уже покрывает git над bind-mount). Требует host-prerequisites Owner (P-1…P-4, вне кода): блокер P-1 — `chown -R 1000:1000 /home/slin/.claude` для доступа uid 1000 к claude creds (иначе preflight заворачивает конвейер); прод-рестарт self — только в окно тишины (общий инстанс с enduro-trails), страховка — staging-гейт (adr-0003). ADR `docs/work-items/ORCH-040/06-adr/ADR-001-run-agents-as-host-uid.md`, глобальный `docs/architecture/adr/adr-0005-container-runs-as-host-uid.md`; INFRA.md обновлён (рантайм-uid, volumes/SSH target, host-prerequisites). Тесты: `tests/test_orch040_compose.py`.
 - **Staging-чек B6 читает реестр из окружения работающего staging-инстанса** (ORCH-048): блок B6 «Registry: sandbox present, prod ET/ORCH absent» в `scripts/staging_check.py` давал **ложный FAIL** (`prod-ET=YES(BAD!)`, `prod-ORCH=YES(BAD!)`) при фактически исправной изоляции — единственный чек suite, который не ходил к инстансу по HTTP, а импортировал `src.projects` локально через host-path хак `sys.path.insert(0, "/repos/orchestrator")` + `importlib.reload`, строя реестр из `ORCH_PROJECTS_JSON` **process-env запускающего процесса**. При фактическом запуске деплоером с хоста переменная не задана → дефолт `_DEFAULT_PROJECTS` (ET+ORCH) → ложный FAIL → лишний откат `deploy-staging → development`. Решение (вариант «в», ADR-001): host-path хак удалён; suite канонически запускается ВНУТРИ контейнера `orchestrator-staging` через `docker exec … python3 /repos/orchestrator/scripts/staging_check.py` (`scripts/` доступен только через bind-mount, `import src.projects` резолвится через `PYTHONPATH=/app` из кода контейнера, env — `.env.staging`) → B6 читает реестр именно работающего инстанса, без HTTP-bootstrap и «курицы-яйца». Логика вердикта вынесена в чистую `_evaluate_b6(known) -> (passed, detail)` (инвариант `passed ⟺ SANDBOX ∈ known ∧ PROD_ET ∉ known ∧ PROD_ORCH ∉ known`, формат detail сохранён) + `_known_project_ids_from_registry()` / `_run_b6()` с детерминированным FAIL при недоступности источника (не ложный PASS, не необработанное исключение). Синхронно обновлены `.openclaw/agents/deployer.md` (команда стадии через `docker exec`) и `docs/operations/STAGING_CHECK.md`. `src/projects.py`, `.env*` и прочие чеки A/B4/B5/C не тронуты; реестр `QG_CHECKS` и `check_staging_status` (ADR-0003) не менялись. ADR `docs/work-items/ORCH-048/06-adr/ADR-001-b6-registry-via-in-container-run.md`. Тесты: `tests/test_staging_check_b6.py`.
 - **Testing-гейт `check_tests_passed` читает `result:` наравне с `verdict:`/`status:`** (ORCH-047): парсер `_parse_tests_verdict` (`src/qg/checks.py`) теперь принимает три равноправных машиночитаемых поля frontmatter `13-test-report.md` — `result:` (канон промпта тестера `.openclaw/agents/tester.md`, `result: PASS|FAIL`), плюс легаси `verdict:` и `status:` (enduro-trails ET-001..ET-014); достаточно любого одного непустого. Устраняет рассинхрон контракта: тестер честно эмитил `result: PASS` без `verdict:`/`status:`, парсер попадал в ветку «нет машинного вердикта» → откат `testing → development` в петлю до исчерпания `MAX_DEVELOPER_RETRIES` (наблюдалось на ORCH-17; ORCH-016 прошёл лишь из-за избыточного дублирования полей). Семантика приоритетов сохранена и распространена на все три поля через объединённую строку: negative-токен в любом поле авторитетен (перебивает positive), наборы токенов заморожены (обратная совместимость). Сигнатура гейта, имя и реестр `QG_CHECKS` не менялись. ADR `docs/work-items/ORCH-047/06-adr/ADR-001-result-field-in-tests-gate.md`. Тесты: `tests/test_qg.py::TestCheckTestsPassed`.
--- a/docs/architecture/README.md
+++ b/docs/architecture/README.md
@@ -53,6 +53,33 @@ created → analysis → architecture → development → review → testing →

 Подробнее: [adr-0006](adr/adr-0006-merge-gate.md), детально — `docs/work-items/ORCH-043/06-adr/ADR-001-merge-gate.md`.

+### Исполняемый самодеплой стадии `deploy` (ORCH-36)
+`deploy` перестаёт быть «бумажной»: для self-hosting (`is_self_hosting_repo`) стадия
+РЕАЛЬНО деплоит прод (8500) через хост-хук `scripts/orchestrator-deploy-hook.sh`,
+а `deploy_status: SUCCESS` означает доказанный health-ok, не декларацию LLM. Три фазы
+(детерминированно, без LLM в критическом пути self-restart):
+- **Фаза A (вход в `deploy`)** — при `deploy_require_manual_approve=true` вместо запуска
+  прод-deployer выставляется approval-pending статус Plane + запрос approve
+  (Plane-коммент + Telegram). Перехват в `advance_stage` ПОСЛЕ `check_staging_status`
+  и merge-gate.
+- **Фаза B (Plane → `Approved`)** — `advance_stage(deploy, finished_agent=None)`
+  запускает **detached host-процесс** (ssh + setsid → хук с прод-параметрами +
+  build-once retag `SOURCE_IMAGE`) и ставит детерминированный **finalizer-job**;
+  маркер `initiated` — идемпотентность. Возврат БЕЗ advance (вердикта ещё нет).
+- **Фаза C (finalizer)** — новый контейнер после рестарта читает sentinel `result`
+  (exit-code хука), маппит `0→SUCCESS / иначе→FAILED`, пишет `14-deploy-log.md`,
+  вызывает `advance_stage(deploy, finished_agent="deployer")` → существующие контракты:
+  `SUCCESS → done`, `FAILED → откат БАГ-8 на development`.
+
+Approve = смена статуса Plane на `Approved` (status-only verdict model; комментарии
+не управляют конвейером). На старте — обязательный ручной approve (флаг `true`); полный
+авто — отдельная задача (ORCH-54). Условность как ORCH-35: реально для `orchestrator`,
+прочие репо — прежний синхронный ssh-деплой агентом. Контракты не меняются:
+`STAGE_TRANSITIONS`, реестр QG, `check_deploy_status`/`_parse_deploy_status`, БАГ-8,
+terminal-sync, merge-gate, exit-code-контракт хука. Restart-safe состояние —
+sentinel-файлы (`<repos_dir>/.deploy-state-<repo>/<wi>/`), без миграции БД.
+Подробнее: [adr-0007](adr/adr-0007-executable-self-deploy.md), детально —
+`docs/work-items/ORCH-036/06-adr/ADR-001-executable-self-deploy.md`.
 ### Reconciler: реконсиляция потерянных webhook (ORCH-053 — реализовано)
 Конвейер продвигается только входящими webhook; потерянное событие (502 на ребилде,
 нет ретраев у Plane/Gitea, неразрезолвленный `sha→branch`) → задача застревает молча
@@ -139,4 +166,5 @@ never-raise на единицу работы; тишина при синхрон
 Схема БД, потоки данных, resilience-слой, детали Dockerfile — [internals.md](internals.md).

 ---
+*Актуально на 2026-06-06. Обновлять при изменении src/stages.py, src/qg/checks.py, src/main.py. ORCH-043: merge-gate — design (см. adr-0006), реализация в ветке feature/ORCH-043. ORCH-036: исполняемый самодеплой стадии `deploy` — design (см. adr-0007), реализация в ветке feature/ORCH-036.*
 *Актуально на 2026-06-06. Обновлять при изменении src/stages.py, src/qg/checks.py, src/main.py. ORCH-043: merge-gate — design (см. adr-0006), реализация в ветке feature/ORCH-043. ORCH-053: reconciler — реализовано (см. adr-0007, src/reconciler.py).*
--- a/docs/architecture/adr/adr-0007-executable-self-deploy.md
+++ b/docs/architecture/adr/adr-0007-executable-self-deploy.md
@@ -0,0 +1,64 @@
+# ADR-0007: Исполняемый самодеплой стадии `deploy` (Вариант B, ORCH-36)
+
+## Статус
+Accepted (design) — реализация в ветке `feature/ORCH-036`.
+
+## Контекст
+Стадия `deploy` была «бумажной»: deployer-агент писал `deploy_status:` в
+`14-deploy-log.md`, гейт `check_deploy_status` парсил вердикт и двигал
+`deploy → done`. Реального деплоя не было. ORCH-36 делает стадию исполняемой для
+self-hosting (`orchestrator`), сохраняя прежний ssh-путь для остальных репо.
+
+Три ограничения формируют дизайн (детально — `docs/work-items/ORCH-036/06-adr/ADR-001`):
+1. **Self-restart**: рестарт прод-контейнера 8500 убивает in-container процесс →
+   рестарт делает ВНЕШНИЙ host-процесс.
+2. **Status-only verdict model**: approve = смена статуса Plane на `Approved`
+   (комментарии не управляют конвейером).
+3. **Гонка гейта**: вердикт нельзя читать до завершения асинхронного хука.
+
+## Решение
+Для self-hosting стадия `deploy` исполняется в три фазы детерминированным кодом
+(без LLM в критическом пути self-restart):
+
+- **Фаза A (вход в `deploy`)** — для self + `deploy_require_manual_approve=true`
+  вместо запуска прод-deployer выставляется approval-pending статус Plane + запрос
+  approve (Plane-коммент + Telegram). Перехват в `advance_stage` на ребре
+  `deploy-staging → deploy` (после `check_staging_status` и merge-gate).
+- **Фаза B (Plane → Approved)** — `advance_stage(deploy, finished_agent=None)`
+  запускает **detached host-процесс** (ssh + setsid → `orchestrator-deploy-hook.sh`
+  с прод-параметрами и build-once retag) и ставит **детерминированный finalizer-job**
+  с задержкой; маркер `initiated` — идемпотентность. Возврат БЕЗ advance.
+- **Фаза C (finalizer)** — после рестарта новый контейнер дочитывает sentinel
+  `result` (exit-code хука), маппит `0→SUCCESS / иначе→FAILED`, пишет
+  `14-deploy-log.md`, вызывает `advance_stage(deploy, finished_agent="deployer")`
+  → существующие контракты: `SUCCESS → done`, `FAILED → откат БАГ-8 на development`.
+
+### Ключевые инварианты (НЕ меняются)
+`STAGE_TRANSITIONS`, реестр QG, `check_deploy_status` / `_parse_deploy_status`
+(frontmatter only), откат БАГ-8, terminal-sync `deploy → done`, merge-gate (ORCH-43),
+exit-code-контракт хука (0/1/2).
+
+### Новое (сквозное)
+- **Детерминированный job-kind** `deploy-finalizer` в очереди (reserved-agent, не
+  LLM): read-result | defer | map+write+advance. Зеркалит детерминизм merge-gate.
+- **Approve-флаг** `deploy_require_manual_approve` (дефолт `true`; полный авто —
+  отдельная задача после набора метрик доверия, ORCH-54).
+- **Build-once**: опциональный `SOURCE_IMAGE` retag в хуке (обратно совместимо).
+- **Restart-safe состояние** деплоя — sentinel-файлы под
+  `<repos_dir>/.deploy-state-<repo>/<wi>/` (как merge-lease), БЕЗ миграции БД.
+
+### Условность
+Вся логика — только для `is_self_hosting_repo(repo)` (как ORCH-35). Прочие репо
+деплоятся прежним синхронным ssh-путём агентом.
+
+## Последствия
+- `deploy_status: SUCCESS` доказан реальным health-ok; критический путь self-restart
+  детерминирован.
+- Вводится новая под-компонента (finalizer job-handler) → изменение помечено
+  `arch:major-change`.
+- Approve вписан в status-only модель: restart-safe, аудируемо, идемпотентно.
+- На старте — обязательный ручной approve; молчаливых деплоев нет (Plane+Telegram).
+
+## Связанные ADR
+`adr-0003` (staging-gate), `adr-0006` (merge-gate), `adr-0005` (run-as-host-uid).
+Детальный per-work-item: `docs/work-items/ORCH-036/06-adr/ADR-001-executable-self-deploy.md`.
--- a/docs/operations/DEPLOY_HOOK.md
+++ b/docs/operations/DEPLOY_HOOK.md
@@ -8,6 +8,7 @@

 1. **Захват текущего образа** — до рестарта записывает ID образа работающего контейнера в `$PREV_IMAGE_FILE` (best-effort, не падает если сервис не запущен).
 2. **git pull** — обновляет код репозитория.
+2b. **Build-once retag** (ORCH-036, BR-6) — если задан `$SOURCE_IMAGE`, хук ретегает его на `$TARGET_IMAGE` (`docker tag $SOURCE_IMAGE $TARGET_IMAGE`) и поднимает контейнер на этом образе через `up -d --no-build`. Это деплой РОВНО того образа, что прошёл staging, **без `docker build`**. Если `$SOURCE_IMAGE` не задан (дефолт) — шаг пропускается (обратная совместимость).
 3. **Рестарт контейнера** — `docker compose --profile $COMPOSE_PROFILE up -d --no-build $TARGET_SERVICE`.
 4. **Health-цикл** — 10 попыток × 6с = до 60с. Критерий: HTTP 200 + тело содержит `"status":"ok"`.
   - **Успех** → `exit 0`, лог "Deploy SUCCESS".
@@ -29,6 +30,7 @@
 | `TARGET_IMAGE`   | `orchestrator-orchestrator-staging` | Имя образа для retag при rollback           |
 | `COMPOSE_PROFILE`| `staging`                         | Docker compose profile (пусто = без профиля) |
 | `PREV_IMAGE_FILE`| `$REPO/.deploy-prev-image-staging`| Файл для сохранения предыдущего образа        |
+| `SOURCE_IMAGE`   | _(unset)_                         | Build-once (ORCH-036): провалидированный образ для retag на `$TARGET_IMAGE` перед рестартом (без rebuild). Не задан → шаг пропущен. |
 | `LOG`            | `/var/log/orchestrator/deploy-hook.log` | Лог-файл (fallback: `$REPO/deploy-hook.log`) |

 > ⚠️ **Дефолт — всегда STAGING**. Прод активируется только явным переопределением env.
@@ -55,6 +57,20 @@ PREV_IMAGE_FILE=/home/slin/repos/orchestrator/.deploy-prev-image-prod \
 bash scripts/orchestrator-deploy-hook.sh --deploy
 ```

+### Прод build-once (ORCH-036) — ретег staging-образа, без rebuild
+
+Так прод-деплой запускается **автоматически** исполняемым самодеплоем (Фаза B: `ssh + setsid`, см. `INFRA.md`). Ключевое отличие — `SOURCE_IMAGE` указывает на провалидированный staging-образ, который ретегается на прод-тег:
+
+```bash
+SOURCE_IMAGE=orchestrator-orchestrator-staging \
+TARGET_SERVICE=orchestrator \
+TARGET_PORT=8500 \
+TARGET_IMAGE=orchestrator-orchestrator \
+COMPOSE_PROFILE="" \
+PREV_IMAGE_FILE=/home/slin/repos/orchestrator/.deploy-prev-image-prod \
+bash scripts/orchestrator-deploy-hook.sh --deploy
+```
+
 ### Ручной rollback staging

 ```bash
--- a/docs/operations/INFRA.md
+++ b/docs/operations/INFRA.md
@@ -75,6 +75,14 @@ ADR `docs/work-items/ORCH-040/06-adr/ADR-001-run-agents-as-host-uid.md` и гл
 | `ORCH_AGENT_EFFORT_DEFAULT` | режим работы `--effort` по умолчанию (ORCH-41): low\|medium\|high\|xhigh\|max; дефолт `high` |
 | `ORCH_AGENT_EFFORT_<AGENT>` | per-agent effort; дефолт: думающие → high, tester/deployer → medium |
 | `ORCH_AGENT_FALLBACK_MODEL` | опц. фолбэк-модель при overloaded (`--fallback-model`); пусто → без флага |
+| `ORCH_SELF_DEPLOY_ENABLED` | ORCH-036 kill-switch исполняемого самодеплоя (true); false → legacy-путь для всех |
+| `ORCH_SELF_DEPLOY_REPOS` | CSV репозиториев с реальным самодеплоем; пусто → только self-hosting `orchestrator` |
+| `ORCH_DEPLOY_REQUIRE_MANUAL_APPROVE` | требовать человеческий Plane «Approved» для прод-деплоя (true, безопасно) |
+| `ORCH_DEPLOY_FINALIZE_DELAY_S` / `_MAX_ATTEMPTS` | задержка и бюджет defer'ов finalizer'а (Фаза C; 90 / 10) |
+| `ORCH_DEPLOY_SSH_USER` / `_SSH_HOST` | куда запускается detached хост-деплой (Фаза B, `ssh user@host`) |
+| `ORCH_DEPLOY_HOOK_SCRIPT` / `_HOST_REPO_PATH` | путь к хук-скрипту (отн. репо) и чекаут orchestrator на хосте |
+| `ORCH_DEPLOY_PROD_SOURCE_IMAGE` | staging-образ для build-once retag на прод-тег (без rebuild) |
+| `ORCH_DEPLOY_PROD_TARGET_SERVICE` / `_TARGET_PORT` / `_TARGET_IMAGE` / `_COMPOSE_PROFILE` / `_PREV_IMAGE_FILE` | прод-цель хука + снапшот для авто-rollback |
 | `ORCH_RECONCILE_ENABLED` | kill-switch sweeper потерянных webhook (ORCH-053); дефолт `true`. **При инциденте/раскатке** — `false` глушит весь фоновый reconciler |
 | `ORCH_RECONCILE_PLANE_ENABLED` | отдельный флаг F-2 (опрос Plane API); `false` гасит только plane-ветку, F-1 продолжает работать; дефолт `true` |
 | `ORCH_RECONCILE_INTERVAL_S` | период фонового прохода reconciler, сек; дефолт `120` |
--- a/docs/work-items/ORCH-036/00-business-request.md
+++ b/docs/work-items/ORCH-036/00-business-request.md
@@ -0,0 +1,7 @@
+# Business Request: ORCH-36: Исполняемый самодеплой — стадия deploy дёргает хост-хук (Вариант B)
+
+Work Item ID: ORCH-036
+
+## Description
+
+TBD
--- a/docs/work-items/ORCH-036/01-brd.md
+++ b/docs/work-items/ORCH-036/01-brd.md
@@ -0,0 +1,109 @@
+# BRD — ORCH-36: Исполняемый самодеплой (стадия deploy дёргает хост-хук, Вариант B)
+
+Work Item: ORCH-036
+Stage: analysis
+Автор: analyst
+Дата: 2026-06-06
+
+## 1. Контекст и проблема
+
+Стадия `deploy` конвейера сейчас **«бумажная»**. На ней deployer-агент (LLM) только
+пишет `docs/work-items/<wi>/14-deploy-log.md` с `deploy_status: SUCCESS|FAILED`, а QG
+`check_deploy_status` (`src/qg/checks.py:464`) парсит этот вердикт и пускает `deploy → done`.
+**Реального docker-деплоя нет** — продакшен орка катается руками (Стрим).
+
+Хост-хук `scripts/orchestrator-deploy-hook.sh` **уже существует** (ORCH-34) и умеет:
+захват PREV_IMG → `git pull` → рестарт сервиса → health-check (10×6с = 60с) →
+авто-rollback при провале health, с корректным exit-code. Дефолты — STAGING-безопасные;
+прод включается через override env (`TARGET_SERVICE`, `TARGET_PORT`, `TARGET_IMAGE`,
+`COMPOSE_PROFILE`).
+
+**Главная мина (self-hosting):** прод-контейнер `orchestrator` (8500) — ОДИН на все
+проекты, и в нём же бежит сам deployer-агент. Deployer не может синхронно рестартить
+контейнер, в котором живёт (`docker compose up -d orchestrator` убьёт его процесс на
+середине). Реальный рестарт self-репо обязан делать ВНЕШНИЙ хост-хук (вне контейнера),
+который срабатывает ПОСЛЕ выхода агента. Рубильник — снаружи; орк только ИНИЦИИРУЕТ.
+
+## 2. Цель
+
+Превратить стадию `deploy` в РЕАЛЬНЫЙ самодеплой: после зелёного `deploy-staging`-гейта
+конвейер вызывает хост-хук с прод-параметрами, хук промоутит образ в прод (8500) с
+health-чеком и авто-rollback. Результат хука (exit-code) маппится в `deploy_status`.
+**На старте — с ОБЯЗАТЕЛЬНЫМ ручным approve** (`DEPLOY_REQUIRE_MANUAL_APPROVE=true`):
+прод не трогается без явного «go» Владельца.
+
+## 3. Ценность для бизнеса
+
+- Уходит последний ручной шаг конвейера (прод-деплой Стрим) → шаг к автономному внедрению (эпик ORCH-54).
+- `deploy_status: SUCCESS` становится **доказанным** (реальный health-ok), а не декларацией LLM.
+- Гарантия build-once: «что протестировали на staging — то и в проде» (тот же образ, без пересборки).
+- Прод никогда не остаётся в нерабочем состоянии: авто-rollback + health-таймаут.
+
+## 4. Заинтересованные стороны
+
+| Роль | Интерес |
+|------|---------|
+| Владелец (Слава/Стрим) | Контроль через ручной approve; уведомления о каждом промоуте/откате |
+| Проект enduro-trails | Прод-орк не должен падать (общий инстанс) — групповой риск |
+| Конвейер ORCH | Стадия `deploy` исполняемая, гейты не сломаны |
+
+## 5. Объём (scope)
+
+### В объёме
+1. Исполнение реального прод-деплоя из стадии `deploy` через хост-хук (ssh / detached на хосте).
+2. Обязательный ручной approve-гейт ПОСЛЕ зелёного staging и ДО прод-рестарта (флаг включён).
+3. Маппинг exit-code хука → `deploy_status: SUCCESS|FAILED` (реальный, не бумажный).
+4. Уведомления (Plane-коммент + Telegram) на каждый промоут и откат.
+5. Build-once: перетегирование образа, прошедшего staging, без пересборки.
+6. Обновление `deployer.md` и `INFRA.md` (документация = golden source).
+7. Для НЕ-self репо (enduro-trails и др.) — деплой по ssh на их хост (поведение не ломается).
+
+### Вне объёма (явно)
+- **Включение полного авто** (`DEPLOY_REQUIRE_MANUAL_APPROVE: true → false`) — отдельная задача,
+  ТОЛЬКО после набора метрик доверия (см. §7). В этой задаче флаг НЕ выключается.
+- Изменение `docker-compose.yml` без явной необходимости.
+- Изменение стадий `STAGE_TRANSITIONS`, реестра QG, terminal-sync `deploy → done`.
+- Прод-деплой в реальный бой во время разработки задачи (отладка — только на staging-цели хука).
+
+## 6. Бизнес-требования
+
+- **BR-1.** После зелёного `deploy-staging`-гейта стадия `deploy` РЕАЛЬНО собирает/перетегирует
+  образ, рестартит целевой сервис и проверяет health — не пишет бумажный SUCCESS.
+- **BR-2.** Для self-репо `orchestrator` рестарт 8500 выполняется ВНЕШНИМ (detached/host)
+  процессом; deployer-агент НЕ убивает контейнер, в котором работает.
+- **BR-3.** `deploy_status: SUCCESS` пишется ТОЛЬКО при health-ok хука; провал/health-fail →
+  `deploy_status: FAILED` → откат на `development` (как ORCH-35 staging-rollback, БАГ-8).
+- **BR-4.** Ручной approve обязателен (флаг `true`): без явного «go» прод НЕ трогается.
+- **BR-5.** Каждый промоут и откат уведомляет Владельца: Plane-коммент в задачу + Telegram.
+  «Молчаливых» деплоев нет.
+- **BR-6.** Build-once: в прод идёт тот образ, что прошёл staging-гейт (перетег, не пересборка).
+- **BR-7.** Staging-гейт (`check_staging_status`) остаётся обязательным предусловием прод-деплоя.
+- **BR-8.** Прод никогда не остаётся в нерабочем состоянии — авто-rollback при провале health.
+- **BR-9.** Существующие гейты и инварианты не ломаются: `check_deploy_status`,
+  `_parse_deploy_status`, rollback `deploy → development` (БАГ-8), terminal-sync `deploy → done`,
+  merge-gate (ORCH-43).
+- **BR-10.** Документация (`deployer.md`, `INFRA.md`, `CHANGELOG.md`) обновлена в том же PR.
+
+## 7. Критерии готовности к включению ПОЛНОГО авто (вне этой задачи)
+
+Переключать `DEPLOY_REQUIRE_MANUAL_APPROVE: true → false` можно ТОЛЬКО когда закрыты ВСЕ 5:
+1. ≥10 успешных промоутов подряд (staging зелёный → approve → прод поднялся, откат не нужен).
+2. Zero false-negative: staging-гейт ни разу не пропустил битый деплой как «зелёный».
+3. Авто-rollback проверен в бою (≥2–3 реальных срабатывания), recovery 100%, MTTR < 60с.
+4. Ни одного «молчаливого» деплоя (каждый промоут/откат уведомил Владельца).
+5. Период наблюдения ≥10 деплоев ИЛИ ≥2 недели без инцидентов в режиме manual-approve.
+
+## 8. Риски
+
+| Риск | Влияние | Митигация |
+|------|---------|-----------|
+| Падение прод-орка 8500 при self-деплое | Встаёт конвейер ВСЕХ проектов | Detached host-хук + health + авто-rollback; отладка на staging-цели |
+| Deployer рестартит сам себя синхронно | Процесс агента убит на середине | BR-2: рестарт только внешним detached-процессом |
+| Преждевременный `deploy_status: SUCCESS` (хук ещё не закончил) | Задача уходит в done при незавершённом деплое | Гейт читает РЕАЛЬНЫЙ исход хука (механизм — на дизайне) |
+| Деплой без approve | Неконтролируемый прод-деплой | BR-4: approve-гейт блокирует до «go» |
+| Пересборка вместо перетега | В прод уезжает не то, что тестировали | BR-6: build-once, `--no-build` + retag |
+
+## 9. Связанные задачи
+ORCH-7 (self-hosting), ORCH-21 (auto-rollback), ORCH-34 (хук готов), ORCH-35 (staging-гейт),
+ORCH-43 (merge-gate в проде), ORCH-54 (эпик автономного внедрения).
+Дизайн-референс: `tasks/orchestrator/DESIGN_STAGING_ENV.md §4/§7`.
--- a/docs/work-items/ORCH-036/02-trz.md
+++ b/docs/work-items/ORCH-036/02-trz.md
@@ -0,0 +1,136 @@
+# ТЗ — ORCH-36: Исполняемый самодеплой (стадия deploy дёргает хост-хук, Вариант B)
+
+Work Item: ORCH-036
+Stage: analysis
+Автор: analyst
+Дата: 2026-06-06
+
+> Документ фиксирует ТРЕБОВАНИЯ к изменениям (что и где). Конкретный механизм
+> (ssh vs docker.sock vs detached nohup/systemd-run; механизм approve) выбирает
+> архитектор в ADR (`06-adr/`). ТЗ задаёт границы и контракты, не реализацию.
+
+## 1. Текущее устройство (as-is, разведано в коде)
+
+- **Стадии** (`src/stages.py`): `… testing → deploy-staging → deploy → done`.
+  - `deploy-staging`: `agent=deployer`, `qg=check_staging_status` (запускается deployer при
+    выходе из `deploy-staging`, входе в `deploy`).
+  - `deploy`: `agent=None`, `qg=check_deploy_status` (агент НЕ запускается при выходе из `deploy`).
+  - **Вывод:** реальную работу стадии `deploy` делает deployer-агент, запущенный на переходе
+    `deploy-staging → deploy`. Он пишет `14-deploy-log.md`. Когда он завершается, `advance_stage`
+    с `current_stage=deploy` прогоняет `check_deploy_status` и двигает `deploy → done`.
+- **QG** (`src/qg/checks.py`):
+  - `check_deploy_status:464` → `_parse_deploy_status:406` читает ТОЛЬКО `deploy_status:` из
+    YAML-frontmatter `14-deploy-log.md` (worktree → origin/main fallback → not found).
+  - `check_staging_status:580` — условный (реален только для self-hosting `orchestrator`).
+  - `is_self_hosting_repo()` (`:511`) — детектор self-репо.
+- **Откаты/диспетчеризация** (`src/stage_engine.py`):
+  - `_handle_qg_failure_rollbacks:585` — ветка `deployer` + `check_deploy_status` FAILED →
+    откат `deploy → development`, `set_issue_blocked`, release merge-lease, Plane+Telegram.
+  - Terminal-sync `deploy → done` (`:281`) → `set_issue_done`, release merge-lease.
+  - merge-gate (ORCH-43) на ребре `deploy-staging → deploy` — НЕ трогать.
+- **Launcher** (`src/agents/launcher.py`):
+  - deployer-агент конфиг: `.task-deploy.md` / `.openclaw/agents/deployer.md` (`:180`).
+  - Пост-обработка: commit+push артефактов в worktree (`:506-558`).
+  - `exit_code != 0 && agent == deployer` → откат `deploy → development` (`:560-581`).
+- **Хост-хук** (`scripts/orchestrator-deploy-hook.sh`, ORCH-34) — ГОТОВ: `--deploy`/`--rollback`,
+  параметризован env, дефолты STAGING; health 10×6с; авто-rollback; exit 0/1/2.
+- **Agent (deployer.md)**: на стадии `deploy` сейчас пишет «бумажный» вердикт; в промпте маркер
+  «Real docker/SSH deploys are handled by scripts/orchestrator-deploy-hook.sh (ORCH-36)».
+- **Топология** (`docs/operations/INFRA.md`): prod=8500 (`.env`), staging=8501 (`.env.staging`,
+  profile staging). Контейнер под uid 1000, доступ к docker.sock через gid 999.
+
+## 2. Изменения по модулям (to-be)
+
+### 2.1 `scripts/orchestrator-deploy-hook.sh` (донастройка прод-режима)
+- Хук уже параметризован; требуется обеспечить **корректный прод-профиль вызова**:
+  `TARGET_SERVICE=orchestrator`, `TARGET_PORT=8500`, `TARGET_IMAGE=orchestrator-orchestrator`,
+  `COMPOSE_PROFILE` (для прод-сервиса — пустой/дефолтный, т.к. prod стартует без profile).
+- **Build-once (BR-6):** деплой должен использовать образ, прошедший staging (перетег
+  staging-образа → прод-тег + `docker compose up -d --no-build`), а НЕ пересобирать. Если
+  текущий хук всегда `--no-build` и тянет `git pull` — уточнить в ADR, как гарантируется
+  идентичность артефакта staging↔prod (retag staging image, либо общий build-once шаг).
+- `PREV_IMAGE_FILE` для прод — отдельный путь (например `.deploy-prev-image` без `-staging`),
+  чтобы не путать снапшоты prod/staging.
+- Поведение `--rollback`, health-loop, exit-code (0=ok, 1=rolled back, 2=rollback тоже упал) —
+  НЕ менять контракт.
+
+### 2.2 Approve-гейт (новое; место — на дизайне)
+- Ввести флаг конфигурации `DEPLOY_REQUIRE_MANUAL_APPROVE` (bool, дефолт `true`).
+- При `true`: перед вызовом прод-хука (после зелёного `deploy-staging`) конвейер ОСТАНАВЛИВАЕТСЯ
+  и ждёт явного «go» Владельца. Без «go» прод-хук НЕ вызывается.
+- Механизм approve (выбрать ОДИН в ADR): Plane-коммент-триггер (по образцу `:approved:`
+  в `check_analysis_approved`) / Telegram-кнопка / signal-файл. Требование к механизму:
+  рестарт-safe (переживает перезапуск инстанса), идемпотентный, аудируемый.
+- При `false` (вне этой задачи): approve-шаг пропускается — НЕ реализовывать выключение здесь,
+  только заложить ветку по флагу.
+
+### 2.3 Триггер реального деплоя из стадии `deploy`
+- На стадии `deploy` (для self-репо `orchestrator`) вместо/в дополнение к записи вердикта
+  агентом — ИНИЦИИРОВАТЬ внешний detached-процесс (host-хук), который выполнит
+  build-once+restart+health ПОСЛЕ выхода агента (BR-2: агент не рестартит сам себя).
+- Маршрут вызова (на дизайне): ssh на хост (`DEPLOY_SSH_USER`/`DEPLOY_HOOK_SCRIPT`) ИЛИ
+  detached через docker.sock/nohup/systemd-run. Требование: процесс хука переживает выход
+  агента и завершение его сессии.
+- Для **не-self** репо (enduro-trails): деплой по ssh на их хост (как раньше) — поведение не ломать.
+
+### 2.4 Маппинг результата хука → `deploy_status`
+- `deploy_status: SUCCESS` пишется в `14-deploy-log.md` ТОЛЬКО при exit-code хука = 0 (health-ok).
+- exit-code ≠ 0 (1 = rolled back; 2 = rollback тоже упал) → `deploy_status: FAILED`.
+- **Контракт `_parse_deploy_status` НЕ меняется** (читает `deploy_status: SUCCESS|FAILED` из
+  frontmatter). Меняется только КТО и КОГДА пишет этот вердикт — на основе реального исхода.
+- **Гонка чтения гейта:** т.к. self-рестарт асинхронный (detached), гейт `check_deploy_status`
+  не должен прочитать вердикт ДО завершения хука. Механизм синхронизации (post-factum запись
+  лога/мердж в main / отложенный гейт) — спроектировать в ADR так, чтобы гейт читал РЕАЛЬНЫЙ
+  итог. Контракт чтения из worktree→origin/main (`_deploy_log_from_main`) можно переиспользовать.
+
+### 2.5 Уведомления (BR-5)
+- На промоут (старт прод-деплоя + успех) и на откат → `plane_add_comment(work_item_id, …)` +
+  `send_telegram(…)`. Переиспользовать существующие хелперы (`src/notifications.py`,
+  `src/plane_sync.py`). Никаких «молчаливых» деплоев.
+
+### 2.6 Конфигурация (`src/config.py` / `.env.example` / `.env.staging.example`)
+- Новый: `deploy_require_manual_approve: bool = True` (env `ORCH_DEPLOY_REQUIRE_MANUAL_APPROVE`).
+- Прод-параметры хука: `DEPLOY_SSH_USER`, `DEPLOY_SSH_HOST`, `DEPLOY_HOOK_SCRIPT` (уже есть в
+  INFRA-карте) + прод-override `TARGET_SERVICE/PORT/IMAGE`. Прописать дескрипторы в `.env.example`
+  (значения — только на хосте, не коммитить).
+- Условность по репо: реальный прод-деплой — только для self-hosting (`is_self_hosting_repo`),
+  как ORCH-35; прочие репо идут прежним ssh-путём.
+
+### 2.7 Документация (BR-10, golden source)
+- `.openclaw/agents/deployer.md` — раздел «Stage: deploy»: переписать с «бумажного SUCCESS» на
+  «стадия ВЫЗЫВАЕТ хук»; зафиксировать запрет синхронного рестарта 8500 и detached-путь self.
+- `docs/operations/INFRA.md` — процедура прод-деплоя орка через хук + approve.
+- `docs/operations/DEPLOY_HOOK.md` — обновить, если затронут контракт хука.
+- `CHANGELOG.md` — запись о включении исполняемого деплоя (manual-approve).
+- ADR в `docs/work-items/ORCH-036/06-adr/ADR-NNN-*.md` (создаёт архитектор).
+
+## 3. API
+- Изменений публичного HTTP API (`/health`, `/status`, `/queue`, `/webhook/*`) **не требуется**.
+- Если approve реализуется через Plane-коммент — переиспользуется существующий webhook-путь
+  (`POST /webhook/plane`), новый endpoint не вводится. Если через signal-файл/Telegram —
+  внешний по отношению к HTTP API механизм. Решение — ADR.
+
+## 4. Схема БД
+- Изменения схемы **не требуются** для базового сценария (вердикт — в `14-deploy-log.md`;
+  approve-состояние желательно хранить рестарт-safe — допустимо через jobs/task_content или
+  signal-файл, без новой таблицы). Если архитектор сочтёт нужным поле статуса approve —
+  обосновать в ADR; по умолчанию — без миграции.
+
+## 5. Требования к Quality Gates
+- `check_deploy_status` и `_parse_deploy_status` — контракт чтения НЕ менять (frontmatter only).
+- Откат `deploy → development` при `deploy_status: FAILED` (`stage_engine` БАГ-8) — сохранить.
+- Terminal-sync `deploy → done` и release merge-lease — сохранить.
+- merge-gate (`check_branch_mergeable`) на ребре `deploy-staging → deploy` — не затрагивать.
+- `check_staging_status` остаётся обязательным предусловием (BR-7).
+
+## 6. Артефакты pipeline
+- Создаётся/обновляется: `docs/work-items/ORCH-036/14-deploy-log.md` (с РЕАЛЬНЫМ `deploy_status`).
+- Обновляются по pipeline: `06-adr/ADR-NNN-*.md`, `12-review.md`, `13-test-report.md`,
+  `15-staging-log.md` (последующими агентами).
+
+## 7. Нефункциональные требования
+- **Безопасность self-deploy:** рестарт 8500 — только внешним рубильником; орк не может
+  необратимо убить себя.
+- **Идемпотентность** хука и approve-механизма; **рестарт-safe** approve-состояние.
+- **MTTR < 60с** при авто-rollback (health-loop хука 10×6с уже укладывается).
+- **Отладка только на staging-цели** хука; реальный прод — лишь после approve.
--- a/docs/work-items/ORCH-036/03-acceptance-criteria.md
+++ b/docs/work-items/ORCH-036/03-acceptance-criteria.md
@@ -0,0 +1,97 @@
+# Критерии приёмки — ORCH-36: Исполняемый самодеплой (Вариант B)
+
+Work Item: ORCH-036
+Stage: analysis
+Автор: analyst
+Дата: 2026-06-06
+
+Формат: каждый критерий — проверяемое условие PASS/FAIL. Отладка и проверки
+выполняются на **staging-цели хука** (8501); реальный прод (8500) — только после approve.
+
+---
+
+## AC-1. Стадия deploy исполняет реальный деплой (не бумажный)
+- **PASS:** на стадии `deploy` (после зелёного `deploy-staging`) вызывается хост-хук,
+  который реально перетегирует образ, рестартит целевой сервис и выполняет health-check;
+  `deploy_status` отражает РЕАЛЬНЫЙ исход хука.
+- **FAIL:** `deploy_status: SUCCESS` пишется без фактического рестарта/health (бумажный лог).
+- **Проверка:** прогон на staging-цели хука; в логе хука видны retag + `up -d` + health-loop;
+  exit-code хука соответствует записанному `deploy_status`.
+
+## AC-2. Self-репо: рестарт 8500 — внешним detached-процессом, агент себя не убивает
+- **PASS:** для `orchestrator` рестарт 8500 выполняет процесс ВНЕ контейнера агента; deployer-агент
+  завершается штатно (exit 0), его процесс не убит рестартом контейнера.
+- **FAIL:** deployer синхронно делает `docker compose up -d orchestrator` из контейнера и/или
+  агент падает/обрывается на середине из-за рестарта собственного контейнера.
+- **Проверка:** симуляция на staging-цели; убедиться, что detached-процесс переживает выход агента.
+
+## AC-3. deploy_status маппится из exit-code хука
+- **PASS:** exit-code хука 0 → `deploy_status: SUCCESS`; exit-code ≠ 0 (1/2) → `deploy_status: FAILED`.
+- **FAIL:** любой иной маппинг (например SUCCESS при exit 1).
+- **Проверка:** unit-тест маппинга exit-code → вердикт; интеграционный прогон с искусственным
+  кодом возврата хука.
+
+## AC-4. Провал деплоя → откат на development
+- **PASS:** при `deploy_status: FAILED` задача откатывается `deploy → development`
+  (`set_issue_blocked`, Plane+Telegram), как в существующей ветке БАГ-8.
+- **FAIL:** при FAILED задача уходит в `done` или зависает.
+- **Проверка:** существующий контракт `stage_engine._handle_qg_failure_rollbacks` для
+  `deployer`+`check_deploy_status` сохранён и срабатывает.
+
+## AC-5. Ручной approve обязателен и реально тормозит прод
+- **PASS:** при `DEPLOY_REQUIRE_MANUAL_APPROVE=true` прод-хук НЕ вызывается до явного «go»;
+  после «go» — вызывается.
+- **FAIL:** прод-хук дёргается без approve.
+- **Проверка:** прогон без «go» — целевой сервис НЕ перезапущен (нет записи рестарта в логе хука,
+  не сменился uptime/контейнер); прогон с «go» — рестарт состоялся.
+
+## AC-6. Уведомления о каждом промоуте и откате
+- **PASS:** на старт/успех прод-деплоя и на откат приходят и Plane-коммент в задачу, и Telegram.
+- **FAIL:** хотя бы один промоут/откат прошёл «молчаливо».
+- **Проверка:** в Plane-задаче и в Telegram-чате присутствуют сообщения для каждого исхода.
+
+## AC-7. Build-once: в прод идёт образ, прошедший staging
+- **PASS:** прод-деплой использует тот же образ, что прошёл staging-гейт (retag + `--no-build`),
+  без пересборки.
+- **FAIL:** прод-деплой пересобирает образ заново (артефакт может отличаться от протестированного).
+- **Проверка:** sha/тег образа прод == образ, валидированный на staging; в логе нет `build`.
+
+## AC-8. Staging-гейт остаётся обязательным предусловием
+- **PASS:** прод-деплой недостижим без зелёного `check_staging_status` (`staging_status: SUCCESS`).
+- **FAIL:** прод-хук можно вызвать при FAILED/отсутствующем staging-вердикте.
+- **Проверка:** при `staging_status: FAILED` задача откатывается на development, до `deploy` не доходит.
+
+## AC-9. Авто-rollback восстанавливает прод (симуляция битого деплоя)
+- **PASS:** при симуляции битого деплоя на staging-цели health не проходит → хук авто-откатывает
+  на предыдущий образ → сервис снова healthy; exit-code = 1 (rolled back); MTTR < 60с.
+- **FAIL:** сервис остаётся нерабочим после провала деплоя.
+- **Проверка:** искусственно сломать health, прогнать хук, убедиться в восстановлении и exit 1.
+
+## AC-10. Существующие инварианты не сломаны
+- **PASS:** не изменены контракты `check_deploy_status` / `_parse_deploy_status`,
+  `STAGE_TRANSITIONS`, terminal-sync `deploy → done`, merge-gate (ORCH-43), rollback БАГ-8.
+- **FAIL:** любой из перечисленных контрактов изменён/сломан.
+- **Проверка:** существующие тесты deploy/staging/merge-gate зелёные; регресс-прогон `pytest tests/`.
+
+## AC-11. Условность по репо (не-self не ломается)
+- **PASS:** для не-self репо (enduro-trails) деплой идёт прежним ssh-путём; self-логика (detached,
+  approve, 8500) применяется только для `orchestrator`.
+- **FAIL:** не-self репо затронуты self-специфичной логикой и ломаются.
+- **Проверка:** `is_self_hosting_repo` корректно разводит пути; тест на не-self репо.
+
+## AC-12. Флаг полного авто НЕ выключен в этой задаче
+- **PASS:** `DEPLOY_REQUIRE_MANUAL_APPROVE` остаётся `true`; переключение в `false` не делается.
+- **FAIL:** флаг выставлен в `false` в рамках задачи.
+- **Проверка:** дефолт конфигурации = `true`; в коде/`.env.example` нет принудительного `false`.
+
+## AC-13. Документация обновлена (golden source)
+- **PASS:** обновлены `deployer.md` (стадия deploy = вызов хука), `INFRA.md` (процедура),
+  `CHANGELOG.md`; заведён ADR в `06-adr/`.
+- **FAIL:** функционал изменён, документация — нет (Reviewer обязан вернуть REQUEST_CHANGES).
+- **Проверка:** диффы документации присутствуют в том же PR.
+
+---
+
+## Definition of Done
+Все AC-1…AC-13 в статусе PASS; `pytest tests/` зелёный; артефакты pipeline на месте;
+прод (8500) во время разработки НЕ тронут (вся проверка — на staging-цели хука).
--- a/docs/work-items/ORCH-036/04-test-plan.yaml
+++ b/docs/work-items/ORCH-036/04-test-plan.yaml
@@ -0,0 +1,122 @@
+work_item: ORCH-036
+title: "Исполняемый самодеплой — стадия deploy дёргает хост-хук (Вариант B)"
+stage: analysis
+notes: >
+  Все тесты — на изолированном уровне (unit/integration с моками subprocess/ssh
+  и хука). Реальный прод (8500) НЕ трогается. Интеграционные прогоны хука — на
+  staging-цели. Хост-хук (bash) проверяется отдельным интеграционным сценарием с
+  поддельным health/exit-code; в pytest вызов хука мокается.
+
+tests:
+  # --- exit-code -> deploy_status mapping (AC-1, AC-3) ---
+  - id: TC-01
+    type: unit
+    description: "Маппинг exit-code хука 0 -> deploy_status: SUCCESS"
+    module: tests/test_deploy_hook_mapping.py
+    expected: PASS
+  - id: TC-02
+    type: unit
+    description: "Маппинг exit-code хука 1 (rolled back) -> deploy_status: FAILED"
+    module: tests/test_deploy_hook_mapping.py
+    expected: PASS
+  - id: TC-03
+    type: unit
+    description: "Маппинг exit-code хука 2 (rollback тоже упал) -> deploy_status: FAILED"
+    module: tests/test_deploy_hook_mapping.py
+    expected: PASS
+
+  # --- approve gate (AC-5, AC-12) ---
+  - id: TC-04
+    type: unit
+    description: "DEPLOY_REQUIRE_MANUAL_APPROVE дефолт == true в settings"
+    module: tests/test_deploy_approve.py
+    expected: PASS
+  - id: TC-05
+    type: integration
+    description: "Флаг true и нет 'go' -> прод-хук НЕ вызывается (subprocess/ssh не дёрнут)"
+    module: tests/test_deploy_approve.py
+    expected: PASS
+  - id: TC-06
+    type: integration
+    description: "Флаг true и есть 'go' -> прод-хук вызывается ровно один раз"
+    module: tests/test_deploy_approve.py
+    expected: PASS
+
+  # --- self vs non-self routing (AC-2, AC-11) ---
+  - id: TC-07
+    type: unit
+    description: "is_self_hosting_repo('orchestrator') == True; иной репо -> False (не регрессировал)"
+    module: tests/test_deploy_routing.py
+    expected: PASS
+  - id: TC-08
+    type: integration
+    description: "self-репо orchestrator: рестарт инициируется detached/host-процессом, не синхронно из агента"
+    module: tests/test_deploy_routing.py
+    expected: PASS
+  - id: TC-09
+    type: integration
+    description: "не-self репо (enduro-trails): деплой идёт прежним ssh-путём, self-логика не применяется"
+    module: tests/test_deploy_routing.py
+    expected: PASS
+
+  # --- rollback on FAILED (AC-4) ---
+  - id: TC-10
+    type: integration
+    description: "deploy_status: FAILED -> откат deploy->development, set_issue_blocked, release merge-lease"
+    module: tests/test_deploy_rollback.py
+    expected: PASS
+
+  # --- staging precondition preserved (AC-8) ---
+  - id: TC-11
+    type: integration
+    description: "staging_status: FAILED -> до стадии deploy не доходит (откат на development)"
+    module: tests/test_staging_precondition.py
+    expected: PASS
+
+  # --- notifications (AC-6) ---
+  - id: TC-12
+    type: integration
+    description: "Успешный промоут -> и Plane-коммент, и Telegram отправлены"
+    module: tests/test_deploy_notifications.py
+    expected: PASS
+  - id: TC-13
+    type: integration
+    description: "Откат -> и Plane-коммент, и Telegram отправлены (нет молчаливого деплоя)"
+    module: tests/test_deploy_notifications.py
+    expected: PASS
+
+  # --- build-once (AC-7) ---
+  - id: TC-14
+    type: integration
+    description: "Прод-деплой использует образ staging (retag, без build) — нет шага docker build"
+    module: tests/test_deploy_build_once.py
+    expected: PASS
+
+  # --- regression: unchanged gate contracts (AC-10) ---
+  - id: TC-15
+    type: unit
+    description: "_parse_deploy_status: SUCCESS->(True), FAILED->(False), нет frontmatter->(False) — контракт цел"
+    module: tests/test_qg_checks.py
+    expected: PASS
+  - id: TC-16
+    type: unit
+    description: "STAGE_TRANSITIONS deploy->done и agent/qg deploy не изменены"
+    module: tests/test_stages.py
+    expected: PASS
+  - id: TC-17
+    type: integration
+    description: "terminal-sync deploy->done (set_issue_done + release merge-lease) сохранён"
+    module: tests/test_deploy_terminal_sync.py
+    expected: PASS
+  - id: TC-18
+    type: integration
+    description: "merge-gate на ребре deploy-staging->deploy не затронут (регресс ORCH-43 зелёный)"
+    module: tests/test_merge_gate.py
+    expected: PASS
+
+  # --- auto-rollback hook behavior (AC-9) ---
+  - id: TC-19
+    type: integration
+    description: "Симуляция битого деплоя на staging-цели: health fail -> авто-rollback -> healthy, exit 1, MTTR<60с"
+    module: tests/test_deploy_hook_rollback_sim.py
+    expected: PASS
--- a/docs/work-items/ORCH-036/06-adr/ADR-001-executable-self-deploy.md
+++ b/docs/work-items/ORCH-036/06-adr/ADR-001-executable-self-deploy.md
@@ -0,0 +1,184 @@
+# ADR-001: Исполняемый самодеплой — стадия `deploy` дёргает хост-хук (Вариант B)
+
+Work Item: ORCH-036
+Stage: architecture
+Автор: architect
+Дата: 2026-06-06
+
+## Статус
+Accepted
+
+## Контекст
+
+Стадия `deploy` сейчас «бумажная»: deployer-агент (LLM) пишет в `14-deploy-log.md`
+`deploy_status: SUCCESS|FAILED`, а гейт `check_deploy_status` (`src/qg/checks.py:464`)
+парсит этот вердикт и двигает `deploy → done`. Реального docker-деплоя нет (прод
+катается руками). BRD ORCH-36 требует превратить стадию в РЕАЛЬНЫЙ самодеплой с
+обязательным ручным approve, build-once и авто-rollback (BR-1…BR-10).
+
+Три твёрдых ограничения, разведанных в коде, определяют дизайн:
+
+1. **Self-restart (BR-2).** Прод-контейнер `orchestrator` (8500) — ОДИН на все
+   проекты, и в нём же исполняется deployer. `docker compose up -d orchestrator`
+   из контейнера убьёт процесс агента/воркера на середине. Реальный рестарт обязан
+   делать ВНЕШНИЙ процесс на хосте, переживающий гибель контейнера.
+2. **Status-only verdict model.** Комментарии Plane НЕ управляют конвейером —
+   механизм `:approved:`/`:rejected:` был удалён (`src/webhooks/plane.py:544`,
+   bug-3 «echo self-hit»). Единственный человеческий гейт — **смена статуса Plane
+   на `Approved`** (`handle_verdict` → `_try_advance_stage` → `advance_stage`).
+3. **Гонка чтения гейта.** Так как реальный рестарт асинхронный и убивает контейнер,
+   `check_deploy_status` нельзя выполнять на выходе агента — вердикта ещё нет; его
+   преждевременное чтение → ложный FAILED → ложный откат.
+
+Контракты, которые НЕ меняются (BR-9, AC-10): `STAGE_TRANSITIONS`,
+`check_deploy_status` / `_parse_deploy_status` (frontmatter only), откат БАГ-8
+(`deploy → development`), terminal-sync `deploy → done`, merge-gate (ORCH-43),
+exit-code-контракт хука (0/1/2).
+
+## Решение
+
+Деплой стадии `deploy` для self-hosting (`orchestrator`) разбивается на **три фазы**,
+оркеструемые детерминированным кодом (без LLM в критическом пути self-restart). Для
+НЕ-self репо (enduro-trails и пр.) поведение НЕ меняется — прежний синхронный
+ssh-деплой агентом.
+
+### Условность по репо
+Вся новая логика гейтится `is_self_hosting_repo(repo)` (как ORCH-35). Не-self репо
+идут существующим путём: deployer-агент на стадии `deploy` делает ssh-деплой
+синхронно, пишет `14-deploy-log.md`, гейт срабатывает на выходе агента.
+
+### Фаза A — запрос approve (вход в `deploy`)
+В `advance_stage` на ребре `deploy-staging → deploy` (ПОСЛЕ зелёного
+`check_staging_status` и merge-gate ORCH-43), для self-hosting + `deploy_require_
+manual_approve=true`:
+- **НЕ** ставить в очередь прод-deployer (перехватить штатный
+  `enqueue_job(get_agent_for_stage("deploy-staging"))`);
+- выставить issue в approval-pending статус (паттерн `set_issue_in_review`),
+  написать Plane-коммент «approve для прод-деплоя» + Telegram (BR-5);
+- записать restart-safe маркер `approve-requested` (sentinel-файл, см. ниже).
+
+Задача остаётся на стадии `deploy` и ждёт человека. `STAGE_TRANSITIONS` не меняется.
+
+При `deploy_require_manual_approve=false` (вне объёма, флаг НЕ выключается в ORCH-36 —
+AC-12) Фаза A сразу переходит к Фазе B без человеческого гейта. Структурная ветка
+закладывается, но дефолт `true`.
+
+### Фаза B — инициация деплоя (смена статуса Plane → Approved)
+Человек ставит issue в `Approved`. `handle_verdict(approved=True)` →
+`_try_advance_stage` → `advance_stage(current_stage="deploy", finished_agent=None)`.
+Новая ветка-перехват в `advance_stage`:
+- условие: `current_stage=="deploy"` И `finished_agent is None` (человеческий путь)
+  И self-hosting И approve-флаг И маркер `initiated` ОТСУТСТВУЕТ;
+- действие: запустить **внешний detached host-процесс** (см. ниже) и поставить в
+  очередь детерминированный **finalizer-job** с задержкой; записать маркер
+  `initiated` (идемпотентность: повторный Approved не запускает деплой дважды);
+  Plane-коммент «прод-деплой стартовал» + Telegram (BR-5);
+- **вернуться БЕЗ advance** (НЕ запускать `check_deploy_status` — вердикта ещё нет).
+
+Дискриминатор `finished_agent` разводит Фазу B (человек, `None`) и Фазу C
+(finalizer, `"deployer"`), поэтому повторное использование `advance_stage` безопасно.
+
+### Фаза C — фиксация вердикта (детерминированный finalizer)
+Finalizer-job (claim'ится воркером уже в НОВОМ контейнере после рестарта):
+- читает sentinel `result` (exit-code хука, записан host-процессом);
+- если `result` ещё нет и бюджет попыток не исчерпан → **defer** (повторный
+  finalizer-job с `available_at_delay_s`, как merge-gate defer); бюджет считается
+  из `jobs` (`LIKE '%deploy-finalize%'`, restart-safe);
+- если `result` есть → **маппинг exit-code → deploy_status** (детерминированный,
+  unit-тестируемый): `0 → SUCCESS`, `1|2|иное → FAILED`; записать
+  `14-deploy-log.md` (frontmatter `deploy_status:`), смержить в `main` (паттерн
+  лога), затем вызвать `advance_stage(current_stage="deploy", finished_agent="deployer")`;
+- далее срабатывают СУЩЕСТВУЮЩИЕ контракты: `SUCCESS` → terminal-sync `deploy → done`
+  + release merge-lease; `FAILED` → откат БАГ-8 `deploy → development` +
+  `set_issue_blocked` + Plane/Telegram (BR-3, AC-4). `_parse_deploy_status` НЕ меняется.
+
+### Механизм detached-запуска: ssh + setsid
+Выбор: **ssh на хост (`slin@DEPLOY_SSH_HOST`) с setsid-detached исполнением** хука.
+Обоснование: ssh-ключи уже смонтированы (INFRA P-2), не-self репо уже деплоятся по
+ssh (единый путь), хук живёт на хосте и под `slin` имеет полный доступ к docker вне
+контейнера → переживает рестарт 8500 (BR-2). `setsid`/`nohup` + redirect отвязывает
+удалённый процесс от ssh-канала, чтобы он пережил гибель ssh-клиента при рестарте
+контейнера. Отвергнуто: вызов через docker.sock изнутри контейнера = ровно мина
+«убей себя на середине вызова».
+
+Эскиз (точная сборка — за разработчиком):
+```
+ssh -o StrictHostKeyChecking=no slin@$DEPLOY_SSH_HOST \
+  "setsid bash -c 'cd /home/slin/repos/orchestrator && \
+     SOURCE_IMAGE=orchestrator-orchestrator-staging \
+     TARGET_SERVICE=orchestrator TARGET_PORT=8500 \
+     TARGET_IMAGE=orchestrator-orchestrator COMPOSE_PROFILE= \
+     PREV_IMAGE_FILE=.deploy-prev-image-prod \
+     bash scripts/orchestrator-deploy-hook.sh --deploy; \
+     echo \$? > <result-sentinel>' >> <hook.log> 2>&1 </dev/null &"
+```
+ssh-команда возвращается сразу; remote-процесс detached. Запись sentinel `result`
+делает **обёртка** (`echo $? > result`), а НЕ хук — контракт хука нетронут.
+
+### Build-once (BR-6, AC-7)
+Прод обязан подняться на ОБРАЗЕ, прошедшем staging (а не на пересборке). Решение:
+расширить хук **опциональным** `SOURCE_IMAGE` (обратно совместимо: не задан →
+текущее поведение). При заданном `SOURCE_IMAGE` хук ПЕРЕД `up -d --no-build`
+делает `docker tag $SOURCE_IMAGE $TARGET_IMAGE`. Для прод-self:
+`SOURCE_IMAGE=orchestrator-orchestrator-staging` → `TARGET_IMAGE=orchestrator-orchestrator`.
+Это единственное допустимое изменение хука; exit-code-контракт и дефолтное
+staging-поведение не меняются. `git pull` хука обновляет рабочее дерево хоста для
+будущих сборок, но РАЗВЁРНУТЫЙ артефакт = перетегированный staging-образ.
+
+### Restart-safe состояние: sentinel-файлы (без миграции БД)
+По образцу merge-lease (`<repos_dir>/.merge-lease-<repo>.json`) состояние деплоя
+хранится в файлах под `<repos_dir>/.deploy-state-<repo>/<work_item_id>/` (вне git,
+видны и хосту, и контейнеру через mount `/home/slin/repos ↔ /repos`):
+- `approve-requested` — Фаза A выполнена;
+- `initiated` — Фаза B запущена (idempotency-guard);
+- `result` — exit-code хука (пишет host-обёртка).
+Бюджет finalize-defer считается из `jobs` (restart-safe), новых таблиц/колонок НЕТ
+(TRZ §4).
+
+## Последствия
+
+### Плюсы
+- `deploy_status: SUCCESS` становится ДОКАЗАННЫМ (реальный health-ok хука), не
+  декларацией LLM (BR-1).
+- Self-restart безопасен: рестарт 8500 делает внешний host-процесс; орк себя не
+  убивает (BR-2). Вердикт фиксирует НОВЫЙ контейнер после рестарта.
+- Критический путь self-restart **детерминирован** (без LLM) — главный выигрыш по
+  безопасности self-hosting; зеркалит детерминизм merge-gate ORCH-43.
+- Approve вписан в существующую status-only модель — restart-safe, аудируемо в Plane,
+  идемпотентно (маркер `initiated`).
+- Гонка чтения гейта закрыта: гейт читает РЕАЛЬНЫЙ итог через finalizer-defer.
+- Build-once гарантирует «что тестировали — то в проде».
+- Нетронуты: `STAGE_TRANSITIONS`, реестр QG, `_parse_deploy_status`, БАГ-8,
+  terminal-sync, merge-gate, контракт хука (exit-code).
+
+### Минусы / ограничения
+- Вводится **новый детерминированный job-handler** в очереди (reserved-agent
+  `deploy-finalizer`, не-LLM) — расширение dispatch воркера/лаунчера. Контейнированное,
+  но это новая под-компонента → задача помечается `arch:major-change`.
+- Перехваты в `advance_stage` усложняют стадию `deploy` (три ветки по
+  `finished_agent`/маркерам). Требуется аккуратное покрытие тестами (TC-04…TC-09).
+- Build-once зависит от того, что deploy-staging оставил валидный образ
+  `orchestrator-orchestrator-staging`; при rebase merge-gate возможен дрейф
+  образ↔main (см. 10-tech-risks R-3).
+- Approve = смена статуса Plane на `Approved`; человек должен понимать, что на
+  стадии `deploy` `Approved` означает «деплой в прод» (документируется в deployer.md
+  и INFRA.md).
+
+### Что обязан сделать developer
+1. `src/config.py`: `deploy_require_manual_approve: bool = True` + прод-параметры
+   хука/ssh + `deploy_finalize_delay_s` / `deploy_finalize_max_attempts`.
+2. `src/stage_engine.py`: перехваты Фазы A/B + ветка finalizer (Фаза C через
+   `advance_stage(..., finished_agent="deployer")`).
+3. Очередь: reserved-agent `deploy-finalizer` (детерминированный handler:
+   read-result | defer | map+write+advance). Маппинг exit→status — отдельная
+   чистая функция (unit TC-01/02/03).
+4. `scripts/orchestrator-deploy-hook.sh`: опциональный `SOURCE_IMAGE` retag
+   (обратно совместимо) + прод `PREV_IMAGE_FILE`.
+5. Уведомления (Plane+Telegram) на initiate/success/rollback (BR-5).
+6. Документация: `deployer.md`, `INFRA.md`, `DEPLOY_HOOK.md`, `CHANGELOG.md`.
+7. Отладка — только на staging-цели хука; прод 8500 в разработке не трогать.
+
+## Связанные решения
+- Глобальный ADR: `docs/architecture/adr/adr-0007-executable-self-deploy.md`.
+- ORCH-35 staging-gate (`adr-0003`), ORCH-43 merge-gate (`adr-0006`),
+  ORCH-21 auto-rollback, ORCH-34 хук, ORCH-40 run-as-host-uid (`adr-0005`).
--- a/docs/work-items/ORCH-036/07-infra-requirements.md
+++ b/docs/work-items/ORCH-036/07-infra-requirements.md
@@ -0,0 +1,48 @@
+# Инфраструктурные требования — ORCH-036
+
+Work Item: ORCH-036
+Stage: architecture
+Автор: architect
+
+> Топология не меняется (та же mva154, те же два контейнера). Меняется ПРОЦЕДУРА
+> прод-деплоя орка: из ручной → исполняемая через хост-хук с ручным approve.
+
+## 1. Контейнеры / порты — без изменений
+- prod `orchestrator` (8500), staging `orchestrator-staging` (8501) — как в INFRA.md.
+- Образы (имена для build-once): prod `orchestrator-orchestrator`,
+  staging `orchestrator-orchestrator-staging`.
+
+## 2. Хост-предусловия (Owner, в git не коммитятся)
+- **HP-1.** ssh-доступ из контейнера на хост: `ssh slin@$DEPLOY_SSH_HOST` работает
+  под uid 1000 ключом из `~/.orchestrator-ssh` (INFRA P-2). Без него detached-запуск
+  Фазы B невозможен.
+- **HP-2.** `<repos_dir>/.deploy-state-<repo>/` доступен на запись и хосту (host-обёртка
+  пишет `result`), и контейнеру (finalizer читает) — обеспечивается mount
+  `/home/slin/repos ↔ /repos` (как merge-lease).
+- **HP-3.** `PREV_IMAGE_FILE` для прод — отдельный путь
+  (`.deploy-prev-image-prod`), чтобы не путать снапшоты prod/staging.
+- **HP-4 (P-4 из INFRA).** Прод-рестарт self — только в окно тишины; общий инстанс
+  с enduro-trails. На старте — под ручным approve (флаг `true`).
+
+## 3. Переменные окружения (карта; значения — на хосте, в git только дескрипторы)
+| Переменная | Назначение | Дефолт |
+|-----------|-----------|--------|
+| `ORCH_DEPLOY_REQUIRE_MANUAL_APPROVE` | ручной approve перед прод-деплоем | `true` |
+| `DEPLOY_SSH_USER` / `DEPLOY_SSH_HOST` | ssh-цель хост-хука | — (INFRA-карта) |
+| `DEPLOY_HOOK_SCRIPT` | путь к хуку на хосте | `scripts/orchestrator-deploy-hook.sh` |
+| прод `TARGET_SERVICE/PORT/IMAGE`, `COMPOSE_PROFILE` | override прод-профиля хука | `orchestrator`/`8500`/`orchestrator-orchestrator`/пусто |
+| `SOURCE_IMAGE` (новый параметр хука) | образ для build-once retag | пусто → текущее поведение |
+| `ORCH_DEPLOY_FINALIZE_DELAY_S` | задержка перед первым finalize-поллом | > 60с (health-loop хука) |
+| `ORCH_DEPLOY_FINALIZE_MAX_ATTEMPTS` | бюджет finalize-defer | bounded (anti-livelock) |
+
+Прописать дескрипторы в `.env.example` / INFRA.md. Реальные значения не коммитить.
+
+## 4. Сетевые / процессные требования
+- Detached host-процесс (ssh + setsid) обязан пережить рестарт прод-контейнера 8500.
+- Finalizer-job исполняется в НОВОМ контейнере после рестарта (очередь restart-safe).
+- MTTR авто-rollback < 60с (health-loop хука 10×6с уже укладывается, BR-8/AC-9).
+
+## 5. Что НЕ требуется
+- Новых контейнеров/портов/сервисов — нет.
+- Изменений `docker-compose.yml` — не требуется (build-once через retag, не профиль).
+- Multi-node / облако / message-queue — нет (принципы проекта).
--- a/docs/work-items/ORCH-036/08-data-requirements.md
+++ b/docs/work-items/ORCH-036/08-data-requirements.md
@@ -0,0 +1,34 @@
+# Требования к данным / схеме БД — ORCH-036
+
+Work Item: ORCH-036
+Stage: architecture
+Автор: architect
+
+## Решение: миграция БД НЕ требуется
+
+Схема SQLite (`events`, `tasks`, `agent_runs`, `jobs`) не меняется. Обоснование:
+
+1. **Вердикт деплоя** — в `14-deploy-log.md` (frontmatter `deploy_status:`), как
+   сейчас. `_parse_deploy_status` не трогаем (AC-10).
+2. **Approve / initiated / result-состояние** — restart-safe через **sentinel-файлы**
+   под `<repos_dir>/.deploy-state-<repo>/<work_item_id>/` (паттерн merge-lease
+   `<repos_dir>/.merge-lease-<repo>.json`), а не через новую таблицу/колонку:
+   - `approve-requested` — Фаза A;
+   - `initiated` — Фаза B (idempotency-guard);
+   - `result` — exit-code хука (пишет host-обёртка).
+3. **Бюджет finalize-defer** считается из существующей таблицы `jobs`
+   (`task_content LIKE '%deploy-finalize%'`), как `_merge_defer_count` для merge-gate
+   — restart-safe, без новых полей.
+4. **Finalizer-job** использует существующую структуру `jobs` (agent, repo,
+   task_content, task_id, available_at). Reserved-agent `deploy-finalizer` — это
+   значение в колонке `agent`, схема не меняется.
+
+## Почему файлы, а не БД
+- Sentinel должен быть виден И хосту (пишет `result`), И контейнеру (читает finalizer);
+  файл на общем mount это обеспечивает, SQLite-запись из host-обёртки — нет.
+- Зеркалит уже принятый паттерн merge-lease (ORCH-43) — единообразие, restart-safe,
+  crash-реклейм по возрасту файла.
+
+Если разработчик при реализации сочтёт необходимым поле статуса approve в БД —
+это требует обновления данного ADR с обоснованием; по умолчанию — без миграции
+(согласовано с TRZ §4).
--- a/docs/work-items/ORCH-036/10-tech-risks.md
+++ b/docs/work-items/ORCH-036/10-tech-risks.md
@@ -0,0 +1,23 @@
+# Технические риски — ORCH-036
+
+Work Item: ORCH-036
+Stage: architecture
+Автор: architect
+
+| ID | Риск | Влияние | Вероятность | Митигация |
+|----|------|---------|-------------|-----------|
+| R-1 | Detached host-процесс не пережил рестарт 8500 (ssh-канал убит вместе с контейнером) | Деплой не завершён, `result` не записан, finalizer вечно defer'ит | Средняя | `setsid`/`nohup` + redirect отвязывает remote-процесс от ssh; интеграционная проверка на staging-цели (TC-08); finalize-defer bounded → по исчерпании `set_issue_blocked` + Telegram |
+| R-2 | Преждевременное чтение `check_deploy_status` (вердикта ещё нет) | Ложный FAILED → ложный откат на development | Средняя | Фаза B возвращается БЕЗ advance; гейт запускает только finalizer (Фаза C) после появления `result`; defer пока `result` отсутствует |
+| R-3 | Дрейф образ↔main: merge-gate сделал rebase, но staging-образ собран до rebase → build-once тегирует «не тот» код | В прод уезжает не точно то, что в `main` | Низкая | merge-gate (ORCH-43) делает re-test после rebase; build-once = «что валидировано на staging», что и есть контракт; задокументировано как осознанное ограничение; усиление (rebuild+revalidate staging после rebase) — отдельная задача |
+| R-4 | Двойной Approved (человек кликнул дважды / дубль webhook) запускает деплой дважды | Двойной рестарт прода, гонка | Средняя | Маркер `initiated` (idempotency-guard); event-dedup webhook'ов Plane уже есть |
+| R-5 | exit 2 хука (rollback тоже упал) → 8500 лежит → finalizer/новый контейнер не поднялся | Конвейер всех проектов встал | Низкая | health-loop + авто-rollback хука минимизируют; `restart: unless-stopped` поднимет контейнер на ПРЕДЫДУЩЕМ образе если retag не случился; exit 2 → `deploy_status: FAILED` + откат + Telegram-алерт; ручной `--rollback` хука как backstop |
+| R-6 | Reserved-agent `deploy-finalizer` ошибочно уйдёт в LLM-путь лаунчера (`_spawn` → ValueError) | Finalizer не отработает | Низкая | Перехват ДО `_spawn` в `launch_job`; unit-тест маршрутизации |
+| R-7 | sentinel-файлы не видны контейнеру/хосту (mount/uid) | Фазы B/C не синхронизируются | Низкая | Тот же mount и uid-модель, что у merge-lease (ORCH-40/43); HP-2 в 07-infra |
+| R-8 | Approve через смену статуса Plane конфликтует с auto-advance других стадий | Случайный `Approved` на `deploy` ничего не ломает, но семантика неочевидна | Низкая | Перехват по `current_stage=="deploy"` + `finished_agent is None` + маркеры; задокументировать в deployer.md/INFRA, что `Approved` на `deploy` = «деплой в прод» |
+| R-9 | Самодеплой ORCH ломает прод во время разработки самой ORCH-36 | Групповой простой (enduro-trails) | Низкая | Вся отладка — на staging-цели хука (8501); прод 8500 не трогать (AC: DoD); флаг approve=true |
+
+## Сводный приоритет
+- **Блокеры дизайна:** R-1, R-2 — закрыты архитектурой (setsid-detached + finalizer-defer).
+- **Безопасность self-hosting:** R-5, R-9 — закрыты обязательным approve + staging-отладкой
+  + авто-rollback + `restart: unless-stopped`.
+- **Корректность:** R-3, R-4 — осознанные ограничения / idempotency-guard.
--- a/docs/work-items/ORCH-036/12-review.md
+++ b/docs/work-items/ORCH-036/12-review.md
@@ -0,0 +1,64 @@
+---
+type: review
+work_item_id: ORCH-036
+verdict: APPROVED
+version: 2
+---
+
+# Review ORCH-036 — Исполняемый самодеплой стадии `deploy` (Вариант B)
+
+## Summary
+
+Re-review после фикса двух P1 из версии 1. Оба блокера устранены:
+
+1. **Stale deploy-state маркеры** — добавлен `self_deploy.clear_state(repo, work_item_id)`
+   (never-raise, idempotent, рекурсивное удаление `<repos_dir>/.deploy-state-<repo>/<wi>/`)
+   в ветке БАГ-8-отката `check_deploy_status` FAILED (`_handle_qg_failure_rollbacks`,
+   `src/stage_engine.py`) и дополнительно в начале Фазы A (`_handle_self_deploy_phase_a`)
+   как belt-and-suspenders. Добавлен регрессионный тест
+   `tests/test_deploy_rollback.py::test_tc11_re_deploy_after_rollback_not_wedged`,
+   доказывающий, что после FAILED → откат → фикс → повторный заход на `deploy` Фаза B
+   РЕАЛЬНО инициирует деплой (нет no-op по устаревшему `initiated`), плюс
+   `tests/test_deploy_hook_mapping.py::test_clear_state_removes_all_markers_and_is_idempotent`.
+2. **`.env.example`** — добавлен полный блок дескрипторов `ORCH_SELF_DEPLOY_*` /
+   `ORCH_DEPLOY_*` (14 настроек, плейсхолдеры, секреты не коммитятся) по образцу
+   merge-gate ORCH-043, с подробными комментариями.
+
+Реализация трёхфазного исполняемого самодеплоя соответствует ADR-001 и закрывает
+критерии приёмки AC-1…AC-13. Контракты `STAGE_TRANSITIONS` / `QG_CHECKS` /
+`_parse_deploy_status` / БАГ-8 / terminal-sync / merge-gate (ORCH-43) НЕ тронуты;
+условность по репо (`self_deploy_applies`) корректна; перехваты упорядочены верно
+(Phase B после terminal-check, Phase A после merge-gate); `deploy-finalizer` —
+детерминированный no-LLM reserved-agent, перехвачен в launcher до `_spawn`. Все
+импорты (`set_issue_in_review`, `plane_add_comment`, `set_issue_blocked`,
+`send_telegram`) присутствуют. `pytest tests/` — **568 passed**.
+
+## Findings
+
+### P0 — Blocker
+- (нет)
+
+### P1 — Must fix
+- (нет — оба P1 из версии 1 устранены и покрыты тестами)
+
+### P2 — Should fix
+- (нет блокирующих; прежний P2 про сквозную процедуру оператора частично закрыт:
+  env-карта новых настроек добавлена в INFRA.md, пошаговый approve→deploy описан в
+  deployer.md и DEPLOY_HOOK.md)
+
+## Документация
+
+Обновлена содержательно и в том же PR:
+- `.openclaw/agents/deployer.md` — стадия `deploy` переписана: self-hosting путь
+  (Фазы A/B/C, явный запрет рестарта 8500 изнутри агента) vs прежний синхронный
+  ssh-путь для не-self репо;
+- `docs/operations/INFRA.md` — env-карта всех новых `ORCH_SELF_DEPLOY_*` / `ORCH_DEPLOY_*`;
+- `docs/operations/DEPLOY_HOOK.md` — `SOURCE_IMAGE` build-once + прод-пример;
+- `docs/architecture/README.md` — раздел «Исполняемый самодеплой стадии `deploy`»;
+- `CHANGELOG.md` — запись Added (фича) + запись Fixed (review-fix: clear_state + .env.example);
+- ADR `docs/work-items/ORCH-036/06-adr/ADR-001-executable-self-deploy.md` + глобальный
+  `docs/architecture/adr/adr-0007-executable-self-deploy.md`;
+- **`.env.example`** — канонический шаблон (CLAUDE.md №8, ТЗ §2.6) дополнен (был пробел в v1).
+
+Документация = golden source: изменения `src/` сопровождены синхронным обновлением
+доки в том же PR. Ось документации — PASS.
--- a/docs/work-items/ORCH-036/13-test-report.md
+++ b/docs/work-items/ORCH-036/13-test-report.md
@@ -0,0 +1,90 @@
+---
+type: test-report
+work_item_id: ORCH-036
+result: PASS
+---
+
+# Test Report — ORCH-036
+
+Исполняемый самодеплой стадии `deploy` (Вариант B) — дёргает хост-хук
+`scripts/orchestrator-deploy-hook.sh`, три фазы (A/B/C), условность по self-hosting репо.
+
+## Окружение
+- Python: 3.12.13
+- pytest: 8.3.3 (pluggy 1.6.0, anyio 4.13.0, asyncio 0.23.8 — mode AUTO)
+- Worktree: `feature/ORCH-036-orch-36-deploy-b`
+- Дата: 2026-06-06
+- Prod (8500) во время тестов НЕ тронут: вся проверка изолированная (моки subprocess/ssh/хука).
+  Smoke выполнялся read-only GET-запросами.
+
+## Smoke test API (prod 8500, read-only)
+| Endpoint | Результат |
+|----------|-----------|
+| GET /health | `{"status":"ok","service":"orchestrator"}` — OK |
+| GET /status | OK (отдаёт активные задачи) |
+| GET /queue | OK (counts/max_concurrency/resilience; breaker=closed, preflight_ok=true) |
+
+`curl` в окружении отсутствует — smoke выполнен через `urllib.request` (эквивалент GET).
+
+## Результаты по тест-плану (04-test-plan.yaml)
+
+| TC ID | Описание | Тест | Результат |
+|-------|----------|------|-----------|
+| TC-01 | exit 0 → deploy_status: SUCCESS | test_tc01_exit0_maps_to_success | PASS |
+| TC-02 | exit 1 (rolled back) → FAILED | test_tc02_exit1_rolled_back_maps_to_failed | PASS |
+| TC-03 | exit 2 (rollback тоже упал) → FAILED | test_tc03_exit2_rollback_also_failed_maps_to_failed | PASS |
+| TC-04 | DEPLOY_REQUIRE_MANUAL_APPROVE дефолт == true | test_tc04_manual_approve_default_true | PASS |
+| TC-05 | true и нет approve → прод-хук НЕ вызван | test_tc05_no_approve_does_not_call_prod_hook | PASS |
+| TC-06 | true и approve → прод-хук вызван ровно 1 раз | test_tc06_approved_calls_prod_hook_exactly_once | PASS |
+| TC-07 | is_self_hosting_repo: только orchestrator True | test_tc07_is_self_hosting_repo_only_orchestrator | PASS |
+| TC-08 | self-репо: рестарт detached host-процессом | test_tc08_self_repo_launches_detached_host_process | PASS |
+| TC-09 | не-self репо: прежний ssh-путь | test_tc09_non_self_repo_uses_legacy_path | PASS |
+| TC-10 | FAILED → откат deploy→development, blocked, release lease | test_tc10_failed_deploy_rolls_back_to_development | PASS |
+| TC-11 | staging_status FAILED → до deploy не доходит | test_tc11_staging_failed_never_reaches_deploy | PASS |
+| TC-12 | успех → Plane-коммент + Telegram | test_tc12_success_notifies_plane_and_telegram | PASS |
+| TC-13 | откат → Plane-коммент + Telegram | test_tc13_rollback_notifies_plane_and_telegram | PASS |
+| TC-14 | build-once: retag staging-образа, без build | test_tc14_deploy_command_retags_staging_image_no_build | PASS |
+| TC-15 | _parse_deploy_status контракт цел (проза не проходит) | test_qg_checks::test_tc15_* (5 кейсов) | PASS |
+| TC-16 | STAGE_TRANSITIONS deploy/deploy-staging не изменены | test_stages::test_tc16_* | PASS |
+| TC-17 | terminal-sync deploy→done сохранён | test_tc17_success_deploy_syncs_terminal_done | PASS |
+| TC-18 | merge-gate (ORCH-43) на ребре не затронут | test_merge_gate (14 кейсов) | PASS |
+| TC-19 | симуляция битого деплоя: авто-rollback → healthy, exit 1 | test_tc19_unhealthy_deploy_auto_rolls_back_exit1 | PASS |
+
+Доп. регрессионные тесты (review-fix): `test_clear_state_removes_all_markers_and_is_idempotent`,
+`test_tc11_re_deploy_after_rollback_not_wedged` — оба PASS (stale deploy-state очищается, повторный
+заход на deploy после отката не зависает).
+
+## Покрытие критериев приёмки
+
+| AC | Покрыт тестами | Статус |
+|----|----------------|--------|
+| AC-1 реальный деплой (не бумажный) | TC-01..03, TC-14, TC-19 | PASS |
+| AC-2 self-репо рестарт detached, агент себя не убивает | TC-08 | PASS |
+| AC-3 deploy_status из exit-code | TC-01..03 | PASS |
+| AC-4 FAILED → откат на development | TC-10 | PASS |
+| AC-5 ручной approve реально тормозит прод | TC-05, TC-06 | PASS |
+| AC-6 уведомления о промоуте и откате | TC-12, TC-13 | PASS |
+| AC-7 build-once (образ из staging) | TC-14 | PASS |
+| AC-8 staging-гейт обязателен | TC-11 | PASS |
+| AC-9 авто-rollback восстанавливает прод (MTTR<60с) | TC-19 | PASS |
+| AC-10 инварианты не сломаны | TC-15..18 + полный регресс | PASS |
+| AC-11 условность по репо (не-self не ломается) | TC-07, TC-09 | PASS |
+| AC-12 флаг авто НЕ выключен (остаётся true) | TC-04 | PASS |
+| AC-13 документация обновлена | проверено reviewer (12-review.md, APPROVED) | PASS |
+
+## Вывод pytest
+
+Полный регресс:
+```
+======================= 568 passed, 1 warning in 15.25s ========================
+```
+(единственный warning — PydanticDeprecatedSince20 в `src/config.py`, не связан с задачей)
+
+Целевые модули тест-плана:
+```
+======================== 46 passed, 1 warning in 2.17s =========================
+```
+
+## Итог
+**PASS** — все 19 TC зелёные, все критерии приёмки AC-1…AC-13 покрыты, полный регресс
+568/568 passed, smoke API OK, прод (8500) не тронут. Задача готова к стадии deploy-staging.
--- a/scripts/orchestrator-deploy-hook.sh
+++ b/scripts/orchestrator-deploy-hook.sh
@@ -9,6 +9,10 @@
 #   TARGET_IMAGE     - image name for retag         (default: orchestrator-orchestrator-staging)
 #   COMPOSE_PROFILE  - docker compose profile       (default: staging)
 #   PREV_IMAGE_FILE  - path to prev-image snapshot  (default: $REPO/.deploy-prev-image-staging)
+#   SOURCE_IMAGE     - build-once source image      (default: unset; ORCH-36)
+#                      When set, the prevalidated (staging) image is retagged onto
+#                      TARGET_IMAGE instead of rebuilding — guarantees prod runs the
+#                      exact artefact that passed staging (no `docker build`).
 #   LOG              - log file path                (default: /var/log/orchestrator/deploy-hook.log)
 #
 # Usage:
@@ -25,6 +29,9 @@ TARGET_PORT="${TARGET_PORT:-8501}"
 TARGET_IMAGE="${TARGET_IMAGE:-orchestrator-orchestrator-staging}"
 COMPOSE_PROFILE="${COMPOSE_PROFILE:-staging}"
 PREV_IMAGE_FILE="${PREV_IMAGE_FILE:-$REPO/.deploy-prev-image-staging}"
+# Build-once (ORCH-36): optional prevalidated source image to retag onto
+# TARGET_IMAGE. Unset -> backward-compatible (no retag), exit-code contract intact.
+SOURCE_IMAGE="${SOURCE_IMAGE:-}"

 # ---- Log setup -------------------------------------------------------------
 LOG_DIR=/var/log/orchestrator
@@ -139,10 +146,24 @@ else
    log "No previous image captured (first deploy or service not running?)"
 fi

-# 2. Pull latest code
+# 2. Pull latest code (keeps the host working tree current for future builds;
+#    the DEPLOYED artefact is the retagged SOURCE_IMAGE below when build-once).
 log "git pull origin main"
 git pull origin main >> "$LOG" 2>&1

+# 2b. Build-once (ORCH-36): retag the prevalidated staging image onto TARGET_IMAGE
+#     instead of rebuilding, so prod runs the exact artefact that passed staging.
+#     Backward compatible: skipped when SOURCE_IMAGE is unset.
+if [[ -n "$SOURCE_IMAGE" ]]; then
+    if docker image inspect "$SOURCE_IMAGE" >/dev/null 2>&1; then
+        log "BUILD-ONCE: retagging $SOURCE_IMAGE -> $TARGET_IMAGE (no rebuild)"
+        docker tag "$SOURCE_IMAGE" "$TARGET_IMAGE" >> "$LOG" 2>&1
+    else
+        log "BUILD-ONCE: SOURCE_IMAGE '$SOURCE_IMAGE' not found locally - aborting (exit 1)"
+        exit 1
+    fi
+fi
+
 # 3. Restart service
 log "Starting $TARGET_SERVICE (profile=$COMPOSE_PROFILE)"
 if [[ -n "$COMPOSE_PROFILE" ]]; then
--- a/src/agents/launcher.py
+++ b/src/agents/launcher.py
@@ -214,7 +214,14 @@ class AgentLauncher:
        Same spawn path as launch(), but threads job['id'] through so the monitor
        can update the job's status (done / requeue / failed) and link jobs.run_id
        to the agent_runs row. Returns the agent_run_id.
+
+        ORCH-036: the reserved-agent ``deploy-finalizer`` is a DETERMINISTIC
+        (no-LLM) job — intercept it BEFORE _spawn (which would raise
+        "Unknown agent", R-6) and run the deploy finalizer synchronously, driving
+        the jobs row status itself. Returns None (no agent_run row).
        """
+        if job.get("agent") == "deploy-finalizer":
+            return self._run_deploy_finalizer_job(job)
        return self._spawn(
            job["agent"],
            job["repo"],
@@ -223,6 +230,27 @@ class AgentLauncher:
            job_id=job["id"],
        )

+    def _run_deploy_finalizer_job(self, job: dict):
+        """ORCH-036 Phase C: run the deterministic deploy finalizer for a job.
+
+        Not an LLM spawn — there is no subprocess/monitor, so we mark the jobs row
+        done/failed here. Any error is contained (the finalizer never-raises, but
+        we guard anyway so a finalizer fault can't wedge the worker).
+        """
+        from ..db import mark_job
+        from .. import stage_engine
+        try:
+            stage_engine.run_deploy_finalizer(job)
+            mark_job(job["id"], "done")
+            logger.info(f"deploy-finalizer job {job['id']} done")
+        except Exception as e:
+            logger.error(f"deploy-finalizer job {job['id']} failed: {e}")
+            try:
+                mark_job(job["id"], "failed", error=f"deploy-finalizer error: {e}")
+            except Exception:
+                pass
+        return None
+
    def _spawn(self, agent: str, repo: str, task_content: str = None,
               task_id: int = None, job_id: int = None) -> int:
        """Shared spawn implementation for launch() and launch_job().
--- a/src/config.py
+++ b/src/config.py
@@ -152,6 +152,49 @@ class Settings(BaseSettings):
    merge_defer_delay_s: int = 60
    merge_defer_max_attempts: int = 5

+    # ORCH-036: executable self-deploy (deploy stage drives the host hook).
+    # The `deploy` stage for the self-hosting repo is turned into a REAL prod
+    # restart via a detached host process, gated by a manual approve. Three-phase
+    # design (ADR-001): A=approve-request, B=initiate (human Approved), C=finalizer
+    # maps the hook exit-code -> deploy_status. Non-self repos are unaffected.
+    #
+    #   self_deploy_enabled            -> global kill-switch; False -> no Phase A/B/C
+    #                                     interception (the legacy synchronous deployer
+    #                                     path runs for everyone, env ORCH_SELF_DEPLOY_ENABLED).
+    #   self_deploy_repos              -> CSV of repos where executable self-deploy is
+    #                                     REAL; empty -> only the self-hosting repo
+    #                                     (orchestrator). Mirrors merge_gate_repos.
+    #   deploy_require_manual_approve  -> require a human Approved before the prod
+    #                                     restart (BR-5). Default true; NOT toggled in
+    #                                     ORCH-36 (AC-12). false -> Phase A initiates
+    #                                     immediately (structural branch, off by default).
+    #   deploy_finalize_delay_s        -> delay before the first finalize poll; must be
+    #                                     > the hook health-loop (~60s) so the verdict
+    #                                     usually exists on the first poll.
+    #   deploy_finalize_max_attempts   -> bounded finalize-defer budget (anti-livelock).
+    # ssh / hook target (detached prod restart; real values live on the host):
+    #   deploy_ssh_user / deploy_ssh_host -> ssh target for the host hook (INFRA P-2).
+    #   deploy_hook_script             -> path to the hook ON THE HOST (relative to repo).
+    #   deploy_host_repo_path          -> orchestrator clone path on the host.
+    # prod overrides passed to the hook for build-once (retag staging image -> prod):
+    #   deploy_prod_source_image       -> image validated on staging (retagged, no rebuild).
+    #   deploy_prod_target_service / _port / _image / _compose_profile -> prod profile.
+    #   deploy_prod_prev_image_file    -> prod prev-image snapshot (separate from staging).
+    self_deploy_enabled: bool = True
+    self_deploy_repos: str = ""
+    deploy_require_manual_approve: bool = True
+    deploy_finalize_delay_s: int = 90
+    deploy_finalize_max_attempts: int = 10
+    deploy_ssh_user: str = "slin"
+    deploy_ssh_host: str = ""
+    deploy_hook_script: str = "scripts/orchestrator-deploy-hook.sh"
+    deploy_host_repo_path: str = "/home/slin/repos/orchestrator"
+    deploy_prod_source_image: str = "orchestrator-orchestrator-staging"
+    deploy_prod_target_service: str = "orchestrator"
+    deploy_prod_target_port: int = 8500
+    deploy_prod_target_image: str = "orchestrator-orchestrator"
+    deploy_prod_compose_profile: str = ""
+    deploy_prod_prev_image_file: str = ".deploy-prev-image-prod"
    # ORCH-053: stuck-task reconciler (sweeper for lost webhooks). A background
    # daemon thread reconciles the "source of truth (gate / Plane) != task stage"
    # drift left behind by a dropped webhook (502 on rebuild, no Plane/Gitea
--- a/src/self_deploy.py
+++ b/src/self_deploy.py
@@ -0,0 +1,338 @@
+"""Executable self-deploy primitives (ORCH-036).
+
+The ``deploy`` stage for the self-hosting ``orchestrator`` repo is a REAL prod
+restart, not a paper LLM verdict. Because the prod container (8500) runs the
+worker/agent itself, the restart must be performed by an EXTERNAL host process
+that survives the container dying (BR-2). The orchestration is split into three
+deterministic phases (ADR-001), wired in ``stage_engine``:
+
+  * Phase A — request approve on the ``deploy-staging -> deploy`` edge.
+  * Phase B — a human Plane ``Approved`` initiates the detached host deploy.
+  * Phase C — a deterministic finalizer maps the hook exit-code -> deploy_status.
+
+This module is a **leaf**: it imports only config / git_worktree (and lazily
+``qg.checks.is_self_hosting_repo``), never ``stage_engine`` / ``launcher`` — the
+orchestration that needs those lives in ``stage_engine``. Every public helper
+honours a **never-raise** contract so a deploy-state hiccup can never crash the
+stage engine.
+
+Restart-safe state lives in sentinel files under
+``<repos_dir>/.deploy-state-<repo>/<work_item_id>/`` (mirrors the merge-lease
+pattern, ТЗ §4 — no DB migration), on the shared mount visible to BOTH the
+container (reads markers) and the host (writes ``result``):
+  * ``approve-requested`` — Phase A done;
+  * ``initiated``         — Phase B started (idempotency-guard);
+  * ``result``            — the hook exit-code, written by the host WRAPPER
+                            (``echo $? > result``), NOT by the hook itself.
+"""
+
+import logging
+import os
+import shlex
+import shutil
+import subprocess
+
+from .config import settings
+
+logger = logging.getLogger("orchestrator.self_deploy")
+
+# Sentinel marker filenames (see module docstring).
+APPROVE_REQUESTED = "approve-requested"
+INITIATED = "initiated"
+RESULT = "result"
+
+# ssh launch is detached (returns immediately); keep a bounded timeout so a hung
+# ssh handshake never wedges the caller.
+_SSH_TIMEOUT = 30
+_GIT_TIMEOUT = 60
+
+
+# ---------------------------------------------------------------------------
+# Conditionality
+# ---------------------------------------------------------------------------
+def self_deploy_applies(repo: str) -> bool:
+    """Whether executable self-deploy (Phase A/B/C) is REAL for this repo.
+
+    Mirrors the ORCH-35 / ORCH-43 conditional rollout:
+      * ``self_deploy_enabled=False`` -> always False (global kill-switch); the
+        legacy synchronous deployer path runs for everyone.
+      * ``self_deploy_repos`` (CSV) non-empty -> real only for listed repos.
+      * empty CSV -> real ONLY for the self-hosting repo (``orchestrator``).
+    Never raises.
+    """
+    try:
+        if not settings.self_deploy_enabled:
+            return False
+        raw = (settings.self_deploy_repos or "").strip()
+        if raw:
+            allowed = {r.strip().lower() for r in raw.split(",") if r.strip()}
+            return (repo or "").strip().lower() in allowed
+        # Lazy import keeps this module a leaf (avoids importing qg at module load).
+        from .qg.checks import is_self_hosting_repo
+        return is_self_hosting_repo(repo)
+    except Exception as e:  # noqa: BLE001 - never-raise contract
+        logger.warning("self_deploy_applies error for %s: %s", repo, e)
+        return False
+
+
+# ---------------------------------------------------------------------------
+# exit-code -> deploy_status mapping (pure, unit-tested: TC-01/02/03)
+# ---------------------------------------------------------------------------
+def map_exit_code_to_status(exit_code) -> str:
+    """Map a deploy-hook exit-code to a machine verdict (deterministic, pure).
+
+    Contract (AC-1 / AC-3, hook exit-code contract 0/1/2):
+      * ``0``            -> ``"SUCCESS"`` (health-ok proven by the hook).
+      * ``1`` (rolled back), ``2`` (rollback also failed), anything else, or a
+        non-int/None -> ``"FAILED"`` (fail-closed; never advances on doubt).
+    """
+    try:
+        code = int(exit_code)
+    except (TypeError, ValueError):
+        return "FAILED"
+    return "SUCCESS" if code == 0 else "FAILED"
+
+
+def build_deploy_log(work_item_id: str, exit_code, status: str) -> str:
+    """Render a 14-deploy-log.md body whose ``deploy_status:`` frontmatter is the
+    verdict ``check_deploy_status`` / ``_parse_deploy_status`` reads (contract
+    unchanged, AC-10). The body is informational only — only the frontmatter is
+    machine-read.
+    """
+    return (
+        "---\n"
+        f"deploy_status: {status}\n"
+        f"work_item: {work_item_id}\n"
+        f"hook_exit_code: {exit_code}\n"
+        "deployed_by: deploy-finalizer\n"
+        "---\n\n"
+        "# Deploy log — ORCH-036 executable self-deploy\n\n"
+        f"Прод-деплой завершён хост-хуком с exit-code `{exit_code}` -> "
+        f"`deploy_status: {status}`.\n\n"
+        "Вердикт зафиксирован детерминированным finalizer'ом (Фаза C), не LLM.\n"
+    )
+
+
+# ---------------------------------------------------------------------------
+# Sentinel state (restart-safe, no DB migration — ТЗ §4)
+# ---------------------------------------------------------------------------
+def _state_dir(base: str, repo: str, work_item_id: str | None) -> str:
+    return os.path.join(base, f".deploy-state-{repo}", (work_item_id or "_"))
+
+
+def container_state_dir(repo: str, work_item_id: str | None) -> str:
+    """State dir as seen FROM THE CONTAINER (settings.repos_dir mount)."""
+    return _state_dir(settings.repos_dir, repo, work_item_id)
+
+
+def host_state_dir(repo: str, work_item_id: str | None) -> str:
+    """State dir as seen FROM THE HOST (settings.host_repos_dir).
+
+    Same physical directory as ``container_state_dir`` via the shared mount; the
+    host path is what we embed in the ssh command so the host wrapper writes the
+    ``result`` sentinel where the container can read it.
+    """
+    return _state_dir(settings.host_repos_dir, repo, work_item_id)
+
+
+def marker_path(repo: str, work_item_id: str | None, name: str) -> str:
+    return os.path.join(container_state_dir(repo, work_item_id), name)
+
+
+def has_marker(repo: str, work_item_id: str | None, name: str) -> bool:
+    """True iff the named sentinel exists. Never raises."""
+    try:
+        return os.path.isfile(marker_path(repo, work_item_id, name))
+    except Exception as e:  # noqa: BLE001 - never-raise
+        logger.warning("has_marker error for %s/%s/%s: %s", repo, work_item_id, name, e)
+        return False
+
+
+def write_marker(repo: str, work_item_id: str | None, name: str, content: str = "") -> bool:
+    """Create/overwrite a sentinel (best-effort). Returns True on success."""
+    try:
+        d = container_state_dir(repo, work_item_id)
+        os.makedirs(d, exist_ok=True)
+        with open(os.path.join(d, name), "w", encoding="utf-8") as f:
+            f.write(str(content))
+        return True
+    except OSError as e:
+        logger.warning("write_marker error for %s/%s/%s: %s", repo, work_item_id, name, e)
+        return False
+
+
+def clear_state(repo: str, work_item_id: str | None) -> bool:
+    """Remove ALL deploy-state sentinels for this work item (best-effort).
+
+    Sentinels are keyed by ``work_item_id`` (stable for the whole task lifetime),
+    so a FAILED prod-deploy leaves ``approve-requested`` / ``initiated`` / ``result``
+    behind. Without cleanup, after the БАГ-8 rollback (deploy -> development) and a
+    fix, the task reaching ``deploy`` again would hit Phase B's idempotency-guard:
+    the STALE ``initiated`` makes it a no-op, the detached hook never re-launches and
+    the task wedges on ``deploy`` forever (re-deploy-after-rollback contract broken;
+    AC-4/AC-10). A stale ``result`` would likewise be mis-read by the new finalizer.
+    Clearing the whole state dir restores a clean slate for the next pass. Idempotent
+    (a missing dir is success). Never raises.
+    """
+    d = container_state_dir(repo, work_item_id)
+    try:
+        shutil.rmtree(d)
+        logger.info("clear_state: removed deploy-state dir %s", d)
+        return True
+    except FileNotFoundError:
+        return True
+    except OSError as e:  # noqa: BLE001 - never-raise contract
+        logger.warning("clear_state error for %s/%s: %s", repo, work_item_id, e)
+        return False
+
+
+def read_result(repo: str, work_item_id: str | None) -> tuple[bool, int | None]:
+    """Read the ``result`` sentinel (hook exit-code written by the host wrapper).
+
+    Returns ``(present, exit_code)``:
+      * ``(False, None)`` -> not written yet (finalizer should DEFER);
+      * ``(True, <int>)`` -> verdict ready;
+      * ``(True, 1)``     -> present but corrupt/unparseable -> treated as a
+                             failure code (fail-closed) so we never advance on garbage.
+    Never raises.
+    """
+    p = marker_path(repo, work_item_id, RESULT)
+    try:
+        with open(p, "r", encoding="utf-8") as f:
+            raw = f.read().strip()
+    except FileNotFoundError:
+        return False, None
+    except OSError as e:
+        logger.warning("read_result error for %s/%s: %s", repo, work_item_id, e)
+        return False, None
+    if raw == "":
+        return False, None
+    try:
+        return True, int(raw)
+    except ValueError:
+        logger.warning("read_result: corrupt result %r for %s/%s", raw, repo, work_item_id)
+        return True, 1
+
+
+# ---------------------------------------------------------------------------
+# Detached host deploy: ssh + setsid (Phase B)
+# ---------------------------------------------------------------------------
+def build_deploy_command(repo: str, work_item_id: str | None, branch: str) -> list[str]:
+    """Build the ssh argv that launches the DETACHED prod deploy on the host.
+
+    The remote command runs the hook via ``setsid`` with stdin/stdout detached and
+    backgrounded (``&``) so the process SURVIVES the prod container restart (BR-2),
+    then the WRAPPER (not the hook) writes the exit-code to the ``result`` sentinel:
+
+        setsid bash -c 'cd <repo> && <prod env...> bash <hook> --deploy; \
+            echo $? > <result>' >> <hook.log> 2>&1 </dev/null &
+
+    Build-once (BR-6): ``SOURCE_IMAGE=<staging-image>`` makes the hook retag the
+    staging-validated image to the prod tag instead of rebuilding (no ``docker
+    build``). The exit-code contract of the hook is untouched.
+    """
+    host_dir = host_state_dir(repo, work_item_id)
+    result_sentinel = os.path.join(host_dir, RESULT)
+    hook_log = os.path.join(host_dir, "hook.log")
+
+    env_assignments = (
+        f"SOURCE_IMAGE={shlex.quote(settings.deploy_prod_source_image)} "
+        f"TARGET_SERVICE={shlex.quote(settings.deploy_prod_target_service)} "
+        f"TARGET_PORT={int(settings.deploy_prod_target_port)} "
+        f"TARGET_IMAGE={shlex.quote(settings.deploy_prod_target_image)} "
+        f"COMPOSE_PROFILE={shlex.quote(settings.deploy_prod_compose_profile)} "
+        f"PREV_IMAGE_FILE={shlex.quote(settings.deploy_prod_prev_image_file)}"
+    )
+    inner = (
+        f"cd {shlex.quote(settings.deploy_host_repo_path)} && "
+        f"{env_assignments} "
+        f"bash {shlex.quote(settings.deploy_hook_script)} --deploy; "
+        f"echo $? > {shlex.quote(result_sentinel)}"
+    )
+    remote = (
+        f"setsid bash -c {shlex.quote(inner)} "
+        f">> {shlex.quote(hook_log)} 2>&1 </dev/null &"
+    )
+    user = (settings.deploy_ssh_user or "").strip()
+    host = (settings.deploy_ssh_host or "").strip()
+    target = f"{user}@{host}" if user else host
+    return ["ssh", "-o", "StrictHostKeyChecking=no", target, remote]
+
+
+def initiate_deploy(repo: str, work_item_id: str | None, branch: str) -> tuple[bool, str]:
+    """Launch the detached prod deploy on the host (Phase B). Never raises.
+
+    The ssh call returns immediately (the remote process is detached via setsid +
+    ``&``). Returns ``(True, msg)`` when ssh dispatched the detached process, or
+    ``(False, reason)`` so the caller can alert and let the human re-approve.
+    """
+    # Ensure the shared state dir exists so the host wrapper can write `result`.
+    try:
+        os.makedirs(container_state_dir(repo, work_item_id), exist_ok=True)
+    except OSError as e:
+        logger.warning("initiate_deploy: state dir error for %s/%s: %s", repo, work_item_id, e)
+
+    cmd = build_deploy_command(repo, work_item_id, branch)
+    try:
+        r = subprocess.run(cmd, capture_output=True, text=True, timeout=_SSH_TIMEOUT)
+    except subprocess.TimeoutExpired:
+        return False, "ssh launch timeout"
+    except (subprocess.SubprocessError, OSError) as e:
+        return False, f"ssh launch error: {e}"
+    if r.returncode != 0:
+        detail = ((r.stderr or "") + (r.stdout or "")).strip()[:200]
+        return False, f"ssh launch failed (rc={r.returncode}): {detail}"
+    logger.info("initiate_deploy: detached prod deploy dispatched for %s/%s", repo, work_item_id)
+    return True, "deploy initiated (detached host process)"
+
+
+# ---------------------------------------------------------------------------
+# Deploy log write + best-effort merge (Phase C)
+# ---------------------------------------------------------------------------
+def write_deploy_log(repo: str, work_item_id: str, branch: str, exit_code, status: str) -> bool:
+    """Write 14-deploy-log.md into the task worktree (so check_deploy_status reads
+    it) and best-effort commit+push it. Returns True iff the file was written.
+    Never raises.
+    """
+    from .git_worktree import get_worktree_path
+
+    rel = f"docs/work-items/{work_item_id}/14-deploy-log.md"
+    try:
+        wt = get_worktree_path(repo, branch)
+    except Exception as e:  # noqa: BLE001 - never-raise
+        logger.error("write_deploy_log: worktree error for %s/%s: %s", repo, branch, e)
+        return False
+
+    path = os.path.join(wt, rel)
+    content = build_deploy_log(work_item_id, exit_code, status)
+    try:
+        os.makedirs(os.path.dirname(path), exist_ok=True)
+        with open(path, "w", encoding="utf-8") as f:
+            f.write(content)
+    except OSError as e:
+        logger.error("write_deploy_log: write error at %s: %s", path, e)
+        return False
+
+    # Best-effort commit + push (the gate also falls back to origin/main).
+    git_env = {
+        **os.environ,
+        "HOME": "/home/slin",
+        "GIT_AUTHOR_NAME": "deploy-finalizer",
+        "GIT_AUTHOR_EMAIL": "deploy-finalizer@mva154.local",
+        "GIT_COMMITTER_NAME": "deploy-finalizer",
+        "GIT_COMMITTER_EMAIL": "deploy-finalizer@mva154.local",
+    }
+    try:
+        subprocess.run(["git", "-C", wt, "add", rel],
+                       capture_output=True, timeout=_GIT_TIMEOUT, env=git_env)
+        commit = subprocess.run(
+            ["git", "-C", wt, "commit", "-m",
+             f"deploy(ORCH-036): finalize {status} for {work_item_id}"],
+            capture_output=True, text=True, timeout=_GIT_TIMEOUT, env=git_env,
+        )
+        if commit.returncode == 0:
+            subprocess.run(["git", "-C", wt, "push", "origin", branch],
+                           capture_output=True, timeout=_GIT_TIMEOUT, env=git_env)
+    except (subprocess.SubprocessError, OSError) as e:
+        logger.warning("write_deploy_log: git commit/push best-effort failed: %s", e)
+    return True
--- a/src/stage_engine.py
+++ b/src/stage_engine.py
@@ -27,6 +27,7 @@ Agent-selection bug fix (ORCH-4):

 import logging
 import os
+import time
 from dataclasses import dataclass, field

 from .db import get_db, update_task_stage, enqueue_job
@@ -35,6 +36,7 @@ from .git_worktree import get_worktree_path
 from .review_parse import extract_review_findings, extract_test_failures
 from .qg.checks import QG_CHECKS
 from . import merge_gate
+from . import self_deploy
 from .notifications import (
    notify_stage_change,
    notify_qg_failure,
@@ -190,6 +192,23 @@ def advance_stage(
            result.note = "terminal"
            return result

+        # --- ORCH-036 Phase B: human Approved on `deploy` -> initiate deploy --
+        # A human flipping the Plane status to Approved on the `deploy` stage
+        # (finished_agent is None) is the prod-deploy trigger for the self-hosting
+        # repo. Initiate the DETACHED host deploy + enqueue the finalizer and
+        # return WITHOUT running check_deploy_status (the verdict does not exist
+        # yet — running the gate now would read a stale/absent log and falsely
+        # roll back, R-2). The finalizer (Phase C, finished_agent="deployer")
+        # records the verdict later; that path is NOT intercepted here.
+        if (
+            current_stage == "deploy"
+            and finished_agent is None
+            and settings.deploy_require_manual_approve
+            and self_deploy.self_deploy_applies(repo)
+        ):
+            _handle_self_deploy_phase_b(task_id, repo, work_item_id, branch, result)
+            return result
+
        # --- Quality gate ----------------------------------------------------
        if qg_name and qg_name in QG_CHECKS:
            # Human-approval gate: split by path.
@@ -252,6 +271,22 @@ def advance_stage(
            ):
                return result

+        # --- ORCH-036 Phase A: request approve before the prod deploy ---------
+        # On the deploy-staging -> deploy edge, AFTER a green check_staging_status
+        # and the merge-gate, the self-hosting repo does NOT auto-launch a prod
+        # deployer. Instead advance the STAGE to `deploy`, put the issue into an
+        # approval-pending state and wait for a human Approved (Phase B). The
+        # merge lease stays HELD across the wait (released on done / rollback).
+        if (
+            current_stage == "deploy-staging"
+            and settings.deploy_require_manual_approve
+            and self_deploy.self_deploy_applies(repo)
+        ):
+            _handle_self_deploy_phase_a(
+                task_id, current_stage, repo, work_item_id, branch, result
+            )
+            return result
+
        # --- Advance ---------------------------------------------------------
        update_task_stage(task_id, next_stage)
        # Telegram live tracker: the analysis->architecture advance is the human
@@ -656,6 +691,16 @@ def _handle_qg_failure_rollbacks(
        notify_stage_change(task_id, current_stage, "development")
        plane_notify_stage(work_item_id, current_stage, "development")
        result.rolled_back_to = "development"
+        # ORCH-036: clear the deploy-state sentinels (approve-requested / initiated /
+        # result) so the NEXT prod-deploy pass (after the developer fixes and the task
+        # returns to `deploy`) is not wedged by Phase B's idempotency-guard reading a
+        # STALE `initiated`, nor the finalizer mis-reading a STALE `result`. Markers are
+        # keyed by work_item_id (stable across the rollback), so without this they
+        # survive into the retry and break re-deploy-after-rollback (AC-4/AC-10).
+        try:
+            self_deploy.clear_state(repo, work_item_id)
+        except Exception as e:  # noqa: BLE001 - defensive (clear_state never-raises anyway)
+            logger.warning(f"Task {task_id}: deploy-state clear on deploy-fail failed: {e}")
        # ORCH-043: deploy failed -> no merge will complete; release the lease so the
        # next task isn't blocked until the lease ages out (holder-aware no-op).
        try:
@@ -831,3 +876,205 @@ def _handle_merge_gate_rollback(
        f"Task {task_id}: merge-gate FAILED, rolled back deploy-staging -> "
        f"development ({reason})"
    )
+
+
+# ---------------------------------------------------------------------------
+# ORCH-036: executable self-deploy (Phase A/B/C)
+# ---------------------------------------------------------------------------
+def _handle_self_deploy_phase_a(
+    task_id, current_stage, repo, work_item_id, branch, result: AdvanceResult
+):
+    """Phase A — advance to `deploy` and request a manual approve (no prod deploy).
+
+    Staging is green and the branch is mergeable; for the self-hosting repo we do
+    NOT auto-deploy to prod. Move the task onto the `deploy` stage (so a later
+    human Approved lands there -> Phase B), set the issue approval-pending and ask
+    the human to flip the status to Approved. A restart-safe `approve-requested`
+    marker records that Phase A ran. The merge lease stays HELD.
+    """
+    update_task_stage(task_id, "deploy")
+    notify_stage_change(task_id, current_stage, "deploy")
+    result.advanced = True
+    result.to_stage = "deploy"
+    result.note = "self-deploy-approval-pending"
+
+    if work_item_id:
+        set_issue_in_review(work_item_id)
+    # ORCH-036: belt-and-suspenders — wipe any STALE deploy-state markers before
+    # arming a fresh approve. A prior FAILED pass clears on rollback, but clearing
+    # here too guarantees the entry to every new prod-deploy pass starts clean
+    # (e.g. after a crash/manual intervention), so `initiated`/`result` from an
+    # earlier attempt can never leak into this one.
+    self_deploy.clear_state(repo, work_item_id)
+    self_deploy.write_marker(
+        repo, work_item_id, self_deploy.APPROVE_REQUESTED, content=str(time.time())
+    )
+    if work_item_id:
+        plane_add_comment(
+            work_item_id,
+            "\U0001f7e1 Staging зелёный. Требуется ручной approve для ПРОД-деплоя: "
+            "смените статус задачи на «Approved», чтобы запустить деплой в прод (8500).",
+            author="deployer",
+        )
+    send_telegram(
+        f"\U0001f7e1 {work_item_id}: staging OK. Ждёт approve на ПРОД-деплой "
+        f"(смените статус на Approved)."
+    )
+    logger.info(
+        f"Task {task_id}: self-deploy Phase A — advanced to deploy, "
+        f"approval-pending (awaiting human Approved)"
+    )
+
+
+def _handle_self_deploy_phase_b(task_id, repo, work_item_id, branch, result: AdvanceResult):
+    """Phase B — a human Approved initiates the DETACHED prod deploy (idempotent).
+
+    Idempotency-guard: if the `initiated` marker already exists (double Approved /
+    duplicate webhook, R-4) this is a no-op. Otherwise launch the detached host
+    deploy, and ONLY on success record `initiated` + enqueue the finalizer (so a
+    failed launch can be retried by re-approving). Returns without advancing — the
+    finalizer (Phase C) records the verdict once the hook finishes.
+    """
+    if self_deploy.has_marker(repo, work_item_id, self_deploy.INITIATED):
+        result.note = "self-deploy-already-initiated"
+        logger.info(
+            f"Task {task_id}: prod deploy already initiated; ignoring repeat Approved"
+        )
+        return
+
+    ok, msg = self_deploy.initiate_deploy(repo, work_item_id, branch)
+    if not ok:
+        result.note = f"self-deploy-initiate-failed: {msg}"
+        if work_item_id:
+            plane_add_comment(
+                work_item_id,
+                f"⚠️ Не удалось запустить прод-деплой: {msg}. "
+                "Повторите approve после устранения причины.",
+                author="deployer",
+            )
+        send_telegram(f"⚠️ {work_item_id}: прод-деплой не запустился: {msg}")
+        logger.error(f"Task {task_id}: self-deploy initiate failed: {msg}")
+        return
+
+    self_deploy.write_marker(
+        repo, work_item_id, self_deploy.INITIATED, content=str(time.time())
+    )
+    task_desc = (
+        f"Work item: {work_item_id}\nRepo: {repo}\nBranch: {branch}\n"
+        f"Stage: deploy\nNote: deploy-finalize poll (prod self-deploy initiated)."
+    )
+    new_job = enqueue_job(
+        "deploy-finalizer", repo, task_desc, task_id=task_id,
+        available_at_delay_s=settings.deploy_finalize_delay_s,
+    )
+    result.enqueued_agent = "deploy-finalizer"
+    result.enqueued_job_id = new_job
+    result.note = "self-deploy-initiated"
+    if work_item_id:
+        plane_add_comment(
+            work_item_id,
+            "\U0001f680 Прод-деплой стартовал (detached host-процесс). "
+            "Вердикт будет зафиксирован после health-check.",
+            author="deployer",
+        )
+    send_telegram(f"\U0001f680 {work_item_id}: прод-деплой стартовал. Жду результат.")
+    logger.info(
+        f"Task {task_id}: self-deploy Phase B — detached deploy initiated, "
+        f"finalizer enqueued (job_id={new_job})"
+    )
+
+
+def _deploy_finalize_defer_count(task_id: int) -> int:
+    """How many times this task's finalizer has already deferred (restart-safe).
+
+    Counted from the persisted jobs queue by the defer marker in task_content
+    (mirrors _merge_defer_count), so a service restart never resets the budget.
+    """
+    conn = get_db()
+    n = conn.execute(
+        "SELECT COUNT(*) FROM jobs WHERE task_id=? AND task_content LIKE '%deploy-finalize defer%'",
+        (task_id,),
+    ).fetchone()[0]
+    conn.close()
+    return n
+
+
+def run_deploy_finalizer(job: dict):
+    """Phase C — deterministic finalizer (reserved-agent `deploy-finalizer`, no LLM).
+
+    Claimed by the worker in the NEW container after the prod restart. Reads the
+    `result` sentinel (hook exit-code written by the host wrapper):
+      * not written yet & budget left -> DEFER (re-queue with a delay);
+      * budget exhausted              -> set_issue_blocked + Telegram (anti-livelock);
+      * present                       -> map exit-code -> deploy_status, write
+        14-deploy-log.md, then advance_stage(finished_agent="deployer") so the
+        EXISTING contracts fire: SUCCESS -> terminal-sync deploy->done + release
+        lease; FAILED -> БАГ-8 rollback deploy->development + set_issue_blocked.
+    Never raises into the caller (the launcher marks the job done/failed).
+    """
+    task_id = job.get("task_id")
+    repo = job.get("repo")
+    conn = get_db()
+    row = conn.execute(
+        "SELECT work_item_id, branch FROM tasks WHERE id=?", (task_id,)
+    ).fetchone()
+    conn.close()
+    if not row:
+        logger.error(f"deploy-finalizer: no task row for task_id={task_id}")
+        return
+    work_item_id, branch = row[0], row[1]
+
+    present, code = self_deploy.read_result(repo, work_item_id)
+    if not present:
+        defers = _deploy_finalize_defer_count(task_id)
+        if defers < settings.deploy_finalize_max_attempts:
+            task_desc = (
+                f"Work item: {work_item_id}\nRepo: {repo}\nBranch: {branch}\n"
+                f"Stage: deploy\nNote: deploy-finalize defer "
+                f"(attempt {defers + 1}/{settings.deploy_finalize_max_attempts}) — "
+                f"deploy result not ready, retrying after {settings.deploy_finalize_delay_s}s."
+            )
+            new_job = enqueue_job(
+                "deploy-finalizer", repo, task_desc, task_id=task_id,
+                available_at_delay_s=settings.deploy_finalize_delay_s,
+            )
+            logger.info(
+                f"Task {task_id}: deploy result not ready, finalizer deferred "
+                f"(job_id={new_job}, attempt {defers + 1}/{settings.deploy_finalize_max_attempts})"
+            )
+        else:
+            if work_item_id:
+                set_issue_blocked(work_item_id)
+            send_telegram(
+                f"\U0001f6a8 {work_item_id}: deploy result не появился после "
+                f"{settings.deploy_finalize_max_attempts} попыток. Нужно ручное вмешательство."
+            )
+            logger.error(
+                f"Task {task_id}: deploy-finalize defer attempts exhausted "
+                f"({settings.deploy_finalize_max_attempts})"
+            )
+        return
+
+    # Result present -> deterministic verdict.
+    status = self_deploy.map_exit_code_to_status(code)
+    self_deploy.write_deploy_log(repo, work_item_id, branch, code, status)
+    logger.info(
+        f"Task {task_id}: deploy finalized, hook exit={code} -> deploy_status={status}"
+    )
+    if status == "SUCCESS" and work_item_id:
+        plane_add_comment(
+            work_item_id,
+            f"✅ Прод-деплой успешен (health-check OK, exit {code}).",
+            author="deployer",
+        )
+        send_telegram(f"✅ {work_item_id}: прод-деплой успешен (exit {code}).")
+
+    # Drive the EXISTING deploy contracts via the gate verdict we just wrote.
+    advance_stage(
+        task_id=task_id,
+        current_stage="deploy",
+        repo=repo,
+        work_item_id=work_item_id,
+        branch=branch,
+        finished_agent="deployer",
+    )
--- a/tests/test_deploy_approve.py
+++ b/tests/test_deploy_approve.py
@@ -0,0 +1,160 @@
+"""ORCH-036 TC-04/05/06: the manual-approve gate for the executable self-deploy.
+
+Contract (AC-5, AC-12):
+  * TC-04 — ``deploy_require_manual_approve`` defaults to True in settings.
+  * TC-05 — flag true + NO human approve -> the prod hook is NEVER called; the
+    deploy-staging -> deploy edge only advances the STAGE and requests an approve
+    (Phase A). ``initiate_deploy`` / ssh subprocess must not be touched.
+  * TC-06 — flag true + a human Approved -> the prod hook is launched EXACTLY once
+    (Phase B), idempotent on a repeated Approved (the ``initiated`` marker guards).
+"""
+
+import os
+import tempfile
+
+import pytest
+
+_test_db = os.path.join(tempfile.gettempdir(), "test_orch_deploy_approve.db")
+os.environ["ORCH_DB_PATH"] = _test_db
+os.environ["ORCH_REPOS_DIR"] = tempfile.gettempdir()
+os.environ.setdefault("ORCH_GITEA_TOKEN", "test-token")
+os.environ.setdefault("ORCH_PLANE_API_TOKEN", "test-token")
+
+from unittest.mock import MagicMock  # noqa: E402
+
+import src.db as _db  # noqa: E402
+from src.db import init_db, get_db  # noqa: E402
+from src import stage_engine  # noqa: E402
+from src import self_deploy  # noqa: E402
+from src.stage_engine import advance_stage  # noqa: E402
+
+
+@pytest.fixture(autouse=True)
+def fresh_db(monkeypatch, tmp_path):
+    monkeypatch.setattr(_db.settings, "db_path", _test_db)
+    if os.path.exists(_test_db):
+        os.unlink(_test_db)
+    init_db()
+    # Isolate the sentinel state dirs to a per-test tmp dir.
+    monkeypatch.setattr(self_deploy.settings, "repos_dir", str(tmp_path))
+    monkeypatch.setattr(self_deploy.settings, "host_repos_dir", str(tmp_path))
+    yield
+
+
+@pytest.fixture(autouse=True)
+def silence_side_effects(monkeypatch):
+    for name in (
+        "notify_stage_change", "notify_qg_failure", "notify_approve_requested",
+        "send_telegram", "plane_notify_stage", "plane_notify_qg", "plane_add_comment",
+        "set_issue_in_review", "set_issue_needs_input", "set_issue_in_progress",
+        "set_issue_blocked", "set_issue_done",
+    ):
+        monkeypatch.setattr(stage_engine, name, MagicMock())
+
+
+def _make_task(stage, repo="orchestrator", branch="feature/ORCH-036-x", wi="ORCH-036"):
+    conn = get_db()
+    cur = conn.execute(
+        "INSERT INTO tasks (plane_id, work_item_id, repo, branch, stage) "
+        "VALUES (?, ?, ?, ?, ?)",
+        (f"plane-{wi}", wi, repo, branch, stage),
+    )
+    task_id = cur.lastrowid
+    conn.commit()
+    conn.close()
+    return task_id
+
+
+def _stage(task_id):
+    conn = get_db()
+    row = conn.execute("SELECT stage FROM tasks WHERE id=?", (task_id,)).fetchone()
+    conn.close()
+    return row[0]
+
+
+def _jobs():
+    conn = get_db()
+    rows = conn.execute("SELECT agent, repo, task_id FROM jobs ORDER BY id").fetchall()
+    conn.close()
+    return [dict(r) for r in rows]
+
+
+def _pass(*a, **k):
+    return (True, "ok")
+
+
+# ---------------------------------------------------------------------------
+# TC-04: default flag value
+# ---------------------------------------------------------------------------
+def test_tc04_manual_approve_default_true():
+    """The fresh, un-overridden settings default must be True (safe-by-default)."""
+    from src.config import Settings
+    assert Settings().deploy_require_manual_approve is True
+
+
+# ---------------------------------------------------------------------------
+# TC-05: flag true, no approve -> prod hook NOT called (Phase A only)
+# ---------------------------------------------------------------------------
+def test_tc05_no_approve_does_not_call_prod_hook(monkeypatch):
+    monkeypatch.setattr(stage_engine.settings, "deploy_require_manual_approve", True)
+    monkeypatch.setattr(
+        stage_engine, "QG_CHECKS",
+        {**stage_engine.QG_CHECKS,
+         "check_staging_status": _pass,
+         "check_branch_mergeable": _pass},
+    )
+    # Spy: the deploy launcher must never run on the staging->deploy edge.
+    initiate = MagicMock()
+    monkeypatch.setattr(stage_engine.self_deploy, "initiate_deploy", initiate)
+    ssh_run = MagicMock()
+    monkeypatch.setattr(self_deploy.subprocess, "run", ssh_run)
+
+    task_id = _make_task("deploy-staging")
+    res = advance_stage(
+        task_id, "deploy-staging", "orchestrator", "ORCH-036",
+        "feature/ORCH-036-x", finished_agent="deployer",
+    )
+
+    # Phase A: advanced the STAGE to deploy, but requested approve — no prod hook.
+    assert res.advanced is True
+    assert res.to_stage == "deploy"
+    assert _stage(task_id) == "deploy"
+    assert res.note == "self-deploy-approval-pending"
+    initiate.assert_not_called()
+    ssh_run.assert_not_called()
+    # No deployer job: the human Approved (Phase B) is what triggers the deploy.
+    assert _jobs() == []
+    # The restart-safe approve-requested marker was written.
+    assert self_deploy.has_marker("orchestrator", "ORCH-036", self_deploy.APPROVE_REQUESTED)
+
+
+# ---------------------------------------------------------------------------
+# TC-06: flag true + Approved -> prod hook called exactly once (idempotent)
+# ---------------------------------------------------------------------------
+def test_tc06_approved_calls_prod_hook_exactly_once(monkeypatch):
+    monkeypatch.setattr(stage_engine.settings, "deploy_require_manual_approve", True)
+    monkeypatch.setattr(stage_engine.settings, "deploy_ssh_host", "mva154")
+    # Real initiate_deploy, but the ssh subprocess is mocked (rc=0 -> dispatched).
+    ssh_run = MagicMock(return_value=MagicMock(returncode=0, stdout="", stderr=""))
+    monkeypatch.setattr(self_deploy.subprocess, "run", ssh_run)
+
+    task_id = _make_task("deploy")  # already on deploy, awaiting Approved
+
+    # 1st human Approved -> Phase B initiates the detached deploy.
+    res1 = advance_stage(
+        task_id, "deploy", "orchestrator", "ORCH-036",
+        "feature/ORCH-036-x", finished_agent=None,
+    )
+    assert res1.note == "self-deploy-initiated"
+    assert ssh_run.call_count == 1
+    # The finalizer was enqueued.
+    assert any(j["agent"] == "deploy-finalizer" for j in _jobs())
+    assert self_deploy.has_marker("orchestrator", "ORCH-036", self_deploy.INITIATED)
+
+    # 2nd (duplicate) Approved -> idempotent no-op, hook NOT called again.
+    res2 = advance_stage(
+        task_id, "deploy", "orchestrator", "ORCH-036",
+        "feature/ORCH-036-x", finished_agent=None,
+    )
+    assert res2.note == "self-deploy-already-initiated"
+    assert ssh_run.call_count == 1  # still exactly one prod deploy
--- a/tests/test_deploy_build_once.py
+++ b/tests/test_deploy_build_once.py
@@ -0,0 +1,47 @@
+"""ORCH-036 TC-14: prod deploy is build-ONCE — retag the staging image, no rebuild (AC-7).
+
+The detached prod-deploy command must pass ``SOURCE_IMAGE=<staging-image>`` to the
+hook so it retags the staging-validated image onto the prod tag instead of running
+``docker build``. We assert the composed ssh command carries the staging source
+image and never asks the hook to build.
+"""
+
+import os
+
+os.environ.setdefault("ORCH_PLANE_API_TOKEN", "test-token")
+os.environ.setdefault("ORCH_GITEA_TOKEN", "test-token")
+
+from src import self_deploy  # noqa: E402
+
+
+def test_tc14_deploy_command_retags_staging_image_no_build(monkeypatch):
+    monkeypatch.setattr(self_deploy.settings, "deploy_ssh_user", "slin")
+    monkeypatch.setattr(self_deploy.settings, "deploy_ssh_host", "mva154")
+    monkeypatch.setattr(
+        self_deploy.settings, "deploy_prod_source_image", "orchestrator-orchestrator-staging"
+    )
+
+    cmd = self_deploy.build_deploy_command("orchestrator", "ORCH-036", "feature/ORCH-036-x")
+    remote = cmd[-1]
+
+    # The prevalidated staging image is handed to the hook as SOURCE_IMAGE (build-once).
+    assert "SOURCE_IMAGE=orchestrator-orchestrator-staging" in remote
+    # No rebuild is requested in the remote command.
+    assert "docker build" not in remote
+    assert "--build" not in remote
+
+
+def test_tc14_hook_retag_branch_present():
+    """The hook itself must honour SOURCE_IMAGE by retagging (no rebuild)."""
+    import pathlib
+    hook = pathlib.Path(__file__).resolve().parents[1] / "scripts" / "orchestrator-deploy-hook.sh"
+    text = hook.read_text(encoding="utf-8")
+    assert 'SOURCE_IMAGE="${SOURCE_IMAGE:-}"' in text
+    # Build-once retag branch present; the hook never runs `docker build`.
+    assert 'docker tag "$SOURCE_IMAGE" "$TARGET_IMAGE"' in text
+    # No EXECUTABLE `docker build` line (comments mentioning it are fine).
+    exec_lines = [
+        ln.strip() for ln in text.splitlines()
+        if ln.strip() and not ln.strip().startswith("#")
+    ]
+    assert not any("docker build" in ln for ln in exec_lines)
--- a/tests/test_deploy_hook_mapping.py
+++ b/tests/test_deploy_hook_mapping.py
@@ -0,0 +1,66 @@
+"""ORCH-036 TC-01/02/03: deterministic exit-code -> deploy_status mapping.
+
+The finalizer (Phase C) maps the host-hook exit-code to the machine verdict via a
+PURE function (no LLM, no I/O), so it is unit-testable in isolation. Contract
+(hook exit-code 0/1/2, AC-1/AC-3): 0 -> SUCCESS; 1 (rolled back), 2 (rollback also
+failed), and anything else -> FAILED (fail-closed).
+"""
+
+import os
+
+os.environ.setdefault("ORCH_PLANE_API_TOKEN", "test-token")
+os.environ.setdefault("ORCH_GITEA_TOKEN", "test-token")
+
+from src import self_deploy  # noqa: E402
+from src.self_deploy import map_exit_code_to_status, build_deploy_log  # noqa: E402
+
+
+def test_tc01_exit0_maps_to_success():
+    assert map_exit_code_to_status(0) == "SUCCESS"
+
+
+def test_tc02_exit1_rolled_back_maps_to_failed():
+    assert map_exit_code_to_status(1) == "FAILED"
+
+
+def test_tc03_exit2_rollback_also_failed_maps_to_failed():
+    assert map_exit_code_to_status(2) == "FAILED"
+
+
+def test_other_exit_codes_map_to_failed():
+    for code in (3, 127, 255, -1):
+        assert map_exit_code_to_status(code) == "FAILED"
+
+
+def test_non_int_or_none_maps_to_failed_fail_closed():
+    assert map_exit_code_to_status(None) == "FAILED"
+    assert map_exit_code_to_status("garbage") == "FAILED"
+
+
+def test_deploy_log_frontmatter_carries_status():
+    """The rendered log must expose deploy_status in YAML frontmatter so the
+    existing _parse_deploy_status contract (AC-10) reads the right verdict."""
+    body_ok = build_deploy_log("ORCH-036", 0, "SUCCESS")
+    assert body_ok.startswith("---\n")
+    assert "deploy_status: SUCCESS" in body_ok
+    body_fail = build_deploy_log("ORCH-036", 2, "FAILED")
+    assert "deploy_status: FAILED" in body_fail
+    assert "hook_exit_code: 2" in body_fail
+
+
+def test_clear_state_removes_all_markers_and_is_idempotent(monkeypatch, tmp_path):
+    """clear_state wipes the whole work-item state dir (all sentinels) and treats a
+    missing dir as success, so a re-deploy after rollback starts from a clean slate."""
+    monkeypatch.setattr(self_deploy.settings, "repos_dir", str(tmp_path))
+    repo, wi = "orchestrator", "ORCH-036"
+    self_deploy.write_marker(repo, wi, self_deploy.APPROVE_REQUESTED, "t")
+    self_deploy.write_marker(repo, wi, self_deploy.INITIATED, "t")
+    self_deploy.write_marker(repo, wi, self_deploy.RESULT, "1")
+    assert self_deploy.has_marker(repo, wi, self_deploy.INITIATED) is True
+
+    assert self_deploy.clear_state(repo, wi) is True
+    assert self_deploy.has_marker(repo, wi, self_deploy.APPROVE_REQUESTED) is False
+    assert self_deploy.has_marker(repo, wi, self_deploy.INITIATED) is False
+    assert self_deploy.has_marker(repo, wi, self_deploy.RESULT) is False
+    # Idempotent: clearing an already-absent dir is still success (never raises).
+    assert self_deploy.clear_state(repo, wi) is True
--- a/tests/test_deploy_hook_rollback_sim.py
+++ b/tests/test_deploy_hook_rollback_sim.py
@@ -0,0 +1,118 @@
+"""ORCH-036 TC-19: deploy-hook auto-rollback simulation (AC-9).
+
+Drives the REAL ``scripts/orchestrator-deploy-hook.sh`` in a hermetic sandbox:
+``docker`` / ``curl`` / ``git`` / ``sleep`` are replaced by PATH-shimmed stubs so
+no real infra is touched (and prod is never restarted — INFRA safety). The curl
+stub is stateful: the freshly-deployed service is UNHEALTHY for the whole deploy
+health-check window, which must trigger the hook's AUTO-ROLLBACK; after the
+rollback restart the previous image is HEALTHY again.
+
+Expected hook contract (exit-code 0/1/2):
+  * health fails -> auto rollback -> previous image healthy -> exit 1 (rolled back);
+  * the whole run completes well under the 60s MTTR budget (sleeps are shimmed).
+"""
+
+import os
+import shutil
+import stat
+import subprocess
+import time
+
+import pytest
+
+HOOK = os.path.join(
+    os.path.dirname(os.path.dirname(os.path.abspath(__file__))),
+    "scripts", "orchestrator-deploy-hook.sh",
+)
+
+pytestmark = pytest.mark.skipif(
+    shutil.which("bash") is None, reason="bash required for hook simulation"
+)
+
+
+def _write_exec(path, content):
+    with open(path, "w", encoding="utf-8") as f:
+        f.write(content)
+    os.chmod(path, os.stat(path).st_mode | stat.S_IEXEC | stat.S_IXGRP | stat.S_IXOTH)
+
+
+def _setup_sandbox(tmp_path):
+    """Create PATH-shimmed docker/curl/git/sleep stubs + a rewritten hook copy."""
+    binx = tmp_path / "bin"
+    binx.mkdir()
+    state = tmp_path / "state"
+    state.mkdir()
+    repo = tmp_path / "repo"
+    repo.mkdir()
+    cnt = state / "curl_count"
+
+    # docker: fake a running service + a recoverable previous image.
+    _write_exec(str(binx / "docker"), """#!/bin/bash
+case "$1" in
+  compose)
+    for a in "$@"; do [ "$a" = "ps" ] && { echo "fakecid"; exit 0; }; done
+    exit 0;;
+  inspect) echo "sha256:previmage"; exit 0;;
+  image) exit 0;;            # docker image inspect <img> -> found
+  tag) exit 0;;
+  *) exit 0;;
+esac
+""")
+
+    # curl: first 20 invocations (10 deploy health attempts x2 calls) UNHEALTHY,
+    # then HEALTHY (the rolled-back previous image).
+    _write_exec(str(binx / "curl"), f"""#!/bin/bash
+CNT="{cnt}"
+n=$(cat "$CNT" 2>/dev/null || echo 0); n=$((n+1)); echo "$n" > "$CNT"
+iscode=""
+for a in "$@"; do [ "$a" = "-w" ] && iscode=1; done
+if [ "$n" -gt 20 ]; then
+  [ -n "$iscode" ] && echo "200" || echo '{{"status":"ok"}}'
+else
+  [ -n "$iscode" ] && echo "000" || echo ""
+fi
+exit 0
+""")
+
+    _write_exec(str(binx / "git"), "#!/bin/bash\nexit 0\n")
+    # Shim sleep to a no-op so the simulation runs fast (real timing is governed
+    # by the hook's sleep args; here we only assert the rollback CONTROL FLOW).
+    _write_exec(str(binx / "sleep"), "#!/bin/bash\nexit 0\n")
+
+    # Copy the hook, repointing REPO to the sandbox (avoids the hardcoded prod path).
+    hook_text = open(HOOK, encoding="utf-8").read()
+    hook_text = hook_text.replace(
+        "REPO=/home/slin/repos/orchestrator", f"REPO={repo}"
+    )
+    hook_copy = tmp_path / "hook.sh"
+    _write_exec(str(hook_copy), hook_text)
+
+    env = {
+        **os.environ,
+        "PATH": f"{binx}:{os.environ['PATH']}",
+        "LOG": str(state / "hook.log"),
+        "PREV_IMAGE_FILE": str(state / "prev-image"),
+        "COMPOSE_PROFILE": "staging",
+        "TARGET_SERVICE": "orchestrator-staging",
+        "TARGET_PORT": "8501",
+    }
+    return hook_copy, env
+
+
+def test_tc19_unhealthy_deploy_auto_rolls_back_exit1(tmp_path):
+    hook_copy, env = _setup_sandbox(tmp_path)
+
+    t0 = time.time()
+    proc = subprocess.run(
+        ["bash", str(hook_copy), "--deploy"],
+        env=env, capture_output=True, text=True, timeout=60,
+    )
+    elapsed = time.time() - t0
+
+    # AC-9: unhealthy deploy -> auto rollback succeeded on the previous image -> exit 1.
+    assert proc.returncode == 1, f"stdout={proc.stdout}\nstderr={proc.stderr}"
+    out = proc.stdout + proc.stderr
+    assert "AUTO ROLLBACK" in out
+    assert "rolled back to previous image successfully" in out
+    # MTTR well under the 60s budget (sleeps shimmed; control flow only).
+    assert elapsed < 60
--- a/tests/test_deploy_notifications.py
+++ b/tests/test_deploy_notifications.py
@@ -0,0 +1,102 @@
+"""ORCH-036 TC-12/13: no silent deploy — both Plane AND Telegram are notified (AC-6).
+
+The finalizer (Phase C) must announce the prod-deploy outcome on BOTH channels:
+  * TC-12 — a SUCCESS deploy -> a Plane comment AND a Telegram message.
+  * TC-13 — a FAILED deploy (rollback) -> a Plane comment AND a Telegram message.
+"""
+
+import os
+import tempfile
+
+import pytest
+
+_test_db = os.path.join(tempfile.gettempdir(), "test_orch_deploy_notif.db")
+os.environ["ORCH_DB_PATH"] = _test_db
+os.environ["ORCH_REPOS_DIR"] = tempfile.gettempdir()
+os.environ.setdefault("ORCH_GITEA_TOKEN", "test-token")
+os.environ.setdefault("ORCH_PLANE_API_TOKEN", "test-token")
+
+from unittest.mock import MagicMock  # noqa: E402
+
+import src.db as _db  # noqa: E402
+from src.db import init_db, get_db  # noqa: E402
+from src import stage_engine  # noqa: E402
+from src import self_deploy  # noqa: E402
+
+
+@pytest.fixture(autouse=True)
+def fresh_db(monkeypatch, tmp_path):
+    monkeypatch.setattr(_db.settings, "db_path", _test_db)
+    if os.path.exists(_test_db):
+        os.unlink(_test_db)
+    init_db()
+    monkeypatch.setattr(self_deploy.settings, "repos_dir", str(tmp_path))
+    monkeypatch.setattr(self_deploy.settings, "host_repos_dir", str(tmp_path))
+    monkeypatch.setattr(stage_engine.self_deploy, "write_deploy_log", MagicMock(return_value=True))
+    monkeypatch.setattr(stage_engine.merge_gate, "release_merge_lease", MagicMock())
+    yield
+
+
+@pytest.fixture(autouse=True)
+def silence_side_effects(monkeypatch):
+    for name in (
+        "notify_stage_change", "notify_qg_failure", "notify_approve_requested",
+        "send_telegram", "plane_notify_stage", "plane_notify_qg", "plane_add_comment",
+        "set_issue_in_review", "set_issue_needs_input", "set_issue_in_progress",
+        "set_issue_blocked", "set_issue_done",
+    ):
+        monkeypatch.setattr(stage_engine, name, MagicMock())
+
+
+def _make_task(stage, repo="orchestrator", branch="feature/ORCH-036-x", wi="ORCH-036"):
+    conn = get_db()
+    cur = conn.execute(
+        "INSERT INTO tasks (plane_id, work_item_id, repo, branch, stage) "
+        "VALUES (?, ?, ?, ?, ?)",
+        (f"plane-{wi}", wi, repo, branch, stage),
+    )
+    task_id = cur.lastrowid
+    conn.commit()
+    conn.close()
+    return task_id
+
+
+def _pass(*a, **k):
+    return (True, "ok")
+
+
+def _fail(reason):
+    def _f(*a, **k):
+        return (False, reason)
+    return _f
+
+
+def _run_finalizer(task_id):
+    stage_engine.run_deploy_finalizer(
+        {"task_id": task_id, "repo": "orchestrator", "id": 1, "agent": "deploy-finalizer"}
+    )
+
+
+def test_tc12_success_notifies_plane_and_telegram(monkeypatch):
+    self_deploy.write_marker("orchestrator", "ORCH-036", self_deploy.RESULT, "0")
+    monkeypatch.setattr(
+        stage_engine, "QG_CHECKS",
+        {**stage_engine.QG_CHECKS, "check_deploy_status": _pass},
+    )
+    task_id = _make_task("deploy")
+    _run_finalizer(task_id)
+    assert stage_engine.plane_add_comment.called
+    assert stage_engine.send_telegram.called
+
+
+def test_tc13_rollback_notifies_plane_and_telegram(monkeypatch):
+    self_deploy.write_marker("orchestrator", "ORCH-036", self_deploy.RESULT, "1")
+    monkeypatch.setattr(
+        stage_engine, "QG_CHECKS",
+        {**stage_engine.QG_CHECKS, "check_deploy_status": _fail("Deploy status: FAILED")},
+    )
+    task_id = _make_task("deploy")
+    _run_finalizer(task_id)
+    # The БАГ-8 rollback path announces on both channels (no silent failure).
+    assert stage_engine.send_telegram.called
+    assert stage_engine.plane_add_comment.called or stage_engine.plane_notify_qg.called
--- a/tests/test_deploy_rollback.py
+++ b/tests/test_deploy_rollback.py
@@ -0,0 +1,141 @@
+"""ORCH-036 TC-10: a FAILED prod deploy rolls back deploy -> development (AC-4).
+
+The finalizer (Phase C) reads the hook ``result`` sentinel, maps a non-zero exit
+to ``deploy_status: FAILED`` and then drives the EXISTING deploy contract via
+``advance_stage(finished_agent="deployer")``. With a FAILED verdict the БАГ-8
+rollback fires: deploy -> development, ``set_issue_blocked`` + Telegram alert, and
+(for the self-hosting repo) the merge-lease is released so the branch is not
+wedged. The hook exit-code -> verdict mapping is unit-tested in
+``test_deploy_hook_mapping.py``; here we assert the engine REACTION.
+"""
+
+import os
+import tempfile
+
+import pytest
+
+_test_db = os.path.join(tempfile.gettempdir(), "test_orch_deploy_rollback.db")
+os.environ["ORCH_DB_PATH"] = _test_db
+os.environ["ORCH_REPOS_DIR"] = tempfile.gettempdir()
+os.environ.setdefault("ORCH_GITEA_TOKEN", "test-token")
+os.environ.setdefault("ORCH_PLANE_API_TOKEN", "test-token")
+
+from unittest.mock import MagicMock  # noqa: E402
+
+import src.db as _db  # noqa: E402
+from src.db import init_db, get_db  # noqa: E402
+from src import stage_engine  # noqa: E402
+from src import self_deploy  # noqa: E402
+
+
+@pytest.fixture(autouse=True)
+def fresh_db(monkeypatch, tmp_path):
+    monkeypatch.setattr(_db.settings, "db_path", _test_db)
+    if os.path.exists(_test_db):
+        os.unlink(_test_db)
+    init_db()
+    monkeypatch.setattr(self_deploy.settings, "repos_dir", str(tmp_path))
+    monkeypatch.setattr(self_deploy.settings, "host_repos_dir", str(tmp_path))
+    # The finalizer's deploy-log write touches a git worktree we don't have here;
+    # the verdict it drives comes from check_deploy_status (monkeypatched below).
+    monkeypatch.setattr(stage_engine.self_deploy, "write_deploy_log", MagicMock(return_value=True))
+    yield
+
+
+@pytest.fixture(autouse=True)
+def silence_side_effects(monkeypatch):
+    for name in (
+        "notify_stage_change", "notify_qg_failure", "notify_approve_requested",
+        "send_telegram", "plane_notify_stage", "plane_notify_qg", "plane_add_comment",
+        "set_issue_in_review", "set_issue_needs_input", "set_issue_in_progress",
+        "set_issue_blocked", "set_issue_done",
+    ):
+        monkeypatch.setattr(stage_engine, name, MagicMock())
+
+
+def _make_task(stage, repo="orchestrator", branch="feature/ORCH-036-x", wi="ORCH-036"):
+    conn = get_db()
+    cur = conn.execute(
+        "INSERT INTO tasks (plane_id, work_item_id, repo, branch, stage) "
+        "VALUES (?, ?, ?, ?, ?)",
+        (f"plane-{wi}", wi, repo, branch, stage),
+    )
+    task_id = cur.lastrowid
+    conn.commit()
+    conn.close()
+    return task_id
+
+
+def _stage(task_id):
+    conn = get_db()
+    row = conn.execute("SELECT stage FROM tasks WHERE id=?", (task_id,)).fetchone()
+    conn.close()
+    return row[0]
+
+
+def _fail(reason):
+    def _f(*a, **k):
+        return (False, reason)
+    return _f
+
+
+def test_tc10_failed_deploy_rolls_back_to_development(monkeypatch):
+    # Hook reported exit 1 (rolled back) -> the host wrapper wrote result=1.
+    self_deploy.write_marker("orchestrator", "ORCH-036", self_deploy.RESULT, "1")
+    # The deploy-log verdict the gate reads is FAILED.
+    monkeypatch.setattr(
+        stage_engine, "QG_CHECKS",
+        {**stage_engine.QG_CHECKS, "check_deploy_status": _fail("Deploy status: FAILED")},
+    )
+    task_id = _make_task("deploy")
+
+    stage_engine.run_deploy_finalizer(
+        {"task_id": task_id, "repo": "orchestrator", "id": 1, "agent": "deploy-finalizer"}
+    )
+
+    # БАГ-8 rollback fired: NOT done, back on development, blocked + alerted.
+    assert _stage(task_id) == "development"
+    assert stage_engine.set_issue_blocked.called
+    assert stage_engine.send_telegram.called
+    assert stage_engine.set_issue_done.called is False
+
+
+def test_tc11_re_deploy_after_rollback_not_wedged(monkeypatch):
+    """FAILED deploy -> rollback wipes stale markers so a later Phase B re-initiates.
+
+    Regression for the re-deploy-after-rollback contract (AC-4/AC-10): markers are
+    keyed by the (stable) work_item_id, so without cleanup the STALE `initiated` from
+    the first failed attempt would make Phase B's idempotency-guard a no-op on the
+    retry and wedge the task on `deploy` forever.
+    """
+    repo, wi, branch = "orchestrator", "ORCH-036", "feature/ORCH-036-x"
+    # First (failed) pass left BOTH the idempotency-guard and the verdict behind.
+    self_deploy.write_marker(repo, wi, self_deploy.INITIATED, "123")
+    self_deploy.write_marker(repo, wi, self_deploy.RESULT, "1")
+    monkeypatch.setattr(
+        stage_engine, "QG_CHECKS",
+        {**stage_engine.QG_CHECKS, "check_deploy_status": _fail("Deploy status: FAILED")},
+    )
+    task_id = _make_task("deploy")
+
+    stage_engine.run_deploy_finalizer(
+        {"task_id": task_id, "repo": repo, "id": 1, "agent": "deploy-finalizer"}
+    )
+
+    # Rollback fired AND the stale deploy-state sentinels were wiped.
+    assert _stage(task_id) == "development"
+    assert self_deploy.has_marker(repo, wi, self_deploy.INITIATED) is False
+    assert self_deploy.has_marker(repo, wi, self_deploy.RESULT) is False
+    assert self_deploy.read_result(repo, wi) == (False, None)
+
+    # Second pass: the task reaches `deploy` again and the human re-approves. Phase B
+    # must ACTUALLY initiate (no stale `initiated` -> not a no-op), proving the retry
+    # is no longer wedged.
+    init = MagicMock(return_value=(True, "ok"))
+    monkeypatch.setattr(stage_engine.self_deploy, "initiate_deploy", init)
+    result = stage_engine.AdvanceResult(from_stage="deploy")
+    stage_engine._handle_self_deploy_phase_b(task_id, repo, wi, branch, result)
+
+    assert init.called
+    assert result.note == "self-deploy-initiated"
+    assert self_deploy.has_marker(repo, wi, self_deploy.INITIATED) is True
--- a/tests/test_deploy_routing.py
+++ b/tests/test_deploy_routing.py
@@ -0,0 +1,174 @@
+"""ORCH-036 TC-07/08/09: self vs non-self deploy routing (AC-2, AC-11).
+
+  * TC-07 — ``is_self_hosting_repo``/``self_deploy_applies`` recognise the
+    orchestrator repo and reject any other (no regression).
+  * TC-08 — for the self repo the restart is launched as a DETACHED host process
+    (ssh + setsid + background), never synchronously inside the agent.
+  * TC-09 — for a non-self repo (enduro-trails) the deploy keeps the legacy path:
+    the self-deploy Phase A/B logic does NOT apply.
+"""
+
+import os
+import tempfile
+
+import pytest
+
+_test_db = os.path.join(tempfile.gettempdir(), "test_orch_deploy_routing.db")
+os.environ["ORCH_DB_PATH"] = _test_db
+os.environ["ORCH_REPOS_DIR"] = tempfile.gettempdir()
+os.environ.setdefault("ORCH_GITEA_TOKEN", "test-token")
+os.environ.setdefault("ORCH_PLANE_API_TOKEN", "test-token")
+
+from unittest.mock import MagicMock  # noqa: E402
+
+import src.db as _db  # noqa: E402
+from src.db import init_db, get_db  # noqa: E402
+from src import stage_engine  # noqa: E402
+from src import self_deploy  # noqa: E402
+from src.qg.checks import is_self_hosting_repo  # noqa: E402
+from src.stage_engine import advance_stage  # noqa: E402
+
+
+@pytest.fixture(autouse=True)
+def fresh_db(monkeypatch, tmp_path):
+    monkeypatch.setattr(_db.settings, "db_path", _test_db)
+    if os.path.exists(_test_db):
+        os.unlink(_test_db)
+    init_db()
+    monkeypatch.setattr(self_deploy.settings, "repos_dir", str(tmp_path))
+    monkeypatch.setattr(self_deploy.settings, "host_repos_dir", str(tmp_path))
+    yield
+
+
+@pytest.fixture(autouse=True)
+def silence_side_effects(monkeypatch):
+    for name in (
+        "notify_stage_change", "notify_qg_failure", "notify_approve_requested",
+        "send_telegram", "plane_notify_stage", "plane_notify_qg", "plane_add_comment",
+        "set_issue_in_review", "set_issue_needs_input", "set_issue_in_progress",
+        "set_issue_blocked", "set_issue_done",
+    ):
+        monkeypatch.setattr(stage_engine, name, MagicMock())
+
+
+def _make_task(stage, repo, branch, wi):
+    conn = get_db()
+    cur = conn.execute(
+        "INSERT INTO tasks (plane_id, work_item_id, repo, branch, stage) "
+        "VALUES (?, ?, ?, ?, ?)",
+        (f"plane-{wi}", wi, repo, branch, stage),
+    )
+    task_id = cur.lastrowid
+    conn.commit()
+    conn.close()
+    return task_id
+
+
+def _stage(task_id):
+    conn = get_db()
+    row = conn.execute("SELECT stage FROM tasks WHERE id=?", (task_id,)).fetchone()
+    conn.close()
+    return row[0]
+
+
+def _jobs():
+    conn = get_db()
+    rows = conn.execute("SELECT agent, repo, task_id FROM jobs ORDER BY id").fetchall()
+    conn.close()
+    return [dict(r) for r in rows]
+
+
+def _pass(*a, **k):
+    return (True, "ok")
+
+
+# ---------------------------------------------------------------------------
+# TC-07: routing predicate
+# ---------------------------------------------------------------------------
+def test_tc07_is_self_hosting_repo_only_orchestrator():
+    assert is_self_hosting_repo("orchestrator") is True
+    assert is_self_hosting_repo("ORCHESTRATOR") is True  # case-insensitive
+    assert is_self_hosting_repo("enduro-trails") is False
+    assert is_self_hosting_repo("") is False
+    assert is_self_hosting_repo(None) is False
+
+
+def test_tc07_self_deploy_applies_mirrors_routing(monkeypatch):
+    monkeypatch.setattr(self_deploy.settings, "self_deploy_enabled", True)
+    monkeypatch.setattr(self_deploy.settings, "self_deploy_repos", "")
+    assert self_deploy.self_deploy_applies("orchestrator") is True
+    assert self_deploy.self_deploy_applies("enduro-trails") is False
+    # Global kill-switch wins.
+    monkeypatch.setattr(self_deploy.settings, "self_deploy_enabled", False)
+    assert self_deploy.self_deploy_applies("orchestrator") is False
+
+
+# ---------------------------------------------------------------------------
+# TC-08: self repo -> DETACHED host process (ssh + setsid + background)
+# ---------------------------------------------------------------------------
+def test_tc08_self_repo_launches_detached_host_process(monkeypatch):
+    """The deploy command must be an ssh invocation that detaches the hook via
+    setsid and backgrounds it (`&`), so it survives the prod container restart —
+    i.e. NOT a synchronous in-agent call."""
+    monkeypatch.setattr(self_deploy.settings, "deploy_ssh_user", "slin")
+    monkeypatch.setattr(self_deploy.settings, "deploy_ssh_host", "mva154")
+
+    cmd = self_deploy.build_deploy_command("orchestrator", "ORCH-036", "feature/ORCH-036-x")
+
+    assert cmd[0] == "ssh"
+    assert "slin@mva154" in cmd
+    remote = cmd[-1]
+    assert "setsid" in remote          # detached session
+    assert remote.rstrip().endswith("&")  # backgrounded
+    assert "</dev/null" in remote      # stdin detached
+    assert "--deploy" in remote        # runs the deploy hook
+
+
+def test_tc08_initiate_deploy_uses_subprocess_not_blocking(monkeypatch):
+    """initiate_deploy dispatches via subprocess (the ssh call returns at once);
+    a rc=0 means 'detached process launched', not 'deploy finished'."""
+    captured = {}
+
+    def fake_run(cmd, **kwargs):
+        captured["cmd"] = cmd
+        return MagicMock(returncode=0, stdout="", stderr="")
+
+    monkeypatch.setattr(self_deploy.settings, "deploy_ssh_host", "mva154")
+    monkeypatch.setattr(self_deploy.subprocess, "run", fake_run)
+    ok, msg = self_deploy.initiate_deploy("orchestrator", "ORCH-036", "feature/ORCH-036-x")
+    assert ok is True
+    assert captured["cmd"][0] == "ssh"
+    assert "detached" in msg
+
+
+# ---------------------------------------------------------------------------
+# TC-09: non-self repo -> legacy path, self-deploy logic does not apply
+# ---------------------------------------------------------------------------
+def test_tc09_non_self_repo_uses_legacy_path(monkeypatch):
+    """enduro-trails on the deploy-staging -> deploy edge: no Phase A interception,
+    the deployer is enqueued for the deploy stage exactly as before ORCH-036."""
+    monkeypatch.setattr(stage_engine.settings, "deploy_require_manual_approve", True)
+    monkeypatch.setattr(
+        stage_engine, "QG_CHECKS",
+        {**stage_engine.QG_CHECKS, "check_staging_status": _pass},
+    )  # check_branch_mergeable left REAL -> N/A for non-self repo
+    # Spy: self-deploy must not be initiated for a non-self repo.
+    initiate = MagicMock()
+    monkeypatch.setattr(stage_engine.self_deploy, "initiate_deploy", initiate)
+
+    task_id = _make_task("deploy-staging", "enduro-trails", "feature/ET-009-x", "ET-009")
+    res = advance_stage(
+        task_id, "deploy-staging", "enduro-trails", "ET-009",
+        "feature/ET-009-x", finished_agent="deployer",
+    )
+
+    assert res.advanced is True
+    assert _stage(task_id) == "deploy"
+    assert res.note != "self-deploy-approval-pending"
+    initiate.assert_not_called()
+    # Legacy path enqueues the deployer for the deploy stage.
+    jobs = _jobs()
+    assert len(jobs) == 1
+    assert jobs[0]["agent"] == "deployer"
+    # No self-deploy marker for the non-self repo.
+    assert not self_deploy.has_marker("enduro-trails", "ET-009", self_deploy.APPROVE_REQUESTED)
--- a/tests/test_deploy_terminal_sync.py
+++ b/tests/test_deploy_terminal_sync.py
@@ -0,0 +1,104 @@
+"""ORCH-036 TC-17: a SUCCESS prod deploy preserves the terminal-sync contract (AC-10).
+
+When the finalizer (Phase C) reads exit 0 -> ``deploy_status: SUCCESS`` and drives
+``advance_stage(finished_agent="deployer")``, the EXISTING deploy->done transition
+must still fire unchanged: stage becomes ``done``, ``set_issue_done`` is called, no
+agent is launched, and the merge-lease is released (terminal-sync, ORCH-43/БАГ-8
+contract). ORCH-036 only changes HOW the verdict is produced, never the contract.
+"""
+
+import os
+import tempfile
+
+import pytest
+
+_test_db = os.path.join(tempfile.gettempdir(), "test_orch_deploy_terminal.db")
+os.environ["ORCH_DB_PATH"] = _test_db
+os.environ["ORCH_REPOS_DIR"] = tempfile.gettempdir()
+os.environ.setdefault("ORCH_GITEA_TOKEN", "test-token")
+os.environ.setdefault("ORCH_PLANE_API_TOKEN", "test-token")
+
+from unittest.mock import MagicMock  # noqa: E402
+
+import src.db as _db  # noqa: E402
+from src.db import init_db, get_db  # noqa: E402
+from src import stage_engine  # noqa: E402
+from src import self_deploy  # noqa: E402
+
+
+@pytest.fixture(autouse=True)
+def fresh_db(monkeypatch, tmp_path):
+    monkeypatch.setattr(_db.settings, "db_path", _test_db)
+    if os.path.exists(_test_db):
+        os.unlink(_test_db)
+    init_db()
+    monkeypatch.setattr(self_deploy.settings, "repos_dir", str(tmp_path))
+    monkeypatch.setattr(self_deploy.settings, "host_repos_dir", str(tmp_path))
+    monkeypatch.setattr(stage_engine.self_deploy, "write_deploy_log", MagicMock(return_value=True))
+    yield
+
+
+@pytest.fixture(autouse=True)
+def silence_side_effects(monkeypatch):
+    for name in (
+        "notify_stage_change", "notify_qg_failure", "notify_approve_requested",
+        "send_telegram", "plane_notify_stage", "plane_notify_qg", "plane_add_comment",
+        "set_issue_in_review", "set_issue_needs_input", "set_issue_in_progress",
+        "set_issue_blocked", "set_issue_done",
+    ):
+        monkeypatch.setattr(stage_engine, name, MagicMock())
+
+
+def _make_task(stage, repo="orchestrator", branch="feature/ORCH-036-x", wi="ORCH-036"):
+    conn = get_db()
+    cur = conn.execute(
+        "INSERT INTO tasks (plane_id, work_item_id, repo, branch, stage) "
+        "VALUES (?, ?, ?, ?, ?)",
+        (f"plane-{wi}", wi, repo, branch, stage),
+    )
+    task_id = cur.lastrowid
+    conn.commit()
+    conn.close()
+    return task_id
+
+
+def _stage(task_id):
+    conn = get_db()
+    row = conn.execute("SELECT stage FROM tasks WHERE id=?", (task_id,)).fetchone()
+    conn.close()
+    return row[0]
+
+
+def _jobs():
+    conn = get_db()
+    rows = conn.execute("SELECT agent FROM jobs ORDER BY id").fetchall()
+    conn.close()
+    return [r[0] for r in rows]
+
+
+def _pass(*a, **k):
+    return (True, "ok")
+
+
+def test_tc17_success_deploy_syncs_terminal_done(monkeypatch):
+    # Hook reported exit 0 -> the host wrapper wrote result=0.
+    self_deploy.write_marker("orchestrator", "ORCH-036", self_deploy.RESULT, "0")
+    monkeypatch.setattr(
+        stage_engine, "QG_CHECKS",
+        {**stage_engine.QG_CHECKS, "check_deploy_status": _pass},
+    )
+    # Spy the merge-lease release to confirm the terminal-sync still frees it.
+    release = MagicMock()
+    monkeypatch.setattr(stage_engine.merge_gate, "release_merge_lease", release)
+
+    task_id = _make_task("deploy")
+    stage_engine.run_deploy_finalizer(
+        {"task_id": task_id, "repo": "orchestrator", "id": 1, "agent": "deploy-finalizer"}
+    )
+
+    assert _stage(task_id) == "done"
+    assert stage_engine.set_issue_done.called
+    # The merge-lease is released on the deploy->done terminal-sync.
+    release.assert_called_once_with("orchestrator", "feature/ORCH-036-x")
+    # No agent is launched leaving deploy (terminal).
+    assert _jobs() == []
--- a/tests/test_qg_checks.py
+++ b/tests/test_qg_checks.py
@@ -0,0 +1,53 @@
+"""ORCH-036 TC-15: the deploy-verdict parse contract is unchanged (AC-10).
+
+``_parse_deploy_status`` reads ONLY the machine-readable ``deploy_status:`` YAML
+frontmatter (never prose). ORCH-036 produces the verdict differently (a
+deterministic finalizer instead of an LLM), but the parse contract that the gate
+relies on must remain bit-identical:
+    SUCCESS -> (True, ...), FAILED -> (False, ...), no/!frontmatter -> (False, ...).
+"""
+
+import os
+
+os.environ.setdefault("ORCH_PLANE_API_TOKEN", "test-token")
+os.environ.setdefault("ORCH_GITEA_TOKEN", "test-token")
+
+from src.qg.checks import _parse_deploy_status  # noqa: E402
+from src.self_deploy import build_deploy_log  # noqa: E402
+
+
+def test_tc15_success_frontmatter_passes():
+    ok, reason = _parse_deploy_status("---\ndeploy_status: SUCCESS\n---\n\nbody")
+    assert ok is True
+    assert "SUCCESS" in reason
+
+
+def test_tc15_failed_frontmatter_fails():
+    ok, reason = _parse_deploy_status("---\ndeploy_status: FAILED\n---\n\nbody")
+    assert ok is False
+    assert "FAILED" in reason
+
+
+def test_tc15_no_frontmatter_fails():
+    ok, _ = _parse_deploy_status("just prose, deploy_status: SUCCESS in text but no frontmatter")
+    assert ok is False
+
+
+def test_tc15_missing_field_fails():
+    ok, _ = _parse_deploy_status("---\nother_field: SUCCESS\n---\n")
+    assert ok is False
+
+
+def test_tc15_prose_success_word_does_not_pass():
+    """Defensive: the word SUCCESS in prose must NOT satisfy the gate."""
+    ok, _ = _parse_deploy_status("# Deploy\n\nDeploy was a SUCCESS, hooray!\n")
+    assert ok is False
+
+
+def test_tc15_finalizer_log_roundtrips_through_parser():
+    """The finalizer's rendered log must be readable by the EXISTING parser —
+    SUCCESS passes, FAILED fails — proving the producer/consumer contract holds."""
+    ok_s, _ = _parse_deploy_status(build_deploy_log("ORCH-036", 0, "SUCCESS"))
+    ok_f, _ = _parse_deploy_status(build_deploy_log("ORCH-036", 2, "FAILED"))
+    assert ok_s is True
+    assert ok_f is False
--- a/tests/test_stage_engine.py
+++ b/tests/test_stage_engine.py
@@ -822,7 +822,12 @@ class TestMergeGate:

    def test_tc20_pass_advances_to_deploy(self, monkeypatch):
        """TC-20 / AC-1: gate PASS (rebased + green) -> advance to deploy, deployer
-        enqueued, NO rollback. staging gate must pass first (same edge)."""
+        enqueued, NO rollback. staging gate must pass first (same edge).
+
+        ORCH-036: disable the manual-approve self-deploy interception so this test
+        keeps exercising the merge-gate in isolation (the executable self-deploy
+        Phase A path is covered separately in test_deploy_approve.py)."""
+        monkeypatch.setattr(stage_engine.settings, "deploy_require_manual_approve", False)
        monkeypatch.setattr(
            stage_engine, "QG_CHECKS",
            {**stage_engine.QG_CHECKS,
--- a/tests/test_stages.py
+++ b/tests/test_stages.py
@@ -0,0 +1,41 @@
+"""ORCH-036 TC-16: STAGE_TRANSITIONS for deploy are unchanged (AC-10).
+
+ORCH-036 only changes HOW the deploy verdict is produced (a deterministic
+finalizer) — it must NOT touch the state machine. The deploy edge keeps its
+exact transition (deploy -> done), no in-line agent (None), and the gate
+``check_deploy_status``. The deploy-staging edge is likewise untouched.
+"""
+
+import os
+
+os.environ.setdefault("ORCH_PLANE_API_TOKEN", "test-token")
+os.environ.setdefault("ORCH_GITEA_TOKEN", "test-token")
+
+from src.stages import (  # noqa: E402
+    STAGE_TRANSITIONS,
+    get_agent_for_stage,
+    get_next_stage,
+    get_qg_for_stage,
+)
+
+
+def test_tc16_deploy_transition_unchanged():
+    assert STAGE_TRANSITIONS["deploy"] == {
+        "next": "done", "agent": None, "qg": "check_deploy_status"
+    }
+    assert get_next_stage("deploy") == "done"
+    assert get_agent_for_stage("deploy") is None
+    assert get_qg_for_stage("deploy") == "check_deploy_status"
+
+
+def test_tc16_deploy_staging_transition_unchanged():
+    assert STAGE_TRANSITIONS["deploy-staging"] == {
+        "next": "deploy", "agent": "deployer", "qg": "check_staging_status"
+    }
+    assert get_next_stage("deploy-staging") == "deploy"
+    assert get_agent_for_stage("deploy-staging") == "deployer"
+    assert get_qg_for_stage("deploy-staging") == "check_staging_status"
+
+
+def test_tc16_done_is_terminal():
+    assert get_next_stage("done") is None
--- a/tests/test_staging_precondition.py
+++ b/tests/test_staging_precondition.py
@@ -0,0 +1,99 @@
+"""ORCH-036 TC-11: the staging precondition is preserved (AC-8).
+
+A red staging gate (``staging_status: FAILED``) must roll the task back to
+development and NEVER let it reach the ``deploy`` stage — so the executable
+prod self-deploy can never be initiated off a failed staging run. ORCH-036 adds
+its Phase A interception AFTER ``check_staging_status``, so a staging failure
+short-circuits before any self-deploy logic runs.
+"""
+
+import os
+import tempfile
+
+import pytest
+
+_test_db = os.path.join(tempfile.gettempdir(), "test_orch_staging_precond.db")
+os.environ["ORCH_DB_PATH"] = _test_db
+os.environ["ORCH_REPOS_DIR"] = tempfile.gettempdir()
+os.environ.setdefault("ORCH_GITEA_TOKEN", "test-token")
+os.environ.setdefault("ORCH_PLANE_API_TOKEN", "test-token")
+
+from unittest.mock import MagicMock  # noqa: E402
+
+import src.db as _db  # noqa: E402
+from src.db import init_db, get_db  # noqa: E402
+from src import stage_engine  # noqa: E402
+from src import self_deploy  # noqa: E402
+from src.stage_engine import advance_stage  # noqa: E402
+
+
+@pytest.fixture(autouse=True)
+def fresh_db(monkeypatch, tmp_path):
+    monkeypatch.setattr(_db.settings, "db_path", _test_db)
+    if os.path.exists(_test_db):
+        os.unlink(_test_db)
+    init_db()
+    monkeypatch.setattr(self_deploy.settings, "repos_dir", str(tmp_path))
+    monkeypatch.setattr(self_deploy.settings, "host_repos_dir", str(tmp_path))
+    yield
+
+
+@pytest.fixture(autouse=True)
+def silence_side_effects(monkeypatch):
+    for name in (
+        "notify_stage_change", "notify_qg_failure", "notify_approve_requested",
+        "send_telegram", "plane_notify_stage", "plane_notify_qg", "plane_add_comment",
+        "set_issue_in_review", "set_issue_needs_input", "set_issue_in_progress",
+        "set_issue_blocked", "set_issue_done",
+    ):
+        monkeypatch.setattr(stage_engine, name, MagicMock())
+
+
+def _make_task(stage, repo="orchestrator", branch="feature/ORCH-036-x", wi="ORCH-036"):
+    conn = get_db()
+    cur = conn.execute(
+        "INSERT INTO tasks (plane_id, work_item_id, repo, branch, stage) "
+        "VALUES (?, ?, ?, ?, ?)",
+        (f"plane-{wi}", wi, repo, branch, stage),
+    )
+    task_id = cur.lastrowid
+    conn.commit()
+    conn.close()
+    return task_id
+
+
+def _stage(task_id):
+    conn = get_db()
+    row = conn.execute("SELECT stage FROM tasks WHERE id=?", (task_id,)).fetchone()
+    conn.close()
+    return row[0]
+
+
+def _fail(reason):
+    def _f(*a, **k):
+        return (False, reason)
+    return _f
+
+
+def test_tc11_staging_failed_never_reaches_deploy(monkeypatch):
+    monkeypatch.setattr(stage_engine.settings, "deploy_require_manual_approve", True)
+    monkeypatch.setattr(
+        stage_engine, "QG_CHECKS",
+        {**stage_engine.QG_CHECKS,
+         "check_staging_status": _fail("Staging status: FAILED")},
+    )
+    # Guard: a failed staging run must not trigger any self-deploy logic.
+    initiate = MagicMock()
+    monkeypatch.setattr(stage_engine.self_deploy, "initiate_deploy", initiate)
+
+    task_id = _make_task("deploy-staging")
+    res = advance_stage(
+        task_id, "deploy-staging", "orchestrator", "ORCH-036",
+        "feature/ORCH-036-x", finished_agent="deployer",
+    )
+
+    assert res.advanced is False
+    assert res.rolled_back_to == "development"
+    assert _stage(task_id) == "development"   # NEVER reached deploy
+    initiate.assert_not_called()
+    assert not self_deploy.has_marker("orchestrator", "ORCH-036", self_deploy.APPROVE_REQUESTED)