Фундамент тиража 10-common (эпик ORCH-10): платформа разворачивается на
новой инфре без правки кода — только env/конфиг. Каждый дефолт = боевому
значению (пустой .env => поведение 1:1, kill-switch-природа, NFR-2);
STAGE_TRANSITIONS/QG_CHECKS/check_*/machine-verdict/схема БД не тронуты.
- config: agent_home_dir / agent_git_name / git_email_domain / staging_port
(ADR-001 D2/D4); код-блокеры A1-A4 закрыты: plane_sync ссылки из
gitea_public_url+gitea_owner, launcher - единый agent_git_env() (x2 места),
self_deploy/post_deploy - HOME+домен из Settings (имена системных акторов -
платформенные литералы)
- image_freshness: staging_port из конфига + fail-closed guard
staging_port == прод-порт -> отказ ДО ssh/build (инвариант ORCH-058 AC-9
стал исполняемым); REPO= передаётся хуку явно обоими инвокерами (D7)
- SELF_HOSTING_REPO - нормативная платформенная константа (D3, пин-тест)
- compose: полная ${VAR:-default}-интерполяция (реестр B, карта D6); группа
ORCH-040 uid/gid/HOME/маунты двигается согласованно (build.args APP_*);
group_add "МИНА 1" сохранён x3; оба app-сервиса с явным command:
- Dockerfile: ARG APP_UID/APP_GID/APP_USER/APP_HOME (CMD exec-form 8500
сознательно не тронут - D5); deploy-hook: REPO="${REPO:-...}" (D1 реестра)
- секреты: stdlib scripts/gen_secrets.py (token_hex(32); печать по умолчанию;
--write никогда не перезаписывает существующий .env молча, exit=2;
перезапись только --force); .env.example дополнен до полноты ключей старта
- доки: новый docs/operations/REPLICATION.md (карта env, чек-лист секретов,
smoke-процедура с PASS/FAIL, границы 10-common/Lite/Bundled), INFRA.md,
README, CLAUDE.md, CHANGELOG
- анти-регресс: tests/test_no_host_hardcodes.py (tokenize-сканер запрещённых
литералов, config-модули - структурное исключение, allowlist пуст,
негативная самопроверка) + test_host_config_keys / test_infra_parametrization
/ test_secrets_gen / test_replication_smoke; согласованные структурные
правки test_orch040_compose (судит резолв дефолтов) и
test_deploy_hook_rollback_sim (REPO через env-override = контракт D7)
Полный регресс: 1764 passed.
Refs: ORCH-101
Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
142 lines
7.6 KiB
YAML
142 lines
7.6 KiB
YAML
# ORCH-101 (replication foundation): every host-specific value is interpolated
|
||
# as ${VAR:-default}; the defaults equal the current production values, so an
|
||
# empty environment resolves to a byte-for-byte equivalent of the previous file
|
||
# (zero regression, BR-5). Compose reads ${VAR} from the project `.env` /shell —
|
||
# NOT from a service's env_file (so .env.staging does NOT interpolate); the
|
||
# Settings-shared names (ORCH_AGENT_HOME_DIR, ORCH_STAGING_PORT, ...) are read
|
||
# by pydantic from env_file AND by compose from .env — one name per fact (D1).
|
||
# Container-side paths (/app/data, /repos, /opt/claude-code, docker.sock) are a
|
||
# container-layout convention, NOT host values — deliberately not parametrised.
|
||
# See docs/operations/REPLICATION.md for the full variable map.
|
||
services:
|
||
orchestrator:
|
||
build:
|
||
context: .
|
||
# ORCH-101 (D5): uid/gid/home move as ONE coherent group with the runtime
|
||
# user: and the mount targets below (ORCH-040 invariant).
|
||
args:
|
||
APP_UID: ${ORCH_RUN_UID:-1000}
|
||
APP_GID: ${ORCH_RUN_GID:-1000}
|
||
APP_HOME: ${ORCH_AGENT_HOME_DIR:-/home/slin}
|
||
container_name: orchestrator
|
||
restart: unless-stopped
|
||
# ORCH-040: бежим под uid:gid хоста (slin=1000:1000), а не root, чтобы
|
||
# артефакты конвейера (worktree + docs) создавались как slin:slin и git на
|
||
# хосте работал без ручного chown. Доступ к docker.sock сохранён через
|
||
# group_add: ["999"] (МИНА 1 — НЕ удалять). См. ADR-001 ORCH-040.
|
||
user: "${ORCH_RUN_UID:-1000}:${ORCH_RUN_GID:-1000}"
|
||
# init: true injects docker-init (tini) as PID 1 so reparented grandchild
|
||
# processes from the claude/node subprocess tree are reaped (no zombies, B-2).
|
||
init: true
|
||
network_mode: host
|
||
# ORCH-101 (D5): the prod port is configurable on the compose layer (the
|
||
# Dockerfile CMD keeps its exec-form 8500 default — ADR-001 D5); the default
|
||
# resolves byte-for-byte to the previous image CMD. Reuses the existing
|
||
# ORCH_DEPLOY_PROD_TARGET_PORT (no second truth about the prod port).
|
||
command: ["uvicorn", "src.main:app", "--host", "0.0.0.0", "--port", "${ORCH_DEPLOY_PROD_TARGET_PORT:-8500}"]
|
||
volumes:
|
||
- ./data:/app/data
|
||
- ${ORCH_HOST_REPOS_DIR:-/home/slin/repos}:/repos
|
||
- /var/run/docker.sock:/var/run/docker.sock
|
||
- ${ORCH_HOST_CLAUDE_CODE_DIR:-/usr/lib/node_modules/@anthropic-ai/claude-code}:/opt/claude-code:ro
|
||
- ${ORCH_HOST_NODE_BIN:-/usr/bin/node}:/usr/bin/node:ro
|
||
- ${ORCH_HOST_CLAUDE_DIR:-/home/slin/.claude}:${ORCH_AGENT_HOME_DIR:-/home/slin}/.claude
|
||
- ${ORCH_HOST_CLAUDE_JSON:-/home/slin/.claude.json}:${ORCH_AGENT_HOME_DIR:-/home/slin}/.claude.json:ro
|
||
# ORCH-040: target согласован с HOME (launcher: settings.agent_home_dir),
|
||
# не /root/.ssh — обе стороны двигаются одной переменной ORCH_AGENT_HOME_DIR.
|
||
- ${ORCH_HOST_SSH_DIR:-/home/slin/.orchestrator-ssh}:${ORCH_AGENT_HOME_DIR:-/home/slin}/.ssh:ro
|
||
env_file: .env
|
||
environment:
|
||
- ORCH_REPOS_DIR=/repos
|
||
- ORCH_HOST_REPOS_DIR=${ORCH_HOST_REPOS_DIR:-/home/slin/repos}
|
||
# legacy enduro deployer (read via os.environ, keep as-is):
|
||
- DEPLOY_SSH_USER=${ORCH_DEPLOY_SSH_USER:-slin}
|
||
- DEPLOY_SSH_HOST=127.0.0.1
|
||
- DEPLOY_HOOK_SCRIPT=${DEPLOY_HOOK_SCRIPT:-/home/slin/bin/enduro-deploy-hook.sh}
|
||
# ORCH-036 self-deploy (read via pydantic ORCH_ prefix; host-network -> 127.0.0.1, ssh key mounted):
|
||
- ORCH_DEPLOY_SSH_USER=${ORCH_DEPLOY_SSH_USER:-slin}
|
||
- ORCH_DEPLOY_SSH_HOST=127.0.0.1
|
||
- ORCH_DEPLOY_HOOK_SCRIPT=scripts/orchestrator-deploy-hook.sh
|
||
- ORCH_DEPLOY_HOST_REPO_PATH=${ORCH_DEPLOY_HOST_REPO_PATH:-/home/slin/repos/orchestrator}
|
||
group_add:
|
||
- "${ORCH_DOCKER_GID:-999}"
|
||
|
||
# ORCH-100 (FND/F1b): sidecar-watchdog — the monitoring brain in a SEPARATE
|
||
# container (observer separated from observed, ADR-001 D2). Deploying it builds
|
||
# ONLY this service — the prod `orchestrator` is NOT rebuilt/restarted.
|
||
# * network_mode: host -> /metrics reachable at http://127.0.0.1:8500/metrics
|
||
# and host interfaces visible for memory/disk reads.
|
||
# * docker.sock mounted :ro AND the code is GET-only (double read-only guard).
|
||
# * host disk paths bind-mounted :ro so shutil.disk_usage sees the host FS but
|
||
# can never write (opt-in disk ceiling, D6).
|
||
# * mem_limit caps the thin stdlib daemon (D2): OOM = early "sidecar grew" signal.
|
||
# * WATCHDOG_ENABLED=false (or simply not starting the service) -> inert.
|
||
orchestrator-watchdog:
|
||
build:
|
||
context: .
|
||
dockerfile: watchdog/Dockerfile
|
||
container_name: orchestrator-watchdog
|
||
restart: unless-stopped
|
||
init: true
|
||
network_mode: host
|
||
mem_limit: 128m
|
||
mem_reservation: 32m
|
||
volumes:
|
||
- /var/run/docker.sock:/var/run/docker.sock:ro
|
||
- ${ORCH_HOST_REPOS_DIR:-/home/slin/repos}:/repos:ro
|
||
- ./data:/app/data:ro
|
||
# Optional env_file (required: false): a missing .env.watchdog must NOT fail
|
||
# `docker compose up` for the prod orchestrator (self-hosting safety). Absent
|
||
# file -> WATCHDOG_* defaults, no token -> fail-safe (logs, does not send).
|
||
env_file:
|
||
- path: .env.watchdog
|
||
required: false
|
||
group_add:
|
||
- "${ORCH_DOCKER_GID:-999}"
|
||
|
||
# ORCH-31: staging instance (port 8501, isolated DB).
|
||
# Starts ONLY with: docker compose --profile staging up -d orchestrator-staging
|
||
# Normal "docker compose up -d" does NOT start this service.
|
||
orchestrator-staging:
|
||
profiles:
|
||
- staging
|
||
build:
|
||
context: .
|
||
args:
|
||
APP_UID: ${ORCH_RUN_UID:-1000}
|
||
APP_GID: ${ORCH_RUN_GID:-1000}
|
||
APP_HOME: ${ORCH_AGENT_HOME_DIR:-/home/slin}
|
||
container_name: orchestrator-staging
|
||
restart: unless-stopped
|
||
# ORCH-040: тот же uid хоста, что и у prod (см. комментарий выше / ADR-001).
|
||
user: "${ORCH_RUN_UID:-1000}:${ORCH_RUN_GID:-1000}"
|
||
init: true
|
||
network_mode: host
|
||
# ORCH-101 (D4): the same ORCH_STAGING_PORT that settings.staging_port reads —
|
||
# the image_freshness rebuild target and the listening port can never drift.
|
||
command: ["uvicorn", "src.main:app", "--host", "0.0.0.0", "--port", "${ORCH_STAGING_PORT:-8501}"]
|
||
volumes:
|
||
- ./data/staging:/app/data
|
||
- ${ORCH_HOST_REPOS_DIR:-/home/slin/repos}:/repos
|
||
- /var/run/docker.sock:/var/run/docker.sock
|
||
- ${ORCH_HOST_CLAUDE_CODE_DIR:-/usr/lib/node_modules/@anthropic-ai/claude-code}:/opt/claude-code:ro
|
||
- ${ORCH_HOST_NODE_BIN:-/usr/bin/node}:/usr/bin/node:ro
|
||
- ${ORCH_HOST_CLAUDE_DIR:-/home/slin/.claude}:${ORCH_AGENT_HOME_DIR:-/home/slin}/.claude
|
||
- ${ORCH_HOST_CLAUDE_JSON:-/home/slin/.claude.json}:${ORCH_AGENT_HOME_DIR:-/home/slin}/.claude.json:ro
|
||
# ORCH-040: target согласован с HOME (settings.agent_home_dir), не /root/.ssh.
|
||
- ${ORCH_HOST_SSH_DIR:-/home/slin/.orchestrator-ssh}:${ORCH_AGENT_HOME_DIR:-/home/slin}/.ssh:ro
|
||
env_file: .env.staging
|
||
environment:
|
||
- ORCH_REPOS_DIR=/repos
|
||
- ORCH_HOST_REPOS_DIR=${ORCH_HOST_REPOS_DIR:-/home/slin/repos}
|
||
- DEPLOY_SSH_USER=${ORCH_DEPLOY_SSH_USER:-slin}
|
||
- DEPLOY_SSH_HOST=127.0.0.1
|
||
- DEPLOY_HOOK_SCRIPT=${DEPLOY_HOOK_SCRIPT:-/home/slin/bin/enduro-deploy-hook.sh}
|
||
# Staging DB is isolated via ./data/staging volume mount.
|
||
# Inside the container the path remains /app/data/orchestrator.db (same default),
|
||
# but on the host it physically lives at ./data/staging/orchestrator.db —
|
||
# completely separate from prod ./data/orchestrator.db.
|
||
- ORCH_DB_PATH=/app/data/orchestrator.db
|
||
group_add:
|
||
- "${ORCH_DOCKER_GID:-999}"
|