Фундамент тиража 10-common (эпик ORCH-10): платформа разворачивается на
новой инфре без правки кода — только env/конфиг. Каждый дефолт = боевому
значению (пустой .env => поведение 1:1, kill-switch-природа, NFR-2);
STAGE_TRANSITIONS/QG_CHECKS/check_*/machine-verdict/схема БД не тронуты.
- config: agent_home_dir / agent_git_name / git_email_domain / staging_port
(ADR-001 D2/D4); код-блокеры A1-A4 закрыты: plane_sync ссылки из
gitea_public_url+gitea_owner, launcher - единый agent_git_env() (x2 места),
self_deploy/post_deploy - HOME+домен из Settings (имена системных акторов -
платформенные литералы)
- image_freshness: staging_port из конфига + fail-closed guard
staging_port == прод-порт -> отказ ДО ssh/build (инвариант ORCH-058 AC-9
стал исполняемым); REPO= передаётся хуку явно обоими инвокерами (D7)
- SELF_HOSTING_REPO - нормативная платформенная константа (D3, пин-тест)
- compose: полная ${VAR:-default}-интерполяция (реестр B, карта D6); группа
ORCH-040 uid/gid/HOME/маунты двигается согласованно (build.args APP_*);
group_add "МИНА 1" сохранён x3; оба app-сервиса с явным command:
- Dockerfile: ARG APP_UID/APP_GID/APP_USER/APP_HOME (CMD exec-form 8500
сознательно не тронут - D5); deploy-hook: REPO="${REPO:-...}" (D1 реестра)
- секреты: stdlib scripts/gen_secrets.py (token_hex(32); печать по умолчанию;
--write никогда не перезаписывает существующий .env молча, exit=2;
перезапись только --force); .env.example дополнен до полноты ключей старта
- доки: новый docs/operations/REPLICATION.md (карта env, чек-лист секретов,
smoke-процедура с PASS/FAIL, границы 10-common/Lite/Bundled), INFRA.md,
README, CLAUDE.md, CHANGELOG
- анти-регресс: tests/test_no_host_hardcodes.py (tokenize-сканер запрещённых
литералов, config-модули - структурное исключение, allowlist пуст,
негативная самопроверка) + test_host_config_keys / test_infra_parametrization
/ test_secrets_gen / test_replication_smoke; согласованные структурные
правки test_orch040_compose (судит резолв дефолтов) и
test_deploy_hook_rollback_sim (REPO через env-override = контракт D7)
Полный регресс: 1764 passed.
Refs: ORCH-101
Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
119 lines
4.0 KiB
Python
119 lines
4.0 KiB
Python
"""ORCH-036 TC-19: deploy-hook auto-rollback simulation (AC-9).
|
|
|
|
Drives the REAL ``scripts/orchestrator-deploy-hook.sh`` in a hermetic sandbox:
|
|
``docker`` / ``curl`` / ``git`` / ``sleep`` are replaced by PATH-shimmed stubs so
|
|
no real infra is touched (and prod is never restarted — INFRA safety). The curl
|
|
stub is stateful: the freshly-deployed service is UNHEALTHY for the whole deploy
|
|
health-check window, which must trigger the hook's AUTO-ROLLBACK; after the
|
|
rollback restart the previous image is HEALTHY again.
|
|
|
|
Expected hook contract (exit-code 0/1/2):
|
|
* health fails -> auto rollback -> previous image healthy -> exit 1 (rolled back);
|
|
* the whole run completes well under the 60s MTTR budget (sleeps are shimmed).
|
|
"""
|
|
|
|
import os
|
|
import shutil
|
|
import stat
|
|
import subprocess
|
|
import time
|
|
|
|
import pytest
|
|
|
|
HOOK = os.path.join(
|
|
os.path.dirname(os.path.dirname(os.path.abspath(__file__))),
|
|
"scripts", "orchestrator-deploy-hook.sh",
|
|
)
|
|
|
|
pytestmark = pytest.mark.skipif(
|
|
shutil.which("bash") is None, reason="bash required for hook simulation"
|
|
)
|
|
|
|
|
|
def _write_exec(path, content):
|
|
with open(path, "w", encoding="utf-8") as f:
|
|
f.write(content)
|
|
os.chmod(path, os.stat(path).st_mode | stat.S_IEXEC | stat.S_IXGRP | stat.S_IXOTH)
|
|
|
|
|
|
def _setup_sandbox(tmp_path):
|
|
"""Create PATH-shimmed docker/curl/git/sleep stubs + a rewritten hook copy."""
|
|
binx = tmp_path / "bin"
|
|
binx.mkdir()
|
|
state = tmp_path / "state"
|
|
state.mkdir()
|
|
repo = tmp_path / "repo"
|
|
repo.mkdir()
|
|
cnt = state / "curl_count"
|
|
|
|
# docker: fake a running service + a recoverable previous image.
|
|
_write_exec(str(binx / "docker"), """#!/bin/bash
|
|
case "$1" in
|
|
compose)
|
|
for a in "$@"; do [ "$a" = "ps" ] && { echo "fakecid"; exit 0; }; done
|
|
exit 0;;
|
|
inspect) echo "sha256:previmage"; exit 0;;
|
|
image) exit 0;; # docker image inspect <img> -> found
|
|
tag) exit 0;;
|
|
*) exit 0;;
|
|
esac
|
|
""")
|
|
|
|
# curl: first 20 invocations (10 deploy health attempts x2 calls) UNHEALTHY,
|
|
# then HEALTHY (the rolled-back previous image).
|
|
_write_exec(str(binx / "curl"), f"""#!/bin/bash
|
|
CNT="{cnt}"
|
|
n=$(cat "$CNT" 2>/dev/null || echo 0); n=$((n+1)); echo "$n" > "$CNT"
|
|
iscode=""
|
|
for a in "$@"; do [ "$a" = "-w" ] && iscode=1; done
|
|
if [ "$n" -gt 20 ]; then
|
|
[ -n "$iscode" ] && echo "200" || echo '{{"status":"ok"}}'
|
|
else
|
|
[ -n "$iscode" ] && echo "000" || echo ""
|
|
fi
|
|
exit 0
|
|
""")
|
|
|
|
_write_exec(str(binx / "git"), "#!/bin/bash\nexit 0\n")
|
|
# Shim sleep to a no-op so the simulation runs fast (real timing is governed
|
|
# by the hook's sleep args; here we only assert the rollback CONTROL FLOW).
|
|
_write_exec(str(binx / "sleep"), "#!/bin/bash\nexit 0\n")
|
|
|
|
# Copy the hook verbatim and repoint REPO via the env-override — the SAME
|
|
# contract the wired invokers use since ORCH-101 (D7: REPO="${REPO:-…}");
|
|
# no text rewrite needed, so the simulation also proves the override works.
|
|
hook_text = open(HOOK, encoding="utf-8").read()
|
|
hook_copy = tmp_path / "hook.sh"
|
|
_write_exec(str(hook_copy), hook_text)
|
|
|
|
env = {
|
|
**os.environ,
|
|
"PATH": f"{binx}:{os.environ['PATH']}",
|
|
"REPO": str(repo),
|
|
"LOG": str(state / "hook.log"),
|
|
"PREV_IMAGE_FILE": str(state / "prev-image"),
|
|
"COMPOSE_PROFILE": "staging",
|
|
"TARGET_SERVICE": "orchestrator-staging",
|
|
"TARGET_PORT": "8501",
|
|
}
|
|
return hook_copy, env
|
|
|
|
|
|
def test_tc19_unhealthy_deploy_auto_rolls_back_exit1(tmp_path):
|
|
hook_copy, env = _setup_sandbox(tmp_path)
|
|
|
|
t0 = time.time()
|
|
proc = subprocess.run(
|
|
["bash", str(hook_copy), "--deploy"],
|
|
env=env, capture_output=True, text=True, timeout=60,
|
|
)
|
|
elapsed = time.time() - t0
|
|
|
|
# AC-9: unhealthy deploy -> auto rollback succeeded on the previous image -> exit 1.
|
|
assert proc.returncode == 1, f"stdout={proc.stdout}\nstderr={proc.stderr}"
|
|
out = proc.stdout + proc.stderr
|
|
assert "AUTO ROLLBACK" in out
|
|
assert "rolled back to previous image successfully" in out
|
|
# MTTR well under the 60s budget (sleeps shimmed; control flow only).
|
|
assert elapsed < 60
|