Files
orchestrator/tests/test_deploy_hook_rollback_sim.py

119 lines
4.0 KiB
Python

"""ORCH-036 TC-19: deploy-hook auto-rollback simulation (AC-9).
Drives the REAL ``scripts/orchestrator-deploy-hook.sh`` in a hermetic sandbox:
``docker`` / ``curl`` / ``git`` / ``sleep`` are replaced by PATH-shimmed stubs so
no real infra is touched (and prod is never restarted — INFRA safety). The curl
stub is stateful: the freshly-deployed service is UNHEALTHY for the whole deploy
health-check window, which must trigger the hook's AUTO-ROLLBACK; after the
rollback restart the previous image is HEALTHY again.
Expected hook contract (exit-code 0/1/2):
* health fails -> auto rollback -> previous image healthy -> exit 1 (rolled back);
* the whole run completes well under the 60s MTTR budget (sleeps are shimmed).
"""
import os
import shutil
import stat
import subprocess
import time
import pytest
HOOK = os.path.join(
os.path.dirname(os.path.dirname(os.path.abspath(__file__))),
"scripts", "orchestrator-deploy-hook.sh",
)
pytestmark = pytest.mark.skipif(
shutil.which("bash") is None, reason="bash required for hook simulation"
)
def _write_exec(path, content):
with open(path, "w", encoding="utf-8") as f:
f.write(content)
os.chmod(path, os.stat(path).st_mode | stat.S_IEXEC | stat.S_IXGRP | stat.S_IXOTH)
def _setup_sandbox(tmp_path):
"""Create PATH-shimmed docker/curl/git/sleep stubs + a rewritten hook copy."""
binx = tmp_path / "bin"
binx.mkdir()
state = tmp_path / "state"
state.mkdir()
repo = tmp_path / "repo"
repo.mkdir()
cnt = state / "curl_count"
# docker: fake a running service + a recoverable previous image.
_write_exec(str(binx / "docker"), """#!/bin/bash
case "$1" in
compose)
for a in "$@"; do [ "$a" = "ps" ] && { echo "fakecid"; exit 0; }; done
exit 0;;
inspect) echo "sha256:previmage"; exit 0;;
image) exit 0;; # docker image inspect <img> -> found
tag) exit 0;;
*) exit 0;;
esac
""")
# curl: first 20 invocations (10 deploy health attempts x2 calls) UNHEALTHY,
# then HEALTHY (the rolled-back previous image).
_write_exec(str(binx / "curl"), f"""#!/bin/bash
CNT="{cnt}"
n=$(cat "$CNT" 2>/dev/null || echo 0); n=$((n+1)); echo "$n" > "$CNT"
iscode=""
for a in "$@"; do [ "$a" = "-w" ] && iscode=1; done
if [ "$n" -gt 20 ]; then
[ -n "$iscode" ] && echo "200" || echo '{{"status":"ok"}}'
else
[ -n "$iscode" ] && echo "000" || echo ""
fi
exit 0
""")
_write_exec(str(binx / "git"), "#!/bin/bash\nexit 0\n")
# Shim sleep to a no-op so the simulation runs fast (real timing is governed
# by the hook's sleep args; here we only assert the rollback CONTROL FLOW).
_write_exec(str(binx / "sleep"), "#!/bin/bash\nexit 0\n")
# Copy the hook, repointing REPO to the sandbox (avoids the hardcoded prod path).
hook_text = open(HOOK, encoding="utf-8").read()
hook_text = hook_text.replace(
"REPO=/home/slin/repos/orchestrator", f"REPO={repo}"
)
hook_copy = tmp_path / "hook.sh"
_write_exec(str(hook_copy), hook_text)
env = {
**os.environ,
"PATH": f"{binx}:{os.environ['PATH']}",
"LOG": str(state / "hook.log"),
"PREV_IMAGE_FILE": str(state / "prev-image"),
"COMPOSE_PROFILE": "staging",
"TARGET_SERVICE": "orchestrator-staging",
"TARGET_PORT": "8501",
}
return hook_copy, env
def test_tc19_unhealthy_deploy_auto_rolls_back_exit1(tmp_path):
hook_copy, env = _setup_sandbox(tmp_path)
t0 = time.time()
proc = subprocess.run(
["bash", str(hook_copy), "--deploy"],
env=env, capture_output=True, text=True, timeout=60,
)
elapsed = time.time() - t0
# AC-9: unhealthy deploy -> auto rollback succeeded on the previous image -> exit 1.
assert proc.returncode == 1, f"stdout={proc.stdout}\nstderr={proc.stderr}"
out = proc.stdout + proc.stderr
assert "AUTO ROLLBACK" in out
assert "rolled back to previous image successfully" in out
# MTTR well under the 60s budget (sleeps shimmed; control flow only).
assert elapsed < 60