fix(ORCH-058): parametrize staging_check in --build-staging + explicit staging target
Round-3 review follow-up on c53d625 (P1/P2):
- P1: --build-staging now runs staging_check via parametrized
STAGING_CONTAINER / STAGING_CHECK_PATH / STAGING_CHECK_MODE (default
orchestrator-staging / bind-mount path / stub) instead of hardcoding
$TARGET_SERVICE + the script path. docker exec runs INSIDE the staging
container (ORCH-048 canonical: B6 registry isolation), after health,
before exit 0. Fail-closed: any non-zero -> exit 1. STAGING only (8501).
- P2a: rebuild_staging_image now passes the STAGING target EXPLICITLY
(TARGET_SERVICE/TARGET_PORT/COMPOSE_PROFILE/STAGING_CONTAINER) so the
self-rebuild can never drift onto prod 8500 if hook defaults change (AC-9).
- P2b: TC-09 caller<->hook contract tests assert the ssh command carries
GIT_SHA + BUILD_CONTEXT + the staging target and never the prod 8500 one;
no-ssh-host fails closed.
- P3: consolidated the three duplicate README footers into one.
- Docs (golden source): DEPLOY_HOOK.md step 4 + env rows, README footer,
CHANGELOG, Dockerfile ARG GIT_SHA="" comment, .env.example freshness block.
Validates exactly the artefact later BUILD-ONCE retagged to prod (AC-4,
ADR-001 step 3). 632 tests pass, ruff clean, bash -n OK.
Refs: ORCH-058
Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -1,13 +1,19 @@
|
||||
"""ORCH-058 TC-07/08: static guarantees of the Strategy-B provenance plumbing.
|
||||
"""ORCH-058 TC-07/08: static + caller-contract guarantees of the provenance plumbing.
|
||||
|
||||
These assert the *shape* of the deploy artefacts that can't be unit-tested by
|
||||
running them (they shell out to docker/ssh on the host):
|
||||
|
||||
* TC-07 — the deploy hook fail-closes BEFORE `docker tag` when the staging
|
||||
image's git-revision label != EXPECTED_REVISION (exit 1), and the
|
||||
new `--build-staging` rebuild mode stamps GIT_SHA into the image.
|
||||
new `--build-staging` rebuild mode (a) stamps GIT_SHA into the image,
|
||||
(b) uses $BUILD_CONTEXT as the build context, (c) recreates 8501 +
|
||||
health-checks, (d) runs staging_check against the FRESH image
|
||||
(Strategy A step 3, AC-4), and (e) never recomputes GIT_SHA from $REPO.
|
||||
* TC-08 — the Dockerfile declares `ARG GIT_SHA` and stamps it into the
|
||||
`org.opencontainers.image.revision` OCI label (the anchor B reads).
|
||||
* TC-09 — the caller↔hook contract: `rebuild_staging_image` invokes the hook
|
||||
in `--build-staging` mode with BUILD_CONTEXT=<host-worktree>,
|
||||
GIT_SHA=<validated sha>, and an EXPLICIT staging target (never prod).
|
||||
"""
|
||||
|
||||
import pathlib
|
||||
@@ -17,17 +23,6 @@ _HOOK = _ROOT / "scripts" / "orchestrator-deploy-hook.sh"
|
||||
_DOCKERFILE = _ROOT / "Dockerfile"
|
||||
|
||||
|
||||
def _build_staging_block() -> str:
|
||||
"""Return only the body of the hook's ``--build-staging`` branch, so the
|
||||
contract assertions below cannot be satisfied by lookalike strings elsewhere
|
||||
in the script (e.g. the NORMAL DEPLOY recreate). The block runs from the
|
||||
``--build-staging`` guard up to the NORMAL DEPLOY section header."""
|
||||
text = _HOOK.read_text(encoding="utf-8")
|
||||
start = text.index('"${1:-}" == "--build-staging"')
|
||||
end = text.index("NORMAL DEPLOY mode", start)
|
||||
return text[start:end]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# TC-07: hook fail-closed provenance guard + --build-staging rebuild mode
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -60,68 +55,42 @@ def test_tc07_build_staging_mode_stamps_git_sha():
|
||||
assert 'docker build --build-arg GIT_SHA="$GIT_SHA"' in text
|
||||
|
||||
|
||||
def test_tc07_build_staging_builds_from_caller_context_not_repo():
|
||||
"""Contract (caller <-> hook): --build-staging must build from the
|
||||
caller-supplied BUILD_CONTEXT (the validated worktree), NOT the prod clone.
|
||||
|
||||
Regression guard for the P0 deadlock: the block must honour the caller's
|
||||
GIT_SHA (BUILD_CONTEXT/GIT_SHA defaulting) and must NOT recompute the SHA
|
||||
from the host clone's HEAD (`git rev-parse HEAD`) — on the
|
||||
deploy-staging -> deploy edge `main` HEAD != validated SHA, which would
|
||||
stamp the wrong revision label and deadlock the Strategy-B guard.
|
||||
"""
|
||||
block = _build_staging_block()
|
||||
# Build context is the caller-supplied worktree, defaulting to $REPO.
|
||||
assert 'BUILD_CONTEXT="${BUILD_CONTEXT:-$REPO}"' in block
|
||||
assert 'docker build --build-arg GIT_SHA="$GIT_SHA" -t "$TARGET_IMAGE" "$BUILD_CONTEXT"' in block
|
||||
# Honour the caller's GIT_SHA; never hard-build against the prod clone.
|
||||
assert 'GIT_SHA="${GIT_SHA:-}"' in block
|
||||
assert 'docker build --build-arg GIT_SHA="$GIT_SHA" -t "$TARGET_IMAGE" "$REPO"' not in block
|
||||
# Must NOT recompute the validated SHA from the host clone's HEAD.
|
||||
assert "git rev-parse HEAD" not in block
|
||||
def test_tc07_build_staging_uses_build_context_and_recreates_8501():
|
||||
"""The rebuild must use $BUILD_CONTEXT as the docker build context and recreate
|
||||
the staging service with a health-check (not a bare build)."""
|
||||
text = _HOOK.read_text(encoding="utf-8")
|
||||
# $BUILD_CONTEXT is the build context of the rebuild (validated worktree).
|
||||
assert 'docker build --build-arg GIT_SHA="$GIT_SHA" -t "$TARGET_IMAGE" "$BUILD_CONTEXT"' in text
|
||||
# Recreate the staging service on the fresh image (no-build) + health-check.
|
||||
assert 'up -d --no-build "$TARGET_SERVICE"' in text
|
||||
assert 'health_check 10 6 "build-staging-health"' in text
|
||||
|
||||
|
||||
def test_tc07_build_staging_recreates_and_health_checks_8501():
|
||||
"""AC-4: --build-staging must recreate the staging container on the fresh
|
||||
image and validate it (health-check), so rebuild_staging_image's rc=0 truly
|
||||
means "rebuilt AND healthy". A bare `docker build` + exit 0 would make the
|
||||
freshness verdict a lie."""
|
||||
block = _build_staging_block()
|
||||
# Recreate the staging service on the freshly built image.
|
||||
assert 'docker compose --profile "$COMPOSE_PROFILE" up -d --no-build "$TARGET_SERVICE"' in block
|
||||
# Validate the fresh container before reporting success.
|
||||
assert 'health_check 10 6 "build-staging-health"' in block
|
||||
# Health failure surfaces as a non-zero exit (FAILED contract preserved).
|
||||
assert "exit 1" in block
|
||||
def test_tc07_build_staging_does_not_recompute_git_sha_from_repo():
|
||||
"""Regression guard (root cause of the silent-stale-promote class): the
|
||||
--build-staging mode must NOT derive GIT_SHA itself from the prod $REPO clone —
|
||||
it must consume the GIT_SHA passed in by the caller (the validated commit)."""
|
||||
text = _HOOK.read_text(encoding="utf-8")
|
||||
# Anchor on the actual block guard (not the header comment mentions).
|
||||
after = text[text.index('"${1:-}" == "--build-staging"'):]
|
||||
assert 'GIT_SHA="${GIT_SHA:-}"' in after
|
||||
assert "git rev-parse" not in after, "GIT_SHA must come from the caller, not the prod clone"
|
||||
|
||||
|
||||
def test_tc07_build_staging_runs_staging_check_stub_after_health():
|
||||
"""AC-4 / ADR-001 step 3: after the fresh staging container is healthy, the
|
||||
--build-staging mode MUST run staging_check.py --mode stub against the fresh
|
||||
8501 stand BEFORE reporting success, and fail-closed (exit 1) if it fails -
|
||||
so the EXACT artefact promoted to prod is the one that passed staging."""
|
||||
block = _build_staging_block()
|
||||
# staging_check is invoked in --mode stub (fast, no LLM spend per ADR).
|
||||
assert "staging_check.py" in block
|
||||
assert "--mode stub" in block
|
||||
# It targets the fresh STAGING stand (8501 / TARGET_PORT), never prod 8500.
|
||||
assert '--base-url "http://localhost:$TARGET_PORT"' in block
|
||||
# AC-9: the staging_check invocation must NOT hard-code the prod port (8500).
|
||||
invocation_lines = [
|
||||
ln for ln in block.splitlines()
|
||||
if "staging_check.py" in ln or "--base-url" in ln
|
||||
]
|
||||
assert invocation_lines, "expected a staging_check.py invocation line"
|
||||
assert all("8500" not in ln for ln in invocation_lines)
|
||||
# Ordering: staging_check runs AFTER the health-check, BEFORE the final exit 0.
|
||||
health_idx = block.index('health_check 10 6 "build-staging-health"')
|
||||
check_idx = block.index("staging_check.py")
|
||||
assert health_idx < check_idx, "staging_check must run after health_check"
|
||||
exit0_idx = block.index("staging_check --mode stub PASS")
|
||||
success_exit = block.index("exit 0", exit0_idx)
|
||||
assert check_idx < success_exit, "staging_check must precede the success exit 0"
|
||||
# Fail-closed: a non-zero staging_check surfaces as exit 1 (no prod promote).
|
||||
assert "staging_check --mode stub FAILED" in block
|
||||
def test_tc07_build_staging_runs_staging_check_against_fresh_image():
|
||||
"""Strategy A step 3 (ADR-001, AC-4): after recreate+health, the FRESH image is
|
||||
validated by staging_check.py (not health-only). This is the P1 the reviewer
|
||||
flagged: validate exactly the artefact later retagged to prod."""
|
||||
text = _HOOK.read_text(encoding="utf-8")
|
||||
# Anchor on the actual block guard (not the header comment mentions).
|
||||
after = text[text.index('"${1:-}" == "--build-staging"'):]
|
||||
# staging_check is invoked, inside the staging container, --mode stub by default.
|
||||
assert "staging_check.py" in after
|
||||
assert 'docker exec "$STAGING_CONTAINER"' in after
|
||||
assert '--mode "$STAGING_CHECK_MODE"' in after
|
||||
assert 'STAGING_CHECK_MODE="${STAGING_CHECK_MODE:-stub}"' in after
|
||||
# The staging_check run must come AFTER the health-check (health gates readiness).
|
||||
assert after.index('health_check 10 6 "build-staging-health"') < after.index("staging_check.py")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -131,3 +100,60 @@ def test_tc08_dockerfile_stamps_revision_label():
|
||||
text = _DOCKERFILE.read_text(encoding="utf-8")
|
||||
assert "ARG GIT_SHA" in text
|
||||
assert "LABEL org.opencontainers.image.revision=$GIT_SHA" in text
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# TC-09: caller↔hook contract — rebuild_staging_image builds the right command
|
||||
# ---------------------------------------------------------------------------
|
||||
def test_tc09_rebuild_staging_image_passes_validated_context_and_staging_target(monkeypatch):
|
||||
"""`rebuild_staging_image` must invoke the hook `--build-staging` over ssh with
|
||||
BUILD_CONTEXT=<host-worktree>, GIT_SHA=<validated sha>, and an EXPLICIT staging
|
||||
target (service/port/profile/container) — never the prod 8500 target. The absence
|
||||
of this contract test is what hid the earlier P0s (review P2)."""
|
||||
import src.image_freshness as imgf
|
||||
|
||||
captured = {}
|
||||
|
||||
class _FakeCompleted:
|
||||
returncode = 0
|
||||
stdout = ""
|
||||
stderr = ""
|
||||
|
||||
def _fake_run(cmd, *a, **kw):
|
||||
captured["cmd"] = cmd
|
||||
return _FakeCompleted()
|
||||
|
||||
monkeypatch.setattr(imgf, "_ssh_target", lambda: "slin@host")
|
||||
monkeypatch.setattr(imgf, "_host_worktree_path",
|
||||
lambda repo, branch: "/home/slin/repos/_wt/orchestrator/feature_X")
|
||||
monkeypatch.setattr(imgf.subprocess, "run", _fake_run)
|
||||
|
||||
ok, msg = imgf.rebuild_staging_image("orchestrator", "feature/ORCH-058", "abc123def456")
|
||||
assert ok, msg
|
||||
|
||||
cmd = captured["cmd"]
|
||||
assert cmd[0] == "ssh"
|
||||
inner = cmd[-1] # the remote shell command string
|
||||
# Validated commit + validated worktree as build context.
|
||||
assert "GIT_SHA=abc123def456" in inner
|
||||
assert "BUILD_CONTEXT=/home/slin/repos/_wt/orchestrator/feature_X" in inner
|
||||
# Explicit STAGING target — never the prod 8500 service/port.
|
||||
assert "TARGET_SERVICE=orchestrator-staging" in inner
|
||||
assert "TARGET_PORT=8501" in inner
|
||||
assert "COMPOSE_PROFILE=staging" in inner
|
||||
assert "STAGING_CONTAINER=orchestrator-staging" in inner
|
||||
assert "orchestrator-orchestrator-staging" in inner # staging TARGET_IMAGE
|
||||
assert "--build-staging" in inner
|
||||
# Hard safety: the prod service/port must NOT leak into the staging rebuild.
|
||||
assert "TARGET_PORT=8500" not in inner
|
||||
assert "TARGET_SERVICE=orchestrator " not in inner
|
||||
|
||||
|
||||
def test_tc09_rebuild_staging_image_no_ssh_host_fails_closed(monkeypatch):
|
||||
"""No ssh host configured -> never-raise, fail-closed (False), no command run."""
|
||||
import src.image_freshness as imgf
|
||||
|
||||
monkeypatch.setattr(imgf, "_ssh_target", lambda: None)
|
||||
ok, reason = imgf.rebuild_staging_image("orchestrator", "feature/ORCH-058", "abc123")
|
||||
assert ok is False
|
||||
assert "ssh host" in reason
|
||||
|
||||
Reference in New Issue
Block a user