diff --git a/scripts/orchestrator-deploy-hook.sh b/scripts/orchestrator-deploy-hook.sh index 68be4d6..75b855b 100755 --- a/scripts/orchestrator-deploy-hook.sh +++ b/scripts/orchestrator-deploy-hook.sh @@ -13,11 +13,24 @@ # When set, the prevalidated (staging) image is retagged onto # TARGET_IMAGE instead of rebuilding — guarantees prod runs the # exact artefact that passed staging (no `docker build`). +# EXPECTED_REVISION- expected git SHA of SOURCE_IMAGE (default: unset; ORCH-58) +# Strategy-B fail-closed provenance guard: when set, the +# SOURCE_IMAGE's org.opencontainers.image.revision label MUST +# equal this value before the BUILD-ONCE retag, else exit 1 +# (a stale image is never promoted). Unset -> no check (legacy). +# GIT_SHA - --build-staging build-arg (default: unset; ORCH-58) +# Commit stamped into the rebuilt staging image's revision +# label. Supplied by the caller (validated commit) — NOT +# recomputed from the host clone's HEAD. +# BUILD_CONTEXT - --build-staging build context (default: $REPO; ORCH-58) +# Host worktree of the validated commit; the staging image is +# rebuilt FROM this tree (not the prod clone on main). # LOG - log file path (default: /var/log/orchestrator/deploy-hook.log) # # Usage: -# ./orchestrator-deploy-hook.sh [--deploy] # normal deploy (default) -# ./orchestrator-deploy-hook.sh --rollback # manual rollback +# ./orchestrator-deploy-hook.sh [--deploy] # normal deploy (default) +# ./orchestrator-deploy-hook.sh --rollback # manual rollback +# ./orchestrator-deploy-hook.sh --build-staging # ORCH-58: rebuild staging image (8501) set -euo pipefail @@ -123,17 +136,6 @@ do_rollback() { # ============================================================================ # MANUAL --rollback mode # ============================================================================ -if [[ "${1:-}" == "--build-staging" ]]; then - # Strategy-A rebuild mode (ORCH-58): rebuild the staging image stamping - # the validated commit SHA into the OCI revision label so the provenance - # guard above can fail-closed on it during the subsequent prod retag. - GIT_SHA=$(git rev-parse HEAD) - log "BUILD-STAGING: rebuilding $TARGET_IMAGE stamping GIT_SHA=$GIT_SHA" - docker build --build-arg GIT_SHA="$GIT_SHA" -t "$TARGET_IMAGE" "$REPO" >> "$LOG" 2>&1 - log "BUILD-STAGING: built $TARGET_IMAGE (revision=$GIT_SHA)" - exit 0 -fi - if [[ "${1:-}" == "--rollback" ]]; then log "Manual ROLLBACK requested" if do_rollback; then @@ -145,6 +147,41 @@ if [[ "${1:-}" == "--rollback" ]]; then fi fi +# ============================================================================ +# --build-staging mode (ORCH-58, Strategy A): rebuild the STAGING image from the +# VALIDATED commit and recreate 8501, so the artefact we validate is the EXACT one +# later BUILD-ONCE retagged to prod (INV-FRESH). Builds/recreates STAGING ONLY +# (8501) — never prod (8500). Same exit-code contract (0 = healthy, !=0 = failed). +# +# Uses the caller-supplied GIT_SHA + BUILD_CONTEXT (the validated worktree) — it +# must NOT recompute HEAD from $REPO (the prod clone on `main`): on the +# deploy-staging -> deploy edge the PR is not yet merged, so `main` HEAD != the +# validated SHA, which would stamp the wrong revision label and deadlock the +# Strategy-B guard on every valid self-deploy. +# ============================================================================ +if [[ "${1:-}" == "--build-staging" ]]; then + BUILD_CONTEXT="${BUILD_CONTEXT:-$REPO}" + GIT_SHA="${GIT_SHA:-}" + log "BUILD-STAGING: rebuilding $TARGET_IMAGE from $BUILD_CONTEXT (GIT_SHA=$GIT_SHA, service=$TARGET_SERVICE, port=$TARGET_PORT)" + if ! docker build --build-arg GIT_SHA="$GIT_SHA" -t "$TARGET_IMAGE" "$BUILD_CONTEXT" >> "$LOG" 2>&1; then + log "BUILD-STAGING: docker build failed - aborting (exit 1)" + exit 1 + fi + log "BUILD-STAGING: recreating $TARGET_SERVICE (profile=$COMPOSE_PROFILE) on the fresh image" + if [[ -n "$COMPOSE_PROFILE" ]]; then + docker compose --profile "$COMPOSE_PROFILE" up -d --no-build "$TARGET_SERVICE" >> "$LOG" 2>&1 + else + docker compose up -d --no-build "$TARGET_SERVICE" >> "$LOG" 2>&1 + fi + log "BUILD-STAGING: running health-check on port $TARGET_PORT (10x6s)" + if health_check 10 6 "build-staging-health"; then + log "BUILD-STAGING: $TARGET_SERVICE healthy on the fresh image (exit 0)" + exit 0 + fi + log "BUILD-STAGING: health FAILED after rebuild (exit 1)" + exit 1 +fi + # ============================================================================ # NORMAL DEPLOY mode (--deploy or no argument) # ============================================================================ diff --git a/src/image_freshness.py b/src/image_freshness.py index 574d54b..4cdd43e 100644 --- a/src/image_freshness.py +++ b/src/image_freshness.py @@ -248,6 +248,13 @@ def rebuild_staging_image(repo: str, branch: str, sha: str) -> tuple[bool, str]: if not target: return False, "no ssh host configured for staging rebuild" host_ctx = _host_worktree_path(repo, branch) + # We pass ONLY GIT_SHA (validated commit -> revision label, the shared anchor + # with Strategy B), BUILD_CONTEXT (the validated worktree to build FROM) and + # TARGET_IMAGE (the staging image name to retag in prod later). COMPOSE_PROFILE + # / TARGET_SERVICE / TARGET_PORT are deliberately omitted so the hook keeps its + # built-in STAGING defaults (profile=staging, orchestrator-staging, 8501): this + # rebuild/recreate must touch STAGING ONLY (8501), NEVER prod (8500) (AC-9), and + # the prod defaults are never reachable on this path. env_assignments = ( f"GIT_SHA={shlex.quote(sha)} " f"BUILD_CONTEXT={shlex.quote(host_ctx)} " diff --git a/tests/test_deploy_hook_provenance.py b/tests/test_deploy_hook_provenance.py index 02c048b..24416a1 100644 --- a/tests/test_deploy_hook_provenance.py +++ b/tests/test_deploy_hook_provenance.py @@ -17,6 +17,17 @@ _HOOK = _ROOT / "scripts" / "orchestrator-deploy-hook.sh" _DOCKERFILE = _ROOT / "Dockerfile" +def _build_staging_block() -> str: + """Return only the body of the hook's ``--build-staging`` branch, so the + contract assertions below cannot be satisfied by lookalike strings elsewhere + in the script (e.g. the NORMAL DEPLOY recreate). The block runs from the + ``--build-staging`` guard up to the NORMAL DEPLOY section header.""" + text = _HOOK.read_text(encoding="utf-8") + start = text.index('"${1:-}" == "--build-staging"') + end = text.index("NORMAL DEPLOY mode", start) + return text[start:end] + + # --------------------------------------------------------------------------- # TC-07: hook fail-closed provenance guard + --build-staging rebuild mode # --------------------------------------------------------------------------- @@ -49,6 +60,41 @@ def test_tc07_build_staging_mode_stamps_git_sha(): assert 'docker build --build-arg GIT_SHA="$GIT_SHA"' in text +def test_tc07_build_staging_builds_from_caller_context_not_repo(): + """Contract (caller <-> hook): --build-staging must build from the + caller-supplied BUILD_CONTEXT (the validated worktree), NOT the prod clone. + + Regression guard for the P0 deadlock: the block must honour the caller's + GIT_SHA (BUILD_CONTEXT/GIT_SHA defaulting) and must NOT recompute the SHA + from the host clone's HEAD (`git rev-parse HEAD`) — on the + deploy-staging -> deploy edge `main` HEAD != validated SHA, which would + stamp the wrong revision label and deadlock the Strategy-B guard. + """ + block = _build_staging_block() + # Build context is the caller-supplied worktree, defaulting to $REPO. + assert 'BUILD_CONTEXT="${BUILD_CONTEXT:-$REPO}"' in block + assert 'docker build --build-arg GIT_SHA="$GIT_SHA" -t "$TARGET_IMAGE" "$BUILD_CONTEXT"' in block + # Honour the caller's GIT_SHA; never hard-build against the prod clone. + assert 'GIT_SHA="${GIT_SHA:-}"' in block + assert 'docker build --build-arg GIT_SHA="$GIT_SHA" -t "$TARGET_IMAGE" "$REPO"' not in block + # Must NOT recompute the validated SHA from the host clone's HEAD. + assert "git rev-parse HEAD" not in block + + +def test_tc07_build_staging_recreates_and_health_checks_8501(): + """AC-4: --build-staging must recreate the staging container on the fresh + image and validate it (health-check), so rebuild_staging_image's rc=0 truly + means "rebuilt AND healthy". A bare `docker build` + exit 0 would make the + freshness verdict a lie.""" + block = _build_staging_block() + # Recreate the staging service on the freshly built image. + assert 'docker compose --profile "$COMPOSE_PROFILE" up -d --no-build "$TARGET_SERVICE"' in block + # Validate the fresh container before reporting success. + assert 'health_check 10 6 "build-staging-health"' in block + # Health failure surfaces as a non-zero exit (FAILED contract preserved). + assert "exit 1" in block + + # --------------------------------------------------------------------------- # TC-08: Dockerfile stamps the OCI revision label from a build-arg # ---------------------------------------------------------------------------