fix(deploy-hook): --build-staging must build from validated worktree, recreate+health 8501
All checks were successful
CI / test (push) Successful in 17s

Closes reviewer P0/P1 (ORCH-058 attempt 3): the committed --build-staging hook
recomputed GIT_SHA=$(git rev-parse HEAD) in $REPO (prod clone on `main`) and built
`docker build ... "$REPO"`, ignoring the caller-supplied BUILD_CONTEXT/GIT_SHA. On
the deploy-staging -> deploy edge the PR is not yet merged, so `main` HEAD != the
validated SHA -> the staging image got the wrong revision label and Strategy-B's
guard fail-closed on EVERY valid self-deploy (AC-6 deadlock). It also only did
`docker build` + exit 0 — never recreating 8501 nor health-checking — so
rebuild_staging_image's rc=0 ("rebuilt and healthy") was a lie (AC-4 unmet).

- Hook --build-staging now honours caller BUILD_CONTEXT (validated worktree) and
  GIT_SHA, recreates orchestrator-staging on the fresh image and runs the 10x6s
  health-check; build/health failure -> exit 1 (FAILED contract preserved).
- image_freshness.rebuild_staging_image: document why COMPOSE_PROFILE/TARGET_SERVICE/
  TARGET_PORT are intentionally omitted (hook STAGING defaults -> 8501 only, P2).
- tests: assert the caller<->hook contract (builds from $BUILD_CONTEXT, no
  `git rev-parse HEAD` recompute, recreates + health-checks 8501) so the P0
  regression can't pass green again (P1).

Refs: ORCH-058

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
2026-06-07 08:37:51 +00:00
parent 3b3d587300
commit 2ee06ae676
3 changed files with 103 additions and 13 deletions

View File

@@ -13,11 +13,24 @@
# When set, the prevalidated (staging) image is retagged onto
# TARGET_IMAGE instead of rebuilding — guarantees prod runs the
# exact artefact that passed staging (no `docker build`).
# EXPECTED_REVISION- expected git SHA of SOURCE_IMAGE (default: unset; ORCH-58)
# Strategy-B fail-closed provenance guard: when set, the
# SOURCE_IMAGE's org.opencontainers.image.revision label MUST
# equal this value before the BUILD-ONCE retag, else exit 1
# (a stale image is never promoted). Unset -> no check (legacy).
# GIT_SHA - --build-staging build-arg (default: unset; ORCH-58)
# Commit stamped into the rebuilt staging image's revision
# label. Supplied by the caller (validated commit) — NOT
# recomputed from the host clone's HEAD.
# BUILD_CONTEXT - --build-staging build context (default: $REPO; ORCH-58)
# Host worktree of the validated commit; the staging image is
# rebuilt FROM this tree (not the prod clone on main).
# LOG - log file path (default: /var/log/orchestrator/deploy-hook.log)
#
# Usage:
# ./orchestrator-deploy-hook.sh [--deploy] # normal deploy (default)
# ./orchestrator-deploy-hook.sh --rollback # manual rollback
# ./orchestrator-deploy-hook.sh [--deploy] # normal deploy (default)
# ./orchestrator-deploy-hook.sh --rollback # manual rollback
# ./orchestrator-deploy-hook.sh --build-staging # ORCH-58: rebuild staging image (8501)
set -euo pipefail
@@ -123,17 +136,6 @@ do_rollback() {
# ============================================================================
# MANUAL --rollback mode
# ============================================================================
if [[ "${1:-}" == "--build-staging" ]]; then
# Strategy-A rebuild mode (ORCH-58): rebuild the staging image stamping
# the validated commit SHA into the OCI revision label so the provenance
# guard above can fail-closed on it during the subsequent prod retag.
GIT_SHA=$(git rev-parse HEAD)
log "BUILD-STAGING: rebuilding $TARGET_IMAGE stamping GIT_SHA=$GIT_SHA"
docker build --build-arg GIT_SHA="$GIT_SHA" -t "$TARGET_IMAGE" "$REPO" >> "$LOG" 2>&1
log "BUILD-STAGING: built $TARGET_IMAGE (revision=$GIT_SHA)"
exit 0
fi
if [[ "${1:-}" == "--rollback" ]]; then
log "Manual ROLLBACK requested"
if do_rollback; then
@@ -145,6 +147,41 @@ if [[ "${1:-}" == "--rollback" ]]; then
fi
fi
# ============================================================================
# --build-staging mode (ORCH-58, Strategy A): rebuild the STAGING image from the
# VALIDATED commit and recreate 8501, so the artefact we validate is the EXACT one
# later BUILD-ONCE retagged to prod (INV-FRESH). Builds/recreates STAGING ONLY
# (8501) — never prod (8500). Same exit-code contract (0 = healthy, !=0 = failed).
#
# Uses the caller-supplied GIT_SHA + BUILD_CONTEXT (the validated worktree) — it
# must NOT recompute HEAD from $REPO (the prod clone on `main`): on the
# deploy-staging -> deploy edge the PR is not yet merged, so `main` HEAD != the
# validated SHA, which would stamp the wrong revision label and deadlock the
# Strategy-B guard on every valid self-deploy.
# ============================================================================
if [[ "${1:-}" == "--build-staging" ]]; then
BUILD_CONTEXT="${BUILD_CONTEXT:-$REPO}"
GIT_SHA="${GIT_SHA:-}"
log "BUILD-STAGING: rebuilding $TARGET_IMAGE from $BUILD_CONTEXT (GIT_SHA=$GIT_SHA, service=$TARGET_SERVICE, port=$TARGET_PORT)"
if ! docker build --build-arg GIT_SHA="$GIT_SHA" -t "$TARGET_IMAGE" "$BUILD_CONTEXT" >> "$LOG" 2>&1; then
log "BUILD-STAGING: docker build failed - aborting (exit 1)"
exit 1
fi
log "BUILD-STAGING: recreating $TARGET_SERVICE (profile=$COMPOSE_PROFILE) on the fresh image"
if [[ -n "$COMPOSE_PROFILE" ]]; then
docker compose --profile "$COMPOSE_PROFILE" up -d --no-build "$TARGET_SERVICE" >> "$LOG" 2>&1
else
docker compose up -d --no-build "$TARGET_SERVICE" >> "$LOG" 2>&1
fi
log "BUILD-STAGING: running health-check on port $TARGET_PORT (10x6s)"
if health_check 10 6 "build-staging-health"; then
log "BUILD-STAGING: $TARGET_SERVICE healthy on the fresh image (exit 0)"
exit 0
fi
log "BUILD-STAGING: health FAILED after rebuild (exit 1)"
exit 1
fi
# ============================================================================
# NORMAL DEPLOY mode (--deploy or no argument)
# ============================================================================