From a6cbacb62c4b6e59444e33137dfd4e1617bc49f6 Mon Sep 17 00:00:00 2001 From: Dev Agent Date: Fri, 5 Jun 2026 09:26:12 +0300 Subject: [PATCH] feat(staging): add orchestrator deploy hook with health-check and auto-rollback (ORCH-34) --- docs/DEPLOY_HOOK.md | 90 ++++++++++++++ scripts/orchestrator-deploy-hook.sh | 176 ++++++++++++++++++++++++++++ 2 files changed, 266 insertions(+) create mode 100644 docs/DEPLOY_HOOK.md create mode 100755 scripts/orchestrator-deploy-hook.sh diff --git a/docs/DEPLOY_HOOK.md b/docs/DEPLOY_HOOK.md new file mode 100644 index 0000000..ba3cafb --- /dev/null +++ b/docs/DEPLOY_HOOK.md @@ -0,0 +1,90 @@ +# Orchestrator Deploy Hook + +`scripts/orchestrator-deploy-hook.sh` — хост-скрипт деплоя orchestrator с health-чеком и авто-rollback. + +## Как работает + +### Режим `--deploy` (по умолчанию) + +1. **Захват текущего образа** — до рестарта записывает ID образа работающего контейнера в `$PREV_IMAGE_FILE` (best-effort, не падает если сервис не запущен). +2. **git pull** — обновляет код репозитория. +3. **Рестарт контейнера** — `docker compose --profile $COMPOSE_PROFILE up -d --no-build $TARGET_SERVICE`. +4. **Health-цикл** — 10 попыток × 6с = до 60с. Критерий: HTTP 200 + тело содержит `"status":"ok"`. + - **Успех** → `exit 0`, лог "Deploy SUCCESS". + - **Провал** → авто-rollback (шаг 5). +5. **Авто-rollback** — восстанавливает образ из `$PREV_IMAGE_FILE`, рестарт, повторный health 5×3с. + - Если восстановился → `exit 1` (деплой провалился, откат успешен). + - Если и откат не помог → `exit 2` (критично). + +### Режим `--rollback` + +Вручную откатывает сервис на предыдущий образ из `$PREV_IMAGE_FILE`. + +## Переменные окружения + +| Переменная | Дефолт | Описание | +|------------------|-----------------------------------|-----------------------------------------------| +| `TARGET_SERVICE` | `orchestrator-staging` | Имя docker-compose сервиса | +| `TARGET_PORT` | `8501` | Порт health-check | +| `TARGET_IMAGE` | `orchestrator-orchestrator-staging` | Имя образа для retag при rollback | +| `COMPOSE_PROFILE`| `staging` | Docker compose profile (пусто = без профиля) | +| `PREV_IMAGE_FILE`| `$REPO/.deploy-prev-image-staging`| Файл для сохранения предыдущего образа | +| `LOG` | `/var/log/orchestrator/deploy-hook.log` | Лог-файл (fallback: `$REPO/deploy-hook.log`) | + +> ⚠️ **Дефолт — всегда STAGING**. Прод активируется только явным переопределением env. + +## Примеры запуска + +### Staging (дефолт, безопасно) + +```bash +cd /home/slin/repos/orchestrator +bash scripts/orchestrator-deploy-hook.sh --deploy +# или просто: +bash scripts/orchestrator-deploy-hook.sh +``` + +### Прод (осознанный шаг, Этап 5) + +```bash +TARGET_SERVICE=orchestrator \ +TARGET_PORT=8500 \ +TARGET_IMAGE=orchestrator-orchestrator \ +COMPOSE_PROFILE="" \ +PREV_IMAGE_FILE=/home/slin/repos/orchestrator/.deploy-prev-image-prod \ +bash scripts/orchestrator-deploy-hook.sh --deploy +``` + +### Ручной rollback staging + +```bash +bash scripts/orchestrator-deploy-hook.sh --rollback +``` + +## Коды выхода + +| Код | Значение | +|-----|------------------------------------------------------| +| `0` | Деплой успешен, сервис здоров | +| `1` | Деплой провалился; откат выполнен (или пропущен) | +| `2` | Деплой провалился И откат тоже провалился (критично) | + +## Логи + +``` +/var/log/orchestrator/deploy-hook.log +``` + +Каждая строка с UTC-таймстампом в формате `[2026-06-05T06:30:00Z]`. + +## Разница с enduro-deploy-hook.sh + +| Функция | enduro-deploy-hook.sh | orchestrator-deploy-hook.sh | +|----------------------|-----------------------|-----------------------------| +| Захват PREV_IMG | ✅ | ✅ | +| git pull | ✅ | ✅ | +| Рестарт | ✅ | ✅ | +| Health-цикл (60с) | ❌ | ✅ 10×6с | +| Авто-rollback | ❌ | ✅ | +| Параметризация (env) | ❌ хардкод | ✅ дефолт=staging | +| Compose profile | ❌ | ✅ --profile staging | diff --git a/scripts/orchestrator-deploy-hook.sh b/scripts/orchestrator-deploy-hook.sh new file mode 100755 index 0000000..3c23f42 --- /dev/null +++ b/scripts/orchestrator-deploy-hook.sh @@ -0,0 +1,176 @@ +#!/bin/bash +# Deploy hook for orchestrator +# Supports --deploy (default) and --rollback modes. +# Adds health-check loop + automatic rollback if new deploy is unhealthy. +# +# Parametrised via env vars (defaults are STAGING — never prod): +# TARGET_SERVICE - docker-compose service name (default: orchestrator-staging) +# TARGET_PORT - health check port (default: 8501) +# TARGET_IMAGE - image name for retag (default: orchestrator-orchestrator-staging) +# COMPOSE_PROFILE - docker compose profile (default: staging) +# PREV_IMAGE_FILE - path to prev-image snapshot (default: $REPO/.deploy-prev-image-staging) +# LOG - log file path (default: /var/log/orchestrator/deploy-hook.log) +# +# Usage: +# ./orchestrator-deploy-hook.sh [--deploy] # normal deploy (default) +# ./orchestrator-deploy-hook.sh --rollback # manual rollback + +set -euo pipefail + +REPO=/home/slin/repos/orchestrator + +# ---- Defaults (STAGING — safe) --------------------------------------------- +TARGET_SERVICE="${TARGET_SERVICE:-orchestrator-staging}" +TARGET_PORT="${TARGET_PORT:-8501}" +TARGET_IMAGE="${TARGET_IMAGE:-orchestrator-orchestrator-staging}" +COMPOSE_PROFILE="${COMPOSE_PROFILE:-staging}" +PREV_IMAGE_FILE="${PREV_IMAGE_FILE:-$REPO/.deploy-prev-image-staging}" + +# ---- Log setup ------------------------------------------------------------- +LOG_DIR=/var/log/orchestrator +if mkdir -p "$LOG_DIR" 2>/dev/null; then + LOG="${LOG:-$LOG_DIR/deploy-hook.log}" +else + LOG="${LOG:-$REPO/deploy-hook.log}" +fi + +log() { + echo "[$(date -u +%Y-%m-%dT%H:%M:%SZ)] $*" | tee -a "$LOG" +} + +log "Deploy hook called: target=$TARGET_SERVICE port=$TARGET_PORT args=$*" + +cd "$REPO" + +# ============================================================================ +# HEALTH CHECK helper +# Args: max_attempts sleep_sec label +# Returns 0 if healthy within attempts, 1 otherwise +# ============================================================================ +health_check() { + local max_attempts="$1" + local sleep_sec="$2" + local label="${3:-health-check}" + local attempt=0 + while [[ $attempt -lt $max_attempts ]]; do + attempt=$(( attempt + 1 )) + log "$label: attempt $attempt/$max_attempts - GET http://localhost:$TARGET_PORT/health" + local http_code body + body=$(curl -s --max-time 5 "http://localhost:$TARGET_PORT/health" 2>/dev/null || true) + http_code=$(curl -s -o /dev/null -w '%{http_code}' --max-time 5 "http://localhost:$TARGET_PORT/health" 2>/dev/null || echo "000") + if [[ "$http_code" == "200" ]] && echo "$body" | grep -q '"status":"ok"'; then + log "$label: OK (HTTP $http_code, body=$body)" + return 0 + fi + log "$label: not ready yet (HTTP $http_code, body=$body)" + if [[ $attempt -lt $max_attempts ]]; then + sleep "$sleep_sec" + fi + done + log "$label: FAILED after $max_attempts attempts" + return 1 +} + +# ============================================================================ +# ROLLBACK helper (also called for auto-rollback after bad deploy) +# ============================================================================ +do_rollback() { + log "ROLLBACK: checking $PREV_IMAGE_FILE" + if [[ ! -s "$PREV_IMAGE_FILE" ]]; then + log "ROLLBACK: no previous image recorded - rollback skipped (exit 1)" + return 1 + fi + local prev_img + prev_img=$(cat "$PREV_IMAGE_FILE") + if [[ -z "$prev_img" ]]; then + log "ROLLBACK: PREV_IMAGE_FILE is empty - rollback skipped (exit 1)" + return 1 + fi + if ! docker image inspect "$prev_img" >/dev/null 2>&1; then + log "ROLLBACK: recorded image '$prev_img' not found locally - rollback skipped (exit 1)" + return 1 + fi + log "ROLLBACK: retagging $prev_img -> $TARGET_IMAGE" + docker tag "$prev_img" "$TARGET_IMAGE" >> "$LOG" 2>&1 + log "ROLLBACK: restarting $TARGET_SERVICE on previous image" + if [[ -n "$COMPOSE_PROFILE" ]]; then + docker compose --profile "$COMPOSE_PROFILE" up -d --no-build "$TARGET_SERVICE" >> "$LOG" 2>&1 + else + docker compose up -d --no-build "$TARGET_SERVICE" >> "$LOG" 2>&1 + fi + log "ROLLBACK: container restarted, running post-rollback health check (5x3s)" + if health_check 5 3 "ROLLBACK-health"; then + log "ROLLBACK: service is healthy on previous image ($prev_img)" + return 0 + else + log "ROLLBACK: ROLLBACK ALSO FAILED - service still unhealthy after restoring $prev_img" + return 2 + fi +} + +# ============================================================================ +# MANUAL --rollback mode +# ============================================================================ +if [[ "${1:-}" == "--rollback" ]]; then + log "Manual ROLLBACK requested" + if do_rollback; then + log "Manual ROLLBACK succeeded" + exit 0 + else + log "Manual ROLLBACK failed" + exit 1 + fi +fi + +# ============================================================================ +# NORMAL DEPLOY mode (--deploy or no argument) +# ============================================================================ + +# 1. Capture currently running image BEFORE restart (best-effort) +PREV_IMG="" +SVC_CID=$(docker compose --profile "$COMPOSE_PROFILE" ps -q "$TARGET_SERVICE" 2>/dev/null || true) +if [[ -n "$SVC_CID" ]]; then + PREV_IMG=$(docker inspect --format '{{.Image}}' "$SVC_CID" 2>/dev/null || true) +fi +if [[ -n "$PREV_IMG" ]]; then + echo "$PREV_IMG" > "$PREV_IMAGE_FILE" + log "Saved previous image: $PREV_IMG -> $PREV_IMAGE_FILE" +else + log "No previous image captured (first deploy or service not running?)" +fi + +# 2. Pull latest code +log "git pull origin main" +git pull origin main >> "$LOG" 2>&1 + +# 3. Restart service +log "Starting $TARGET_SERVICE (profile=$COMPOSE_PROFILE)" +if [[ -n "$COMPOSE_PROFILE" ]]; then + docker compose --profile "$COMPOSE_PROFILE" up -d --no-build "$TARGET_SERVICE" >> "$LOG" 2>&1 +else + docker compose up -d --no-build "$TARGET_SERVICE" >> "$LOG" 2>&1 +fi +log "$TARGET_SERVICE restarted" + +# 4. Health-check loop: 10 attempts x 6 seconds = up to 60s +log "Starting health-check: 10 attempts x 6s (max 60s)" +if health_check 10 6 "deploy-health"; then + log "Deploy SUCCESS: $TARGET_SERVICE healthy on port $TARGET_PORT" + exit 0 +fi + +# 5. Health failed -> AUTO ROLLBACK +log "deploy FAILED: health not ok after 60s - initiating AUTO ROLLBACK" +rollback_rc=0 +do_rollback || rollback_rc=$? + +if [[ $rollback_rc -eq 0 ]]; then + log "deploy FAILED, rolled back to previous image successfully - exit 1" + exit 1 +elif [[ $rollback_rc -eq 2 ]]; then + log "deploy FAILED, ROLLBACK ALSO FAILED - service may be down - exit 2" + exit 2 +else + log "deploy FAILED, rollback skipped (no previous image) - exit 1" + exit 1 +fi