fix(staging): tolerate sandbox-infra-only FAILs (C9a/C9b) in deploy-staging verdict

The self-hosting orchestrator looped on deploy-staging -> development because scripts/staging_check.py exited 1 on ANY failed check, so two infra-only checks (C9a sandbox branch / C9b analyst-job — caused by SANDBOX bot accounts not being members of the sandbox Plane project, NOT a pipeline regress) forced staging_status: FAILED -> rollback -> loop, burning developer retries and tokens. Direction (б) per ADR-001: classify staging checks as REAL (all pipeline checks, fail-closed) vs SANDBOX_INFRA (narrow allowlist {C9a, C9b}, waivable). New leaf module src/staging_verdict.py (stdlib-only, never-raise): classify_check + compute_staging_verdict fold per-check results into a tolerant-but-fail-closed verdict — any REAL failure -> FAILED/exit1 (safety net holds under any flag); only C9a/C9b failed & tolerant -> SUCCESS/exit0 with waived list; only infra & strict -> FAILED/exit1; any internal error -> FAILED/exit1 (never a false green). staging_check.py now auto-classifies each check (public 3-tuple _items shape kept as an ORCH-048 b6 regression guard), exposes categorized_items(), prints INFRA-WAIVED/VERDICT lines, and exits via the verdict; new --strict flag forces legacy strictness per-run. Kill-switch ORCH_STAGING_INFRA_TOLERANCE_ENABLED (default true) restores legacy strict mode globally. launcher gains action_stage_no_changes_note so "no changes to commit" on action stages is logged as expected, not treated as under-delivery. Contracts unchanged: STAGE_TRANSITIONS, QG_CHECKS registry, staging_status:/ deploy_status: frontmatter, hook exit-code (0/1/2), check_staging_status; no DB migration. Docs: README, STAGING_CHECK.md, deployer.md, .env.example, CHANGELOG. Refs: ORCH-061 Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-06-07 12:39:00 +00:00
parent 1d1208c136
commit 9070489968
15 changed files with 831 additions and 7 deletions
--- a/scripts/staging_check.py
+++ b/scripts/staging_check.py
@@ -51,6 +51,46 @@ import datetime
 import urllib.request
 import urllib.error
 import urllib.parse
+from collections import namedtuple
+
+# ---------------------------------------------------------------------------
+# ORCH-061: pure staging-verdict logic (classification + infra-tolerant verdict).
+# Imported from src.staging_verdict — a stdlib-only leaf, safe to import inside
+# the orchestrator-staging container (PYTHONPATH=/app, pattern B6 / ORCH-048).
+# Guarded so the suite still runs (in strict mode) if src is somehow unimportable
+# from a host invocation; the fallback NEVER yields a silent green (fail-closed).
+# ---------------------------------------------------------------------------
+try:
+    from src.staging_verdict import (  # type: ignore
+        classify_check as _classify_check,
+        compute_staging_verdict as _compute_staging_verdict,
+        REAL as _REAL,
+        SANDBOX_INFRA as _SANDBOX_INFRA,
+    )
+except Exception:  # pragma: no cover - exercised only on a broken host import
+    _classify_check = None
+    _compute_staging_verdict = None
+    _REAL = "real"
+    _SANDBOX_INFRA = "sandbox_infra"
+
+_FallbackVerdict = namedtuple("StagingVerdict", "status exit_code waived summary")
+
+
+def _classify(label: str) -> str:
+    """Classify a check label via staging_verdict; fail-closed to REAL if absent."""
+    if _classify_check is not None:
+        return _classify_check(label)
+    return _REAL
+
+
+def _verdict(items, infra_tolerant: bool):
+    """Compute the suite verdict via staging_verdict; strict fail-closed fallback."""
+    if _compute_staging_verdict is not None:
+        return _compute_staging_verdict(items, infra_tolerant)
+    failed = [lbl for (lbl, ok, _cat) in items if not ok]
+    if failed:
+        return _FallbackVerdict("FAILED", 1, [], f"FAILED (strict fallback): {failed}")
+    return _FallbackVerdict("SUCCESS", 0, [], "SUCCESS (strict fallback): all green")

 # ---------------------------------------------------------------------------
 # Colour helpers
@@ -152,23 +192,47 @@ def _sign_payload(secret: str, body: bytes) -> str:

 class Results:
    def __init__(self):
+        # _items keeps the (label, passed, detail) 3-tuple shape that existing
+        # ORCH-048 B6 tests unpack — categories live in a PARALLEL list so the
+        # public tuple contract is unchanged.
        self._items: list[tuple[str, bool, str]] = []  # (label, passed, detail)
+        self._categories: list[str] = []               # ORCH-061: REAL | SANDBOX_INFRA

-    def add(self, label: str, passed: bool, detail: str = ""):
+    def add(self, label: str, passed: bool, detail: str = "", category: str | None = None):
+        # ORCH-061: every check carries a category. None -> auto-classify by label
+        # (C9a/C9b -> SANDBOX_INFRA, everything else -> REAL). Fail-closed: an
+        # unknown label is REAL, so it still counts toward the safety net.
+        if category is None:
+            category = _classify(label)
        self._items.append((label, passed, detail))
+        self._categories.append(category)
        line = _ok(label) if passed else _fail(label)
        if detail:
            line += f"  [{detail}]"
        print(line)

+    def categorized_items(self) -> list[tuple[str, bool, str]]:
+        """Rows as ``(label, passed, category)`` for ``compute_staging_verdict``."""
+        return [
+            (label, passed, cat)
+            for (label, passed, _detail), cat in zip(self._items, self._categories)
+        ]
+
    def summary(self) -> bool:
        passed = sum(1 for _, ok, _ in self._items if ok)
        total = len(self._items)
        all_ok = passed == total
        colour = _GREEN if all_ok else _RED
+        # ORCH-061: per-category breakdown so an operator can tell a REAL failure
+        # (regression — fail-closed) from a SANDBOX_INFRA one (waivable).
+        rows = self.categorized_items()
+        real_fail = [lbl for lbl, ok, cat in rows if not ok and cat == _REAL]
+        infra_fail = [lbl for lbl, ok, cat in rows if not ok and cat == _SANDBOX_INFRA]
        print()
        print(f"{_BOLD}{'='*60}{_RESET}")
        print(f"{colour}{_BOLD}  RESULT: {passed}/{total} checks PASS{_RESET}")
+        print(f"  REAL failed         : {real_fail or 'none'}")
+        print(f"  SANDBOX_INFRA failed: {infra_fail or 'none'}")
        print(f"{_BOLD}{'='*60}{_RESET}")
        return all_ok

@@ -637,6 +701,28 @@ def _cleanup(plane_base, workspace, gitea_base, plane_headers, gitea_headers,
 # Main
 # ---------------------------------------------------------------------------

+def _resolve_tolerance(cli_strict: bool) -> bool:
+    """Resolve whether the infra-FAIL waiver is active (ORCH-061).
+
+    Precedence: an explicit ``--strict`` CLI flag forces it OFF (for honest manual
+    runs). Otherwise read ``settings.staging_infra_tolerance_enabled`` from the
+    running instance's own config (same pattern as B6's src.* import inside the
+    container). On ANY import/read error -> STRICT (False): we never waive when the
+    config is unreadable (fail-safe), and we say so.
+    """
+    if cli_strict:
+        print(_info("tolerance: DISABLED via --strict (honest run)"))
+        return False
+    try:
+        from src.config import settings  # noqa: WPS433 - lazy, mirrors B6
+        enabled = bool(settings.staging_infra_tolerance_enabled)
+        print(_info(f"tolerance: staging_infra_tolerance_enabled={enabled}"))
+        return enabled
+    except Exception as e:
+        print(_info(f"tolerance: config unavailable, defaulting to STRICT: {e}"))
+        return False
+
+
 def main():
    parser = argparse.ArgumentParser(
        description="Live staging-stand check suite (ORCH-33)"
@@ -656,6 +742,15 @@ def main():
            "full-real: also wait for the analyst agent (slow, costs credits)."
        ),
    )
+    parser.add_argument(
+        "--strict",
+        action="store_true",
+        help=(
+            "ORCH-061: force strict suite — disable the sandbox-infra (C9a/C9b) "
+            "FAIL waiver even if staging_infra_tolerance_enabled=True. Use for an "
+            "honest 10/10 run once the sandbox bot accounts are provisioned."
+        ),
+    )
    args = parser.parse_args()

    base = args.base_url.rstrip("/")
@@ -673,8 +768,23 @@ def main():
    block_b(results)
    block_c(base, results, args.mode)

-    all_ok = results.summary()
-    sys.exit(0 if all_ok else 1)
+    results.summary()
+
+    # ORCH-061: the EXIT CODE (which drives the deployer's staging_status verdict)
+    # comes from the infra-tolerant verdict, NOT a raw passed==total count. A run
+    # whose only failures are known sandbox-infra checks (C9a/C9b) is waived to
+    # exit 0 when tolerance is on; ANY real check failure still exits 1 (FR-4).
+    infra_tolerant = _resolve_tolerance(args.strict)
+    verdict = _verdict(results.categorized_items(), infra_tolerant)
+    if verdict.waived:
+        # FR-7 observability: make "green with an allowance" distinguishable from
+        # an honest green in the logs / captured deployer output.
+        print(f"{_YELLOW}{_BOLD}INFRA-WAIVED:{_RESET} "
+              f"{', '.join(verdict.waived)} "
+              f"(known sandbox-infra; real checks green)")
+    print(f"{_BOLD}VERDICT:{_RESET} {verdict.status} "
+          f"(exit {verdict.exit_code}) — {verdict.summary}")
+    sys.exit(verdict.exit_code)


 if __name__ == "__main__":