orchestrator/tests/test_qg_checks.py

"""ORCH-036 TC-15: the deploy-verdict parse contract is unchanged (AC-10).

``_parse_deploy_status`` reads ONLY the machine-readable ``deploy_status:`` YAML
frontmatter (never prose). ORCH-036 produces the verdict differently (a
deterministic finalizer instead of an LLM), but the parse contract that the gate
relies on must remain bit-identical:
    SUCCESS -> (True, ...), FAILED -> (False, ...), no/!frontmatter -> (False, ...).
"""

import os

os.environ.setdefault("ORCH_PLANE_API_TOKEN", "test-token")
os.environ.setdefault("ORCH_GITEA_TOKEN", "test-token")

from src.qg.checks import _parse_deploy_status  # noqa: E402
from src.self_deploy import build_deploy_log  # noqa: E402


def test_tc15_success_frontmatter_passes():
    ok, reason = _parse_deploy_status("---\ndeploy_status: SUCCESS\n---\n\nbody")
    assert ok is True
    assert "SUCCESS" in reason


def test_tc15_failed_frontmatter_fails():
    ok, reason = _parse_deploy_status("---\ndeploy_status: FAILED\n---\n\nbody")
    assert ok is False
    assert "FAILED" in reason


def test_tc15_no_frontmatter_fails():
    ok, _ = _parse_deploy_status("just prose, deploy_status: SUCCESS in text but no frontmatter")
    assert ok is False


def test_tc15_missing_field_fails():
    ok, _ = _parse_deploy_status("---\nother_field: SUCCESS\n---\n")
    assert ok is False


def test_tc15_prose_success_word_does_not_pass():
    """Defensive: the word SUCCESS in prose must NOT satisfy the gate."""
    ok, _ = _parse_deploy_status("# Deploy\n\nDeploy was a SUCCESS, hooray!\n")
    assert ok is False


def test_tc15_finalizer_log_roundtrips_through_parser():
    """The finalizer's rendered log must be readable by the EXISTING parser —
    SUCCESS passes, FAILED fails — proving the producer/consumer contract holds."""
    ok_s, _ = _parse_deploy_status(build_deploy_log("ORCH-036", 0, "SUCCESS"))
    ok_f, _ = _parse_deploy_status(build_deploy_log("ORCH-036", 2, "FAILED"))
    assert ok_s is True
    assert ok_f is False


# ---------------------------------------------------------------------------
# ORCH-071 TC-15: the deploy-status parsing contract is UNCHANGED by the new
# merge-verify under-gate. The ``merged_to_main:`` observability field the
# under-gate stamps into 14-deploy-log.md must NOT influence ``deploy_status:``
# parsing — the gate keeps reading ONLY the ``deploy_status:`` frontmatter.
# ---------------------------------------------------------------------------
def test_tc15_merged_to_main_field_does_not_affect_deploy_status():
    ok_s, _ = _parse_deploy_status(
        "---\ndeploy_status: SUCCESS\nmerged_to_main: false\n---\n\nbody"
    )
    # deploy_status is the ONLY field read: SUCCESS stays SUCCESS regardless of
    # the merged_to_main observability stamp (which the under-gate enforces
    # separately, outside this parser).
    assert ok_s is True
    ok_f, _ = _parse_deploy_status(
        "---\ndeploy_status: FAILED\nmerged_to_main: true\n---\n\nbody"
    )
    assert ok_f is False
    # merged_to_main alone (no deploy_status) is NOT a verdict.
    ok_n, _ = _parse_deploy_status("---\nmerged_to_main: true\n---\n")
    assert ok_n is False


# ---------------------------------------------------------------------------
# ORCH-061 / TC-04 + TC-05: infra-tolerant staging verdict (pure logic, AC-2/AC-3).
#
# compute_staging_verdict folds the staging-check suite into a single
# SUCCESS/FAILED verdict that is TOLERANT to known sandbox-infra failures
# (C9a/C9b) but stays fail-closed for any REAL pipeline check. These tests
# exercise the verdict directly — no live staging stand / docker (02-trz §9).
# ---------------------------------------------------------------------------
from src.staging_verdict import (  # noqa: E402
    REAL,
    SANDBOX_INFRA,
    compute_staging_verdict,
)


def _rows(*specs):
    """Helper: build (label, passed, category) rows."""
    return [(label, passed, cat) for label, passed, cat in specs]


def test_tc04_only_infra_failures_waived_to_success():
    """TC-04 / AC-2: every REAL check PASS, only known sandbox-infra checks
    (C9a/C9b) FAIL, tolerance ON -> SUCCESS / exit 0 (no false rollback)."""
    rows = _rows(
        ("C7 Create issue in Plane SANDBOX", True, REAL),
        ("C8 Trigger pipeline via /webhook/plane", True, REAL),
        ("C9a Branch appears in orchestrator-sandbox", False, SANDBOX_INFRA),
        ("C9b Analyst job enqueued in staging queue", False, SANDBOX_INFRA),
    )
    v = compute_staging_verdict(rows, infra_tolerant=True)
    assert v.status == "SUCCESS"
    assert v.exit_code == 0
    # Both infra checks are surfaced as waived (observability, FR-7).
    assert set(v.waived) == {
        "C9a Branch appears in orchestrator-sandbox",
        "C9b Analyst job enqueued in staging queue",
    }


def test_tc05_any_real_failure_fails_closed():
    """TC-05 / AC-3: at least one REAL pipeline check FAILS (alongside the infra
    ones) -> FAILED / exit 1 even with tolerance ON (safety net not weakened)."""
    rows = _rows(
        ("C7 Create issue in Plane SANDBOX", False, REAL),   # real regression
        ("C8 Trigger pipeline via /webhook/plane", True, REAL),
        ("C9a Branch appears in orchestrator-sandbox", False, SANDBOX_INFRA),
    )
    v = compute_staging_verdict(rows, infra_tolerant=True)
    assert v.status == "FAILED"
    assert v.exit_code == 1
    assert v.waived == []   # nothing waived when a real check failed


def test_tc05_real_failure_fails_closed_even_alone():
    """A single REAL failure (no infra failures) is still FAILED (fail-closed)."""
    rows = _rows(("C7 Create issue in Plane SANDBOX", False, REAL))
    v = compute_staging_verdict(rows, infra_tolerant=True)
    assert v.status == "FAILED"
    assert v.exit_code == 1


def test_tc09_infra_failure_strict_mode_fails_closed():
    """TC-09 / AC-7: with tolerance OFF, an infra-only FAIL again -> FAILED
    (1:1 pre-ORCH-061 strict behaviour)."""
    rows = _rows(
        ("C7 Create issue in Plane SANDBOX", True, REAL),
        ("C9a Branch appears in orchestrator-sandbox", False, SANDBOX_INFRA),
    )
    v = compute_staging_verdict(rows, infra_tolerant=False)
    assert v.status == "FAILED"
    assert v.exit_code == 1


def test_all_green_is_success_regardless_of_tolerance():
    rows = _rows(
        ("C7 Create issue in Plane SANDBOX", True, REAL),
        ("C9a Branch appears in orchestrator-sandbox", True, SANDBOX_INFRA),
    )
    for tol in (True, False):
        v = compute_staging_verdict(rows, infra_tolerant=tol)
        assert v.status == "SUCCESS"
        assert v.exit_code == 0
        assert v.waived == []


def test_tc12_compute_verdict_never_raises_on_garbage():
    """AC-10 never-raise: malformed rows degrade to a conservative FAILED, never
    an exception."""
    v = compute_staging_verdict([("only-one-element",)], infra_tolerant=True)
    assert v.status == "FAILED"
    assert v.exit_code == 1
    # A completely broken iterable also fails closed without raising.
    v2 = compute_staging_verdict(None, infra_tolerant=True)
    assert v2.status == "FAILED"
    assert v2.exit_code == 1