Files
orchestrator/tests/test_qg_verdicts.py
claude-bot 92961d1d32 refactor(frontmatter): unified frontmatter contract + handoff spec (ORCH-52c)
src/frontmatter.py grows from a single-key reader into the full machine
contract: reader (read_frontmatter_value, unchanged), one parse primitive
(parse_frontmatter), writer (render/write_frontmatter), schema validator
(validate_schema/REQUIRED_FIELDS, warning-only by default) and a shared
strip_frontmatter helper. The five verdict gates (check_reviewer_verdict,
_parse_tests_verdict, _parse_deploy_status, _parse_staging_status,
parse_security_status) now read through the single parse_frontmatter point
instead of duplicated ad-hoc YAML logic; review_parse._strip_frontmatter and
security_gate.extract_security_findings reuse the shared helper.

Strictly backward compatible + never-raise: STAGE_TRANSITIONS, the QG_CHECKS
composition, verdict semantics (incl. ORCH-047 three-field tester + negative
token priority), reason-strings and worktree->origin/main fallback are 1:1.
The schema validator never influences a gate verdict by default; hard-fail is
reserved behind the frontmatter_validation_strict kill-switch (default False).

New formal handoff spec docs/_standards/HANDOFF_PROTOCOL.md ("stage -> required
output" + required frontmatter schema), aligned 1:1 with PIPELINE_DOCS.md.

Tests: test_frontmatter.py (TC-01..07), test_qg_verdicts.py (TC-08..15),
test_security_gate.py (TC-12), test_stages_invariants.py (TC-16). Full
tests/ green (1212).

Refs: ORCH-076

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-09 14:14:30 +03:00

203 lines
9.3 KiB
Python

"""ORCH-076 (ORCH-52c): anti-regression for the five verdict gates after the parse
is delegated to the unified frontmatter API.
Covers TC-08..TC-15 of docs/work-items/ORCH-076/04-test-plan.yaml. The MECHANISM of
YAML-frontmatter parsing is now centralised in ``src/frontmatter.parse_frontmatter``,
but the verdict SEMANTICS (value -> transition) must be 1:1 with before the task:
* check_reviewer_verdict (12-review.md, verdict:) — TC-08
* _parse_tests_verdict (13-test-report.md, result/verdict/status, ORCH-047) — TC-09
* _parse_deploy_status (14-deploy-log.md, deploy_status:) — TC-10
* _parse_staging_status (15-staging-log.md, staging_status:) — TC-11
* (parse_security_status is exercised in tests/test_security_gate.py — TC-12)
* backward-compat of old docs (no new schema) + additive schema — TC-13 / TC-14
* worktree -> origin/main fallback preserved — TC-15
"""
import os
import tempfile
os.environ.setdefault("ORCH_PLANE_API_TOKEN", "test-token")
os.environ.setdefault("ORCH_GITEA_TOKEN", "test-token")
from src.qg import checks as qg # noqa: E402
from src.qg.checks import ( # noqa: E402
_parse_deploy_status,
_parse_staging_status,
_parse_tests_verdict,
check_deploy_status,
check_reviewer_verdict,
check_staging_status,
)
def _write(dirpath, work_item_id, name, content):
d = os.path.join(dirpath, "docs", "work-items", work_item_id)
os.makedirs(d, exist_ok=True)
with open(os.path.join(d, name), "w", encoding="utf-8") as f:
f.write(content)
# --------------------------------------------------------------------------- #
# TC-08 — check_reviewer_verdict via the unified API.
# --------------------------------------------------------------------------- #
def test_tc08_reviewer_verdict_semantics(monkeypatch):
with tempfile.TemporaryDirectory() as d:
monkeypatch.setattr(qg, "_repo_path", lambda repo, branch=None: d)
_write(d, "ORCH-1", "12-review.md", "---\nverdict: APPROVED\n---\nbody")
ok, reason = check_reviewer_verdict("orchestrator", "ORCH-1")
assert ok is True and "APPROVED" in reason
_write(d, "ORCH-1", "12-review.md", "---\nverdict: REQUEST_CHANGES\n---\nbody")
ok, reason = check_reviewer_verdict("orchestrator", "ORCH-1")
assert ok is False and "REQUEST_CHANGES" in reason
# Missing verdict key -> (False).
_write(d, "ORCH-1", "12-review.md", "---\nother: x\n---\nbody")
ok, _ = check_reviewer_verdict("orchestrator", "ORCH-1")
assert ok is False
# No frontmatter at all -> (False).
_write(d, "ORCH-1", "12-review.md", "# review\nAPPROVED in prose\n")
ok, _ = check_reviewer_verdict("orchestrator", "ORCH-1")
assert ok is False
# --------------------------------------------------------------------------- #
# TC-09 — _parse_tests_verdict: ORCH-047 three equal-rank fields + negative-token
# priority preserved.
# --------------------------------------------------------------------------- #
def test_tc09_tests_three_fields_each_pass():
for field in ("result", "verdict", "status"):
ok, reason = _parse_tests_verdict(f"---\n{field}: PASS\n---\nbody")
assert ok is True, field
assert "PASS" in reason
def test_tc09_negative_token_is_authoritative():
# BLOCKED in one field beats a positive token in another (ET-013 case).
ok, reason = _parse_tests_verdict("---\nverdict: BLOCKED\nstatus: PASS\n---\n")
assert ok is False
assert "BLOCKED" in reason
# FAIL likewise.
ok, _ = _parse_tests_verdict("---\nresult: FAIL\n---\n")
assert ok is False
def test_tc09_tests_no_frontmatter_and_malformed():
ok, reason = _parse_tests_verdict("no frontmatter, 23 passed\n")
assert ok is False and "No YAML frontmatter" in reason
ok, reason = _parse_tests_verdict("---\nresult: PASS\nunterminated")
assert ok is False and "Malformed" in reason
ok, reason = _parse_tests_verdict("---\nresult: PASS\nempty:\n---\n")
assert ok is True # result PASS still wins; empty other field is fine
# --------------------------------------------------------------------------- #
# TC-10 — _parse_deploy_status semantics (БАГ-8) unchanged.
# --------------------------------------------------------------------------- #
def test_tc10_deploy_status_semantics():
assert _parse_deploy_status("---\ndeploy_status: SUCCESS\n---\n")[0] is True
assert _parse_deploy_status("---\ndeploy_status: FAILED\n---\n")[0] is False
assert _parse_deploy_status("---\nother: SUCCESS\n---\n")[0] is False
assert _parse_deploy_status("prose only SUCCESS")[0] is False
# Bad YAML -> (False) with the preserved reason prefix.
ok, reason = _parse_deploy_status("---\nx: : :\n---\n")
assert ok is False and "Invalid YAML frontmatter in deploy log" in reason
# --------------------------------------------------------------------------- #
# TC-11 — _parse_staging_status + ORCH-35 conditionality (non-self -> N/A pass).
# --------------------------------------------------------------------------- #
def test_tc11_staging_status_semantics():
assert _parse_staging_status("---\nstaging_status: SUCCESS\n---\n")[0] is True
assert _parse_staging_status("---\nstaging_status: FAILED\n---\n")[0] is False
assert _parse_staging_status("---\nother: SUCCESS\n---\n")[0] is False
def test_tc11_staging_gate_na_for_non_self():
ok, reason = check_staging_status("enduro-trails", "ET-1", "feature/x")
assert ok is True
assert "N/A" in reason
# --------------------------------------------------------------------------- #
# TC-13 — old verdict docs WITHOUT the new schema read exactly as before, for
# every parser.
# --------------------------------------------------------------------------- #
def test_tc13_old_docs_without_schema_still_read():
# None of these carry work_item/stage/author_agent/status/created_at/model_used.
assert _parse_tests_verdict("---\nresult: PASS\n---\n")[0] is True
assert _parse_tests_verdict("---\nverdict: FAIL\n---\n")[0] is False
assert _parse_deploy_status("---\ndeploy_status: SUCCESS\n---\n")[0] is True
assert _parse_staging_status("---\nstaging_status: SUCCESS\n---\n")[0] is True
def test_tc13_reviewer_old_doc(monkeypatch):
with tempfile.TemporaryDirectory() as d:
monkeypatch.setattr(qg, "_repo_path", lambda repo, branch=None: d)
_write(d, "ORCH-1", "12-review.md", "---\nverdict: APPROVED\n---\nlegacy body")
assert check_reviewer_verdict("orchestrator", "ORCH-1")[0] is True
# --------------------------------------------------------------------------- #
# TC-14 — a doc WITH the full additive schema + verdict key yields the SAME
# verdict as without the schema (schema is additive, never changes it).
# --------------------------------------------------------------------------- #
_SCHEMA = (
"work_item: ORCH-076\nstage: testing\nauthor_agent: tester\n"
"status: PASS\ncreated_at: 2026-06-09\nmodel_used: claude-opus-4-8\n"
)
def test_tc14_full_schema_does_not_change_verdict():
bare = _parse_tests_verdict("---\nresult: PASS\n---\n")
full = _parse_tests_verdict(f"---\n{_SCHEMA}result: PASS\n---\n")
assert bare[0] == full[0] is True
bare_d = _parse_deploy_status("---\ndeploy_status: FAILED\n---\n")
full_d = _parse_deploy_status(
"---\nwork_item: ORCH-076\nstage: deploy\nauthor_agent: deployer\n"
"status: done\ncreated_at: 2026-06-09\nmodel_used: claude-opus-4-8\n"
"deploy_status: FAILED\n---\n"
)
assert bare_d[0] == full_d[0] is False
def test_tc14_reviewer_with_schema(monkeypatch):
with tempfile.TemporaryDirectory() as d:
monkeypatch.setattr(qg, "_repo_path", lambda repo, branch=None: d)
_write(
d, "ORCH-1", "12-review.md",
"---\nwork_item: ORCH-1\nstage: review\nauthor_agent: reviewer\n"
"status: APPROVED\ncreated_at: 2026-06-09\nmodel_used: claude-opus-4-8\n"
"verdict: APPROVED\n---\nbody",
)
assert check_reviewer_verdict("orchestrator", "ORCH-1")[0] is True
# --------------------------------------------------------------------------- #
# TC-15 — fallback worktree -> origin/main preserved (the gate still reads the
# log recovered from main through the unified parser).
# --------------------------------------------------------------------------- #
def test_tc15_deploy_status_origin_main_fallback(monkeypatch):
with tempfile.TemporaryDirectory() as d:
# No 14-deploy-log.md in the worktree -> the gate must consult origin/main.
monkeypatch.setattr(qg, "_repo_path", lambda repo, branch=None: d)
monkeypatch.setattr(
qg, "_deploy_log_from_main",
lambda repo, wi: "---\ndeploy_status: SUCCESS\n---\nfrom main",
)
ok, reason = check_deploy_status("orchestrator", "ORCH-1", "feature/x")
assert ok is True and "SUCCESS" in reason
def test_tc15_staging_status_origin_main_fallback(monkeypatch):
with tempfile.TemporaryDirectory() as d:
monkeypatch.setattr(qg, "_repo_path", lambda repo, branch=None: d)
monkeypatch.setattr(
qg, "_staging_log_from_main",
lambda repo, wi: "---\nstaging_status: FAILED\n---\nfrom main",
)
ok, reason = check_staging_status("orchestrator", "ORCH-1", "feature/x")
assert ok is False and "FAILED" in reason