refactor(frontmatter): unified frontmatter contract + handoff spec (ORCH-52c)
All checks were successful
CI / test (push) Successful in 32s
CI / test (pull_request) Successful in 35s

src/frontmatter.py grows from a single-key reader into the full machine
contract: reader (read_frontmatter_value, unchanged), one parse primitive
(parse_frontmatter), writer (render/write_frontmatter), schema validator
(validate_schema/REQUIRED_FIELDS, warning-only by default) and a shared
strip_frontmatter helper. The five verdict gates (check_reviewer_verdict,
_parse_tests_verdict, _parse_deploy_status, _parse_staging_status,
parse_security_status) now read through the single parse_frontmatter point
instead of duplicated ad-hoc YAML logic; review_parse._strip_frontmatter and
security_gate.extract_security_findings reuse the shared helper.

Strictly backward compatible + never-raise: STAGE_TRANSITIONS, the QG_CHECKS
composition, verdict semantics (incl. ORCH-047 three-field tester + negative
token priority), reason-strings and worktree->origin/main fallback are 1:1.
The schema validator never influences a gate verdict by default; hard-fail is
reserved behind the frontmatter_validation_strict kill-switch (default False).

New formal handoff spec docs/_standards/HANDOFF_PROTOCOL.md ("stage -> required
output" + required frontmatter schema), aligned 1:1 with PIPELINE_DOCS.md.

Tests: test_frontmatter.py (TC-01..07), test_qg_verdicts.py (TC-08..15),
test_security_gate.py (TC-12), test_stages_invariants.py (TC-16). Full
tests/ green (1212).

Refs: ORCH-076

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
2026-06-09 14:03:49 +03:00
parent 0cf5bfc84b
commit 428461898d
14 changed files with 1043 additions and 109 deletions

244
tests/test_frontmatter.py Normal file
View File

@@ -0,0 +1,244 @@
"""ORCH-076 (ORCH-52c): unit tests for the unified frontmatter contract.
Covers TC-01..TC-07 of docs/work-items/ORCH-076/04-test-plan.yaml:
* writer (render/round-trip), validator (full / partial schema, strict on/off),
* reader contract preserved (read_frontmatter_value), never-raise on bad input.
The whole module honours a never-raise contract (NFR-2): no input shape may raise.
"""
import os
import tempfile
os.environ.setdefault("ORCH_PLANE_API_TOKEN", "test-token")
os.environ.setdefault("ORCH_GITEA_TOKEN", "test-token")
from src import frontmatter as fm # noqa: E402
from src.frontmatter import ( # noqa: E402
REQUIRED_FIELDS,
FrontmatterParse,
SchemaValidation,
maybe_warn_schema,
parse_frontmatter,
parse_frontmatter_dict,
read_frontmatter,
read_frontmatter_value,
render_frontmatter,
strip_frontmatter,
validate_schema,
write_frontmatter,
)
def _full_schema():
return {
"work_item": "ORCH-076",
"stage": "review",
"author_agent": "reviewer",
"status": "APPROVED",
"created_at": "2026-06-09",
"model_used": "claude-opus-4-8",
}
# --------------------------------------------------------------------------- #
# TC-01 — writer serialises a mapping into canonical leading YAML-frontmatter
# readable by the existing parsers (split("---", 2) + yaml.safe_load).
# --------------------------------------------------------------------------- #
def test_tc01_render_frontmatter_is_canonical_and_reparseable():
out = render_frontmatter({"verdict": "APPROVED", "work_item": "ORCH-076"}, "body text")
assert out.startswith("---\n")
# Existing parser shape: split on '---' into 3 segments + yaml.safe_load.
parts = out.split("---", 2)
assert len(parts) == 3
import yaml
data = yaml.safe_load(parts[1])
assert data["verdict"] == "APPROVED"
assert data["work_item"] == "ORCH-076"
# Body is preserved verbatim after the closing fence.
assert parts[2].lstrip("\n") == "body text"
# And our own primitive round-trips it.
assert parse_frontmatter(out).data == {"verdict": "APPROVED", "work_item": "ORCH-076"}
def test_tc01_render_empty_body_default():
out = render_frontmatter({"a": 1})
assert out == "---\na: 1\n---\n"
# --------------------------------------------------------------------------- #
# TC-02 — round-trip: writer -> reader read_frontmatter_value yields same values.
# --------------------------------------------------------------------------- #
def test_tc02_write_then_read_roundtrip():
data = _full_schema()
with tempfile.TemporaryDirectory() as d:
path = os.path.join(d, "12-review.md")
assert write_frontmatter(path, data, "# Review body") is True
for key, val in data.items():
assert read_frontmatter_value(path, key) == val
# Whole-mapping read matches too.
assert read_frontmatter(path) == data
def test_tc02_render_parse_dict_roundtrip():
data = _full_schema()
rendered = render_frontmatter(data, "body")
assert parse_frontmatter_dict(rendered) == data
# --------------------------------------------------------------------------- #
# TC-03 — validator: full schema -> valid=True, no missing fields.
# --------------------------------------------------------------------------- #
def test_tc03_validate_full_schema_valid():
res = validate_schema(_full_schema())
assert isinstance(res, SchemaValidation)
assert res.valid is True
assert res.missing == []
# --------------------------------------------------------------------------- #
# TC-04 — validator: partial schema -> valid=False with the missing list,
# WITHOUT raising (warning-only by default).
# --------------------------------------------------------------------------- #
def test_tc04_validate_partial_schema_lists_missing():
res = validate_schema({"work_item": "ORCH-076", "stage": "review"})
assert res.valid is False
# The four absent required fields are reported (order = REQUIRED_FIELDS).
assert set(res.missing) == set(REQUIRED_FIELDS) - {"work_item", "stage"}
assert res.missing == [f for f in REQUIRED_FIELDS if f in res.missing]
def test_tc04_blank_and_none_count_as_missing():
data = _full_schema()
data["status"] = "" # blank -> missing
data["model_used"] = None # None -> missing
res = validate_schema(data)
assert res.valid is False
assert set(res.missing) == {"status", "model_used"}
# --------------------------------------------------------------------------- #
# TC-05 — never-raise: writer + validator on broken input return a safe value.
# --------------------------------------------------------------------------- #
def test_tc05_validate_non_mapping_never_raises():
for bad in (None, "not a mapping", 123, ["a", "b"]):
res = validate_schema(bad) # type: ignore[arg-type]
assert res.valid is False
assert set(res.missing) == set(REQUIRED_FIELDS)
def test_tc05_parse_broken_inputs_never_raise():
# No frontmatter.
p = parse_frontmatter("just prose, no fence")
assert p == FrontmatterParse()
assert p.data == {} and p.has_block is False
# Unterminated block.
p = parse_frontmatter("---\nkey: val\nno closing fence")
assert p.has_block is True and p.malformed is True and p.data == {}
# Bad YAML.
p = parse_frontmatter("---\nkey: : :\n bad\n---\n")
assert p.has_block is True and p.yaml_error is not None and p.data == {}
# Non-mapping scalar frontmatter.
p = parse_frontmatter("---\njust a string\n---\nbody")
assert p.has_block is True and p.data == {}
# Non-string input.
assert parse_frontmatter(None).data == {} # type: ignore[arg-type]
assert parse_frontmatter_dict(12345) == {} # type: ignore[arg-type]
def test_tc05_write_to_unwritable_path_returns_false():
# A path under a non-existent directory cannot be opened -> False, no raise.
ok = write_frontmatter("/nonexistent-dir-xyz/cannot/12-review.md", {"a": 1})
assert ok is False
def test_tc05_render_unserialisable_degrades_to_body():
class Bad:
pass
out = render_frontmatter({"x": Bad()}, "fallback-body")
# yaml cannot serialise an arbitrary object -> degrade to the body, never raise.
assert out == "fallback-body"
def test_tc05_read_missing_file_returns_empty():
assert read_frontmatter("/no/such/file.md") == {}
assert read_frontmatter_value("/no/such/file.md", "verdict") is None
# --------------------------------------------------------------------------- #
# TC-06 — reader read_frontmatter_value keeps its previous contract.
# --------------------------------------------------------------------------- #
def test_tc06_reader_contract_preserved():
with tempfile.TemporaryDirectory() as d:
path = os.path.join(d, "doc.md")
with open(path, "w", encoding="utf-8") as f:
f.write("---\nverdict: Approved\nempty:\n---\nbody\n")
# strip + case preserved.
assert read_frontmatter_value(path, "verdict") == "Approved"
# empty value -> None.
assert read_frontmatter_value(path, "empty") is None
# absent key -> None.
assert read_frontmatter_value(path, "missing") is None
# No frontmatter -> None.
with tempfile.TemporaryDirectory() as d:
path = os.path.join(d, "doc.md")
with open(path, "w", encoding="utf-8") as f:
f.write("no frontmatter here\n")
assert read_frontmatter_value(path, "verdict") is None
def test_tc06_reader_strips_whitespace():
with tempfile.TemporaryDirectory() as d:
path = os.path.join(d, "doc.md")
with open(path, "w", encoding="utf-8") as f:
f.write('---\nverdict: " PASS "\n---\n')
assert read_frontmatter_value(path, "verdict") == "PASS"
# --------------------------------------------------------------------------- #
# TC-07 — kill-switch: strict False (default) is inert; strict True signals
# invalidity. maybe_warn_schema never changes a verdict either way.
# --------------------------------------------------------------------------- #
def test_tc07_maybe_warn_schema_default_warning_only(monkeypatch, caplog):
monkeypatch.setattr(fm, "logger", fm.logger)
from src.config import settings
monkeypatch.setattr(settings, "frontmatter_validation_strict", False)
incomplete = render_frontmatter({"verdict": "APPROVED"}, "body")
res = maybe_warn_schema(incomplete, "review report")
# Validation still reports invalidity (the data IS incomplete)...
assert res.valid is False
assert "model_used" in res.missing
# ...but the helper is inert: it returns a value, it does not raise / block.
def test_tc07_strict_flag_visible_to_helper(monkeypatch):
from src.config import settings
# Full schema -> valid regardless of the flag.
monkeypatch.setattr(settings, "frontmatter_validation_strict", True)
res_full = maybe_warn_schema(render_frontmatter(_full_schema(), "b"), "doc")
assert res_full.valid is True
# Incomplete -> invalid; strict True does not raise, just signals.
res_partial = maybe_warn_schema(render_frontmatter({"stage": "review"}, "b"), "doc")
assert res_partial.valid is False
def test_tc07_maybe_warn_schema_on_garbage_is_inert():
# Never-raise: a non-string / no-frontmatter input returns a SchemaValidation
# (reporting the missing fields) WITHOUT raising — the gate verdict is untouched.
for bad in ("no frontmatter", None, 123):
res = maybe_warn_schema(bad, "doc") # type: ignore[arg-type]
assert isinstance(res, SchemaValidation)
# --------------------------------------------------------------------------- #
# strip_frontmatter helper parity.
# --------------------------------------------------------------------------- #
def test_strip_frontmatter_parity():
assert strip_frontmatter("---\na: 1\n---\nbody") == "\nbody"
# No well-formed block -> unchanged.
assert strip_frontmatter("no fence") == "no fence"
assert strip_frontmatter("---\nunterminated") == "---\nunterminated"
# Never-raise on non-string.
assert strip_frontmatter(None) is None # type: ignore[arg-type]

202
tests/test_qg_verdicts.py Normal file
View File

@@ -0,0 +1,202 @@
"""ORCH-076 (ORCH-52c): anti-regression for the five verdict gates after the parse
is delegated to the unified frontmatter API.
Covers TC-08..TC-15 of docs/work-items/ORCH-076/04-test-plan.yaml. The MECHANISM of
YAML-frontmatter parsing is now centralised in ``src/frontmatter.parse_frontmatter``,
but the verdict SEMANTICS (value -> transition) must be 1:1 with before the task:
* check_reviewer_verdict (12-review.md, verdict:) — TC-08
* _parse_tests_verdict (13-test-report.md, result/verdict/status, ORCH-047) — TC-09
* _parse_deploy_status (14-deploy-log.md, deploy_status:) — TC-10
* _parse_staging_status (15-staging-log.md, staging_status:) — TC-11
* (parse_security_status is exercised in tests/test_security_gate.py — TC-12)
* backward-compat of old docs (no new schema) + additive schema — TC-13 / TC-14
* worktree -> origin/main fallback preserved — TC-15
"""
import os
import tempfile
os.environ.setdefault("ORCH_PLANE_API_TOKEN", "test-token")
os.environ.setdefault("ORCH_GITEA_TOKEN", "test-token")
from src.qg import checks as qg # noqa: E402
from src.qg.checks import ( # noqa: E402
_parse_deploy_status,
_parse_staging_status,
_parse_tests_verdict,
check_deploy_status,
check_reviewer_verdict,
check_staging_status,
)
def _write(dirpath, work_item_id, name, content):
d = os.path.join(dirpath, "docs", "work-items", work_item_id)
os.makedirs(d, exist_ok=True)
with open(os.path.join(d, name), "w", encoding="utf-8") as f:
f.write(content)
# --------------------------------------------------------------------------- #
# TC-08 — check_reviewer_verdict via the unified API.
# --------------------------------------------------------------------------- #
def test_tc08_reviewer_verdict_semantics(monkeypatch):
with tempfile.TemporaryDirectory() as d:
monkeypatch.setattr(qg, "_repo_path", lambda repo, branch=None: d)
_write(d, "ORCH-1", "12-review.md", "---\nverdict: APPROVED\n---\nbody")
ok, reason = check_reviewer_verdict("orchestrator", "ORCH-1")
assert ok is True and "APPROVED" in reason
_write(d, "ORCH-1", "12-review.md", "---\nverdict: REQUEST_CHANGES\n---\nbody")
ok, reason = check_reviewer_verdict("orchestrator", "ORCH-1")
assert ok is False and "REQUEST_CHANGES" in reason
# Missing verdict key -> (False).
_write(d, "ORCH-1", "12-review.md", "---\nother: x\n---\nbody")
ok, _ = check_reviewer_verdict("orchestrator", "ORCH-1")
assert ok is False
# No frontmatter at all -> (False).
_write(d, "ORCH-1", "12-review.md", "# review\nAPPROVED in prose\n")
ok, _ = check_reviewer_verdict("orchestrator", "ORCH-1")
assert ok is False
# --------------------------------------------------------------------------- #
# TC-09 — _parse_tests_verdict: ORCH-047 three equal-rank fields + negative-token
# priority preserved.
# --------------------------------------------------------------------------- #
def test_tc09_tests_three_fields_each_pass():
for field in ("result", "verdict", "status"):
ok, reason = _parse_tests_verdict(f"---\n{field}: PASS\n---\nbody")
assert ok is True, field
assert "PASS" in reason
def test_tc09_negative_token_is_authoritative():
# BLOCKED in one field beats a positive token in another (ET-013 case).
ok, reason = _parse_tests_verdict("---\nverdict: BLOCKED\nstatus: PASS\n---\n")
assert ok is False
assert "BLOCKED" in reason
# FAIL likewise.
ok, _ = _parse_tests_verdict("---\nresult: FAIL\n---\n")
assert ok is False
def test_tc09_tests_no_frontmatter_and_malformed():
ok, reason = _parse_tests_verdict("no frontmatter, 23 passed\n")
assert ok is False and "No YAML frontmatter" in reason
ok, reason = _parse_tests_verdict("---\nresult: PASS\nunterminated")
assert ok is False and "Malformed" in reason
ok, reason = _parse_tests_verdict("---\nresult: PASS\nempty:\n---\n")
assert ok is True # result PASS still wins; empty other field is fine
# --------------------------------------------------------------------------- #
# TC-10 — _parse_deploy_status semantics (БАГ-8) unchanged.
# --------------------------------------------------------------------------- #
def test_tc10_deploy_status_semantics():
assert _parse_deploy_status("---\ndeploy_status: SUCCESS\n---\n")[0] is True
assert _parse_deploy_status("---\ndeploy_status: FAILED\n---\n")[0] is False
assert _parse_deploy_status("---\nother: SUCCESS\n---\n")[0] is False
assert _parse_deploy_status("prose only SUCCESS")[0] is False
# Bad YAML -> (False) with the preserved reason prefix.
ok, reason = _parse_deploy_status("---\nx: : :\n---\n")
assert ok is False and "Invalid YAML frontmatter in deploy log" in reason
# --------------------------------------------------------------------------- #
# TC-11 — _parse_staging_status + ORCH-35 conditionality (non-self -> N/A pass).
# --------------------------------------------------------------------------- #
def test_tc11_staging_status_semantics():
assert _parse_staging_status("---\nstaging_status: SUCCESS\n---\n")[0] is True
assert _parse_staging_status("---\nstaging_status: FAILED\n---\n")[0] is False
assert _parse_staging_status("---\nother: SUCCESS\n---\n")[0] is False
def test_tc11_staging_gate_na_for_non_self():
ok, reason = check_staging_status("enduro-trails", "ET-1", "feature/x")
assert ok is True
assert "N/A" in reason
# --------------------------------------------------------------------------- #
# TC-13 — old verdict docs WITHOUT the new schema read exactly as before, for
# every parser.
# --------------------------------------------------------------------------- #
def test_tc13_old_docs_without_schema_still_read():
# None of these carry work_item/stage/author_agent/status/created_at/model_used.
assert _parse_tests_verdict("---\nresult: PASS\n---\n")[0] is True
assert _parse_tests_verdict("---\nverdict: FAIL\n---\n")[0] is False
assert _parse_deploy_status("---\ndeploy_status: SUCCESS\n---\n")[0] is True
assert _parse_staging_status("---\nstaging_status: SUCCESS\n---\n")[0] is True
def test_tc13_reviewer_old_doc(monkeypatch):
with tempfile.TemporaryDirectory() as d:
monkeypatch.setattr(qg, "_repo_path", lambda repo, branch=None: d)
_write(d, "ORCH-1", "12-review.md", "---\nverdict: APPROVED\n---\nlegacy body")
assert check_reviewer_verdict("orchestrator", "ORCH-1")[0] is True
# --------------------------------------------------------------------------- #
# TC-14 — a doc WITH the full additive schema + verdict key yields the SAME
# verdict as without the schema (schema is additive, never changes it).
# --------------------------------------------------------------------------- #
_SCHEMA = (
"work_item: ORCH-076\nstage: testing\nauthor_agent: tester\n"
"status: PASS\ncreated_at: 2026-06-09\nmodel_used: claude-opus-4-8\n"
)
def test_tc14_full_schema_does_not_change_verdict():
bare = _parse_tests_verdict("---\nresult: PASS\n---\n")
full = _parse_tests_verdict(f"---\n{_SCHEMA}result: PASS\n---\n")
assert bare[0] == full[0] is True
bare_d = _parse_deploy_status("---\ndeploy_status: FAILED\n---\n")
full_d = _parse_deploy_status(
"---\nwork_item: ORCH-076\nstage: deploy\nauthor_agent: deployer\n"
"status: done\ncreated_at: 2026-06-09\nmodel_used: claude-opus-4-8\n"
"deploy_status: FAILED\n---\n"
)
assert bare_d[0] == full_d[0] is False
def test_tc14_reviewer_with_schema(monkeypatch):
with tempfile.TemporaryDirectory() as d:
monkeypatch.setattr(qg, "_repo_path", lambda repo, branch=None: d)
_write(
d, "ORCH-1", "12-review.md",
"---\nwork_item: ORCH-1\nstage: review\nauthor_agent: reviewer\n"
"status: APPROVED\ncreated_at: 2026-06-09\nmodel_used: claude-opus-4-8\n"
"verdict: APPROVED\n---\nbody",
)
assert check_reviewer_verdict("orchestrator", "ORCH-1")[0] is True
# --------------------------------------------------------------------------- #
# TC-15 — fallback worktree -> origin/main preserved (the gate still reads the
# log recovered from main through the unified parser).
# --------------------------------------------------------------------------- #
def test_tc15_deploy_status_origin_main_fallback(monkeypatch):
with tempfile.TemporaryDirectory() as d:
# No 14-deploy-log.md in the worktree -> the gate must consult origin/main.
monkeypatch.setattr(qg, "_repo_path", lambda repo, branch=None: d)
monkeypatch.setattr(
qg, "_deploy_log_from_main",
lambda repo, wi: "---\ndeploy_status: SUCCESS\n---\nfrom main",
)
ok, reason = check_deploy_status("orchestrator", "ORCH-1", "feature/x")
assert ok is True and "SUCCESS" in reason
def test_tc15_staging_status_origin_main_fallback(monkeypatch):
with tempfile.TemporaryDirectory() as d:
monkeypatch.setattr(qg, "_repo_path", lambda repo, branch=None: d)
monkeypatch.setattr(
qg, "_staging_log_from_main",
lambda repo, wi: "---\nstaging_status: FAILED\n---\nfrom main",
)
ok, reason = check_staging_status("orchestrator", "ORCH-1", "feature/x")
assert ok is False and "FAILED" in reason

View File

@@ -202,6 +202,32 @@ def test_tc09_missing_or_broken_frontmatter_failclosed():
assert ok is False
def test_orch076_parse_security_status_via_unified_api():
"""ORCH-076 TC-12: parse_security_status now reads through the unified
frontmatter primitive; the PASS/FAIL semantics are 1:1 with before, and an
old report WITHOUT the new schema fields still reads exactly the same."""
from src import frontmatter as fm
# Delegates to the single parse primitive (no private duplicated parse).
assert "parse_frontmatter" in sg.parse_security_status.__doc__
# PASS / FAIL semantics preserved.
assert sg.parse_security_status("---\nsecurity_status: PASS\n---\n")[0] is True
assert sg.parse_security_status("---\nsecurity_status: FAIL\n---\n")[0] is False
# An additive full schema does not change the verdict (FR-5 / AC-4).
schema = (
"work_item: ORCH-076\nstage: deploy-staging\nauthor_agent: deployer\n"
"status: PASS\ncreated_at: 2026-06-09\nmodel_used: claude-opus-4-8\n"
)
with_schema = fm.render_frontmatter(
{**{k.split(":")[0]: v for k, v in
(line.split(": ", 1) for line in schema.strip().splitlines())},
"security_status": "PASS"}
)
assert sg.parse_security_status(with_schema)[0] is True
def test_tc10_artifact_has_valid_frontmatter_and_body(tmp_path, monkeypatch):
"""TC-10: 17-security-report.md is written with valid frontmatter (all machine
fields) and a body listing the findings; read-back == the written verdict."""

View File

@@ -0,0 +1,55 @@
"""ORCH-076 (ORCH-52c) TC-16 / AC-6: the pipeline invariants are untouched.
ORCH-52c only unifies the MECHANISM of YAML-frontmatter parsing behind a single
API; it must NOT change the composition of the stage machine or the QG registry.
This guard fails first if a future edit drifts either contract.
"""
import os
os.environ.setdefault("ORCH_PLANE_API_TOKEN", "test-token")
os.environ.setdefault("ORCH_GITEA_TOKEN", "test-token")
from src.qg.checks import QG_CHECKS # noqa: E402
from src.stages import STAGE_TRANSITIONS # noqa: E402
_EXPECTED_QGS = {
"check_analysis_approved",
"check_analysis_complete",
"check_architecture_done",
"check_ci_green",
"check_review_approved",
"check_tests_passed",
"check_reviewer_verdict",
"check_tests_local",
"check_deploy_status",
"check_staging_status",
"check_branch_mergeable",
"check_staging_image_fresh",
"check_security_gate",
}
_EXPECTED_TRANSITIONS = {
"created": {"next": "analysis", "agent": "analyst", "qg": None},
"analysis": {"next": "architecture", "agent": "architect", "qg": "check_analysis_approved"},
"architecture": {"next": "development", "agent": "developer", "qg": "check_architecture_done"},
"development": {"next": "review", "agent": "reviewer", "qg": "check_ci_green"},
"review": {"next": "testing", "agent": "tester", "qg": "check_reviewer_verdict"},
"testing": {"next": "deploy-staging", "agent": "deployer", "qg": "check_tests_passed"},
"deploy-staging": {"next": "deploy", "agent": "deployer", "qg": "check_staging_status"},
"deploy": {"next": "done", "agent": None, "qg": "check_deploy_status"},
"done": {"next": None, "agent": None, "qg": None},
}
def test_tc16_qg_registry_unchanged():
assert set(QG_CHECKS.keys()) == _EXPECTED_QGS
def test_tc16_qg_callables():
for name, fn in QG_CHECKS.items():
assert callable(fn), f"QG {name} is not callable"
def test_tc16_stage_transitions_unchanged():
assert STAGE_TRANSITIONS == _EXPECTED_TRANSITIONS