Files
orchestrator/tests/test_agent_prompts_canon.py
claude-bot 131d002469
All checks were successful
CI / test (push) Successful in 29s
CI / test (pull_request) Successful in 30s
docs(ORCH-079): ORCH-52f — sync README with code + reviewer overview-docs axis
Layer 5 (final) of epic ORCH-52. Docs + prompt-only; src/ untouched.

- README.md «Известные ограничения»: fix numbering (was 1,2,3,4,3,4),
  move 6 resolved/obsolete items to «Закрыто (история)» trail with ORCH
  refs, keep only really-open limitations (Telegram-48h ORCH-087,
  task-deps intra-repo ORCH-026, serial-gate ORCH-088). Point-sync stage
  table (development → check_ci_green) and event-routing (ORCH-045).
- reviewer.md: overview-docs axis (axis 4 + constraints) — closing a
  README limitation without updating README → finding ≥P1 (canon 52d
  «»; verdict key + 5 XML sections + 6 schema fields byte-intact).
- tests: new tests/test_readme_limitations.py (numbering + no resolved
  items as open); test_agent_prompts_canon.py asserts the new axis.
- CLAUDE.md / CHANGELOG.md updated; epic ORCH-52 closed (52b→…→52f).

Refs: ORCH-079

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-09 16:23:17 +03:00

282 lines
11 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""ORCH-077 (ORCH-52d): structural canon of the 6 system prompts.
The 6 agent prompts (`.openclaw/agents/*.md`) are rewritten in the Anthropic XML
canon and taught to emit the mandatory 52c frontmatter schema. These tests are
pure-text structural checks (NO agent runs, NO `src/` import): they guard the
canon and the anti-regression inventory (TRZ §FR-6 / AC-4) so a future prompt
refactor cannot silently drop a working instruction or a machine-verdict key.
Covers test-plan TC-01..TC-07. TC-08 lives in
`tests/test_agent_frontmatter_no_model.py` (re-used, ORCH-074). The full
regression (TC-10) is the rest of `tests/`.
"""
import os
import pytest
_AGENTS = ("analyst", "architect", "developer", "reviewer", "tester", "deployer")
# tests/ is one level under the repo root; .openclaw/agents lives at the root.
_REPO_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
_AGENTS_DIR = os.path.join(_REPO_ROOT, ".openclaw", "agents")
# ORCH-078 (ORCH-52e): the traceability-marker standard (layer 4 of epic ORCH-52).
_TRACEABILITY = os.path.join(_REPO_ROOT, "docs", "_standards", "TRACEABILITY.md")
# The 5 mandatory XML sections, in normative order (D1 / AC-1).
_REQUIRED_SECTIONS = ("context", "task", "deliverables", "constraints", "output_format")
# The 6 mandatory 52c schema fields (src/frontmatter.py::REQUIRED_FIELDS).
_SCHEMA_FIELDS = (
"work_item",
"stage",
"author_agent",
"status",
"created_at",
"model_used",
)
# Role -> the stage value(s) the prompt's schema must pin (TRZ §FR-2).
_STAGE_BY_ROLE = {
"analyst": ("analysis",),
"architect": ("architecture",),
"developer": ("development",),
"reviewer": ("review",),
"tester": ("testing",),
"deployer": ("deploy-staging", "deploy"),
}
# Anti-regression markers per role that MUST survive the rewrite (TRZ §FR-6).
_ANTI_REGRESS = {
"analyst": [
"01-brd.md",
"02-trz.md",
"03-acceptance-criteria.md",
"04-test-plan.yaml",
"Write tool",
],
"architect": [
"## Статус",
"## Решение",
"## Последствия",
"docs/architecture/adr/", # global cross-cutting ADR rule
"back-to:analysis", # escalation
"arch:major-change", # escalation
],
"developer": [
"TDD",
"--no-verify",
"--force-push",
"свой PR", # "не мержи свой PR"
"Refs:", # conventional commit footer
],
"reviewer": [
"REQUEST_CHANGES",
"НЕ обновлена", # "src/ changed, docs not updated -> REQUEST_CHANGES"
],
"tester": [
"pytest",
"/health",
"/status",
"/queue",
],
"deployer": [
"docker exec orchestrator-staging",
"pr_already_merged",
"8500", # "never restart 8500 from inside"
"INFRA-WAIVED", # ORCH-061 waiver
],
}
def _read(agent: str) -> str:
path = os.path.join(_AGENTS_DIR, f"{agent}.md")
with open(path, encoding="utf-8") as f:
return f.read()
def _read_repo(*parts: str) -> str:
with open(os.path.join(_REPO_ROOT, *parts), encoding="utf-8") as f:
return f.read()
@pytest.mark.parametrize("agent", _AGENTS)
def test_five_xml_sections_present(agent):
"""TC-01: each prompt carries all 5 XML sections (open + close tag)."""
text = _read(agent)
for section in _REQUIRED_SECTIONS:
assert f"<{section}>" in text, f"{agent}.md missing <{section}> open tag"
assert f"</{section}>" in text, f"{agent}.md missing </{section}> close tag"
@pytest.mark.parametrize("agent", _AGENTS)
def test_six_schema_field_names_present(agent):
"""TC-02: each prompt names all 6 mandatory 52c schema fields."""
text = _read(agent)
for field in _SCHEMA_FIELDS:
assert field in text, f"{agent}.md does not mention schema field {field!r}"
@pytest.mark.parametrize("agent", _AGENTS)
def test_schema_pins_role_specific_author_and_stage(agent):
"""TC-03: author_agent == role and the role's stage(s) are pinned in the schema."""
text = _read(agent)
assert f"author_agent: {agent}" in text, (
f"{agent}.md does not pin 'author_agent: {agent}' in an example schema"
)
for stage in _STAGE_BY_ROLE[agent]:
assert f"stage: {stage}" in text, (
f"{agent}.md does not pin 'stage: {stage}' in an example schema"
)
@pytest.mark.parametrize("agent", _AGENTS)
def test_references_templates_and_a_reference_work_item(agent):
"""TC-04: each prompt links docs/_templates/ and at least one reference work item."""
text = _read(agent)
assert "docs/_templates/" in text, f"{agent}.md does not reference docs/_templates/"
assert ("ORCH-073" in text) or ("ORCH-088" in text), (
f"{agent}.md does not reference a gold-standard work item (ORCH-073/ORCH-088)"
)
def test_machine_verdict_keys_preserved_exact_case():
"""TC-05: machine-verdict keys + value sets survive with exact case."""
reviewer = _read("reviewer")
assert "verdict:" in reviewer
assert "APPROVED" in reviewer and "REQUEST_CHANGES" in reviewer
tester = _read("tester")
assert "result:" in tester
assert "PASS" in tester and "FAIL" in tester
deployer = _read("deployer")
assert "staging_status:" in deployer
assert "deploy_status:" in deployer
assert "SUCCESS" in deployer and "FAILED" in deployer
def test_deployer_self_hosting_anti_regress():
"""TC-06: deployer keeps canonical staging cmd, merge-guard, 8500 ban, waiver."""
deployer = _read("deployer")
for marker in _ANTI_REGRESS["deployer"]:
assert marker in deployer, f"deployer.md lost anti-regress marker {marker!r}"
@pytest.mark.parametrize("agent", _AGENTS)
def test_role_anti_regress_markers(agent):
"""TC-07: per-role anti-regression markers (TRZ §FR-6) survive the rewrite."""
text = _read(agent)
for marker in _ANTI_REGRESS[agent]:
assert marker in text, f"{agent}.md lost anti-regress marker {marker!r}"
# --------------------------------------------------------------------------- #
# ORCH-078 (ORCH-52e): traceability-marker standard + reading-rule anti-regress
# (TRZ §FR-1..FR-8; AC-1..AC-5, AC-8). Pure-text checks, NO `src/` import.
# --------------------------------------------------------------------------- #
def test_traceability_standard_exists_and_nonempty():
"""TC-01 (AC-1): docs/_standards/TRACEABILITY.md exists and is non-empty."""
assert os.path.isfile(_TRACEABILITY), "docs/_standards/TRACEABILITY.md is missing"
assert _read_repo("docs", "_standards", "TRACEABILITY.md").strip(), (
"TRACEABILITY.md is empty"
)
def test_traceability_describes_marker_format_and_placement():
"""TC-02 (AC-1): standard describes the ORCH-NNN marker and where it is placed."""
text = _read_repo("docs", "_standards", "TRACEABILITY.md")
assert "ORCH-NNN" in text, "TRACEABILITY.md does not describe the ORCH-NNN marker"
# placement rule: next to a non-trivial invariant (not on trivial code).
assert "инвариант" in text, "TRACEABILITY.md does not state the placement rule"
def test_traceability_has_real_verifiable_example():
"""TC-03 (AC-1): the worked example points at files that really exist in main.
A traceability standard whose example references a missing file/ADR would
refute itself, so the example must be checkable against the repo tree.
"""
text = _read_repo("docs", "_standards", "TRACEABILITY.md")
assert "src/serial_gate.py" in text and "ORCH-088" in text, (
"TRACEABILITY.md lacks the serial_gate/ORCH-088 worked example"
)
assert os.path.isfile(os.path.join(_REPO_ROOT, "src", "serial_gate.py")), (
"example references src/serial_gate.py which does not exist"
)
assert os.path.isfile(os.path.join(
_REPO_ROOT, "docs", "work-items", "ORCH-088", "06-adr",
"ADR-001-serial-gate.md",
)), "example references an ORCH-088 ADR that does not exist"
def test_traceability_documents_fallback_access():
"""TC-04 (AC-4): standard documents the git show origin/main fallback."""
text = _read_repo("docs", "_standards", "TRACEABILITY.md")
assert "git show origin/main:docs/work-items/" in text, (
"TRACEABILITY.md does not document the cross-branch ADR fallback"
)
def test_traceability_documents_anti_archeology():
"""TC-05 (AC-5): standard documents the 3+ markers -> cross-cutting ADR rule."""
text = _read_repo("docs", "_standards", "TRACEABILITY.md")
assert "docs/architecture/adr/" in text, (
"TRACEABILITY.md anti-archeology rule does not point at the cross-cutting ADR dir"
)
assert "3+" in text, "TRACEABILITY.md does not state the 3+ markers threshold"
def test_developer_carries_reading_rule_and_fallback():
"""TC-06 (AC-2, AC-4): developer.md carries the reading rule + standard + fallback."""
text = _read("developer")
assert "TRACEABILITY.md" in text, "developer.md does not reference TRACEABILITY.md"
assert "git show origin/main:docs/work-items/" in text, (
"developer.md does not carry the cross-branch ADR fallback"
)
def test_architect_carries_reading_rule_and_anti_archeology():
"""TC-07 (AC-2, AC-5): architect.md carries reading rule + anti-archeology."""
text = _read("architect")
assert "TRACEABILITY.md" in text, "architect.md does not reference TRACEABILITY.md"
assert "3+" in text, "architect.md does not carry the 3+ markers anti-archeology rule"
def test_reviewer_carries_traceability_control_axis():
"""TC-08 (AC-3): reviewer.md carries the traceability-compliance control axis."""
text = _read("reviewer")
assert "TRACEABILITY.md" in text, "reviewer.md does not reference TRACEABILITY.md"
def test_claude_md_and_readme_reference_traceability_standard():
"""TC-12 (AC-8): CLAUDE.md and architecture README reference the standard."""
assert "TRACEABILITY.md" in _read_repo("CLAUDE.md"), (
"CLAUDE.md does not reference docs/_standards/TRACEABILITY.md"
)
assert "TRACEABILITY.md" in _read_repo("docs", "architecture", "README.md"), (
"architecture README does not reference docs/_standards/TRACEABILITY.md"
)
# --------------------------------------------------------------------------- #
# ORCH-079 (ORCH-52f): reviewer overview-docs axis (layer 5 of epic ORCH-52).
# Pure-text anti-drift check (TRZ §FR-6 / AC-5), NO `src/` import.
# --------------------------------------------------------------------------- #
def test_reviewer_carries_overview_docs_axis():
"""ORCH-079 TC-01 (AC-5): reviewer.md covers the README overview-docs axis.
The reviewer must require README ("Известные ограничения") to be updated when
a PR closes a documented limitation. This guards the rule against silent drift
in a future prompt refactor, exactly like the traceability control axis.
"""
text = _read("reviewer")
assert "Известные ограничения" in text, (
"reviewer.md does not mention the README 'Известные ограничения' overview-docs axis"
)
assert "ORCH-079" in text, (
"reviewer.md does not anchor the overview-docs axis to ORCH-079"
)