"""ORCH-077 (ORCH-52d): structural canon of the 6 system prompts. The 6 agent prompts (`.openclaw/agents/*.md`) are rewritten in the Anthropic XML canon and taught to emit the mandatory 52c frontmatter schema. These tests are pure-text structural checks (NO agent runs, NO `src/` import): they guard the canon and the anti-regression inventory (TRZ §FR-6 / AC-4) so a future prompt refactor cannot silently drop a working instruction or a machine-verdict key. Covers test-plan TC-01..TC-07. TC-08 lives in `tests/test_agent_frontmatter_no_model.py` (re-used, ORCH-074). The full regression (TC-10) is the rest of `tests/`. """ import os import re import pytest _AGENTS = ("analyst", "architect", "developer", "reviewer", "tester", "deployer") # tests/ is one level under the repo root; .openclaw/agents lives at the root. _REPO_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) _AGENTS_DIR = os.path.join(_REPO_ROOT, ".openclaw", "agents") # ORCH-078 (ORCH-52e): the traceability-marker standard (layer 4 of epic ORCH-52). _TRACEABILITY = os.path.join(_REPO_ROOT, "docs", "_standards", "TRACEABILITY.md") # The 5 mandatory XML sections, in normative order (D1 / AC-1). _REQUIRED_SECTIONS = ("context", "task", "deliverables", "constraints", "output_format") # The 6 mandatory 52c schema fields (src/frontmatter.py::REQUIRED_FIELDS). _SCHEMA_FIELDS = ( "work_item", "stage", "author_agent", "status", "created_at", "model_used", ) # Role -> the stage value(s) the prompt's schema must pin (TRZ §FR-2). _STAGE_BY_ROLE = { "analyst": ("analysis",), "architect": ("architecture",), "developer": ("development",), "reviewer": ("review",), "tester": ("testing",), "deployer": ("deploy-staging", "deploy"), } # Anti-regression markers per role that MUST survive the rewrite (TRZ §FR-6). _ANTI_REGRESS = { "analyst": [ "01-brd.md", "02-trz.md", "03-acceptance-criteria.md", "04-test-plan.yaml", "Write tool", ], "architect": [ "## Статус", "## Решение", "## Последствия", "docs/architecture/adr/", # global cross-cutting ADR rule "back-to:analysis", # escalation "arch:major-change", # escalation ], "developer": [ "TDD", "--no-verify", "--force-push", "свой PR", # "не мержи свой PR" "Refs:", # conventional commit footer ], "reviewer": [ "REQUEST_CHANGES", "НЕ обновлена", # "src/ changed, docs not updated -> REQUEST_CHANGES" ], "tester": [ "pytest", "/health", "/status", "/queue", ], "deployer": [ "docker exec orchestrator-staging", "pr_already_merged", "8500", # "never restart 8500 from inside" "INFRA-WAIVED", # ORCH-061 waiver ], } def _read(agent: str) -> str: path = os.path.join(_AGENTS_DIR, f"{agent}.md") with open(path, encoding="utf-8") as f: return f.read() def _read_repo(*parts: str) -> str: with open(os.path.join(_REPO_ROOT, *parts), encoding="utf-8") as f: return f.read() @pytest.mark.parametrize("agent", _AGENTS) def test_five_xml_sections_present(agent): """TC-01: each prompt carries all 5 XML sections (open + close tag).""" text = _read(agent) for section in _REQUIRED_SECTIONS: assert f"<{section}>" in text, f"{agent}.md missing <{section}> open tag" assert f"" in text, f"{agent}.md missing close tag" @pytest.mark.parametrize("agent", _AGENTS) def test_six_schema_field_names_present(agent): """TC-02: each prompt names all 6 mandatory 52c schema fields.""" text = _read(agent) for field in _SCHEMA_FIELDS: assert field in text, f"{agent}.md does not mention schema field {field!r}" @pytest.mark.parametrize("agent", _AGENTS) def test_schema_pins_role_specific_author_and_stage(agent): """TC-03: author_agent == role and the role's stage(s) are pinned in the schema.""" text = _read(agent) assert f"author_agent: {agent}" in text, ( f"{agent}.md does not pin 'author_agent: {agent}' in an example schema" ) for stage in _STAGE_BY_ROLE[agent]: assert f"stage: {stage}" in text, ( f"{agent}.md does not pin 'stage: {stage}' in an example schema" ) @pytest.mark.parametrize("agent", _AGENTS) def test_references_templates_and_a_reference_work_item(agent): """TC-04: each prompt links docs/_templates/ and at least one reference work item.""" text = _read(agent) assert "docs/_templates/" in text, f"{agent}.md does not reference docs/_templates/" assert ("ORCH-073" in text) or ("ORCH-088" in text), ( f"{agent}.md does not reference a gold-standard work item (ORCH-073/ORCH-088)" ) def test_machine_verdict_keys_preserved_exact_case(): """TC-05: machine-verdict keys + value sets survive with exact case.""" reviewer = _read("reviewer") assert "verdict:" in reviewer assert "APPROVED" in reviewer and "REQUEST_CHANGES" in reviewer tester = _read("tester") assert "result:" in tester assert "PASS" in tester and "FAIL" in tester deployer = _read("deployer") assert "staging_status:" in deployer assert "deploy_status:" in deployer assert "SUCCESS" in deployer and "FAILED" in deployer def test_deployer_self_hosting_anti_regress(): """TC-06: deployer keeps canonical staging cmd, merge-guard, 8500 ban, waiver.""" deployer = _read("deployer") for marker in _ANTI_REGRESS["deployer"]: assert marker in deployer, f"deployer.md lost anti-regress marker {marker!r}" @pytest.mark.parametrize("agent", _AGENTS) def test_role_anti_regress_markers(agent): """TC-07: per-role anti-regression markers (TRZ §FR-6) survive the rewrite.""" text = _read(agent) for marker in _ANTI_REGRESS[agent]: assert marker in text, f"{agent}.md lost anti-regress marker {marker!r}" # --------------------------------------------------------------------------- # # ORCH-078 (ORCH-52e): traceability-marker standard + reading-rule anti-regress # (TRZ §FR-1..FR-8; AC-1..AC-5, AC-8). Pure-text checks, NO `src/` import. # --------------------------------------------------------------------------- # def test_traceability_standard_exists_and_nonempty(): """TC-01 (AC-1): docs/_standards/TRACEABILITY.md exists and is non-empty.""" assert os.path.isfile(_TRACEABILITY), "docs/_standards/TRACEABILITY.md is missing" assert _read_repo("docs", "_standards", "TRACEABILITY.md").strip(), ( "TRACEABILITY.md is empty" ) def test_traceability_describes_marker_format_and_placement(): """TC-02 (AC-1): standard describes the ORCH-NNN marker and where it is placed.""" text = _read_repo("docs", "_standards", "TRACEABILITY.md") assert "ORCH-NNN" in text, "TRACEABILITY.md does not describe the ORCH-NNN marker" # placement rule: next to a non-trivial invariant (not on trivial code). assert "инвариант" in text, "TRACEABILITY.md does not state the placement rule" def test_traceability_has_real_verifiable_example(): """TC-03 (AC-1): the worked example points at files that really exist in main. A traceability standard whose example references a missing file/ADR would refute itself, so the example must be checkable against the repo tree. """ text = _read_repo("docs", "_standards", "TRACEABILITY.md") assert "src/serial_gate.py" in text and "ORCH-088" in text, ( "TRACEABILITY.md lacks the serial_gate/ORCH-088 worked example" ) assert os.path.isfile(os.path.join(_REPO_ROOT, "src", "serial_gate.py")), ( "example references src/serial_gate.py which does not exist" ) assert os.path.isfile(os.path.join( _REPO_ROOT, "docs", "work-items", "ORCH-088", "06-adr", "ADR-001-serial-gate.md", )), "example references an ORCH-088 ADR that does not exist" def test_traceability_documents_fallback_access(): """TC-04 (AC-4): standard documents the git show origin/main fallback.""" text = _read_repo("docs", "_standards", "TRACEABILITY.md") assert "git show origin/main:docs/work-items/" in text, ( "TRACEABILITY.md does not document the cross-branch ADR fallback" ) def test_traceability_documents_anti_archeology(): """TC-05 (AC-5): standard documents the 3+ markers -> cross-cutting ADR rule.""" text = _read_repo("docs", "_standards", "TRACEABILITY.md") assert "docs/architecture/adr/" in text, ( "TRACEABILITY.md anti-archeology rule does not point at the cross-cutting ADR dir" ) assert "3+" in text, "TRACEABILITY.md does not state the 3+ markers threshold" def test_developer_carries_reading_rule_and_fallback(): """TC-06 (AC-2, AC-4): developer.md carries the reading rule + standard + fallback.""" text = _read("developer") assert "TRACEABILITY.md" in text, "developer.md does not reference TRACEABILITY.md" assert "git show origin/main:docs/work-items/" in text, ( "developer.md does not carry the cross-branch ADR fallback" ) def test_architect_carries_reading_rule_and_anti_archeology(): """TC-07 (AC-2, AC-5): architect.md carries reading rule + anti-archeology.""" text = _read("architect") assert "TRACEABILITY.md" in text, "architect.md does not reference TRACEABILITY.md" assert "3+" in text, "architect.md does not carry the 3+ markers anti-archeology rule" def test_reviewer_carries_traceability_control_axis(): """TC-08 (AC-3): reviewer.md carries the traceability-compliance control axis.""" text = _read("reviewer") assert "TRACEABILITY.md" in text, "reviewer.md does not reference TRACEABILITY.md" def test_claude_md_and_readme_reference_traceability_standard(): """TC-12 (AC-8): CLAUDE.md and architecture README reference the standard.""" assert "TRACEABILITY.md" in _read_repo("CLAUDE.md"), ( "CLAUDE.md does not reference docs/_standards/TRACEABILITY.md" ) assert "TRACEABILITY.md" in _read_repo("docs", "architecture", "README.md"), ( "architecture README does not reference docs/_standards/TRACEABILITY.md" ) # --------------------------------------------------------------------------- # # ORCH-079 (ORCH-52f): reviewer overview-docs axis (layer 5 of epic ORCH-52). # Pure-text anti-drift check (TRZ §FR-6 / AC-5), NO `src/` import. # --------------------------------------------------------------------------- # def test_reviewer_carries_overview_docs_axis(): """ORCH-079 TC-01 (AC-5): reviewer.md covers the README overview-docs axis. The reviewer must require README ("Известные ограничения") to be updated when a PR closes a documented limitation. This guards the rule against silent drift in a future prompt refactor, exactly like the traceability control axis. """ text = _read("reviewer") assert "Известные ограничения" in text, ( "reviewer.md does not mention the README 'Известные ограничения' overview-docs axis" ) assert "ORCH-079" in text, ( "reviewer.md does not anchor the overview-docs axis to ORCH-079" ) # --------------------------------------------------------------------------- # # ORCH-092 (epilogue of epic ORCH-52): prompt audit of the 6 agents — # de-hardcode date/model, gate-name parity, escalation sections, dead-line # removal, tester enrichment, deployer ban-frame. Pure-text checks; only # TC-03 imports `src/` (the QG_CHECKS registry parity check). # Covers test-plan TC-01..TC-08 (TC-09/TC-10/TC-11 = existing canon + full regression). # --------------------------------------------------------------------------- # def _fenced_blocks(text: str) -> list[str]: """Return the body of every ``` fenced code block (the *copyable* examples).""" blocks: list[str] = [] inside = False buf: list[str] = [] for line in text.splitlines(): if line.lstrip().startswith("```"): if inside: blocks.append("\n".join(buf)) buf = [] inside = not inside continue if inside: buf.append(line) return blocks @pytest.mark.parametrize("agent", _AGENTS) def test_orch092_created_at_is_placeholder_not_literal(agent): """TC-01 (AC-1): copyable example uses a date placeholder + a substitution note. The field name `created_at` stays; only its value becomes a placeholder. No literal date may survive inside a ``` fenced (copyable) block, else an agent would copy a stale date verbatim. """ text = _read(agent) assert "created_at: " in text, ( f"{agent}.md does not use the created_at: placeholder" ) for block in _fenced_blocks(text): assert re.search(r"created_at:\s*\d", block) is None, ( f"{agent}.md still hardcodes a literal created_at date in a copyable block" ) assert "date +%F" in text, ( f"{agent}.md does not instruct to substitute the actual date (date +%F)" ) @pytest.mark.parametrize("agent", _AGENTS) def test_orch092_model_used_is_placeholder_not_literal(agent): """TC-02 (AC-2): copyable example uses a model placeholder, not the literal model. `model_used: claude-opus-4-8` is allowed as a reference in the field table (outside the fenced block) but must NOT appear in a copyable example. """ text = _read(agent) assert "model_used: " in text, ( f"{agent}.md does not use the model_used: placeholder" ) for block in _fenced_blocks(text): assert "model_used: claude-opus-4-8" not in block, ( f"{agent}.md still hardcodes model_used: claude-opus-4-8 in a copyable block" ) def test_orch092_gate_names_match_qg_registry(): """TC-03 (AC-3): every check_* named in the 6 prompts is a real QG_CHECKS key. The only test in this module that imports `src/` (integration). Guards against a prompt naming a non-existent gate; confirms check_tests_passed is valid. """ from src.qg.checks import QG_CHECKS pattern = re.compile(r"check_[a-z_]+") for agent in _AGENTS: for name in sorted(set(pattern.findall(_read(agent)))): assert name in QG_CHECKS, ( f"{agent}.md references gate {name!r} which is absent from QG_CHECKS" ) assert "check_tests_passed" in QG_CHECKS, "check_tests_passed must remain a real gate" def test_orch092_developer_pr_oversize_is_escalation_not_split(): """TC-04 (AC-4): the 'split into smaller PRs' instruction became an escalation.""" text = _read("developer") assert "разбивай на меньшие PR" not in text, ( "developer.md still carries the unrealisable 'split into smaller PRs' instruction" ) assert "на уровне задач" in text and "декомпозиц" in text, ( "developer.md does not reframe an oversize PR as task-level decomposition" ) assert "свой PR" in text, "developer.md lost the 'свой PR' marker" @pytest.mark.parametrize("agent", ("developer", "reviewer", "tester")) def test_orch092_escalation_section_present_after_success(agent): """TC-05 (AC-5): dev/reviewer/tester carry after .""" text = _read(agent) # The real section tags sit on their own line (an inline `` mention # in uses backticks and must not be mistaken for the section). open_m = re.search(r"(?m)^\s*$", text) close_m = re.search(r"(?m)^\s*$", text) assert open_m and close_m, f"{agent}.md is missing the section" success_m = re.search(r"(?m)^\s*$", text) assert success_m and open_m.start() > success_m.start(), ( f"{agent}.md places before (breaks section order)" ) def test_orch092_escalation_routes_are_role_specific(): """TC-05 (AC-5): escalation routes match each role.""" assert "back-to:analysis" in _read("developer"), "developer lacks back-to:analysis route" assert "back-to:dev" in _read("tester"), "tester lacks back-to:dev route" assert "REQUEST_CHANGES" in _read("reviewer"), "reviewer lacks REQUEST_CHANGES route" def test_orch092_tester_enriched(): """TC-06 (AC-7): tester gains worktree path, serial_gate smoke and TRZ coverage.""" text = _read("tester") assert "worktree" in text, "tester.md does not mention the task-branch worktree path" assert "serial_gate" in text, "tester.md /queue smoke omits the serial_gate block check" assert "04-test-plan.yaml" in text, "tester.md does not require coverage of every TRZ TC" for marker in _ANTI_REGRESS["tester"]: assert marker in text, f"tester.md lost anti-regress marker {marker!r}" def test_orch092_deployer_prominent_ban_frame(): """TC-07 (AC-6): deployer carries a prominent prod-8500 ban frame inside .""" text = _read("deployer") context = text[text.index(""):text.index("")] assert "8500" in context, "deployer.md frame does not name the prod 8500" assert "NEVER restart the prod" in context, ( "deployer.md does not raise the 'NEVER restart prod 8500' ban into the context frame" ) for marker in _ANTI_REGRESS["deployer"]: assert marker in text, f"deployer.md lost anti-regress marker {marker!r}" def test_orch092_reviewer_dead_line_removed(): """TC-08 (AC-8): the dead 'same Developer instance' line is gone; live markers stay.""" text = _read("reviewer") assert "того же экземпляра" not in text, ( "reviewer.md still carries the dead 'same Developer instance' instruction" ) for marker in ( "REQUEST_CHANGES", "НЕ обновлена", "TRACEABILITY.md", "Известные ограничения", "ORCH-079", ): assert marker in text, f"reviewer.md lost live invariant marker {marker!r}"