Files
orchestrator/tests/test_agent_prompts_canon.py
claude-bot 6d798c01ef docs(overview): витрина системы docs/overview/ — бизнес+тех, 3 аудитории, презентация (ORCH-011)
Единая точка входа в документацию платформы (ADR-001 D1–D9):
- docs/overview/ — 10 файлов: индекс (маршруты «Я заказчик / Я менеджер /
  Я разработчик» + норматив «изменил функциональность → обнови витрину в том же
  PR»), business.md (без жаргона, 6 сценариев), 7 тех-блоков (link-first),
  presentation.md (16 слайдов + процедура сборки «команда + Проверка:»).
- scripts/build_presentation.py — генератор .pptx в тёмном дизайне (python-pptx;
  чистый stdlib-парсер parse_slides + ленивый import pptx; бинарь не коммитится,
  build/ в .gitignore; зависимость НЕ в прод-образе — машинный гард TC-09).
- tests/test_system_docs.py — структурный анти-дрейф: derive-сверки стадий/
  гейтов/агентов импортом STAGE_TRANSITIONS/QG_CHECKS/glob промптов/config,
  валидность ссылок, FORBIDDEN-скан + секрет-эвристика, слайды каноническим
  парсером, NFR-2, указатели.
- reviewer.md — ось обзорных доков ORCH-079 расширена на витрину (D7; канон 52d
  байт-в-байт, только текст внутри секций) + анти-регресс ассерт в
  test_agent_prompts_canon.py.
- Указатели: README.md, CLAUDE.md (правила №2/№6, «Структура»),
  PRODUCT_VISION.md (врезка-ссылка), CHANGELOG.md.

Рантайм байт-в-байт: src/**, docker-compose.yml, Dockerfile, requirements* —
ноль изменений (docs+tests+dev-скрипт, паттерн ORCH-102/103). pytest: 1873 passed.

Refs: ORCH-011

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-11 09:36:40 +03:00

451 lines
19 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""ORCH-077 (ORCH-52d): structural canon of the 6 system prompts.
The 6 agent prompts (`.openclaw/agents/*.md`) are rewritten in the Anthropic XML
canon and taught to emit the mandatory 52c frontmatter schema. These tests are
pure-text structural checks (NO agent runs, NO `src/` import): they guard the
canon and the anti-regression inventory (TRZ §FR-6 / AC-4) so a future prompt
refactor cannot silently drop a working instruction or a machine-verdict key.
Covers test-plan TC-01..TC-07. TC-08 lives in
`tests/test_agent_frontmatter_no_model.py` (re-used, ORCH-074). The full
regression (TC-10) is the rest of `tests/`.
"""
import os
import re
import pytest
_AGENTS = ("analyst", "architect", "developer", "reviewer", "tester", "deployer")
# tests/ is one level under the repo root; .openclaw/agents lives at the root.
_REPO_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
_AGENTS_DIR = os.path.join(_REPO_ROOT, ".openclaw", "agents")
# ORCH-078 (ORCH-52e): the traceability-marker standard (layer 4 of epic ORCH-52).
_TRACEABILITY = os.path.join(_REPO_ROOT, "docs", "_standards", "TRACEABILITY.md")
# The 5 mandatory XML sections, in normative order (D1 / AC-1).
_REQUIRED_SECTIONS = ("context", "task", "deliverables", "constraints", "output_format")
# The 6 mandatory 52c schema fields (src/frontmatter.py::REQUIRED_FIELDS).
_SCHEMA_FIELDS = (
"work_item",
"stage",
"author_agent",
"status",
"created_at",
"model_used",
)
# Role -> the stage value(s) the prompt's schema must pin (TRZ §FR-2).
_STAGE_BY_ROLE = {
"analyst": ("analysis",),
"architect": ("architecture",),
"developer": ("development",),
"reviewer": ("review",),
"tester": ("testing",),
"deployer": ("deploy-staging", "deploy"),
}
# Anti-regression markers per role that MUST survive the rewrite (TRZ §FR-6).
_ANTI_REGRESS = {
"analyst": [
"01-brd.md",
"02-trz.md",
"03-acceptance-criteria.md",
"04-test-plan.yaml",
"Write tool",
],
"architect": [
"## Статус",
"## Решение",
"## Последствия",
"docs/architecture/adr/", # global cross-cutting ADR rule
"back-to:analysis", # escalation
"arch:major-change", # escalation
],
"developer": [
"TDD",
"--no-verify",
"--force-push",
"свой PR", # "не мержи свой PR"
"Refs:", # conventional commit footer
],
"reviewer": [
"REQUEST_CHANGES",
"НЕ обновлена", # "src/ changed, docs not updated -> REQUEST_CHANGES"
],
"tester": [
"pytest",
"/health",
"/status",
"/queue",
],
"deployer": [
"docker exec orchestrator-staging",
"pr_already_merged",
"8500", # "never restart 8500 from inside"
"INFRA-WAIVED", # ORCH-061 waiver
],
}
def _read(agent: str) -> str:
path = os.path.join(_AGENTS_DIR, f"{agent}.md")
with open(path, encoding="utf-8") as f:
return f.read()
def _read_repo(*parts: str) -> str:
with open(os.path.join(_REPO_ROOT, *parts), encoding="utf-8") as f:
return f.read()
@pytest.mark.parametrize("agent", _AGENTS)
def test_five_xml_sections_present(agent):
"""TC-01: each prompt carries all 5 XML sections (open + close tag)."""
text = _read(agent)
for section in _REQUIRED_SECTIONS:
assert f"<{section}>" in text, f"{agent}.md missing <{section}> open tag"
assert f"</{section}>" in text, f"{agent}.md missing </{section}> close tag"
@pytest.mark.parametrize("agent", _AGENTS)
def test_six_schema_field_names_present(agent):
"""TC-02: each prompt names all 6 mandatory 52c schema fields."""
text = _read(agent)
for field in _SCHEMA_FIELDS:
assert field in text, f"{agent}.md does not mention schema field {field!r}"
@pytest.mark.parametrize("agent", _AGENTS)
def test_schema_pins_role_specific_author_and_stage(agent):
"""TC-03: author_agent == role and the role's stage(s) are pinned in the schema."""
text = _read(agent)
assert f"author_agent: {agent}" in text, (
f"{agent}.md does not pin 'author_agent: {agent}' in an example schema"
)
for stage in _STAGE_BY_ROLE[agent]:
assert f"stage: {stage}" in text, (
f"{agent}.md does not pin 'stage: {stage}' in an example schema"
)
@pytest.mark.parametrize("agent", _AGENTS)
def test_references_templates_and_a_reference_work_item(agent):
"""TC-04: each prompt links docs/_templates/ and at least one reference work item."""
text = _read(agent)
assert "docs/_templates/" in text, f"{agent}.md does not reference docs/_templates/"
assert ("ORCH-073" in text) or ("ORCH-088" in text), (
f"{agent}.md does not reference a gold-standard work item (ORCH-073/ORCH-088)"
)
def test_machine_verdict_keys_preserved_exact_case():
"""TC-05: machine-verdict keys + value sets survive with exact case."""
reviewer = _read("reviewer")
assert "verdict:" in reviewer
assert "APPROVED" in reviewer and "REQUEST_CHANGES" in reviewer
tester = _read("tester")
assert "result:" in tester
assert "PASS" in tester and "FAIL" in tester
deployer = _read("deployer")
assert "staging_status:" in deployer
assert "deploy_status:" in deployer
assert "SUCCESS" in deployer and "FAILED" in deployer
def test_deployer_self_hosting_anti_regress():
"""TC-06: deployer keeps canonical staging cmd, merge-guard, 8500 ban, waiver."""
deployer = _read("deployer")
for marker in _ANTI_REGRESS["deployer"]:
assert marker in deployer, f"deployer.md lost anti-regress marker {marker!r}"
@pytest.mark.parametrize("agent", _AGENTS)
def test_role_anti_regress_markers(agent):
"""TC-07: per-role anti-regression markers (TRZ §FR-6) survive the rewrite."""
text = _read(agent)
for marker in _ANTI_REGRESS[agent]:
assert marker in text, f"{agent}.md lost anti-regress marker {marker!r}"
# --------------------------------------------------------------------------- #
# ORCH-078 (ORCH-52e): traceability-marker standard + reading-rule anti-regress
# (TRZ §FR-1..FR-8; AC-1..AC-5, AC-8). Pure-text checks, NO `src/` import.
# --------------------------------------------------------------------------- #
def test_traceability_standard_exists_and_nonempty():
"""TC-01 (AC-1): docs/_standards/TRACEABILITY.md exists and is non-empty."""
assert os.path.isfile(_TRACEABILITY), "docs/_standards/TRACEABILITY.md is missing"
assert _read_repo("docs", "_standards", "TRACEABILITY.md").strip(), (
"TRACEABILITY.md is empty"
)
def test_traceability_describes_marker_format_and_placement():
"""TC-02 (AC-1): standard describes the ORCH-NNN marker and where it is placed."""
text = _read_repo("docs", "_standards", "TRACEABILITY.md")
assert "ORCH-NNN" in text, "TRACEABILITY.md does not describe the ORCH-NNN marker"
# placement rule: next to a non-trivial invariant (not on trivial code).
assert "инвариант" in text, "TRACEABILITY.md does not state the placement rule"
def test_traceability_has_real_verifiable_example():
"""TC-03 (AC-1): the worked example points at files that really exist in main.
A traceability standard whose example references a missing file/ADR would
refute itself, so the example must be checkable against the repo tree.
"""
text = _read_repo("docs", "_standards", "TRACEABILITY.md")
assert "src/serial_gate.py" in text and "ORCH-088" in text, (
"TRACEABILITY.md lacks the serial_gate/ORCH-088 worked example"
)
assert os.path.isfile(os.path.join(_REPO_ROOT, "src", "serial_gate.py")), (
"example references src/serial_gate.py which does not exist"
)
assert os.path.isfile(os.path.join(
_REPO_ROOT, "docs", "work-items", "ORCH-088", "06-adr",
"ADR-001-serial-gate.md",
)), "example references an ORCH-088 ADR that does not exist"
def test_traceability_documents_fallback_access():
"""TC-04 (AC-4): standard documents the git show origin/main fallback."""
text = _read_repo("docs", "_standards", "TRACEABILITY.md")
assert "git show origin/main:docs/work-items/" in text, (
"TRACEABILITY.md does not document the cross-branch ADR fallback"
)
def test_traceability_documents_anti_archeology():
"""TC-05 (AC-5): standard documents the 3+ markers -> cross-cutting ADR rule."""
text = _read_repo("docs", "_standards", "TRACEABILITY.md")
assert "docs/architecture/adr/" in text, (
"TRACEABILITY.md anti-archeology rule does not point at the cross-cutting ADR dir"
)
assert "3+" in text, "TRACEABILITY.md does not state the 3+ markers threshold"
def test_developer_carries_reading_rule_and_fallback():
"""TC-06 (AC-2, AC-4): developer.md carries the reading rule + standard + fallback."""
text = _read("developer")
assert "TRACEABILITY.md" in text, "developer.md does not reference TRACEABILITY.md"
assert "git show origin/main:docs/work-items/" in text, (
"developer.md does not carry the cross-branch ADR fallback"
)
def test_architect_carries_reading_rule_and_anti_archeology():
"""TC-07 (AC-2, AC-5): architect.md carries reading rule + anti-archeology."""
text = _read("architect")
assert "TRACEABILITY.md" in text, "architect.md does not reference TRACEABILITY.md"
assert "3+" in text, "architect.md does not carry the 3+ markers anti-archeology rule"
def test_reviewer_carries_traceability_control_axis():
"""TC-08 (AC-3): reviewer.md carries the traceability-compliance control axis."""
text = _read("reviewer")
assert "TRACEABILITY.md" in text, "reviewer.md does not reference TRACEABILITY.md"
def test_claude_md_and_readme_reference_traceability_standard():
"""TC-12 (AC-8): CLAUDE.md and architecture README reference the standard."""
assert "TRACEABILITY.md" in _read_repo("CLAUDE.md"), (
"CLAUDE.md does not reference docs/_standards/TRACEABILITY.md"
)
assert "TRACEABILITY.md" in _read_repo("docs", "architecture", "README.md"), (
"architecture README does not reference docs/_standards/TRACEABILITY.md"
)
# --------------------------------------------------------------------------- #
# ORCH-079 (ORCH-52f): reviewer overview-docs axis (layer 5 of epic ORCH-52).
# Pure-text anti-drift check (TRZ §FR-6 / AC-5), NO `src/` import.
# --------------------------------------------------------------------------- #
def test_reviewer_carries_overview_docs_axis():
"""ORCH-079 TC-01 (AC-5): reviewer.md covers the README overview-docs axis.
The reviewer must require README ("Известные ограничения") to be updated when
a PR closes a documented limitation. This guards the rule against silent drift
in a future prompt refactor, exactly like the traceability control axis.
"""
text = _read("reviewer")
assert "Известные ограничения" in text, (
"reviewer.md does not mention the README 'Известные ограничения' overview-docs axis"
)
assert "ORCH-079" in text, (
"reviewer.md does not anchor the overview-docs axis to ORCH-079"
)
def test_reviewer_overview_axis_covers_system_showcase():
"""ORCH-011 (ADR-001 D7): the ORCH-079 overview-docs axis explicitly extends
to the system showcase `docs/overview/` — a PR changing functionality described
in the showcase without updating it must yield a finding >= P1. Guarded here
because the axis history (ORCH-079) shows overview docs rot unless named
explicitly in the prompt."""
text = _read("reviewer")
assert "docs/overview/" in text, (
"reviewer.md does not extend the overview-docs axis to the docs/overview/ showcase"
)
assert "ORCH-011" in text, (
"reviewer.md does not anchor the showcase extension to ORCH-011"
)
# --------------------------------------------------------------------------- #
# ORCH-092 (epilogue of epic ORCH-52): prompt audit of the 6 agents —
# de-hardcode date/model, gate-name parity, escalation sections, dead-line
# removal, tester enrichment, deployer ban-frame. Pure-text checks; only
# TC-03 imports `src/` (the QG_CHECKS registry parity check).
# Covers test-plan TC-01..TC-08 (TC-09/TC-10/TC-11 = existing canon + full regression).
# --------------------------------------------------------------------------- #
def _fenced_blocks(text: str) -> list[str]:
"""Return the body of every ``` fenced code block (the *copyable* examples)."""
blocks: list[str] = []
inside = False
buf: list[str] = []
for line in text.splitlines():
if line.lstrip().startswith("```"):
if inside:
blocks.append("\n".join(buf))
buf = []
inside = not inside
continue
if inside:
buf.append(line)
return blocks
@pytest.mark.parametrize("agent", _AGENTS)
def test_orch092_created_at_is_placeholder_not_literal(agent):
"""TC-01 (AC-1): copyable example uses a date placeholder + a substitution note.
The field name `created_at` stays; only its value becomes a placeholder. No
literal date may survive inside a ``` fenced (copyable) block, else an agent
would copy a stale date verbatim.
"""
text = _read(agent)
assert "created_at: <YYYY-MM-DD>" in text, (
f"{agent}.md does not use the created_at: <YYYY-MM-DD> placeholder"
)
for block in _fenced_blocks(text):
assert re.search(r"created_at:\s*\d", block) is None, (
f"{agent}.md still hardcodes a literal created_at date in a copyable block"
)
assert "date +%F" in text, (
f"{agent}.md does not instruct to substitute the actual date (date +%F)"
)
@pytest.mark.parametrize("agent", _AGENTS)
def test_orch092_model_used_is_placeholder_not_literal(agent):
"""TC-02 (AC-2): copyable example uses a model placeholder, not the literal model.
`model_used: claude-opus-4-8` is allowed as a reference in the field table
(outside the fenced block) but must NOT appear in a copyable example.
"""
text = _read(agent)
assert "model_used: <resolve ORCH-41>" in text, (
f"{agent}.md does not use the model_used: <resolve ORCH-41> placeholder"
)
for block in _fenced_blocks(text):
assert "model_used: claude-opus-4-8" not in block, (
f"{agent}.md still hardcodes model_used: claude-opus-4-8 in a copyable block"
)
def test_orch092_gate_names_match_qg_registry():
"""TC-03 (AC-3): every check_* named in the 6 prompts is a real QG_CHECKS key.
The only test in this module that imports `src/` (integration). Guards against
a prompt naming a non-existent gate; confirms check_tests_passed is valid.
"""
from src.qg.checks import QG_CHECKS
pattern = re.compile(r"check_[a-z_]+")
for agent in _AGENTS:
for name in sorted(set(pattern.findall(_read(agent)))):
assert name in QG_CHECKS, (
f"{agent}.md references gate {name!r} which is absent from QG_CHECKS"
)
assert "check_tests_passed" in QG_CHECKS, "check_tests_passed must remain a real gate"
def test_orch092_developer_pr_oversize_is_escalation_not_split():
"""TC-04 (AC-4): the 'split into smaller PRs' instruction became an escalation."""
text = _read("developer")
assert "разбивай на меньшие PR" not in text, (
"developer.md still carries the unrealisable 'split into smaller PRs' instruction"
)
assert "на уровне задач" in text and "декомпозиц" in text, (
"developer.md does not reframe an oversize PR as task-level decomposition"
)
assert "свой PR" in text, "developer.md lost the 'свой PR' marker"
@pytest.mark.parametrize("agent", ("developer", "reviewer", "tester"))
def test_orch092_escalation_section_present_after_success(agent):
"""TC-05 (AC-5): dev/reviewer/tester carry <escalation> after </success_criteria>."""
text = _read(agent)
# The real section tags sit on their own line (an inline `<escalation>` mention
# in <constraints> uses backticks and must not be mistaken for the section).
open_m = re.search(r"(?m)^<escalation>\s*$", text)
close_m = re.search(r"(?m)^</escalation>\s*$", text)
assert open_m and close_m, f"{agent}.md is missing the <escalation> section"
success_m = re.search(r"(?m)^</success_criteria>\s*$", text)
assert success_m and open_m.start() > success_m.start(), (
f"{agent}.md places <escalation> before </success_criteria> (breaks section order)"
)
def test_orch092_escalation_routes_are_role_specific():
"""TC-05 (AC-5): escalation routes match each role."""
assert "back-to:analysis" in _read("developer"), "developer lacks back-to:analysis route"
assert "back-to:dev" in _read("tester"), "tester lacks back-to:dev route"
assert "REQUEST_CHANGES" in _read("reviewer"), "reviewer lacks REQUEST_CHANGES route"
def test_orch092_tester_enriched():
"""TC-06 (AC-7): tester gains worktree path, serial_gate smoke and TRZ coverage."""
text = _read("tester")
assert "worktree" in text, "tester.md does not mention the task-branch worktree path"
assert "serial_gate" in text, "tester.md /queue smoke omits the serial_gate block check"
assert "04-test-plan.yaml" in text, "tester.md does not require coverage of every TRZ TC"
for marker in _ANTI_REGRESS["tester"]:
assert marker in text, f"tester.md lost anti-regress marker {marker!r}"
def test_orch092_deployer_prominent_ban_frame():
"""TC-07 (AC-6): deployer carries a prominent prod-8500 ban frame inside <context>."""
text = _read("deployer")
context = text[text.index("<context>"):text.index("</context>")]
assert "8500" in context, "deployer.md <context> frame does not name the prod 8500"
assert "NEVER restart the prod" in context, (
"deployer.md does not raise the 'NEVER restart prod 8500' ban into the context frame"
)
for marker in _ANTI_REGRESS["deployer"]:
assert marker in text, f"deployer.md lost anti-regress marker {marker!r}"
def test_orch092_reviewer_dead_line_removed():
"""TC-08 (AC-8): the dead 'same Developer instance' line is gone; live markers stay."""
text = _read("reviewer")
assert "того же экземпляра" not in text, (
"reviewer.md still carries the dead 'same Developer instance' instruction"
)
for marker in (
"REQUEST_CHANGES",
"НЕ обновлена",
"TRACEABILITY.md",
"Известные ограничения",
"ORCH-079",
):
assert marker in text, f"reviewer.md lost live invariant marker {marker!r}"