Замыкающий слой эпика ORCH-52. Тело всех 6 промптов .openclaw/agents/*.md переписано в едином каноне Anthropic (5 обязательных XML-секций <context>/ <task>/<deliverables>/<constraints>/<output_format>, запреты «❌ X → ✅ Y», <thinking> у решающих ролей), и каждый промпт добровольно эмитит 6-польную frontmatter-схему 52c (work_item/stage/author_agent/status/created_at/ model_used) аддитивно — рядом с machine-verdict ключом, не меняя его имя/ регистр/значения (verdict:/result:/staging_status:/deploy_status:/ security_status:). Docs/prompts-only: src/**, STAGE_TRANSITIONS, QG_CHECKS, схема БД не тронуты; frontmatter_validation_strict остаётся False (enforcement не включён). Функциональное содержание старых промптов перенесено 1:1 (инвентарь TRZ §FR-6). - tests/test_agent_prompts_canon.py: структурный анти-регресс (TC-01…TC-07) - tests/manual/ab_prompt_compare.md: метод A/B (TC-09 / AC-6) - CLAUDE.md, CHANGELOG.md обновлены; README/ADR — архитектором Полный регресс pytest tests/ -q зелёный (1244); test_agent_frontmatter_no_model остаётся зелёным. Refs: ORCH-077 Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
166 lines
5.7 KiB
Python
166 lines
5.7 KiB
Python
"""ORCH-077 (ORCH-52d): structural canon of the 6 system prompts.
|
||
|
||
The 6 agent prompts (`.openclaw/agents/*.md`) are rewritten in the Anthropic XML
|
||
canon and taught to emit the mandatory 52c frontmatter schema. These tests are
|
||
pure-text structural checks (NO agent runs, NO `src/` import): they guard the
|
||
canon and the anti-regression inventory (TRZ §FR-6 / AC-4) so a future prompt
|
||
refactor cannot silently drop a working instruction or a machine-verdict key.
|
||
|
||
Covers test-plan TC-01..TC-07. TC-08 lives in
|
||
`tests/test_agent_frontmatter_no_model.py` (re-used, ORCH-074). The full
|
||
regression (TC-10) is the rest of `tests/`.
|
||
"""
|
||
import os
|
||
|
||
import pytest
|
||
|
||
_AGENTS = ("analyst", "architect", "developer", "reviewer", "tester", "deployer")
|
||
|
||
# tests/ is one level under the repo root; .openclaw/agents lives at the root.
|
||
_AGENTS_DIR = os.path.join(
|
||
os.path.dirname(os.path.dirname(os.path.abspath(__file__))),
|
||
".openclaw", "agents",
|
||
)
|
||
|
||
# The 5 mandatory XML sections, in normative order (D1 / AC-1).
|
||
_REQUIRED_SECTIONS = ("context", "task", "deliverables", "constraints", "output_format")
|
||
|
||
# The 6 mandatory 52c schema fields (src/frontmatter.py::REQUIRED_FIELDS).
|
||
_SCHEMA_FIELDS = (
|
||
"work_item",
|
||
"stage",
|
||
"author_agent",
|
||
"status",
|
||
"created_at",
|
||
"model_used",
|
||
)
|
||
|
||
# Role -> the stage value(s) the prompt's schema must pin (TRZ §FR-2).
|
||
_STAGE_BY_ROLE = {
|
||
"analyst": ("analysis",),
|
||
"architect": ("architecture",),
|
||
"developer": ("development",),
|
||
"reviewer": ("review",),
|
||
"tester": ("testing",),
|
||
"deployer": ("deploy-staging", "deploy"),
|
||
}
|
||
|
||
# Anti-regression markers per role that MUST survive the rewrite (TRZ §FR-6).
|
||
_ANTI_REGRESS = {
|
||
"analyst": [
|
||
"01-brd.md",
|
||
"02-trz.md",
|
||
"03-acceptance-criteria.md",
|
||
"04-test-plan.yaml",
|
||
"Write tool",
|
||
],
|
||
"architect": [
|
||
"## Статус",
|
||
"## Решение",
|
||
"## Последствия",
|
||
"docs/architecture/adr/", # global cross-cutting ADR rule
|
||
"back-to:analysis", # escalation
|
||
"arch:major-change", # escalation
|
||
],
|
||
"developer": [
|
||
"TDD",
|
||
"--no-verify",
|
||
"--force-push",
|
||
"свой PR", # "не мержи свой PR"
|
||
"Refs:", # conventional commit footer
|
||
],
|
||
"reviewer": [
|
||
"REQUEST_CHANGES",
|
||
"НЕ обновлена", # "src/ changed, docs not updated -> REQUEST_CHANGES"
|
||
],
|
||
"tester": [
|
||
"pytest",
|
||
"/health",
|
||
"/status",
|
||
"/queue",
|
||
],
|
||
"deployer": [
|
||
"docker exec orchestrator-staging",
|
||
"pr_already_merged",
|
||
"8500", # "never restart 8500 from inside"
|
||
"INFRA-WAIVED", # ORCH-061 waiver
|
||
],
|
||
}
|
||
|
||
|
||
def _read(agent: str) -> str:
|
||
path = os.path.join(_AGENTS_DIR, f"{agent}.md")
|
||
with open(path, encoding="utf-8") as f:
|
||
return f.read()
|
||
|
||
|
||
@pytest.mark.parametrize("agent", _AGENTS)
|
||
def test_five_xml_sections_present(agent):
|
||
"""TC-01: each prompt carries all 5 XML sections (open + close tag)."""
|
||
text = _read(agent)
|
||
for section in _REQUIRED_SECTIONS:
|
||
assert f"<{section}>" in text, f"{agent}.md missing <{section}> open tag"
|
||
assert f"</{section}>" in text, f"{agent}.md missing </{section}> close tag"
|
||
|
||
|
||
@pytest.mark.parametrize("agent", _AGENTS)
|
||
def test_six_schema_field_names_present(agent):
|
||
"""TC-02: each prompt names all 6 mandatory 52c schema fields."""
|
||
text = _read(agent)
|
||
for field in _SCHEMA_FIELDS:
|
||
assert field in text, f"{agent}.md does not mention schema field {field!r}"
|
||
|
||
|
||
@pytest.mark.parametrize("agent", _AGENTS)
|
||
def test_schema_pins_role_specific_author_and_stage(agent):
|
||
"""TC-03: author_agent == role and the role's stage(s) are pinned in the schema."""
|
||
text = _read(agent)
|
||
assert f"author_agent: {agent}" in text, (
|
||
f"{agent}.md does not pin 'author_agent: {agent}' in an example schema"
|
||
)
|
||
for stage in _STAGE_BY_ROLE[agent]:
|
||
assert f"stage: {stage}" in text, (
|
||
f"{agent}.md does not pin 'stage: {stage}' in an example schema"
|
||
)
|
||
|
||
|
||
@pytest.mark.parametrize("agent", _AGENTS)
|
||
def test_references_templates_and_a_reference_work_item(agent):
|
||
"""TC-04: each prompt links docs/_templates/ and at least one reference work item."""
|
||
text = _read(agent)
|
||
assert "docs/_templates/" in text, f"{agent}.md does not reference docs/_templates/"
|
||
assert ("ORCH-073" in text) or ("ORCH-088" in text), (
|
||
f"{agent}.md does not reference a gold-standard work item (ORCH-073/ORCH-088)"
|
||
)
|
||
|
||
|
||
def test_machine_verdict_keys_preserved_exact_case():
|
||
"""TC-05: machine-verdict keys + value sets survive with exact case."""
|
||
reviewer = _read("reviewer")
|
||
assert "verdict:" in reviewer
|
||
assert "APPROVED" in reviewer and "REQUEST_CHANGES" in reviewer
|
||
|
||
tester = _read("tester")
|
||
assert "result:" in tester
|
||
assert "PASS" in tester and "FAIL" in tester
|
||
|
||
deployer = _read("deployer")
|
||
assert "staging_status:" in deployer
|
||
assert "deploy_status:" in deployer
|
||
assert "SUCCESS" in deployer and "FAILED" in deployer
|
||
|
||
|
||
def test_deployer_self_hosting_anti_regress():
|
||
"""TC-06: deployer keeps canonical staging cmd, merge-guard, 8500 ban, waiver."""
|
||
deployer = _read("deployer")
|
||
for marker in _ANTI_REGRESS["deployer"]:
|
||
assert marker in deployer, f"deployer.md lost anti-regress marker {marker!r}"
|
||
|
||
|
||
@pytest.mark.parametrize("agent", _AGENTS)
|
||
def test_role_anti_regress_markers(agent):
|
||
"""TC-07: per-role anti-regression markers (TRZ §FR-6) survive the rewrite."""
|
||
text = _read(agent)
|
||
for marker in _ANTI_REGRESS[agent]:
|
||
assert marker in text, f"{agent}.md lost anti-regress marker {marker!r}"
|