orchestrator/tests/test_agent_prompts_canon.py

"""ORCH-077 (ORCH-52d): structural canon of the 6 system prompts.

The 6 agent prompts (`.openclaw/agents/*.md`) are rewritten in the Anthropic XML
canon and taught to emit the mandatory 52c frontmatter schema. These tests are
pure-text structural checks (NO agent runs, NO `src/` import): they guard the
canon and the anti-regression inventory (TRZ §FR-6 / AC-4) so a future prompt
refactor cannot silently drop a working instruction or a machine-verdict key.

Covers test-plan TC-01..TC-07. TC-08 lives in
`tests/test_agent_frontmatter_no_model.py` (re-used, ORCH-074). The full
regression (TC-10) is the rest of `tests/`.
"""
import os

import pytest

_AGENTS = ("analyst", "architect", "developer", "reviewer", "tester", "deployer")

# tests/ is one level under the repo root; .openclaw/agents lives at the root.
_AGENTS_DIR = os.path.join(
    os.path.dirname(os.path.dirname(os.path.abspath(__file__))),
    ".openclaw", "agents",
)

# The 5 mandatory XML sections, in normative order (D1 / AC-1).
_REQUIRED_SECTIONS = ("context", "task", "deliverables", "constraints", "output_format")

# The 6 mandatory 52c schema fields (src/frontmatter.py::REQUIRED_FIELDS).
_SCHEMA_FIELDS = (
    "work_item",
    "stage",
    "author_agent",
    "status",
    "created_at",
    "model_used",
)

# Role -> the stage value(s) the prompt's schema must pin (TRZ §FR-2).
_STAGE_BY_ROLE = {
    "analyst": ("analysis",),
    "architect": ("architecture",),
    "developer": ("development",),
    "reviewer": ("review",),
    "tester": ("testing",),
    "deployer": ("deploy-staging", "deploy"),
}

# Anti-regression markers per role that MUST survive the rewrite (TRZ §FR-6).
_ANTI_REGRESS = {
    "analyst": [
        "01-brd.md",
        "02-trz.md",
        "03-acceptance-criteria.md",
        "04-test-plan.yaml",
        "Write tool",
    ],
    "architect": [
        "## Статус",
        "## Решение",
        "## Последствия",
        "docs/architecture/adr/",  # global cross-cutting ADR rule
        "back-to:analysis",        # escalation
        "arch:major-change",       # escalation
    ],
    "developer": [
        "TDD",
        "--no-verify",
        "--force-push",
        "свой PR",                 # "не мержи свой PR"
        "Refs:",                   # conventional commit footer
    ],
    "reviewer": [
        "REQUEST_CHANGES",
        "НЕ обновлена",            # "src/ changed, docs not updated -> REQUEST_CHANGES"
    ],
    "tester": [
        "pytest",
        "/health",
        "/status",
        "/queue",
    ],
    "deployer": [
        "docker exec orchestrator-staging",
        "pr_already_merged",
        "8500",                    # "never restart 8500 from inside"
        "INFRA-WAIVED",            # ORCH-061 waiver
    ],
}


def _read(agent: str) -> str:
    path = os.path.join(_AGENTS_DIR, f"{agent}.md")
    with open(path, encoding="utf-8") as f:
        return f.read()


@pytest.mark.parametrize("agent", _AGENTS)
def test_five_xml_sections_present(agent):
    """TC-01: each prompt carries all 5 XML sections (open + close tag)."""
    text = _read(agent)
    for section in _REQUIRED_SECTIONS:
        assert f"<{section}>" in text, f"{agent}.md missing <{section}> open tag"
        assert f"</{section}>" in text, f"{agent}.md missing </{section}> close tag"


@pytest.mark.parametrize("agent", _AGENTS)
def test_six_schema_field_names_present(agent):
    """TC-02: each prompt names all 6 mandatory 52c schema fields."""
    text = _read(agent)
    for field in _SCHEMA_FIELDS:
        assert field in text, f"{agent}.md does not mention schema field {field!r}"


@pytest.mark.parametrize("agent", _AGENTS)
def test_schema_pins_role_specific_author_and_stage(agent):
    """TC-03: author_agent == role and the role's stage(s) are pinned in the schema."""
    text = _read(agent)
    assert f"author_agent: {agent}" in text, (
        f"{agent}.md does not pin 'author_agent: {agent}' in an example schema"
    )
    for stage in _STAGE_BY_ROLE[agent]:
        assert f"stage: {stage}" in text, (
            f"{agent}.md does not pin 'stage: {stage}' in an example schema"
        )


@pytest.mark.parametrize("agent", _AGENTS)
def test_references_templates_and_a_reference_work_item(agent):
    """TC-04: each prompt links docs/_templates/ and at least one reference work item."""
    text = _read(agent)
    assert "docs/_templates/" in text, f"{agent}.md does not reference docs/_templates/"
    assert ("ORCH-073" in text) or ("ORCH-088" in text), (
        f"{agent}.md does not reference a gold-standard work item (ORCH-073/ORCH-088)"
    )


def test_machine_verdict_keys_preserved_exact_case():
    """TC-05: machine-verdict keys + value sets survive with exact case."""
    reviewer = _read("reviewer")
    assert "verdict:" in reviewer
    assert "APPROVED" in reviewer and "REQUEST_CHANGES" in reviewer

    tester = _read("tester")
    assert "result:" in tester
    assert "PASS" in tester and "FAIL" in tester

    deployer = _read("deployer")
    assert "staging_status:" in deployer
    assert "deploy_status:" in deployer
    assert "SUCCESS" in deployer and "FAILED" in deployer


def test_deployer_self_hosting_anti_regress():
    """TC-06: deployer keeps canonical staging cmd, merge-guard, 8500 ban, waiver."""
    deployer = _read("deployer")
    for marker in _ANTI_REGRESS["deployer"]:
        assert marker in deployer, f"deployer.md lost anti-regress marker {marker!r}"


@pytest.mark.parametrize("agent", _AGENTS)
def test_role_anti_regress_markers(agent):
    """TC-07: per-role anti-regression markers (TRZ §FR-6) survive the rewrite."""
    text = _read(agent)
    for marker in _ANTI_REGRESS[agent]:
        assert marker in text, f"{agent}.md lost anti-regress marker {marker!r}"