orchestrator/tests/test_agent_prompts_canon.py

"""ORCH-077 (ORCH-52d): structural canon of the 6 system prompts.

The 6 agent prompts (`.openclaw/agents/*.md`) are rewritten in the Anthropic XML
canon and taught to emit the mandatory 52c frontmatter schema. These tests are
pure-text structural checks (NO agent runs, NO `src/` import): they guard the
canon and the anti-regression inventory (TRZ §FR-6 / AC-4) so a future prompt
refactor cannot silently drop a working instruction or a machine-verdict key.

Covers test-plan TC-01..TC-07. TC-08 lives in
`tests/test_agent_frontmatter_no_model.py` (re-used, ORCH-074). The full
regression (TC-10) is the rest of `tests/`.
"""
import os

import pytest

_AGENTS = ("analyst", "architect", "developer", "reviewer", "tester", "deployer")

# tests/ is one level under the repo root; .openclaw/agents lives at the root.
_REPO_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
_AGENTS_DIR = os.path.join(_REPO_ROOT, ".openclaw", "agents")

# ORCH-078 (ORCH-52e): the traceability-marker standard (layer 4 of epic ORCH-52).
_TRACEABILITY = os.path.join(_REPO_ROOT, "docs", "_standards", "TRACEABILITY.md")

# The 5 mandatory XML sections, in normative order (D1 / AC-1).
_REQUIRED_SECTIONS = ("context", "task", "deliverables", "constraints", "output_format")

# The 6 mandatory 52c schema fields (src/frontmatter.py::REQUIRED_FIELDS).
_SCHEMA_FIELDS = (
    "work_item",
    "stage",
    "author_agent",
    "status",
    "created_at",
    "model_used",
)

# Role -> the stage value(s) the prompt's schema must pin (TRZ §FR-2).
_STAGE_BY_ROLE = {
    "analyst": ("analysis",),
    "architect": ("architecture",),
    "developer": ("development",),
    "reviewer": ("review",),
    "tester": ("testing",),
    "deployer": ("deploy-staging", "deploy"),
}

# Anti-regression markers per role that MUST survive the rewrite (TRZ §FR-6).
_ANTI_REGRESS = {
    "analyst": [
        "01-brd.md",
        "02-trz.md",
        "03-acceptance-criteria.md",
        "04-test-plan.yaml",
        "Write tool",
    ],
    "architect": [
        "## Статус",
        "## Решение",
        "## Последствия",
        "docs/architecture/adr/",  # global cross-cutting ADR rule
        "back-to:analysis",        # escalation
        "arch:major-change",       # escalation
    ],
    "developer": [
        "TDD",
        "--no-verify",
        "--force-push",
        "свой PR",                 # "не мержи свой PR"
        "Refs:",                   # conventional commit footer
    ],
    "reviewer": [
        "REQUEST_CHANGES",
        "НЕ обновлена",            # "src/ changed, docs not updated -> REQUEST_CHANGES"
    ],
    "tester": [
        "pytest",
        "/health",
        "/status",
        "/queue",
    ],
    "deployer": [
        "docker exec orchestrator-staging",
        "pr_already_merged",
        "8500",                    # "never restart 8500 from inside"
        "INFRA-WAIVED",            # ORCH-061 waiver
    ],
}


def _read(agent: str) -> str:
    path = os.path.join(_AGENTS_DIR, f"{agent}.md")
    with open(path, encoding="utf-8") as f:
        return f.read()


def _read_repo(*parts: str) -> str:
    with open(os.path.join(_REPO_ROOT, *parts), encoding="utf-8") as f:
        return f.read()


@pytest.mark.parametrize("agent", _AGENTS)
def test_five_xml_sections_present(agent):
    """TC-01: each prompt carries all 5 XML sections (open + close tag)."""
    text = _read(agent)
    for section in _REQUIRED_SECTIONS:
        assert f"<{section}>" in text, f"{agent}.md missing <{section}> open tag"
        assert f"</{section}>" in text, f"{agent}.md missing </{section}> close tag"


@pytest.mark.parametrize("agent", _AGENTS)
def test_six_schema_field_names_present(agent):
    """TC-02: each prompt names all 6 mandatory 52c schema fields."""
    text = _read(agent)
    for field in _SCHEMA_FIELDS:
        assert field in text, f"{agent}.md does not mention schema field {field!r}"


@pytest.mark.parametrize("agent", _AGENTS)
def test_schema_pins_role_specific_author_and_stage(agent):
    """TC-03: author_agent == role and the role's stage(s) are pinned in the schema."""
    text = _read(agent)
    assert f"author_agent: {agent}" in text, (
        f"{agent}.md does not pin 'author_agent: {agent}' in an example schema"
    )
    for stage in _STAGE_BY_ROLE[agent]:
        assert f"stage: {stage}" in text, (
            f"{agent}.md does not pin 'stage: {stage}' in an example schema"
        )


@pytest.mark.parametrize("agent", _AGENTS)
def test_references_templates_and_a_reference_work_item(agent):
    """TC-04: each prompt links docs/_templates/ and at least one reference work item."""
    text = _read(agent)
    assert "docs/_templates/" in text, f"{agent}.md does not reference docs/_templates/"
    assert ("ORCH-073" in text) or ("ORCH-088" in text), (
        f"{agent}.md does not reference a gold-standard work item (ORCH-073/ORCH-088)"
    )


def test_machine_verdict_keys_preserved_exact_case():
    """TC-05: machine-verdict keys + value sets survive with exact case."""
    reviewer = _read("reviewer")
    assert "verdict:" in reviewer
    assert "APPROVED" in reviewer and "REQUEST_CHANGES" in reviewer

    tester = _read("tester")
    assert "result:" in tester
    assert "PASS" in tester and "FAIL" in tester

    deployer = _read("deployer")
    assert "staging_status:" in deployer
    assert "deploy_status:" in deployer
    assert "SUCCESS" in deployer and "FAILED" in deployer


def test_deployer_self_hosting_anti_regress():
    """TC-06: deployer keeps canonical staging cmd, merge-guard, 8500 ban, waiver."""
    deployer = _read("deployer")
    for marker in _ANTI_REGRESS["deployer"]:
        assert marker in deployer, f"deployer.md lost anti-regress marker {marker!r}"


@pytest.mark.parametrize("agent", _AGENTS)
def test_role_anti_regress_markers(agent):
    """TC-07: per-role anti-regression markers (TRZ §FR-6) survive the rewrite."""
    text = _read(agent)
    for marker in _ANTI_REGRESS[agent]:
        assert marker in text, f"{agent}.md lost anti-regress marker {marker!r}"


# --------------------------------------------------------------------------- #
# ORCH-078 (ORCH-52e): traceability-marker standard + reading-rule anti-regress
# (TRZ §FR-1..FR-8; AC-1..AC-5, AC-8). Pure-text checks, NO `src/` import.
# --------------------------------------------------------------------------- #

def test_traceability_standard_exists_and_nonempty():
    """TC-01 (AC-1): docs/_standards/TRACEABILITY.md exists and is non-empty."""
    assert os.path.isfile(_TRACEABILITY), "docs/_standards/TRACEABILITY.md is missing"
    assert _read_repo("docs", "_standards", "TRACEABILITY.md").strip(), (
        "TRACEABILITY.md is empty"
    )


def test_traceability_describes_marker_format_and_placement():
    """TC-02 (AC-1): standard describes the ORCH-NNN marker and where it is placed."""
    text = _read_repo("docs", "_standards", "TRACEABILITY.md")
    assert "ORCH-NNN" in text, "TRACEABILITY.md does not describe the ORCH-NNN marker"
    # placement rule: next to a non-trivial invariant (not on trivial code).
    assert "инвариант" in text, "TRACEABILITY.md does not state the placement rule"


def test_traceability_has_real_verifiable_example():
    """TC-03 (AC-1): the worked example points at files that really exist in main.

    A traceability standard whose example references a missing file/ADR would
    refute itself, so the example must be checkable against the repo tree.
    """
    text = _read_repo("docs", "_standards", "TRACEABILITY.md")
    assert "src/serial_gate.py" in text and "ORCH-088" in text, (
        "TRACEABILITY.md lacks the serial_gate/ORCH-088 worked example"
    )
    assert os.path.isfile(os.path.join(_REPO_ROOT, "src", "serial_gate.py")), (
        "example references src/serial_gate.py which does not exist"
    )
    assert os.path.isfile(os.path.join(
        _REPO_ROOT, "docs", "work-items", "ORCH-088", "06-adr",
        "ADR-001-serial-gate.md",
    )), "example references an ORCH-088 ADR that does not exist"


def test_traceability_documents_fallback_access():
    """TC-04 (AC-4): standard documents the git show origin/main fallback."""
    text = _read_repo("docs", "_standards", "TRACEABILITY.md")
    assert "git show origin/main:docs/work-items/" in text, (
        "TRACEABILITY.md does not document the cross-branch ADR fallback"
    )


def test_traceability_documents_anti_archeology():
    """TC-05 (AC-5): standard documents the 3+ markers -> cross-cutting ADR rule."""
    text = _read_repo("docs", "_standards", "TRACEABILITY.md")
    assert "docs/architecture/adr/" in text, (
        "TRACEABILITY.md anti-archeology rule does not point at the cross-cutting ADR dir"
    )
    assert "3+" in text, "TRACEABILITY.md does not state the 3+ markers threshold"


def test_developer_carries_reading_rule_and_fallback():
    """TC-06 (AC-2, AC-4): developer.md carries the reading rule + standard + fallback."""
    text = _read("developer")
    assert "TRACEABILITY.md" in text, "developer.md does not reference TRACEABILITY.md"
    assert "git show origin/main:docs/work-items/" in text, (
        "developer.md does not carry the cross-branch ADR fallback"
    )


def test_architect_carries_reading_rule_and_anti_archeology():
    """TC-07 (AC-2, AC-5): architect.md carries reading rule + anti-archeology."""
    text = _read("architect")
    assert "TRACEABILITY.md" in text, "architect.md does not reference TRACEABILITY.md"
    assert "3+" in text, "architect.md does not carry the 3+ markers anti-archeology rule"


def test_reviewer_carries_traceability_control_axis():
    """TC-08 (AC-3): reviewer.md carries the traceability-compliance control axis."""
    text = _read("reviewer")
    assert "TRACEABILITY.md" in text, "reviewer.md does not reference TRACEABILITY.md"


def test_claude_md_and_readme_reference_traceability_standard():
    """TC-12 (AC-8): CLAUDE.md and architecture README reference the standard."""
    assert "TRACEABILITY.md" in _read_repo("CLAUDE.md"), (
        "CLAUDE.md does not reference docs/_standards/TRACEABILITY.md"
    )
    assert "TRACEABILITY.md" in _read_repo("docs", "architecture", "README.md"), (
        "architecture README does not reference docs/_standards/TRACEABILITY.md"
    )


# --------------------------------------------------------------------------- #
# ORCH-079 (ORCH-52f): reviewer overview-docs axis (layer 5 of epic ORCH-52).
# Pure-text anti-drift check (TRZ §FR-6 / AC-5), NO `src/` import.
# --------------------------------------------------------------------------- #

def test_reviewer_carries_overview_docs_axis():
    """ORCH-079 TC-01 (AC-5): reviewer.md covers the README overview-docs axis.

    The reviewer must require README ("Известные ограничения") to be updated when
    a PR closes a documented limitation. This guards the rule against silent drift
    in a future prompt refactor, exactly like the traceability control axis.
    """
    text = _read("reviewer")
    assert "Известные ограничения" in text, (
        "reviewer.md does not mention the README 'Известные ограничения' overview-docs axis"
    )
    assert "ORCH-079" in text, (
        "reviewer.md does not anchor the overview-docs axis to ORCH-079"
    )