orchestrator/tests/test_onboarding_kit.py

"""ORCH-009: structural tests of the onboarding kit (`onboarding/repo-skeleton/`).

Covers test-plan TC-01 (kit completeness), TC-03..TC-08 (prompt-template canon
52d/92), TC-19 (INFRA.md template sections) and TC-20 (ONBOARDING.md runbook).
Pure-text structural checks: NO network, NO agent runs (NFR-5). The kit prompt
templates are checked separately from the live orchestrator prompts
(`tests/test_agent_prompts_canon.py`) — the two trees must not be confused
(ADR-001 D1 ORCH-009).
"""
import json
import os
import re

import pytest

_REPO_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
_ONBOARDING = os.path.join(_REPO_ROOT, "onboarding")
_KIT = os.path.join(_ONBOARDING, "repo-skeleton")
_RUNBOOK = os.path.join(_REPO_ROOT, "docs", "operations", "ONBOARDING.md")

_AGENTS = ("analyst", "architect", "developer", "reviewer", "tester", "deployer")

# The 5 mandatory XML sections, in normative order (canon 52d, AC-2).
_SECTIONS = ("context", "task", "deliverables", "constraints", "output_format")

# The 6 mandatory 52c schema fields (mirrors src/frontmatter.py::REQUIRED_FIELDS,
# kept literal here on purpose: kit tests must not import src/ — NFR-1 hygiene).
_SCHEMA_FIELDS = ("work_item", "stage", "author_agent", "status", "created_at", "model_used")

# Role -> stage value(s) the template's example schema must pin (FR-2).
_STAGE_BY_ROLE = {
    "analyst": ("analysis",),
    "architect": ("architecture",),
    "developer": ("development",),
    "reviewer": ("review",),
    "tester": ("testing",),
    "deployer": ("deploy-staging", "deploy"),
}


def _read(*parts: str) -> str:
    with open(os.path.join(_REPO_ROOT, *parts), encoding="utf-8") as f:
        return f.read()


def _kit(*parts: str) -> str:
    with open(os.path.join(_KIT, *parts), encoding="utf-8") as f:
        return f.read()


def _prompt(agent: str) -> str:
    return _kit(".openclaw", "agents", f"{agent}.md")


def _fenced_blocks(text: str) -> list[str]:
    """Return the body of every ``` fenced code block (the *copyable* examples)."""
    blocks: list[str] = []
    inside = False
    buf: list[str] = []
    for line in text.splitlines():
        if line.lstrip().startswith("```"):
            if inside:
                blocks.append("\n".join(buf))
                buf = []
            inside = not inside
            continue
        if inside:
            buf.append(line)
    return blocks


# --------------------------------------------------------------------------- #
# TC-01 — kit completeness (AC-1 / FR-1)
# --------------------------------------------------------------------------- #

_REQUIRED_FILES = [
    ".openclaw/agents/analyst.md",
    ".openclaw/agents/architect.md",
    ".openclaw/agents/developer.md",
    ".openclaw/agents/reviewer.md",
    ".openclaw/agents/tester.md",
    ".openclaw/agents/deployer.md",
    "CLAUDE.md",
    "AGENTS.md",
    "CONTRIBUTING.md",
    "README.md",
    "CHANGELOG.md",
    ".env.example",
    "docs/ARCHITECTURE.md",
    "docs/PIPELINE.md",
    "docs/PRODUCT_VISION.md",
    "docs/operations/INFRA.md",
    "docs/architecture/adr/README.md",
    "docs/work-items/.gitkeep",
    "docs/history/.gitkeep",
]


def test_tc01_kit_contains_all_required_elements():
    """TC-01: every FR-1 element of the skeleton is present (6 prompts + carcass)."""
    missing = [
        rel for rel in _REQUIRED_FILES
        if not os.path.isfile(os.path.join(_KIT, *rel.split("/")))
    ]
    assert not missing, f"onboarding/repo-skeleton is missing: {missing}"


def test_tc01_kit_readme_and_placeholder_dictionary_exist():
    """TC-01/D1: onboarding/README.md + placeholders.json (single source of truth)."""
    assert os.path.isfile(os.path.join(_ONBOARDING, "README.md"))
    payload = json.loads(_read("onboarding", "placeholders.json"))
    assert isinstance(payload, dict) and payload, "placeholders.json must be a non-empty dict"
    for name, meta in payload.items():
        assert re.fullmatch(r"[A-Z][A-Z0-9_]*", name), f"bad placeholder name {name!r}"
        for key in ("description", "required", "default", "example"):
            assert key in meta, f"placeholders.json[{name}] lacks {key!r}"


def test_kit_does_not_fork_the_canon():
    """BR-2/D3: no second editable copy of the canon inside the kit.

    `docs/_templates/` and `docs/_standards/` are live-copied by the script at
    materialisation time and must NOT be stored in the skeleton.
    """
    for forbidden in ("docs/_templates", "docs/_standards"):
        assert not os.path.exists(os.path.join(_KIT, *forbidden.split("/"))), (
            f"kit must not store an editable canon copy: {forbidden}"
        )


# --------------------------------------------------------------------------- #
# D2 — placeholder dictionary bijection (declared <-> used)
# --------------------------------------------------------------------------- #

_PLACEHOLDER_RE = re.compile(r"\{\{([A-Z][A-Z0-9_]*)\}\}")


def _kit_files() -> list[str]:
    out = []
    for root, _dirs, files in os.walk(_KIT):
        for name in files:
            out.append(os.path.join(root, name))
    return out


def test_placeholder_dictionary_bijection():
    """D2: every placeholder used in the kit is declared, every declared is used."""
    declared = set(json.loads(_read("onboarding", "placeholders.json")))
    used: set[str] = set()
    for path in _kit_files():
        with open(path, encoding="utf-8") as f:
            used.update(_PLACEHOLDER_RE.findall(f.read()))
    assert used == declared, (
        f"placeholder drift: used-not-declared={sorted(used - declared)}, "
        f"declared-not-used={sorted(declared - used)}"
    )


# --------------------------------------------------------------------------- #
# TC-03 — 5 XML sections in normative order (AC-2)
# --------------------------------------------------------------------------- #

@pytest.mark.parametrize("agent", _AGENTS)
def test_tc03_five_xml_sections_in_normative_order(agent):
    """Real section tags sit on their own line; inline backticked mentions
    (e.g. «см. `<output_format>`» inside <task>) must not be mistaken for them
    (same disambiguation as the ORCH-092 <escalation> check)."""
    text = _prompt(agent)
    positions = []
    for section in _SECTIONS:
        open_m = re.search(rf"(?m)^<{section}>\s*$", text)
        close_m = re.search(rf"(?m)^</{section}>\s*$", text)
        assert open_m, f"kit {agent}.md missing <{section}> on its own line"
        assert close_m, f"kit {agent}.md missing </{section}> on its own line"
        positions.append(open_m.start())
    assert positions == sorted(positions), (
        f"kit {agent}.md sections out of normative order "
        f"context→task→deliverables→constraints→output_format"
    )


# --------------------------------------------------------------------------- #
# TC-04 — <escalation> at dev/reviewer/tester; bans in «❌ → ✅» form (AC-2)
# --------------------------------------------------------------------------- #

@pytest.mark.parametrize("agent", ("developer", "reviewer", "tester"))
def test_tc04_escalation_section_after_success_criteria(agent):
    text = _prompt(agent)
    open_m = re.search(r"(?m)^<escalation>\s*$", text)
    close_m = re.search(r"(?m)^</escalation>\s*$", text)
    assert open_m and close_m, f"kit {agent}.md is missing the <escalation> section"
    success_m = re.search(r"(?m)^</success_criteria>\s*$", text)
    assert success_m and open_m.start() > success_m.start(), (
        f"kit {agent}.md must place <escalation> after </success_criteria>"
    )


@pytest.mark.parametrize("agent", _AGENTS)
def test_tc04_bans_use_cross_check_format(agent):
    text = _prompt(agent)
    assert "❌" in text and "✅" in text, (
        f"kit {agent}.md must format bans as «❌ X → ✅ Y»"
    )


# --------------------------------------------------------------------------- #
# TC-05 — each template directs the agent to the project docs (AC-2 / BR-3)
# --------------------------------------------------------------------------- #

@pytest.mark.parametrize("agent", _AGENTS)
def test_tc05_prompt_directs_agent_to_docs(agent):
    text = _prompt(agent)
    for marker in (
        "CLAUDE.md",            # passport, read BEFORE work
        "AGENTS.md",            # docs map / entry point
        "docs/ARCHITECTURE.md",  # architecture doc
        "docs/work-items/",     # artefact home
        "PIPELINE_DOCS.md",     # docs standard
        "docs/_templates/",     # skeletons
    ):
        assert marker in text, f"kit {agent}.md does not reference {marker!r}"


@pytest.mark.parametrize("agent", ("developer", "reviewer"))
def test_tc05_changelog_duty_present(agent):
    assert "CHANGELOG.md" in _prompt(agent), (
        f"kit {agent}.md must carry the CHANGELOG update duty"
    )


def test_tc05_architect_carries_adr_rules():
    text = _prompt("architect")
    assert "06-adr/" in text, "kit architect.md must route decisions to 06-adr/"
    assert "docs/architecture/adr/" in text, (
        "kit architect.md must carry the cross-cutting ADR rule"
    )


# --------------------------------------------------------------------------- #
# TC-06 — 52c schema emission + byte-exact machine-verdict keys (AC-2)
# --------------------------------------------------------------------------- #

@pytest.mark.parametrize("agent", _AGENTS)
def test_tc06_six_schema_fields_named(agent):
    text = _prompt(agent)
    for field in _SCHEMA_FIELDS:
        assert field in text, f"kit {agent}.md does not mention schema field {field!r}"


@pytest.mark.parametrize("agent", _AGENTS)
def test_tc06_schema_pins_role_author_and_stage(agent):
    text = _prompt(agent)
    assert f"author_agent: {agent}" in text, (
        f"kit {agent}.md does not pin 'author_agent: {agent}'"
    )
    for stage in _STAGE_BY_ROLE[agent]:
        assert f"stage: {stage}" in text, f"kit {agent}.md does not pin 'stage: {stage}'"


def test_tc06_machine_verdict_keys_byte_exact():
    reviewer = _prompt("reviewer")
    assert "verdict:" in reviewer
    assert "APPROVED" in reviewer and "REQUEST_CHANGES" in reviewer

    tester = _prompt("tester")
    assert "result:" in tester
    assert "PASS" in tester and "FAIL" in tester

    deployer = _prompt("deployer")
    assert "staging_status:" in deployer
    assert "deploy_status:" in deployer
    assert "security_status:" in deployer
    assert "SUCCESS" in deployer and "FAILED" in deployer


@pytest.mark.parametrize("agent", _AGENTS)
def test_tc06_dates_and_models_are_placeholders(agent):
    """Anti-pattern ORCH-092: no literal date/model inside copyable examples."""
    text = _prompt(agent)
    assert "created_at: <YYYY-MM-DD>" in text, (
        f"kit {agent}.md must use the created_at: <YYYY-MM-DD> placeholder"
    )
    assert "date +%F" in text, (
        f"kit {agent}.md must instruct to substitute the actual date (date +%F)"
    )
    for block in _fenced_blocks(text):
        assert re.search(r"created_at:\s*\d", block) is None, (
            f"kit {agent}.md hardcodes a literal created_at date in a copyable block"
        )
        assert re.search(r"model_used:\s*claude", block) is None, (
            f"kit {agent}.md hardcodes a literal model in a copyable block"
        )


# --------------------------------------------------------------------------- #
# TC-07 — reviewer-gate on documentation (AC-3 / BR-4)
# --------------------------------------------------------------------------- #

def test_tc07_reviewer_gate_docs_not_updated_means_request_changes():
    text = _prompt("reviewer")
    assert "REQUEST_CHANGES" in text
    assert "НЕ обновлена" in text, (
        "kit reviewer.md must carry the mandatory gate: docs NOT updated -> "
        "verdict: REQUEST_CHANGES"
    )


# --------------------------------------------------------------------------- #
# TC-08 — language policy: 5 ru + deployer en (AC-4 / D9)
# --------------------------------------------------------------------------- #

_CYRILLIC = re.compile(r"[а-яА-ЯёЁ]")


@pytest.mark.parametrize("agent", ("analyst", "architect", "developer", "reviewer", "tester"))
def test_tc08_ru_canon_for_five_roles(agent):
    assert _CYRILLIC.search(_prompt(agent)), (
        f"kit {agent}.md must follow the ru canon (ADR-001 D9 ORCH-009)"
    )


def test_tc08_deployer_is_english():
    text = _prompt("deployer")
    assert not _CYRILLIC.search(text), (
        "kit deployer.md must stay 100% English (safety-critical canon, D9)"
    )
    assert "Do NOT translate" in text, (
        "kit deployer.md must carry the language-note guard"
    )


# --------------------------------------------------------------------------- #
# TC-19 — INFRA.md template: mandatory sections (AC-10 / FR-3)
# --------------------------------------------------------------------------- #

def test_tc19_infra_template_mandatory_sections():
    text = _kit("docs", "operations", "INFRA.md")
    assert "Топология" in text, "INFRA template lacks the topology section"
    assert "{{PROD_PORT}}" in text and "{{STAGING_PORT}}" in text, (
        "INFRA template must parametrise prod/staging ports"
    )
    assert "env" in text.lower(), "INFRA template lacks the env map section"
    assert ".env.example" in text, "INFRA template lacks the .env.example canon rule"
    assert "Границы доступа" in text, "INFRA template lacks the access-boundaries section"
    assert "общего хоста" in text or "общий хост" in text, (
        "INFRA template lacks the shared-host risk warnings"
    )
    assert "секрет" in text.lower(), "INFRA template lacks the secrets rule"


def test_tc19_orchestrator_own_infra_untouched_sections():
    """AC-10: the orchestrator's own INFRA.md keeps its self-hosting warnings."""
    own = _read("docs", "operations", "INFRA.md")
    assert "orchestrator" in own and "8500" in own, (
        "docs/operations/INFRA.md of the orchestrator must stay the self-hosting runbook"
    )


# --------------------------------------------------------------------------- #
# TC-20 — runbook ONBOARDING.md covers all layers in order (AC-11 / FR-6)
# --------------------------------------------------------------------------- #

def test_tc20_runbook_exists_and_layer_order():
    assert os.path.isfile(_RUNBOOK), "docs/operations/ONBOARDING.md is missing"
    text = _read("docs", "operations", "ONBOARDING.md")
    # All BR-1 layers, in sequence.
    anchors = ["Предусловия", "Plane", "Gitea", "kit", "Регистрация", "Верификация", "Откат"]
    positions = []
    for anchor in anchors:
        idx = text.find(anchor)
        assert idx != -1, f"ONBOARDING.md lacks the {anchor!r} layer"
        positions.append(idx)
    assert positions == sorted(positions), (
        f"ONBOARDING.md layers out of order: {anchors}"
    )


def test_tc20_runbook_manual_steps_and_selfhosting_warning():
    text = _read("docs", "operations", "ONBOARDING.md")
    assert "ручной шаг" in text.lower() or "РУЧНОЙ ШАГ" in text, (
        "ONBOARDING.md must explicitly mark manual steps"
    )
    assert "рестарт" in text.lower(), (
        "ONBOARDING.md must describe the operator-managed restart step"
    )
    assert "self-hosting" in text or "групповое окно" in text, (
        "ONBOARDING.md must warn that a prod restart is a group-wide window"
    )
    # Plane workspace-webhook already exists: verify, never create (Ф-6).
    assert "workspace" in text.lower(), "ONBOARDING.md must cover the workspace webhook"
    assert "существует" in text, (
        "ONBOARDING.md must state the Plane workspace-webhook already exists"
    )


def test_tc20_runbook_verification_and_smoke_journal():
    text = _read("docs", "operations", "ONBOARDING.md")
    assert "verify" in text, "ONBOARDING.md must document the verify mode"
    assert "8501" in text, "ONBOARDING.md smoke contour must be staging (8501) — D8"
    assert "Журнал smoke-прогонов" in text, (
        "ONBOARDING.md must carry the smoke-run journal section (D8)"
    )
    assert "onboard_project.py" in text, "ONBOARDING.md must reference the CLI"


def test_setup_webhooks_generalised():
    """TRZ §2: SETUP_WEBHOOKS.md is generalised per-repo + references the runbook."""
    text = _read("docs", "operations", "SETUP_WEBHOOKS.md")
    assert "ONBOARDING.md" in text, (
        "SETUP_WEBHOOKS.md must reference docs/operations/ONBOARDING.md"
    )
    assert "<repo>" in text or "{repo}" in text, (
        "SETUP_WEBHOOKS.md per-repo section must be generalised, not enduro-hardcoded"
    )