docs(llm): LLM call-site map, control-path axis, roadmap & usage policy + anti-drift tests
ORCH-118 (inventory-first, docs+tests only): publish an evidence-based map of
every place the orchestrator's control flow consumes (or can consume) an LLM
judgment, mark the control-path axis (C control-path vs P artifact-producer),
define "avoidable LLM control path" as a checkable two-bit predicate, classify
each call-site, and order the deterministic-replacement roadmap. Pin the map to
code with offline structural anti-drift tests.
- docs/architecture/llm-call-sites.md — map + machine-readable inventory block
+ control-path axis + classification + keep-LLM justifications + deterministic
non-agent paths (FR-1/FR-2/FR-3/FR-8).
- docs/architecture/llm-determinization-roadmap.md — ordered candidates BY ROLE,
savings sourced from agent_runs, recommended first slice = deployer staging
(FR-4). No fabricated follow-up Plane-IDs (R3/NFR-6).
- docs/architecture/llm-usage-policy.md — normative principle, keep/replace
criteria via the axis, definition of "avoidable LLM control path" (FR-5/FR-8).
- tests/test_llm_call_site_inventory.py — TC-01/02/03/04/05/06/09/12/13/14.
- tests/test_llm_determinization_docs.py — TC-07/08/11.
- CHANGELOG.md + docs/overview/tech-quality-security.md — golden-source sync (AC-8).
Avoidable LLM control paths = {tester, deployer}; control-path-keep = {reviewer};
not-control-path (P) = {analyst, architect, developer}. Single LLM transport =
launcher._spawn (S0); no alternative transport (TC-12). Runtime untouched:
STAGE_TRANSITIONS / QG_CHECKS / check_* / machine-verdict keys / DB schema are
byte-for-byte; no replacement runners implemented (FR-7). Full suite: 2081 passed.
Refs: ORCH-118
Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
152
tests/test_llm_determinization_docs.py
Normal file
152
tests/test_llm_determinization_docs.py
Normal file
@@ -0,0 +1,152 @@
|
||||
# ORCH-118 (FR-4 / FR-5 / FR-8 / AC-4, AC-5, AC-9, AC-10): structural tests for the
|
||||
# determinization roadmap and the LLM usage policy.
|
||||
#
|
||||
# These are offline/deterministic (no network, no LLM, no subprocess). They assert
|
||||
# the roadmap carries the mandatory per-candidate attributes (named BY ROLE, never a
|
||||
# fabricated Plane-ID), that the policy is normative and defines "avoidable LLM
|
||||
# control path" as a checkable predicate, and that NO doc binds a candidate to a
|
||||
# non-existent follow-up Plane-ID (R3 / NFR-6 anti-fabrication).
|
||||
|
||||
import re
|
||||
from pathlib import Path
|
||||
|
||||
REPO_ROOT = Path(__file__).resolve().parents[1]
|
||||
ARCH = REPO_ROOT / "docs" / "architecture"
|
||||
MAP = ARCH / "llm-call-sites.md"
|
||||
ROADMAP = ARCH / "llm-determinization-roadmap.md"
|
||||
POLICY = ARCH / "llm-usage-policy.md"
|
||||
WORK_ITEMS = REPO_ROOT / "docs" / "work-items"
|
||||
|
||||
# A follow-up Plane-ID pattern in the ORCH-1XX range. ORCH-118 itself is allowed;
|
||||
# any OTHER ORCH-1XX referenced in a doc must resolve to a real work-item dir —
|
||||
# this catches the R2 anti-pattern of binding the map to invented IDs
|
||||
# (ORCH-115 / ORCH-116, which do not exist).
|
||||
_PLANE_ID_RE = re.compile(r"ORCH-1\d\d")
|
||||
_SELF_ID = "ORCH-118"
|
||||
|
||||
|
||||
def _extract_block(text: str, name: str) -> str:
|
||||
start = f"<!-- {name}:START -->"
|
||||
end = f"<!-- {name}:END -->"
|
||||
assert start in text, f"missing block start marker {start!r}"
|
||||
assert end in text, f"missing block end marker {end!r}"
|
||||
return text.split(start, 1)[1].split(end, 1)[0]
|
||||
|
||||
|
||||
def _parse_pipe_table(block: str) -> list[dict]:
|
||||
header = None
|
||||
rows: list[dict] = []
|
||||
for raw in block.splitlines():
|
||||
line = raw.strip()
|
||||
if not line.startswith("|"):
|
||||
continue
|
||||
cells = [c.strip() for c in line.strip("|").split("|")]
|
||||
joined = "".join(cells)
|
||||
if joined and set(joined) <= set("-: "):
|
||||
continue
|
||||
if header is None:
|
||||
header = [c.lower() for c in cells]
|
||||
continue
|
||||
rows.append(dict(zip(header, cells)))
|
||||
return rows
|
||||
|
||||
|
||||
def _roadmap_rows() -> list[dict]:
|
||||
block = _extract_block(ROADMAP.read_text(encoding="utf-8"), "ORCH-118-ROADMAP-BLOCK")
|
||||
rows = _parse_pipe_table(block)
|
||||
assert rows, "roadmap block parsed to zero rows"
|
||||
return rows
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# TC-07 — roadmap completeness + recommended first slice (FR-4 / AC-4).
|
||||
# ---------------------------------------------------------------------------
|
||||
def test_tc07_roadmap_completeness_and_first_slice():
|
||||
rows = _roadmap_rows()
|
||||
roles = {r["role"] for r in rows}
|
||||
# The two avoidable LLM control paths are the roadmap candidates.
|
||||
assert {"deployer", "tester"} <= roles, f"roadmap missing candidates: {roles}"
|
||||
|
||||
ranks = []
|
||||
first_slice_roles = []
|
||||
for r in rows:
|
||||
role = r["role"]
|
||||
assert r["dependencies"].strip(), f"{role}: empty dependencies"
|
||||
# Savings estimate must cite its source (agent_runs / usage).
|
||||
assert "agent_runs" in r["savings_estimate_source"], (
|
||||
f"{role}: savings estimate not sourced from agent_runs"
|
||||
)
|
||||
assert r["security_risk"].strip(), f"{role}: empty security_risk"
|
||||
assert r["hybrid_needed"].lower() in {"yes", "no"}, (
|
||||
f"{role}: hybrid_needed must be yes/no, got {r['hybrid_needed']!r}"
|
||||
)
|
||||
# follow-up is named BY ROLE, never a Plane-ID (R3 / NFR-6 / AC-9).
|
||||
ftype = r["followup_type"]
|
||||
assert ftype.strip(), f"{role}: empty followup_type"
|
||||
assert not re.search(r"ORCH-\d+", ftype), (
|
||||
f"{role}: followup_type binds a Plane-ID ({ftype!r}) — forbidden (AC-9)"
|
||||
)
|
||||
assert role in ftype, f"{role}: followup_type must name the role, got {ftype!r}"
|
||||
ranks.append(int(r["rank"]))
|
||||
if r["first_slice"].lower() == "yes":
|
||||
first_slice_roles.append(role)
|
||||
|
||||
assert ranks == sorted(ranks), f"roadmap not ordered by rank: {ranks}"
|
||||
assert len(set(ranks)) == len(ranks), f"duplicate ranks: {ranks}"
|
||||
# Exactly one recommended first slice, and it is the deployer (staging) replacement.
|
||||
assert first_slice_roles == ["deployer"], (
|
||||
f"recommended first slice must be exactly [deployer]; got {first_slice_roles}"
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# TC-08 — policy exists, is normative, and defines "avoidable LLM control path".
|
||||
# ---------------------------------------------------------------------------
|
||||
def test_tc08_policy_normative_and_defines_avoidable():
|
||||
assert POLICY.is_file(), "llm-usage-policy.md missing"
|
||||
text = POLICY.read_text(encoding="utf-8")
|
||||
|
||||
# Principle: LLM only where genuine judgment is needed.
|
||||
assert "настоящее суждение" in text, "policy missing the core principle"
|
||||
# keep vs replace criteria, framed through the control-path axis.
|
||||
low = text.lower()
|
||||
assert "keep" in low and "replace" in low, "policy missing keep/replace criteria"
|
||||
assert "control path" in low or "control-path" in low, (
|
||||
"policy keep/replace criteria not framed through the control-path axis"
|
||||
)
|
||||
|
||||
# The defined term appears as a defined term.
|
||||
assert "avoidable llm control path" in low, (
|
||||
"policy does not define the term 'avoidable LLM control path'"
|
||||
)
|
||||
# Machine-readable definition block: two-bit predicate (C consultation AND
|
||||
# derivable verdict).
|
||||
block = _extract_block(text, "ORCH-118-AVOIDABLE-DEFINITION-BLOCK").lower()
|
||||
assert "control" in block, "definition missing the control-path condition (i)"
|
||||
assert "deriv" in block, "definition missing the derivability condition (ii)"
|
||||
# The verdict-derivability condition names a real tool signal.
|
||||
assert any(sig in block for sig in ("exit-code", "exit code", "pytest", "staging_check")), (
|
||||
"derivability condition does not reference a concrete tool signal"
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# TC-11 — anti-fabrication: no candidate bound to a non-existent follow-up ID.
|
||||
# ---------------------------------------------------------------------------
|
||||
def test_tc11_no_fabricated_followup_ids():
|
||||
"""Every ORCH-1XX referenced in the map / roadmap / policy (other than ORCH-118
|
||||
itself) MUST resolve to a real docs/work-items/ dir. This catches the R2 defect
|
||||
of pinning the map to invented IDs (ORCH-115 / ORCH-116)."""
|
||||
offenders = []
|
||||
for doc in (MAP, ROADMAP, POLICY):
|
||||
assert doc.is_file(), f"doc missing: {doc}"
|
||||
text = doc.read_text(encoding="utf-8")
|
||||
for token in set(_PLANE_ID_RE.findall(text)):
|
||||
if token == _SELF_ID:
|
||||
continue
|
||||
if not (WORK_ITEMS / token).is_dir():
|
||||
offenders.append(f"{doc.name}: references non-existent work item {token}")
|
||||
assert not offenders, (
|
||||
"fabricated / unresolvable follow-up Plane-ID(s) found (name follow-ups BY "
|
||||
"ROLE, not by invented ID — R3 / NFR-6 / AC-9):\n" + "\n".join(offenders)
|
||||
)
|
||||
Reference in New Issue
Block a user