feat(launcher): drop dead frontmatter model + validate model name (never-break)

G1: remove the dead `model:` line from all 6 .openclaw/agents/*.md prompts — launcher never read it; config (agent_model_*) is the single source of truth. G2: add is_valid_model helper (format check ^claude-…$) applied inside resolve_agent_model's resolution cascade and at the inline --fallback-model read in _spawn. An invalid name is logged and skipped to the next valid level (in the limit: no --model flag), never passed to the CLI, never raises. Format check chosen over an allowlist for forward-compatibility (ADR-001). G3 (routing) and G4 (fallback) intentionally NOT enabled — all agents stay on claude-opus-4-8; agent_fallback_model stays "". Docs (golden source) updated in the same change: README model/effort table + validation, CLAUDE.md, .env.example (ORCH_AGENT_MODEL_*/EFFORT_*/FALLBACK_MODEL), CHANGELOG. Tests: test_agent_frontmatter_no_model.py (G1), extended test_resolve_agent_model.py (G2 never-break). Refs: ORCH-074 Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-06-08 21:53:09 +03:00
parent 6aebd9116b
commit fb87fda9b6
13 changed files with 288 additions and 17 deletions
--- a/tests/test_agent_frontmatter_no_model.py
+++ b/tests/test_agent_frontmatter_no_model.py
@@ -0,0 +1,68 @@
+"""ORCH-074 (G1): the dead `model:` frontmatter is gone from all 6 agent prompts.
+
+launcher.py never reads frontmatter `model:` — it was a lying/dead declaration
+(claude-sonnet-4-6 / claude-opus-4-7) that contradicted the real model resolved
+from config (ORCH-41). The mine: if someone "fixed" the launcher to read it, every
+agent would silently fall back to a stale model. G1 removes the line entirely so
+config (agent_model_*) stays the single source of truth.
+
+TC-01: no .openclaw/agents/*.md contains a `^model:` line in its frontmatter.
+TC-02: each frontmatter is still valid YAML and keeps name/description.
+"""
+import os
+
+import pytest
+
+try:
+    import yaml  # PyYAML
+    _HAVE_YAML = True
+except Exception:  # pragma: no cover - yaml is a test/runtime dep
+    _HAVE_YAML = False
+
+_AGENTS = ("analyst", "architect", "developer", "reviewer", "tester", "deployer")
+
+# tests/ is one level under the repo root; .openclaw/agents lives at the root.
+_AGENTS_DIR = os.path.join(
+    os.path.dirname(os.path.dirname(os.path.abspath(__file__))),
+    ".openclaw", "agents",
+)
+
+
+def _frontmatter_block(text: str) -> str:
+    """Return the YAML between the first two '---' fences (the frontmatter)."""
+    lines = text.splitlines()
+    assert lines and lines[0].strip() == "---", "frontmatter must open with '---'"
+    end = None
+    for i in range(1, len(lines)):
+        if lines[i].strip() == "---":
+            end = i
+            break
+    assert end is not None, "frontmatter must close with a second '---'"
+    return "\n".join(lines[1:end])
+
+
+@pytest.mark.parametrize("agent", _AGENTS)
+def test_no_model_line_in_frontmatter(agent):
+    """TC-01: no agent prompt declares a `model:` key in its frontmatter."""
+    path = os.path.join(_AGENTS_DIR, f"{agent}.md")
+    with open(path, encoding="utf-8") as f:
+        block = _frontmatter_block(f.read())
+    for line in block.splitlines():
+        assert not line.lstrip().startswith("model:"), (
+            f"{agent}.md still declares a frontmatter 'model:' line: {line!r}"
+        )
+
+
+@pytest.mark.parametrize("agent", _AGENTS)
+def test_frontmatter_still_valid_yaml_with_keys(agent):
+    """TC-02: frontmatter parses as YAML and keeps name/description (no model)."""
+    path = os.path.join(_AGENTS_DIR, f"{agent}.md")
+    with open(path, encoding="utf-8") as f:
+        block = _frontmatter_block(f.read())
+    if not _HAVE_YAML:
+        pytest.skip("PyYAML not available")
+    data = yaml.safe_load(block)
+    assert isinstance(data, dict), f"{agent}.md frontmatter is not a YAML mapping"
+    assert data.get("name") == agent
+    assert data.get("description"), f"{agent}.md lost its description"
+    assert "model" not in data, f"{agent}.md frontmatter still has a model key"
--- a/tests/test_resolve_agent_model.py
+++ b/tests/test_resolve_agent_model.py
@@ -23,7 +23,9 @@ os.environ.setdefault("ORCH_DB_PATH",
 os.environ.setdefault("ORCH_GITEA_TOKEN", "test-token")
 os.environ.setdefault("ORCH_PLANE_API_TOKEN", "test-token")

-from src.agents.launcher import resolve_agent_model
+import logging
+
+from src.agents.launcher import resolve_agent_model, is_valid_model
 from src.config import settings
 from src import projects as P
 from src.projects import ProjectConfig, reload_projects, _parse_projects_json
@@ -154,3 +156,86 @@ def test_parse_projects_json_malformed_override_ignored():
           '"agent_models":"oops"}]')
    parsed = _parse_projects_json(raw)
    assert parsed is not None and parsed[0].agent_models == {}
+
+
+# =============================================================================
+# ORCH-074 (G2): model-name validation, never-break. is_valid_model is a
+# structural format check (^claude-…$), applied on top of the ORCH-41 cascade so
+# garbage at any level is logged and skipped, never passed to --model.
+# =============================================================================
+
+# ---- is_valid_model predicate (the single G2 contract) ----------------------
+def test_is_valid_model_accepts_canonical():
+    assert is_valid_model("claude-opus-4-8") is True
+    assert is_valid_model("claude-sonnet-4-6") is True
+    # forward-compatible: a future version passes without a code change
+    assert is_valid_model("claude-opus-4-9") is True
+    # surrounding whitespace is tolerated (stripped)
+    assert is_valid_model("  claude-opus-4-8  ") is True
+
+
+def test_is_valid_model_rejects_garbage():
+    assert is_valid_model("") is False
+    assert is_valid_model("   ") is False
+    assert is_valid_model(None) is False
+    assert is_valid_model("gpt-4") is False            # another provider
+    assert is_valid_model("claud-opus-typo") is False  # wrong prefix
+    assert is_valid_model("Claude-Opus-4-8") is False  # uppercase not allowed
+    assert is_valid_model("claude-opus 4 8") is False  # spaces inside
+
+
+# ---- TC-03: garbage in agent_model_<agent> -> fall back to default ----------
+def test_garbage_per_agent_env_falls_back_to_default(monkeypatch, caplog):
+    monkeypatch.setattr(settings, "agent_model_developer", "gpt-4")
+    with caplog.at_level(logging.WARNING):
+        result = resolve_agent_model("developer")
+    assert result == "claude-opus-4-8"  # dropped garbage, used default
+    assert any("Invalid model name" in r.message for r in caplog.records)
+
+
+# ---- TC-04: garbage in project-override -> fall back to next valid level -----
+def test_garbage_project_override_falls_back_to_default(monkeypatch, caplog):
+    _install_registry(monkeypatch, {"developer": "claud-opus-typo"})
+    with caplog.at_level(logging.WARNING):
+        result = resolve_agent_model("developer", ORCH_PLANE_ID)
+    assert result == "claude-opus-4-8"  # override dropped, default used
+    assert any("Invalid model name" in r.message for r in caplog.records)
+
+
+# ---- TC-05: both override and default invalid -> "" (no --model), no raise ---
+def test_all_levels_invalid_returns_empty(monkeypatch, caplog):
+    monkeypatch.setattr(settings, "agent_model_default", "totally-bogus")
+    _install_registry(monkeypatch, {"developer": "gpt-4"})
+    with caplog.at_level(logging.WARNING):
+        result = resolve_agent_model("developer", ORCH_PLANE_ID)
+    assert result == ""  # never returns garbage; CLI default applies
+    # both invalid levels were logged
+    assert sum("Invalid model name" in r.message for r in caplog.records) >= 2
+
+
+# ---- TC-06: valid canonical name passes unchanged (ORCH-41 regression) -------
+def test_valid_canonical_unchanged():
+    assert resolve_agent_model("developer") == "claude-opus-4-8"
+
+
+# ---- TC-07: all 6 agents resolve to claude-opus-4-8 (routing G3 off) ---------
+def test_all_six_agents_resolve_to_opus_4_8():
+    for agent in ("analyst", "architect", "developer", "reviewer", "tester",
+                  "deployer"):
+        assert resolve_agent_model(agent) == "claude-opus-4-8"
+
+
+# ---- TC-08: valid per-project override still passes validation (AC-8) --------
+def test_valid_per_project_override_unchanged(monkeypatch):
+    _install_registry(monkeypatch, {"reviewer": "claude-sonnet-4-6"})
+    assert resolve_agent_model("reviewer", ORCH_PLANE_ID) == "claude-sonnet-4-6"
+
+
+# ---- TC-09 / TC-11: G4 fallback is OFF (ADR-001 decision 3) ------------------
+def test_fallback_model_disabled_by_default():
+    # G4 not enabled: agent_fallback_model stays "" -> no --fallback-model flag.
+    assert settings.agent_fallback_model == ""
+    # never-break: the SAME predicate guards the inline fallback read in _spawn,
+    # so a typo there would be rejected exactly like a model name.
+    assert is_valid_model("claude-bad typo") is False
+    assert is_valid_model("") is False