Merge-gate re-test runs under the orchestrator's prod env, where the operator legitimately set ORCH_AGENT_FALLBACK_MODEL and changed ORCH_AGENT_MODEL_DEFAULT / ORCH_AGENT_EFFORT_*. Two ORCH-41-era tests asserted SHIPPED defaults through the env-backed settings singleton and failed 3/3 there, while Gitea CI (clean env) stayed green. Branch ORCH-009 touches neither src/ nor these tests - latent non-hermetic landmine on main, detonated by the prod env change. - test_resolve_agent_effort.py: autouse fixture now mirrors the sibling model-file baseline (pins shipped model/fallback fields) so the flag-assembly tests are env-independent. - test_resolve_agent_model.py: fixture also resets agent_fallback_model; test_fallback_model_disabled_by_default now asserts the CLASS field default (the actual ORCH-074 ADR-001 G4 invariant: shipped default is ""), never-break is_valid_model asserts unchanged byte-for-byte. Clean-env behaviour is byte-equivalent (fixtures pin exactly what an empty env yields). Full suite: 1713 passed (was 2 failed / 1711). Refs: ORCH-009 Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
249 lines
11 KiB
Python
249 lines
11 KiB
Python
"""ORCH-41: tests for resolve_agent_model (per-agent + per-project LLM model).
|
|
|
|
Covers the 4-level resolution priority:
|
|
1. ProjectConfig.agent_models[agent] (per-project override, from projects_json)
|
|
2. settings.agent_model_<agent> (per-agent env, when non-empty)
|
|
3. settings.agent_model_default (global default)
|
|
4. "" (no override anywhere -> CLI default)
|
|
|
|
plus: unknown project_id / no project_id skips level 1, unknown agent skips
|
|
level 2, and the frozen ProjectConfig still accepts agent_models (default {}).
|
|
|
|
We never mutate the module-global registry permanently: tests that need a
|
|
custom registry install one via monkeypatch + reload_projects and restore the
|
|
default afterwards (autouse fixture).
|
|
"""
|
|
import os
|
|
import tempfile
|
|
|
|
import pytest
|
|
|
|
os.environ.setdefault("ORCH_DB_PATH",
|
|
os.path.join(tempfile.gettempdir(), "test_orch41_model.db"))
|
|
os.environ.setdefault("ORCH_GITEA_TOKEN", "test-token")
|
|
os.environ.setdefault("ORCH_PLANE_API_TOKEN", "test-token")
|
|
|
|
import logging
|
|
|
|
from src.agents.launcher import resolve_agent_model, is_valid_model
|
|
from src.config import settings
|
|
from src import projects as P
|
|
from src.projects import ProjectConfig, reload_projects, _parse_projects_json
|
|
|
|
ORCH_PLANE_ID = "8da6aa25-a60e-44d6-a1e2-d8ae59aa7d6a"
|
|
ENDURO_PLANE_ID = "7a79f0a9-5278-49cd-9007-9a338f238f9c"
|
|
|
|
|
|
@pytest.fixture(autouse=True)
|
|
def _clean_settings(monkeypatch):
|
|
"""Reset all per-agent/default model settings to a known baseline so tests
|
|
are order-independent regardless of what other modules set in the env."""
|
|
monkeypatch.setattr(settings, "agent_model_default", "claude-opus-4-8")
|
|
for a in ("analyst", "architect", "developer", "reviewer", "tester", "deployer"):
|
|
monkeypatch.setattr(settings, f"agent_model_{a}", "")
|
|
# Hermeticity: the host env (prod .env; merge-gate re-test runs under it)
|
|
# may legitimately set ORCH_AGENT_FALLBACK_MODEL -> reset to shipped default.
|
|
monkeypatch.setattr(settings, "agent_fallback_model", "")
|
|
# default registry (no per-project overrides)
|
|
monkeypatch.setattr(P.settings, "projects_json", "")
|
|
reload_projects()
|
|
yield
|
|
reload_projects()
|
|
|
|
|
|
def _install_registry(monkeypatch, agent_models):
|
|
"""Install a single-project registry for ORCH with the given agent_models."""
|
|
reg = [ProjectConfig(
|
|
plane_project_id=ORCH_PLANE_ID, repo="orchestrator",
|
|
work_item_prefix="ORCH", name="orchestrator",
|
|
agent_models=agent_models,
|
|
)]
|
|
monkeypatch.setattr(P, "PROJECTS", reg)
|
|
monkeypatch.setattr(P, "_BY_PLANE_ID", {p.plane_project_id: p for p in reg})
|
|
monkeypatch.setattr(P, "_BY_REPO", {p.repo: p for p in reg})
|
|
|
|
|
|
# ---- Level 4: nothing configured -> "" --------------------------------------
|
|
def test_no_config_returns_empty(monkeypatch):
|
|
monkeypatch.setattr(settings, "agent_model_default", "")
|
|
assert resolve_agent_model("developer") == ""
|
|
assert resolve_agent_model("developer", ORCH_PLANE_ID) == ""
|
|
|
|
|
|
# ---- Level 3: global default ------------------------------------------------
|
|
def test_global_default():
|
|
assert resolve_agent_model("developer") == "claude-opus-4-8"
|
|
assert resolve_agent_model("architect") == "claude-opus-4-8"
|
|
|
|
|
|
# ---- Level 2: per-agent env beats default -----------------------------------
|
|
def test_per_agent_env_overrides_default(monkeypatch):
|
|
monkeypatch.setattr(settings, "agent_model_reviewer", "claude-sonnet-4-6")
|
|
assert resolve_agent_model("reviewer") == "claude-sonnet-4-6"
|
|
# other agents still fall through to default
|
|
assert resolve_agent_model("developer") == "claude-opus-4-8"
|
|
|
|
|
|
# ---- Level 1: per-project override beats per-agent env and default ----------
|
|
def test_project_override_beats_env_and_default(monkeypatch):
|
|
monkeypatch.setattr(settings, "agent_model_developer", "claude-sonnet-4-6")
|
|
_install_registry(monkeypatch, {"developer": "claude-opus-4-8"})
|
|
assert resolve_agent_model("developer", ORCH_PLANE_ID) == "claude-opus-4-8"
|
|
# without project_id, falls back to per-agent env
|
|
assert resolve_agent_model("developer") == "claude-sonnet-4-6"
|
|
|
|
|
|
def test_project_override_only_for_listed_agent(monkeypatch):
|
|
_install_registry(monkeypatch, {"developer": "claude-opus-4-8"})
|
|
# reviewer not in agent_models -> falls back to default
|
|
assert resolve_agent_model("reviewer", ORCH_PLANE_ID) == "claude-opus-4-8"
|
|
monkeypatch.setattr(settings, "agent_model_reviewer", "claude-sonnet-4-6")
|
|
assert resolve_agent_model("reviewer", ORCH_PLANE_ID) == "claude-sonnet-4-6"
|
|
|
|
|
|
# ---- unknown / empty project id skips level 1 -------------------------------
|
|
def test_unknown_project_id_skips_override(monkeypatch):
|
|
_install_registry(monkeypatch, {"developer": "x-model"})
|
|
assert resolve_agent_model("developer", "no-such-uuid") == "claude-opus-4-8"
|
|
assert resolve_agent_model("developer", None) == "claude-opus-4-8"
|
|
|
|
|
|
# ---- unknown agent skips per-agent env, still gets default ------------------
|
|
def test_unknown_agent_falls_to_default():
|
|
assert resolve_agent_model("nonexistent") == "claude-opus-4-8"
|
|
|
|
|
|
# ---- frozen ProjectConfig accepts agent_models ------------------------------
|
|
def test_projectconfig_frozen_with_agent_models():
|
|
pc = ProjectConfig(
|
|
plane_project_id="x", repo="r", work_item_prefix="P", name="n",
|
|
agent_models={"developer": "m"},
|
|
)
|
|
assert pc.agent_models == {"developer": "m"}
|
|
# default is an empty dict, not shared/mutable across instances
|
|
pc2 = ProjectConfig(plane_project_id="y", repo="r2",
|
|
work_item_prefix="P2", name="n2")
|
|
assert pc2.agent_models == {}
|
|
assert pc2.agent_models is not pc.agent_models
|
|
with pytest.raises(Exception):
|
|
pc.repo = "changed" # frozen
|
|
|
|
|
|
# ---- projects_json parsing of agent_models / agent_efforts ------------------
|
|
def test_parse_projects_json_with_overrides():
|
|
raw = (
|
|
'[{"plane_project_id":"p1","repo":"orchestrator",'
|
|
'"work_item_prefix":"ORCH",'
|
|
'"agent_models":{"developer":"claude-opus-4-8","reviewer":"claude-sonnet-4-6"},'
|
|
'"agent_efforts":{"developer":"xhigh","tester":"low"}}]'
|
|
)
|
|
parsed = _parse_projects_json(raw)
|
|
assert parsed is not None and len(parsed) == 1
|
|
pc = parsed[0]
|
|
assert pc.agent_models == {"developer": "claude-opus-4-8",
|
|
"reviewer": "claude-sonnet-4-6"}
|
|
assert pc.agent_efforts == {"developer": "xhigh", "tester": "low"}
|
|
|
|
|
|
def test_parse_projects_json_omitted_overrides_default_empty():
|
|
raw = ('[{"plane_project_id":"p1","repo":"r","work_item_prefix":"P"}]')
|
|
parsed = _parse_projects_json(raw)
|
|
assert parsed is not None and len(parsed) == 1
|
|
assert parsed[0].agent_models == {}
|
|
assert parsed[0].agent_efforts == {}
|
|
|
|
|
|
def test_parse_projects_json_malformed_override_ignored():
|
|
# agent_models is not an object -> dropped to {}, entry still valid
|
|
raw = ('[{"plane_project_id":"p1","repo":"r","work_item_prefix":"P",'
|
|
'"agent_models":"oops"}]')
|
|
parsed = _parse_projects_json(raw)
|
|
assert parsed is not None and parsed[0].agent_models == {}
|
|
|
|
|
|
# =============================================================================
|
|
# ORCH-074 (G2): model-name validation, never-break. is_valid_model is a
|
|
# structural format check (^claude-…$), applied on top of the ORCH-41 cascade so
|
|
# garbage at any level is logged and skipped, never passed to --model.
|
|
# =============================================================================
|
|
|
|
# ---- is_valid_model predicate (the single G2 contract) ----------------------
|
|
def test_is_valid_model_accepts_canonical():
|
|
assert is_valid_model("claude-opus-4-8") is True
|
|
assert is_valid_model("claude-sonnet-4-6") is True
|
|
# forward-compatible: a future version passes without a code change
|
|
assert is_valid_model("claude-opus-4-9") is True
|
|
# surrounding whitespace is tolerated (stripped)
|
|
assert is_valid_model(" claude-opus-4-8 ") is True
|
|
|
|
|
|
def test_is_valid_model_rejects_garbage():
|
|
assert is_valid_model("") is False
|
|
assert is_valid_model(" ") is False
|
|
assert is_valid_model(None) is False
|
|
assert is_valid_model("gpt-4") is False # another provider
|
|
assert is_valid_model("claud-opus-typo") is False # wrong prefix
|
|
assert is_valid_model("Claude-Opus-4-8") is False # uppercase not allowed
|
|
assert is_valid_model("claude-opus 4 8") is False # spaces inside
|
|
|
|
|
|
# ---- TC-03: garbage in agent_model_<agent> -> fall back to default ----------
|
|
def test_garbage_per_agent_env_falls_back_to_default(monkeypatch, caplog):
|
|
monkeypatch.setattr(settings, "agent_model_developer", "gpt-4")
|
|
with caplog.at_level(logging.WARNING):
|
|
result = resolve_agent_model("developer")
|
|
assert result == "claude-opus-4-8" # dropped garbage, used default
|
|
assert any("Invalid model name" in r.message for r in caplog.records)
|
|
|
|
|
|
# ---- TC-04: garbage in project-override -> fall back to next valid level -----
|
|
def test_garbage_project_override_falls_back_to_default(monkeypatch, caplog):
|
|
_install_registry(monkeypatch, {"developer": "claud-opus-typo"})
|
|
with caplog.at_level(logging.WARNING):
|
|
result = resolve_agent_model("developer", ORCH_PLANE_ID)
|
|
assert result == "claude-opus-4-8" # override dropped, default used
|
|
assert any("Invalid model name" in r.message for r in caplog.records)
|
|
|
|
|
|
# ---- TC-05: both override and default invalid -> "" (no --model), no raise ---
|
|
def test_all_levels_invalid_returns_empty(monkeypatch, caplog):
|
|
monkeypatch.setattr(settings, "agent_model_default", "totally-bogus")
|
|
_install_registry(monkeypatch, {"developer": "gpt-4"})
|
|
with caplog.at_level(logging.WARNING):
|
|
result = resolve_agent_model("developer", ORCH_PLANE_ID)
|
|
assert result == "" # never returns garbage; CLI default applies
|
|
# both invalid levels were logged
|
|
assert sum("Invalid model name" in r.message for r in caplog.records) >= 2
|
|
|
|
|
|
# ---- TC-06: valid canonical name passes unchanged (ORCH-41 regression) -------
|
|
def test_valid_canonical_unchanged():
|
|
assert resolve_agent_model("developer") == "claude-opus-4-8"
|
|
|
|
|
|
# ---- TC-07: all 6 agents resolve to claude-opus-4-8 (routing G3 off) ---------
|
|
def test_all_six_agents_resolve_to_opus_4_8():
|
|
for agent in ("analyst", "architect", "developer", "reviewer", "tester",
|
|
"deployer"):
|
|
assert resolve_agent_model(agent) == "claude-opus-4-8"
|
|
|
|
|
|
# ---- TC-08: valid per-project override still passes validation (AC-8) --------
|
|
def test_valid_per_project_override_unchanged(monkeypatch):
|
|
_install_registry(monkeypatch, {"reviewer": "claude-sonnet-4-6"})
|
|
assert resolve_agent_model("reviewer", ORCH_PLANE_ID) == "claude-sonnet-4-6"
|
|
|
|
|
|
# ---- TC-09 / TC-11: G4 fallback is OFF (ADR-001 decision 3) ------------------
|
|
def test_fallback_model_disabled_by_default():
|
|
# G4 not enabled (ORCH-074 ADR-001, Решение 3): the SHIPPED default of
|
|
# agent_fallback_model is "" -> no --fallback-model flag out of the box.
|
|
# Assert the CLASS field default, not the runtime singleton: the host env
|
|
# may legitimately enable the fallback via ORCH_AGENT_FALLBACK_MODEL, and
|
|
# this test must stay hermetic (the merge-gate re-test runs under that env).
|
|
assert type(settings).model_fields["agent_fallback_model"].default == ""
|
|
# never-break: the SAME predicate guards the inline fallback read in _spawn,
|
|
# so a typo there would be rejected exactly like a model name.
|
|
assert is_valid_model("claude-bad typo") is False
|
|
assert is_valid_model("") is False
|