"""ORCH-41: tests for resolve_agent_model (per-agent + per-project LLM model). Covers the 4-level resolution priority: 1. ProjectConfig.agent_models[agent] (per-project override, from projects_json) 2. settings.agent_model_ (per-agent env, when non-empty) 3. settings.agent_model_default (global default) 4. "" (no override anywhere -> CLI default) plus: unknown project_id / no project_id skips level 1, unknown agent skips level 2, and the frozen ProjectConfig still accepts agent_models (default {}). We never mutate the module-global registry permanently: tests that need a custom registry install one via monkeypatch + reload_projects and restore the default afterwards (autouse fixture). """ import os import tempfile import pytest os.environ.setdefault("ORCH_DB_PATH", os.path.join(tempfile.gettempdir(), "test_orch41_model.db")) os.environ.setdefault("ORCH_GITEA_TOKEN", "test-token") os.environ.setdefault("ORCH_PLANE_API_TOKEN", "test-token") import logging from src.agents.launcher import resolve_agent_model, is_valid_model from src.config import settings from src import projects as P from src.projects import ProjectConfig, reload_projects, _parse_projects_json ORCH_PLANE_ID = "8da6aa25-a60e-44d6-a1e2-d8ae59aa7d6a" ENDURO_PLANE_ID = "7a79f0a9-5278-49cd-9007-9a338f238f9c" @pytest.fixture(autouse=True) def _clean_settings(monkeypatch): """Reset all per-agent/default model settings to a known baseline so tests are order-independent regardless of what other modules set in the env.""" monkeypatch.setattr(settings, "agent_model_default", "claude-opus-4-8") for a in ("analyst", "architect", "developer", "reviewer", "tester", "deployer"): monkeypatch.setattr(settings, f"agent_model_{a}", "") # default registry (no per-project overrides) monkeypatch.setattr(P.settings, "projects_json", "") reload_projects() yield reload_projects() def _install_registry(monkeypatch, agent_models): """Install a single-project registry for ORCH with the given agent_models.""" reg = [ProjectConfig( plane_project_id=ORCH_PLANE_ID, repo="orchestrator", work_item_prefix="ORCH", name="orchestrator", agent_models=agent_models, )] monkeypatch.setattr(P, "PROJECTS", reg) monkeypatch.setattr(P, "_BY_PLANE_ID", {p.plane_project_id: p for p in reg}) monkeypatch.setattr(P, "_BY_REPO", {p.repo: p for p in reg}) # ---- Level 4: nothing configured -> "" -------------------------------------- def test_no_config_returns_empty(monkeypatch): monkeypatch.setattr(settings, "agent_model_default", "") assert resolve_agent_model("developer") == "" assert resolve_agent_model("developer", ORCH_PLANE_ID) == "" # ---- Level 3: global default ------------------------------------------------ def test_global_default(): assert resolve_agent_model("developer") == "claude-opus-4-8" assert resolve_agent_model("architect") == "claude-opus-4-8" # ---- Level 2: per-agent env beats default ----------------------------------- def test_per_agent_env_overrides_default(monkeypatch): monkeypatch.setattr(settings, "agent_model_reviewer", "claude-sonnet-4-6") assert resolve_agent_model("reviewer") == "claude-sonnet-4-6" # other agents still fall through to default assert resolve_agent_model("developer") == "claude-opus-4-8" # ---- Level 1: per-project override beats per-agent env and default ---------- def test_project_override_beats_env_and_default(monkeypatch): monkeypatch.setattr(settings, "agent_model_developer", "claude-sonnet-4-6") _install_registry(monkeypatch, {"developer": "claude-opus-4-8"}) assert resolve_agent_model("developer", ORCH_PLANE_ID) == "claude-opus-4-8" # without project_id, falls back to per-agent env assert resolve_agent_model("developer") == "claude-sonnet-4-6" def test_project_override_only_for_listed_agent(monkeypatch): _install_registry(monkeypatch, {"developer": "claude-opus-4-8"}) # reviewer not in agent_models -> falls back to default assert resolve_agent_model("reviewer", ORCH_PLANE_ID) == "claude-opus-4-8" monkeypatch.setattr(settings, "agent_model_reviewer", "claude-sonnet-4-6") assert resolve_agent_model("reviewer", ORCH_PLANE_ID) == "claude-sonnet-4-6" # ---- unknown / empty project id skips level 1 ------------------------------- def test_unknown_project_id_skips_override(monkeypatch): _install_registry(monkeypatch, {"developer": "x-model"}) assert resolve_agent_model("developer", "no-such-uuid") == "claude-opus-4-8" assert resolve_agent_model("developer", None) == "claude-opus-4-8" # ---- unknown agent skips per-agent env, still gets default ------------------ def test_unknown_agent_falls_to_default(): assert resolve_agent_model("nonexistent") == "claude-opus-4-8" # ---- frozen ProjectConfig accepts agent_models ------------------------------ def test_projectconfig_frozen_with_agent_models(): pc = ProjectConfig( plane_project_id="x", repo="r", work_item_prefix="P", name="n", agent_models={"developer": "m"}, ) assert pc.agent_models == {"developer": "m"} # default is an empty dict, not shared/mutable across instances pc2 = ProjectConfig(plane_project_id="y", repo="r2", work_item_prefix="P2", name="n2") assert pc2.agent_models == {} assert pc2.agent_models is not pc.agent_models with pytest.raises(Exception): pc.repo = "changed" # frozen # ---- projects_json parsing of agent_models / agent_efforts ------------------ def test_parse_projects_json_with_overrides(): raw = ( '[{"plane_project_id":"p1","repo":"orchestrator",' '"work_item_prefix":"ORCH",' '"agent_models":{"developer":"claude-opus-4-8","reviewer":"claude-sonnet-4-6"},' '"agent_efforts":{"developer":"xhigh","tester":"low"}}]' ) parsed = _parse_projects_json(raw) assert parsed is not None and len(parsed) == 1 pc = parsed[0] assert pc.agent_models == {"developer": "claude-opus-4-8", "reviewer": "claude-sonnet-4-6"} assert pc.agent_efforts == {"developer": "xhigh", "tester": "low"} def test_parse_projects_json_omitted_overrides_default_empty(): raw = ('[{"plane_project_id":"p1","repo":"r","work_item_prefix":"P"}]') parsed = _parse_projects_json(raw) assert parsed is not None and len(parsed) == 1 assert parsed[0].agent_models == {} assert parsed[0].agent_efforts == {} def test_parse_projects_json_malformed_override_ignored(): # agent_models is not an object -> dropped to {}, entry still valid raw = ('[{"plane_project_id":"p1","repo":"r","work_item_prefix":"P",' '"agent_models":"oops"}]') parsed = _parse_projects_json(raw) assert parsed is not None and parsed[0].agent_models == {} # ============================================================================= # ORCH-074 (G2): model-name validation, never-break. is_valid_model is a # structural format check (^claude-…$), applied on top of the ORCH-41 cascade so # garbage at any level is logged and skipped, never passed to --model. # ============================================================================= # ---- is_valid_model predicate (the single G2 contract) ---------------------- def test_is_valid_model_accepts_canonical(): assert is_valid_model("claude-opus-4-8") is True assert is_valid_model("claude-sonnet-4-6") is True # forward-compatible: a future version passes without a code change assert is_valid_model("claude-opus-4-9") is True # surrounding whitespace is tolerated (stripped) assert is_valid_model(" claude-opus-4-8 ") is True def test_is_valid_model_rejects_garbage(): assert is_valid_model("") is False assert is_valid_model(" ") is False assert is_valid_model(None) is False assert is_valid_model("gpt-4") is False # another provider assert is_valid_model("claud-opus-typo") is False # wrong prefix assert is_valid_model("Claude-Opus-4-8") is False # uppercase not allowed assert is_valid_model("claude-opus 4 8") is False # spaces inside # ---- TC-03: garbage in agent_model_ -> fall back to default ---------- def test_garbage_per_agent_env_falls_back_to_default(monkeypatch, caplog): monkeypatch.setattr(settings, "agent_model_developer", "gpt-4") with caplog.at_level(logging.WARNING): result = resolve_agent_model("developer") assert result == "claude-opus-4-8" # dropped garbage, used default assert any("Invalid model name" in r.message for r in caplog.records) # ---- TC-04: garbage in project-override -> fall back to next valid level ----- def test_garbage_project_override_falls_back_to_default(monkeypatch, caplog): _install_registry(monkeypatch, {"developer": "claud-opus-typo"}) with caplog.at_level(logging.WARNING): result = resolve_agent_model("developer", ORCH_PLANE_ID) assert result == "claude-opus-4-8" # override dropped, default used assert any("Invalid model name" in r.message for r in caplog.records) # ---- TC-05: both override and default invalid -> "" (no --model), no raise --- def test_all_levels_invalid_returns_empty(monkeypatch, caplog): monkeypatch.setattr(settings, "agent_model_default", "totally-bogus") _install_registry(monkeypatch, {"developer": "gpt-4"}) with caplog.at_level(logging.WARNING): result = resolve_agent_model("developer", ORCH_PLANE_ID) assert result == "" # never returns garbage; CLI default applies # both invalid levels were logged assert sum("Invalid model name" in r.message for r in caplog.records) >= 2 # ---- TC-06: valid canonical name passes unchanged (ORCH-41 regression) ------- def test_valid_canonical_unchanged(): assert resolve_agent_model("developer") == "claude-opus-4-8" # ---- TC-07: all 6 agents resolve to claude-opus-4-8 (routing G3 off) --------- def test_all_six_agents_resolve_to_opus_4_8(): for agent in ("analyst", "architect", "developer", "reviewer", "tester", "deployer"): assert resolve_agent_model(agent) == "claude-opus-4-8" # ---- TC-08: valid per-project override still passes validation (AC-8) -------- def test_valid_per_project_override_unchanged(monkeypatch): _install_registry(monkeypatch, {"reviewer": "claude-sonnet-4-6"}) assert resolve_agent_model("reviewer", ORCH_PLANE_ID) == "claude-sonnet-4-6" # ---- TC-09 / TC-11: G4 fallback is OFF (ADR-001 decision 3) ------------------ def test_fallback_model_disabled_by_default(): # G4 not enabled: agent_fallback_model stays "" -> no --fallback-model flag. assert settings.agent_fallback_model == "" # never-break: the SAME predicate guards the inline fallback read in _spawn, # so a typo there would be rejected exactly like a model name. assert is_valid_model("claude-bad typo") is False assert is_valid_model("") is False