From 8a292b9d335bf03a57f93166725f186a22fcbc09 Mon Sep 17 00:00:00 2001 From: dev Date: Fri, 5 Jun 2026 16:16:57 +0000 Subject: [PATCH] feat(agents): configurable LLM model + effort per-agent and per-project (ORCH-41) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Vынести модель/effort агентов из хардкода launcher.py в конфиг. - config.py: ORCH_AGENT_MODEL_/_DEFAULT (default claude-opus-4-8), ORCH_AGENT_EFFORT_/_DEFAULT (думающие=high, tester/deployer=medium), ORCH_AGENT_FALLBACK_MODEL. - projects.py: ProjectConfig.agent_models/agent_efforts (field(default_factory=dict)), парсинг из projects_json через _coerce_str_map. - launcher.py: resolve_agent_model/resolve_agent_effort (project>env>default>пусто), валидация effort {low,medium,high,xhigh,max}; убран хардкод model:opus; собираются флаги --model/--effort/--fallback-model. - tests: test_resolve_agent_model.py, test_resolve_agent_effort.py. - docs: INFRA.md, internals.md, CHANGELOG.md. --- CHANGELOG.md | 1 + docs/architecture/internals.md | 4 + docs/operations/INFRA.md | 25 +++++ src/agents/launcher.py | 94 ++++++++++++++++- src/config.py | 28 ++++++ src/projects.py | 26 ++++- tests/test_resolve_agent_effort.py | 138 +++++++++++++++++++++++++ tests/test_resolve_agent_model.py | 156 +++++++++++++++++++++++++++++ 8 files changed, 467 insertions(+), 5 deletions(-) create mode 100644 tests/test_resolve_agent_effort.py create mode 100644 tests/test_resolve_agent_model.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 235e5f9..1bbb8aa 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,7 @@ ## [Unreleased] ### Added +- **Конфигурируемые модель LLM и режим работы (`--effort`) агентов** (ORCH-41): модель/effort каждого агента вынесены из хардкода `launcher.py` в конфиг — глобально per-agent (`ORCH_AGENT_MODEL_` / `ORCH_AGENT_EFFORT_`, дефолты `ORCH_AGENT_MODEL_DEFAULT=claude-opus-4-8`, `ORCH_AGENT_EFFORT_DEFAULT=high`) и per-project (`agent_models` / `agent_efforts` в `ORCH_PROJECTS_JSON`). Резолверы `resolve_agent_model` / `resolve_agent_effort` (приоритет project > per-agent env > default > пусто), валидация effort `{low,medium,high,xhigh,max}`, опц. `ORCH_AGENT_FALLBACK_MODEL` (`--fallback-model`). Хардкод `"model":"opus"` (architect/reviewer) удалён. Тесты: `test_resolve_agent_model.py`, `test_resolve_agent_effort.py`. - **Единый status-коммент агентов в Plane** (ORCH-016): `usage.build_status_comment(...)` — один хелпер для ВСЕХ ролей (analyst..deployer). HTML-формат: header `{icon} {Role} — {описание}`, опциональная строка `Verdict/Status: …` из YAML-frontmatter артефакта, **строка `Длительность: 4m 12s`** (явный `duration_s` от launcher, fallback из `agent_runs` для аналитика), `Документы:`, тех-хвост `tokens · cost`. Утилитки: `usage.fmt_duration`, `usage.get_agent_duration`, новый модуль `src/frontmatter.py` (defensive YAML reader). ADR `docs/work-items/ORCH-016/06-adr/ADR-001-unified-status-comment.md`. - **Документация по канону** (ORCH-9): `CLAUDE.md` (паспорт проекта), структура `docs/` (`architecture/` + `adr/`, `operations/`, `work-items/`, `history/`), `docs/operations/INFRA.md` (RUNBOOK с инфра-изоляцией и self-hosting рисками). - **ADR**: adr-0001 (multi-repo registry), adr-0002 (job queue), adr-0003 (условный staging-гейт). diff --git a/docs/architecture/internals.md b/docs/architecture/internals.md index ac7b878..3e01346 100644 --- a/docs/architecture/internals.md +++ b/docs/architecture/internals.md @@ -326,6 +326,10 @@ jobs со статусом `running` (воркер умёр на рестарт - `ORCH_MAX_CONCURRENCY` (default 1) — лимит параллельных jobs. - `ORCH_QUEUE_POLL_INTERVAL` (default 2.0) — период опроса. +- `ORCH_AGENT_MODEL_DEFAULT` / `ORCH_AGENT_MODEL_` (ORCH-41) — модель агентов; дефолт `claude-opus-4-8`. +- `ORCH_AGENT_EFFORT_DEFAULT` / `ORCH_AGENT_EFFORT_` (ORCH-41) — режим `--effort` (low|medium|high|xhigh|max). +- `ORCH_AGENT_FALLBACK_MODEL` (ORCH-41) — опц. `--fallback-model` при overloaded. +- per-project override: `agent_models` / `agent_efforts` в `ORCH_PROJECTS_JSON`; резолверы `resolve_agent_model` / `resolve_agent_effort` (project > per-agent env > default > пусто). Наблюдаемость: `GET /queue` — counts по статусам + последние 10 jobs. diff --git a/docs/operations/INFRA.md b/docs/operations/INFRA.md index 0895543..cf9d248 100644 --- a/docs/operations/INFRA.md +++ b/docs/operations/INFRA.md @@ -48,6 +48,11 @@ | `ORCH_REPOS_DIR` / `ORCH_HOST_REPOS_DIR` | каталог репозиториев (в контейнере / на хосте) | | `ORCH_DB_PATH` | путь к SQLite БД | | `ORCH_PROJECTS_JSON` | реестр проектов (Plane id → repo + prefix); пусто → дефолт из `src/projects.py` | +| `ORCH_AGENT_MODEL_DEFAULT` | LLM-модель агентов по умолчанию (ORCH-41); дефолт `claude-opus-4-8` | +| `ORCH_AGENT_MODEL_` | per-agent модель (ANALYST/ARCHITECT/DEVELOPER/REVIEWER/TESTER/DEPLOYER); пусто → default | +| `ORCH_AGENT_EFFORT_DEFAULT` | режим работы `--effort` по умолчанию (ORCH-41): low\|medium\|high\|xhigh\|max; дефолт `high` | +| `ORCH_AGENT_EFFORT_` | per-agent effort; дефолт: думающие → high, tester/deployer → medium | +| `ORCH_AGENT_FALLBACK_MODEL` | опц. фолбэк-модель при overloaded (`--fallback-model`); пусто → без флага | | `DEPLOY_SSH_USER` / `_HOST` / `DEPLOY_HOOK_SCRIPT` | параметры деплой-хука | **Секреты — только в `.env` / `.env.staging` на хосте, в гит НЕ коммитятся.** Канон — `.env.example`, `.env.staging.example`. @@ -55,6 +60,26 @@ ## Реестр проектов (`src/projects.py`, ORCH-6) Связывает Plane project id → gitea repo + work-item prefix. Источник: `ORCH_PROJECTS_JSON`, fallback — встроенный дефолт. Прод видит: `enduro-trails` (ET), `orchestrator` (ORCH). Staging видит ТОЛЬКО `orchestrator-sandbox` (SANDBOX) — изоляция. +## Модель и effort агентов (`src/config.py` + `src/agents/launcher.py`, ORCH-41) +Модель LLM и режим работы (`--effort`) каждого агента **конфигурируемы** — глобально per-agent (env) и per-project (через `ORCH_PROJECTS_JSON`). + +**Приоритет резолвинга** (`resolve_agent_model` / `resolve_agent_effort`): +1. per-project override — `agent_models` / `agent_efforts` в записи `ORCH_PROJECTS_JSON`; +2. per-agent env — `ORCH_AGENT_MODEL_` / `ORCH_AGENT_EFFORT_` (если непусто); +3. глобальный дефолт — `ORCH_AGENT_MODEL_DEFAULT` (`claude-opus-4-8`) / `ORCH_AGENT_EFFORT_DEFAULT` (`high`); +4. пусто → флаг не передаётся, действует дефолт CLI. + +**Значения effort:** `low` < `medium` < `high` < `xhigh` < `max` — рычаг «качество vs стоимость/время». Дефолтная раскладка: думающие агенты (analyst/architect/developer/reviewer) → `high`, механические (tester/deployer) → `medium`. Невалидное значение → лог-warning, флаг опускается. + +**Per-project override в `ORCH_PROJECTS_JSON`** (поля `agent_models` / `agent_efforts` опциональны, старые записи работают): +```json +{"plane_project_id":"...","repo":"orchestrator","work_item_prefix":"ORCH", + "agent_models":{"developer":"claude-opus-4-8","reviewer":"claude-sonnet-4-6"}, + "agent_efforts":{"developer":"xhigh","tester":"low"}} +``` + +> ⚠️ Бюджет (ORCH-38): `claude-opus-4-8` дефолт в коде; реальное переключение прод-env делается отдельно после согласования. + ## ⚠️ Self-hosting — оркестратор дорабатывает САМ СЕБЯ **Факт:** прод-инстанс `orchestrator` (8500) — ОДИН на ВСЕ прод-проекты (enduro-trails + orchestrator), с ОБЩЕЙ БД `./data/orchestrator.db` и общей очередью задач (ORCH-1). diff --git a/src/agents/launcher.py b/src/agents/launcher.py index a7ff808..43d8019 100644 --- a/src/agents/launcher.py +++ b/src/agents/launcher.py @@ -15,6 +15,82 @@ from ..plane_sync import notify_stage_change as plane_notify_stage, add_comment logger = logging.getLogger("orchestrator.launcher") +# ORCH-41: valid --effort values accepted by the Claude CLI. An effort that is +# not in this set is treated as misconfiguration: logged and dropped (no flag), +# never passed through to the CLI. +VALID_EFFORTS = frozenset({"low", "medium", "high", "xhigh", "max"}) + + +def _resolve_agent_attr(agent, project_id, project_map_attr, env_attr_prefix, + default_attr): + """ORCH-41 shared resolver with priority: + 1. ProjectConfig.[agent] (per-project override) + 2. settings. (per-agent env, if non-empty) + 3. settings. (global default) + 4. "" (no flag -> CLI default) + + project_id is the Plane project uuid. It is resolved to a ProjectConfig via + the registry; an unknown / empty id simply skips level 1. A missing per-agent + settings attribute (e.g. unknown agent name) skips level 2. + """ + # Level 1: per-project override. + if project_id: + from ..projects import get_project_by_plane_id + proj = get_project_by_plane_id(project_id) + if proj is not None: + override = getattr(proj, project_map_attr, {}).get(agent) + if override: + return override + + # Level 2: per-agent env (settings.), if defined & non-empty. + per_agent = getattr(settings, f"{env_attr_prefix}{agent}", "") + if per_agent: + return per_agent + + # Level 3: global default. + default = getattr(settings, default_attr, "") + if default: + return default + + # Level 4: nothing -> CLI default. + return "" + + +def resolve_agent_model(agent: str, project_id: str = None) -> str: + """ORCH-41: resolve the LLM model for an agent (optionally per-project). + + Returns "" when no model is configured at any level -> caller omits --model + and the CLI default applies. See _resolve_agent_attr for the priority order. + """ + return _resolve_agent_attr( + agent, project_id, + project_map_attr="agent_models", + env_attr_prefix="agent_model_", + default_attr="agent_model_default", + ) + + +def resolve_agent_effort(agent: str, project_id: str = None) -> str: + """ORCH-41: resolve the --effort level for an agent (optionally per-project). + + Same priority as resolve_agent_model. The resolved value is validated against + VALID_EFFORTS; an invalid value is logged and dropped (returns "") so a typo + in env/projects_json can never pass a bad flag to the CLI. + """ + value = _resolve_agent_attr( + agent, project_id, + project_map_attr="agent_efforts", + env_attr_prefix="agent_effort_", + default_attr="agent_effort_default", + ) + if value and value not in VALID_EFFORTS: + logger.warning( + f"Invalid effort '{value}' for agent '{agent}' " + f"(allowed: {sorted(VALID_EFFORTS)}); omitting --effort" + ) + return "" + return value + def prune_run_logs(runs_dir, keep_days=30, keep_max=500, active_paths=None): """L-2: best-effort rotation of per-run logs (/*.log). @@ -85,7 +161,6 @@ class AgentLauncher: "system_prompt": ".openclaw/agents/architect.md", "task_file": ".task-arch.md", "allowed_tools": "Read,Write,Edit,Bash", - "model": "opus", }, "developer": { "system_prompt": ".openclaw/agents/developer.md", @@ -96,7 +171,6 @@ class AgentLauncher: "system_prompt": ".openclaw/agents/reviewer.md", "task_file": ".task-review.md", "allowed_tools": "Read,Write,Edit,Bash", - "model": "opus", }, "tester": { "system_prompt": ".openclaw/agents/tester.md", @@ -171,6 +245,12 @@ class AgentLauncher: _br_row = get_db().execute("SELECT branch FROM tasks WHERE id=?", (task_id,)).fetchone() if task_id else None agent_branch = _br_row[0] if _br_row else "main" + # ORCH-41: resolve the Plane project uuid for this repo so per-project + # model/effort overrides apply. Unknown repo -> None (env/default only). + from ..projects import get_project_by_repo + _proj = get_project_by_repo(repo) + project_id = _proj.plane_project_id if _proj else None + # Ensure the per-branch worktree exists and is on the right branch. work_path = ensure_worktree(repo, agent_branch) @@ -204,8 +284,14 @@ class AgentLauncher: system_prompt = config["system_prompt"] allowed_tools = config["allowed_tools"] - model = config.get("model", "") + # ORCH-41: model + effort + optional fallback are resolved from config + # (project-override > per-agent env > default), not hardcoded in AGENT_CONFIGS. + model = resolve_agent_model(agent, project_id) + effort = resolve_agent_effort(agent, project_id) model_flag = f"--model {model} " if model else "" + effort_flag = f"--effort {effort} " if effort else "" + fb = settings.agent_fallback_model + fb_flag = f"--fallback-model {fb} " if fb else "" # No git fetch/checkout here: ensure_worktree() already put the worktree on # the right branch. The agent simply runs inside its isolated work_path. @@ -218,7 +304,7 @@ class AgentLauncher: f'cd {work_path} && ' f'{self.CLAUDE_BIN} --print ' f'--output-format json ' - f'{model_flag}' + f'{model_flag}{effort_flag}{fb_flag}' f'"$(cat {task_file})" ' f'--system-prompt "$(cat {system_prompt})" ' f'--allowedTools {allowed_tools}' diff --git a/src/config.py b/src/config.py index bd514fe..d8869c1 100644 --- a/src/config.py +++ b/src/config.py @@ -78,6 +78,34 @@ class Settings(BaseSettings): agent_kill_grace_seconds: int = 20 agent_timeout_overrides_json: str = "" + # ORCH-41: per-agent LLM model. Empty -> agent_model_default. Resolution order: + # project-override (projects_json agent_models) > ORCH_AGENT_MODEL_ > + # agent_model_default > CLI default (no --model flag). Default is 4-8 because + # 4-7 == 4-8 in price (Slava 05.06); do NOT hardcode the version anywhere else. + agent_model_default: str = "claude-opus-4-8" + agent_model_analyst: str = "" + agent_model_architect: str = "" + agent_model_developer: str = "" + agent_model_reviewer: str = "" + agent_model_tester: str = "" + agent_model_deployer: str = "" + + # ORCH-41: per-agent effort / reasoning level: low|medium|high|xhigh|max. + # Empty -> agent_effort_default. Same resolution order as model. Default split: + # thinking agents (analyst/architect/developer/reviewer) -> high; mechanical + # agents (tester/deployer) -> medium. + agent_effort_default: str = "high" + agent_effort_analyst: str = "high" + agent_effort_architect: str = "high" + agent_effort_developer: str = "high" + agent_effort_reviewer: str = "high" + agent_effort_tester: str = "medium" + agent_effort_deployer: str = "medium" + + # ORCH-41: optional per-agent fallback model used when the primary is + # overloaded (--fallback-model, works with --print). Empty -> no flag. + agent_fallback_model: str = "" + # L-2: run-log rotation. Old per-run logs in /runs/*.log are pruned at # app startup (best-effort). A *.log is removed if it is older than # log_keep_days OR not within the log_keep_max most-recent logs (whichever diff --git a/src/projects.py b/src/projects.py index 3d9f11a..173e4b1 100644 --- a/src/projects.py +++ b/src/projects.py @@ -17,7 +17,7 @@ registry is used so the system works out of the box. import json import logging -from dataclasses import dataclass +from dataclasses import dataclass, field from .config import settings @@ -30,6 +30,11 @@ class ProjectConfig: repo: str # gitea repo name (== folder under /repos) work_item_prefix: str # ET / ORCH name: str # human-readable label + # ORCH-41: optional per-project agent->model / agent->effort overrides parsed + # from projects_json. frozen dataclass + mutable default -> field(default_factory=dict) + # (a bare {} default raises ValueError). Empty dict = no override (old records work). + agent_models: dict = field(default_factory=dict) + agent_efforts: dict = field(default_factory=dict) # Built-in default registry (used when ORCH_PROJECTS_JSON is empty/invalid). @@ -50,6 +55,23 @@ _DEFAULT_PROJECTS = [ ] +def _coerce_str_map(value, idx, field_name) -> dict: + """ORCH-41: coerce an optional projects_json sub-object into a {str: str} dict. + + Missing / null -> {} (no override). A non-object value is logged and dropped so + one malformed entry can never brick the whole registry; non-string keys/values + are stringified for safety. + """ + if value is None: + return {} + if not isinstance(value, dict): + logger.error( + f"ORCH_PROJECTS_JSON[{idx}].{field_name} is not an object, ignoring" + ) + return {} + return {str(k): str(v) for k, v in value.items()} + + def _parse_projects_json(raw: str) -> list[ProjectConfig] | None: """Parse ORCH_PROJECTS_JSON. Returns None if empty/invalid (-> use default).""" if not raw or not raw.strip(): @@ -75,6 +97,8 @@ def _parse_projects_json(raw: str) -> list[ProjectConfig] | None: repo=str(item["repo"]), work_item_prefix=str(item["work_item_prefix"]), name=str(item.get("name", item["repo"])), + agent_models=_coerce_str_map(item.get("agent_models"), i, "agent_models"), + agent_efforts=_coerce_str_map(item.get("agent_efforts"), i, "agent_efforts"), ) ) except KeyError as e: diff --git a/tests/test_resolve_agent_effort.py b/tests/test_resolve_agent_effort.py new file mode 100644 index 0000000..d2718d4 --- /dev/null +++ b/tests/test_resolve_agent_effort.py @@ -0,0 +1,138 @@ +"""ORCH-41: tests for resolve_agent_effort + effort validation + flag assembly. + +Mirrors test_resolve_agent_model's 4-level priority for the --effort lever, and +adds: + - validation: a value outside {low,medium,high,xhigh,max} is dropped -> "" + - flag assembly: --model / --effort / --fallback-model are present/absent in + the built command exactly when the resolved value is non-empty. +""" +import os +import tempfile + +import pytest + +os.environ.setdefault("ORCH_DB_PATH", + os.path.join(tempfile.gettempdir(), "test_orch41_effort.db")) +os.environ.setdefault("ORCH_GITEA_TOKEN", "test-token") +os.environ.setdefault("ORCH_PLANE_API_TOKEN", "test-token") + +from src.agents.launcher import ( + resolve_agent_effort, resolve_agent_model, VALID_EFFORTS, +) +from src.config import settings +from src import projects as P +from src.projects import ProjectConfig, reload_projects + +ORCH_PLANE_ID = "8da6aa25-a60e-44d6-a1e2-d8ae59aa7d6a" + + +@pytest.fixture(autouse=True) +def _clean_settings(monkeypatch): + monkeypatch.setattr(settings, "agent_effort_default", "high") + for a in ("analyst", "architect", "developer", "reviewer"): + monkeypatch.setattr(settings, f"agent_effort_{a}", "high") + for a in ("tester", "deployer"): + monkeypatch.setattr(settings, f"agent_effort_{a}", "medium") + monkeypatch.setattr(P.settings, "projects_json", "") + reload_projects() + yield + reload_projects() + + +def _install_registry(monkeypatch, agent_efforts): + reg = [ProjectConfig( + plane_project_id=ORCH_PLANE_ID, repo="orchestrator", + work_item_prefix="ORCH", name="orchestrator", + agent_efforts=agent_efforts, + )] + monkeypatch.setattr(P, "PROJECTS", reg) + monkeypatch.setattr(P, "_BY_PLANE_ID", {p.plane_project_id: p for p in reg}) + monkeypatch.setattr(P, "_BY_REPO", {p.repo: p for p in reg}) + + +# ---- default split ---------------------------------------------------------- +def test_default_split(): + assert resolve_agent_effort("developer") == "high" + assert resolve_agent_effort("architect") == "high" + assert resolve_agent_effort("tester") == "medium" + assert resolve_agent_effort("deployer") == "medium" + + +# ---- level 4: nothing -> "" ------------------------------------------------- +def test_no_config_returns_empty(monkeypatch): + monkeypatch.setattr(settings, "agent_effort_default", "") + monkeypatch.setattr(settings, "agent_effort_tester", "") + assert resolve_agent_effort("tester") == "" + + +# ---- level 2: per-agent env beats default ----------------------------------- +def test_per_agent_env(monkeypatch): + monkeypatch.setattr(settings, "agent_effort_tester", "low") + assert resolve_agent_effort("tester") == "low" + + +# ---- level 1: project override wins ----------------------------------------- +def test_project_override(monkeypatch): + monkeypatch.setattr(settings, "agent_effort_developer", "high") + _install_registry(monkeypatch, {"developer": "xhigh"}) + assert resolve_agent_effort("developer", ORCH_PLANE_ID) == "xhigh" + assert resolve_agent_effort("developer") == "high" + + +# ---- validation: invalid value dropped -------------------------------------- +def test_invalid_default_dropped(monkeypatch): + monkeypatch.setattr(settings, "agent_effort_developer", "") + monkeypatch.setattr(settings, "agent_effort_default", "turbo") + assert resolve_agent_effort("developer") == "" + + +def test_invalid_env_dropped(monkeypatch): + monkeypatch.setattr(settings, "agent_effort_reviewer", "ultra") + assert resolve_agent_effort("reviewer") == "" + + +def test_invalid_project_override_dropped(monkeypatch): + _install_registry(monkeypatch, {"developer": "bogus"}) + assert resolve_agent_effort("developer", ORCH_PLANE_ID) == "" + + +def test_all_valid_efforts_pass(monkeypatch): + monkeypatch.setattr(settings, "agent_effort_developer", "") + for e in VALID_EFFORTS: + monkeypatch.setattr(settings, "agent_effort_default", e) + assert resolve_agent_effort("developer") == e + + +# ---- flag assembly (mirror of launcher cmd construction) -------------------- +def _build_flags(model, effort, fb): + model_flag = f"--model {model} " if model else "" + effort_flag = f"--effort {effort} " if effort else "" + fb_flag = f"--fallback-model {fb} " if fb else "" + return f"{model_flag}{effort_flag}{fb_flag}" + + +def test_flags_present_when_configured(monkeypatch): + monkeypatch.setattr(settings, "agent_fallback_model", "claude-sonnet-4-6") + model = resolve_agent_model("developer") + effort = resolve_agent_effort("developer") + fb = settings.agent_fallback_model + flags = _build_flags(model, effort, fb) + assert "--model claude-opus-4-8 " in flags + assert "--effort high " in flags + assert "--fallback-model claude-sonnet-4-6 " in flags + + +def test_flags_absent_when_empty(monkeypatch): + monkeypatch.setattr(settings, "agent_model_default", "") + monkeypatch.setattr(settings, "agent_model_developer", "") + monkeypatch.setattr(settings, "agent_effort_default", "") + monkeypatch.setattr(settings, "agent_effort_developer", "") + monkeypatch.setattr(settings, "agent_fallback_model", "") + model = resolve_agent_model("developer") + effort = resolve_agent_effort("developer") + fb = settings.agent_fallback_model + flags = _build_flags(model, effort, fb) + assert flags == "" + assert "--model" not in flags + assert "--effort" not in flags + assert "--fallback-model" not in flags diff --git a/tests/test_resolve_agent_model.py b/tests/test_resolve_agent_model.py new file mode 100644 index 0000000..029d6f0 --- /dev/null +++ b/tests/test_resolve_agent_model.py @@ -0,0 +1,156 @@ +"""ORCH-41: tests for resolve_agent_model (per-agent + per-project LLM model). + +Covers the 4-level resolution priority: + 1. ProjectConfig.agent_models[agent] (per-project override, from projects_json) + 2. settings.agent_model_ (per-agent env, when non-empty) + 3. settings.agent_model_default (global default) + 4. "" (no override anywhere -> CLI default) + +plus: unknown project_id / no project_id skips level 1, unknown agent skips +level 2, and the frozen ProjectConfig still accepts agent_models (default {}). + +We never mutate the module-global registry permanently: tests that need a +custom registry install one via monkeypatch + reload_projects and restore the +default afterwards (autouse fixture). +""" +import os +import tempfile + +import pytest + +os.environ.setdefault("ORCH_DB_PATH", + os.path.join(tempfile.gettempdir(), "test_orch41_model.db")) +os.environ.setdefault("ORCH_GITEA_TOKEN", "test-token") +os.environ.setdefault("ORCH_PLANE_API_TOKEN", "test-token") + +from src.agents.launcher import resolve_agent_model +from src.config import settings +from src import projects as P +from src.projects import ProjectConfig, reload_projects, _parse_projects_json + +ORCH_PLANE_ID = "8da6aa25-a60e-44d6-a1e2-d8ae59aa7d6a" +ENDURO_PLANE_ID = "7a79f0a9-5278-49cd-9007-9a338f238f9c" + + +@pytest.fixture(autouse=True) +def _clean_settings(monkeypatch): + """Reset all per-agent/default model settings to a known baseline so tests + are order-independent regardless of what other modules set in the env.""" + monkeypatch.setattr(settings, "agent_model_default", "claude-opus-4-8") + for a in ("analyst", "architect", "developer", "reviewer", "tester", "deployer"): + monkeypatch.setattr(settings, f"agent_model_{a}", "") + # default registry (no per-project overrides) + monkeypatch.setattr(P.settings, "projects_json", "") + reload_projects() + yield + reload_projects() + + +def _install_registry(monkeypatch, agent_models): + """Install a single-project registry for ORCH with the given agent_models.""" + reg = [ProjectConfig( + plane_project_id=ORCH_PLANE_ID, repo="orchestrator", + work_item_prefix="ORCH", name="orchestrator", + agent_models=agent_models, + )] + monkeypatch.setattr(P, "PROJECTS", reg) + monkeypatch.setattr(P, "_BY_PLANE_ID", {p.plane_project_id: p for p in reg}) + monkeypatch.setattr(P, "_BY_REPO", {p.repo: p for p in reg}) + + +# ---- Level 4: nothing configured -> "" -------------------------------------- +def test_no_config_returns_empty(monkeypatch): + monkeypatch.setattr(settings, "agent_model_default", "") + assert resolve_agent_model("developer") == "" + assert resolve_agent_model("developer", ORCH_PLANE_ID) == "" + + +# ---- Level 3: global default ------------------------------------------------ +def test_global_default(): + assert resolve_agent_model("developer") == "claude-opus-4-8" + assert resolve_agent_model("architect") == "claude-opus-4-8" + + +# ---- Level 2: per-agent env beats default ----------------------------------- +def test_per_agent_env_overrides_default(monkeypatch): + monkeypatch.setattr(settings, "agent_model_reviewer", "claude-sonnet-4-6") + assert resolve_agent_model("reviewer") == "claude-sonnet-4-6" + # other agents still fall through to default + assert resolve_agent_model("developer") == "claude-opus-4-8" + + +# ---- Level 1: per-project override beats per-agent env and default ---------- +def test_project_override_beats_env_and_default(monkeypatch): + monkeypatch.setattr(settings, "agent_model_developer", "claude-sonnet-4-6") + _install_registry(monkeypatch, {"developer": "claude-opus-4-8"}) + assert resolve_agent_model("developer", ORCH_PLANE_ID) == "claude-opus-4-8" + # without project_id, falls back to per-agent env + assert resolve_agent_model("developer") == "claude-sonnet-4-6" + + +def test_project_override_only_for_listed_agent(monkeypatch): + _install_registry(monkeypatch, {"developer": "claude-opus-4-8"}) + # reviewer not in agent_models -> falls back to default + assert resolve_agent_model("reviewer", ORCH_PLANE_ID) == "claude-opus-4-8" + monkeypatch.setattr(settings, "agent_model_reviewer", "claude-sonnet-4-6") + assert resolve_agent_model("reviewer", ORCH_PLANE_ID) == "claude-sonnet-4-6" + + +# ---- unknown / empty project id skips level 1 ------------------------------- +def test_unknown_project_id_skips_override(monkeypatch): + _install_registry(monkeypatch, {"developer": "x-model"}) + assert resolve_agent_model("developer", "no-such-uuid") == "claude-opus-4-8" + assert resolve_agent_model("developer", None) == "claude-opus-4-8" + + +# ---- unknown agent skips per-agent env, still gets default ------------------ +def test_unknown_agent_falls_to_default(): + assert resolve_agent_model("nonexistent") == "claude-opus-4-8" + + +# ---- frozen ProjectConfig accepts agent_models ------------------------------ +def test_projectconfig_frozen_with_agent_models(): + pc = ProjectConfig( + plane_project_id="x", repo="r", work_item_prefix="P", name="n", + agent_models={"developer": "m"}, + ) + assert pc.agent_models == {"developer": "m"} + # default is an empty dict, not shared/mutable across instances + pc2 = ProjectConfig(plane_project_id="y", repo="r2", + work_item_prefix="P2", name="n2") + assert pc2.agent_models == {} + assert pc2.agent_models is not pc.agent_models + with pytest.raises(Exception): + pc.repo = "changed" # frozen + + +# ---- projects_json parsing of agent_models / agent_efforts ------------------ +def test_parse_projects_json_with_overrides(): + raw = ( + '[{"plane_project_id":"p1","repo":"orchestrator",' + '"work_item_prefix":"ORCH",' + '"agent_models":{"developer":"claude-opus-4-8","reviewer":"claude-sonnet-4-6"},' + '"agent_efforts":{"developer":"xhigh","tester":"low"}}]' + ) + parsed = _parse_projects_json(raw) + assert parsed is not None and len(parsed) == 1 + pc = parsed[0] + assert pc.agent_models == {"developer": "claude-opus-4-8", + "reviewer": "claude-sonnet-4-6"} + assert pc.agent_efforts == {"developer": "xhigh", "tester": "low"} + + +def test_parse_projects_json_omitted_overrides_default_empty(): + raw = ('[{"plane_project_id":"p1","repo":"r","work_item_prefix":"P"}]') + parsed = _parse_projects_json(raw) + assert parsed is not None and len(parsed) == 1 + assert parsed[0].agent_models == {} + assert parsed[0].agent_efforts == {} + + +def test_parse_projects_json_malformed_override_ignored(): + # agent_models is not an object -> dropped to {}, entry still valid + raw = ('[{"plane_project_id":"p1","repo":"r","work_item_prefix":"P",' + '"agent_models":"oops"}]') + parsed = _parse_projects_json(raw) + assert parsed is not None and parsed[0].agent_models == {}