Merge-gate re-test runs under the orchestrator's prod env, where the operator legitimately set ORCH_AGENT_FALLBACK_MODEL and changed ORCH_AGENT_MODEL_DEFAULT / ORCH_AGENT_EFFORT_*. Two ORCH-41-era tests asserted SHIPPED defaults through the env-backed settings singleton and failed 3/3 there, while Gitea CI (clean env) stayed green. Branch ORCH-009 touches neither src/ nor these tests - latent non-hermetic landmine on main, detonated by the prod env change. - test_resolve_agent_effort.py: autouse fixture now mirrors the sibling model-file baseline (pins shipped model/fallback fields) so the flag-assembly tests are env-independent. - test_resolve_agent_model.py: fixture also resets agent_fallback_model; test_fallback_model_disabled_by_default now asserts the CLASS field default (the actual ORCH-074 ADR-001 G4 invariant: shipped default is ""), never-break is_valid_model asserts unchanged byte-for-byte. Clean-env behaviour is byte-equivalent (fixtures pin exactly what an empty env yields). Full suite: 1713 passed (was 2 failed / 1711). Refs: ORCH-009 Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
229 lines
9.3 KiB
Python
229 lines
9.3 KiB
Python
"""ORCH-41: tests for resolve_agent_effort + effort validation + flag assembly.
|
|
|
|
Mirrors test_resolve_agent_model's 4-level priority for the --effort lever, and
|
|
adds:
|
|
- validation: a value outside {low,medium,high,xhigh,max} is dropped -> ""
|
|
- flag assembly: --model / --effort / --fallback-model are present/absent in
|
|
the built command exactly when the resolved value is non-empty.
|
|
"""
|
|
import os
|
|
import tempfile
|
|
|
|
import pytest
|
|
|
|
os.environ.setdefault("ORCH_DB_PATH",
|
|
os.path.join(tempfile.gettempdir(), "test_orch41_effort.db"))
|
|
os.environ.setdefault("ORCH_GITEA_TOKEN", "test-token")
|
|
os.environ.setdefault("ORCH_PLANE_API_TOKEN", "test-token")
|
|
|
|
from src.agents.launcher import (
|
|
resolve_agent_effort, resolve_agent_model, VALID_EFFORTS,
|
|
)
|
|
from src.config import settings
|
|
from src import projects as P
|
|
from src.projects import ProjectConfig, reload_projects
|
|
|
|
ORCH_PLANE_ID = "8da6aa25-a60e-44d6-a1e2-d8ae59aa7d6a"
|
|
|
|
|
|
# ORCH-081/ORCH-52h: canonical effort per role (developer upgraded high -> xhigh).
|
|
CANON_EFFORT = {
|
|
"analyst": "high",
|
|
"architect": "high",
|
|
"developer": "xhigh",
|
|
"reviewer": "high",
|
|
"tester": "medium",
|
|
"deployer": "medium",
|
|
}
|
|
|
|
|
|
@pytest.fixture(autouse=True)
|
|
def _clean_settings(monkeypatch):
|
|
monkeypatch.setattr(settings, "agent_effort_default", "high")
|
|
for a, e in CANON_EFFORT.items():
|
|
monkeypatch.setattr(settings, f"agent_effort_{a}", e)
|
|
# Hermeticity (mirrors test_resolve_agent_model's baseline): the flag-assembly
|
|
# tests below also read the MODEL/fallback fields, and the host env (prod .env;
|
|
# the merge-gate re-test runs under it) may legitimately set
|
|
# ORCH_AGENT_MODEL_* / ORCH_AGENT_FALLBACK_MODEL. These tests assert
|
|
# shipped-default behaviour, not the host config -> pin the shipped defaults.
|
|
monkeypatch.setattr(settings, "agent_model_default", "claude-opus-4-8")
|
|
for a in CANON_EFFORT:
|
|
monkeypatch.setattr(settings, f"agent_model_{a}", "")
|
|
monkeypatch.setattr(settings, "agent_fallback_model", "")
|
|
monkeypatch.setattr(P.settings, "projects_json", "")
|
|
reload_projects()
|
|
yield
|
|
reload_projects()
|
|
|
|
|
|
def _install_registry(monkeypatch, agent_efforts):
|
|
reg = [ProjectConfig(
|
|
plane_project_id=ORCH_PLANE_ID, repo="orchestrator",
|
|
work_item_prefix="ORCH", name="orchestrator",
|
|
agent_efforts=agent_efforts,
|
|
)]
|
|
monkeypatch.setattr(P, "PROJECTS", reg)
|
|
monkeypatch.setattr(P, "_BY_PLANE_ID", {p.plane_project_id: p for p in reg})
|
|
monkeypatch.setattr(P, "_BY_REPO", {p.repo: p for p in reg})
|
|
|
|
|
|
# ---- TC-01: canonical defaults (AC-1 / FR-4) --------------------------------
|
|
def test_default_split():
|
|
assert resolve_agent_effort("developer") == "xhigh"
|
|
assert resolve_agent_effort("architect") == "high"
|
|
assert resolve_agent_effort("tester") == "medium"
|
|
assert resolve_agent_effort("deployer") == "medium"
|
|
|
|
|
|
@pytest.mark.parametrize("agent,expected", list(CANON_EFFORT.items()))
|
|
def test_canonical_effort_all_roles(agent, expected):
|
|
assert resolve_agent_effort(agent) == expected
|
|
|
|
|
|
# ---- TC-02: empty env -> per-role floor (variant c, AC-2) -------------------
|
|
@pytest.mark.parametrize("agent,expected", list(CANON_EFFORT.items()))
|
|
def test_empty_env_falls_back_to_per_role_floor(monkeypatch, agent, expected):
|
|
"""Models the prod bug: ORCH_AGENT_EFFORT_*= present-but-empty -> every level
|
|
resolves to '' on the instance; the per-role floor (config class-default) must
|
|
still yield the canonical level (NOT '')."""
|
|
monkeypatch.setattr(settings, "agent_effort_default", "")
|
|
for a in CANON_EFFORT:
|
|
monkeypatch.setattr(settings, f"agent_effort_{a}", "")
|
|
result = resolve_agent_effort(agent)
|
|
assert result == expected
|
|
assert result != ""
|
|
|
|
|
|
# ---- unknown agent floor degrades to default (high), never '' ---------------
|
|
def test_empty_env_unknown_agent_floor_is_default(monkeypatch):
|
|
monkeypatch.setattr(settings, "agent_effort_default", "")
|
|
monkeypatch.setattr(settings, "agent_effort_tester", "")
|
|
# An agent with no agent_effort_<name> field falls back to the
|
|
# agent_effort_default class-default (high), a safe non-empty floor.
|
|
assert resolve_agent_effort("nonexistent_role") == "high"
|
|
|
|
|
|
# ---- level 2: per-agent env beats default -----------------------------------
|
|
def test_per_agent_env(monkeypatch):
|
|
monkeypatch.setattr(settings, "agent_effort_tester", "low")
|
|
assert resolve_agent_effort("tester") == "low"
|
|
|
|
|
|
# ---- level 1: project override wins -----------------------------------------
|
|
def test_project_override(monkeypatch):
|
|
monkeypatch.setattr(settings, "agent_effort_developer", "high")
|
|
_install_registry(monkeypatch, {"developer": "xhigh"})
|
|
assert resolve_agent_effort("developer", ORCH_PLANE_ID) == "xhigh"
|
|
assert resolve_agent_effort("developer") == "high"
|
|
|
|
|
|
# ---- validation: invalid value dropped --------------------------------------
|
|
def test_invalid_default_dropped(monkeypatch):
|
|
monkeypatch.setattr(settings, "agent_effort_developer", "")
|
|
monkeypatch.setattr(settings, "agent_effort_default", "turbo")
|
|
assert resolve_agent_effort("developer") == ""
|
|
|
|
|
|
def test_invalid_env_dropped(monkeypatch):
|
|
monkeypatch.setattr(settings, "agent_effort_reviewer", "ultra")
|
|
assert resolve_agent_effort("reviewer") == ""
|
|
|
|
|
|
def test_invalid_project_override_dropped(monkeypatch):
|
|
_install_registry(monkeypatch, {"developer": "bogus"})
|
|
assert resolve_agent_effort("developer", ORCH_PLANE_ID) == ""
|
|
|
|
|
|
def test_all_valid_efforts_pass(monkeypatch):
|
|
monkeypatch.setattr(settings, "agent_effort_developer", "")
|
|
for e in VALID_EFFORTS:
|
|
monkeypatch.setattr(settings, "agent_effort_default", e)
|
|
assert resolve_agent_effort("developer") == e
|
|
|
|
|
|
# ---- TC-03: floor does NOT mask a typo (FR-3 / AC-5) ------------------------
|
|
def test_floor_does_not_mask_typo(monkeypatch):
|
|
"""An explicit invalid value is non-empty, so the floor is NOT applied: the
|
|
value is validated and dropped to '' (never-break ORCH-41), even though the
|
|
developer floor (xhigh) exists."""
|
|
monkeypatch.setattr(settings, "agent_effort_default", "")
|
|
monkeypatch.setattr(settings, "agent_effort_developer", "turbo")
|
|
assert resolve_agent_effort("developer") == ""
|
|
|
|
|
|
# ---- TC-04: priority preserved — explicit config beats floor (FR-2) ---------
|
|
def test_explicit_env_beats_floor(monkeypatch):
|
|
"""Operator may deliberately downgrade developer to high; the explicit
|
|
non-empty env wins over the xhigh floor."""
|
|
monkeypatch.setattr(settings, "agent_effort_developer", "high")
|
|
assert resolve_agent_effort("developer") == "high"
|
|
|
|
|
|
def test_default_beats_floor(monkeypatch):
|
|
"""A non-empty global default wins over the per-role floor (floor is strictly
|
|
below default): default=max with empty per-agent -> max, not the xhigh floor."""
|
|
monkeypatch.setattr(settings, "agent_effort_developer", "")
|
|
monkeypatch.setattr(settings, "agent_effort_default", "max")
|
|
assert resolve_agent_effort("developer") == "max"
|
|
|
|
|
|
def test_project_override_beats_floor(monkeypatch):
|
|
monkeypatch.setattr(settings, "agent_effort_developer", "")
|
|
_install_registry(monkeypatch, {"developer": "high"})
|
|
assert resolve_agent_effort("developer", ORCH_PLANE_ID) == "high"
|
|
|
|
|
|
# ---- TC-05: xhigh is a valid effort (FR-5) ----------------------------------
|
|
def test_xhigh_is_valid():
|
|
assert "xhigh" in VALID_EFFORTS
|
|
# developer canonical xhigh resolves (is not dropped by validation)
|
|
assert resolve_agent_effort("developer") == "xhigh"
|
|
|
|
|
|
# ---- flag assembly (mirror of launcher cmd construction) --------------------
|
|
def _build_flags(model, effort, fb):
|
|
model_flag = f"--model {model} " if model else ""
|
|
effort_flag = f"--effort {effort} " if effort else ""
|
|
fb_flag = f"--fallback-model {fb} " if fb else ""
|
|
return f"{model_flag}{effort_flag}{fb_flag}"
|
|
|
|
|
|
# ---- TC-06: flag assembly (AC-3) --------------------------------------------
|
|
def test_flags_present_when_configured(monkeypatch):
|
|
monkeypatch.setattr(settings, "agent_fallback_model", "claude-sonnet-4-6")
|
|
model = resolve_agent_model("developer")
|
|
effort = resolve_agent_effort("developer")
|
|
fb = settings.agent_fallback_model
|
|
flags = _build_flags(model, effort, fb)
|
|
assert "--model claude-opus-4-8 " in flags
|
|
assert "--effort xhigh " in flags
|
|
assert "--fallback-model claude-sonnet-4-6 " in flags
|
|
|
|
|
|
def test_flags_effort_per_role(monkeypatch):
|
|
"""developer -> --effort xhigh; tester -> --effort medium (mirrors _spawn)."""
|
|
assert "--effort xhigh " in _build_flags("", resolve_agent_effort("developer"), "")
|
|
assert "--effort medium " in _build_flags("", resolve_agent_effort("tester"), "")
|
|
|
|
|
|
def test_flags_absent_when_effort_empty():
|
|
"""When the resolved effort is empty, --effort is omitted entirely. Mirrors the
|
|
`f"--effort {effort} " if effort else ""` branch in _spawn (AC-3 negative case)."""
|
|
flags = _build_flags("", "", "")
|
|
assert flags == ""
|
|
assert "--effort" not in flags
|
|
|
|
|
|
def test_flags_absent_when_model_empty(monkeypatch):
|
|
monkeypatch.setattr(settings, "agent_model_default", "")
|
|
monkeypatch.setattr(settings, "agent_model_developer", "")
|
|
monkeypatch.setattr(settings, "agent_fallback_model", "")
|
|
model = resolve_agent_model("developer")
|
|
fb = settings.agent_fallback_model
|
|
flags = _build_flags(model, "", fb)
|
|
assert flags == ""
|
|
assert "--model" not in flags
|
|
assert "--fallback-model" not in flags
|
|
assert "--fallback-model" not in flags
|