fix(effort): per-role floor for --effort resolution + developer→xhigh

resolve_agent_effort returned '' for all agents in prod because empty
ORCH_AGENT_EFFORT_*= env vars clobber pydantic class-defaults, leaving no
non-empty floor to fall back to -> --effort never reached the Claude CLI.

Add a level-4 per-role floor in resolve_agent_effort (src/agents/launcher.py):
_agent_effort_floor reads the declared class-default of agent_effort_<agent>
(model_fields[...].default), which a present-but-empty env cannot override.
Floor applies only when levels 1-3 are empty and BEFORE validation, so a typo
(non-empty) still drops to '' (never-break ORCH-41) and explicit env/override
still wins (priority preserved). config.py: agent_effort_developer high->xhigh
(single source of truth; floor follows automatically).

Refs: ORCH-081

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
2026-06-08 22:40:39 +03:00
committed by stream
parent 62b4d1f7d1
commit 1ada41f272
5 changed files with 156 additions and 26 deletions

View File

@@ -37,12 +37,15 @@ ORCH_AGENT_MODEL_DEVELOPER=
ORCH_AGENT_MODEL_REVIEWER=
ORCH_AGENT_MODEL_TESTER=
ORCH_AGENT_MODEL_DEPLOYER=
# Effort split: thinking agents (analyst/architect/developer/reviewer) -> high;
# mechanical agents (tester/deployer) -> medium.
# Effort split (ORCH-081/ORCH-52h): thinking agents (analyst/architect/reviewer)
# -> high; developer -> xhigh (coding/agentic role, Opus 4.8 canon); mechanical
# agents (tester/deployer) -> medium. NB: an empty ORCH_AGENT_EFFORT_*= no longer
# zeroes the effort — the launcher falls back to a per-role floor (= the config.py
# class-default) so each role still runs at its canonical level (ORCH-081).
ORCH_AGENT_EFFORT_DEFAULT=high
ORCH_AGENT_EFFORT_ANALYST=high
ORCH_AGENT_EFFORT_ARCHITECT=high
ORCH_AGENT_EFFORT_DEVELOPER=high
ORCH_AGENT_EFFORT_DEVELOPER=xhigh
ORCH_AGENT_EFFORT_REVIEWER=high
ORCH_AGENT_EFFORT_TESTER=medium
ORCH_AGENT_EFFORT_DEPLOYER=medium

File diff suppressed because one or more lines are too long

View File

@@ -158,12 +158,50 @@ def resolve_agent_model(agent: str, project_id: str = None) -> str:
return ""
def _agent_effort_floor(agent: str) -> str:
"""ORCH-081 (ORCH-52h): per-role non-empty floor for --effort resolution.
Returns the DECLARED class-default of the ``agent_effort_<agent>`` field on
Settings (e.g. developer -> ``xhigh``, tester/deployer -> ``medium``, the rest
-> ``high``). This is the value pydantic WOULD have used were it not clobbered
by a spurious empty env var (``ORCH_AGENT_EFFORT_<ROLE>=``): the class-default
is fixed in the class body and a present-but-empty env value cannot override it,
so it is a robust floor even when the host ``.env`` zeroes every effort var.
config.py is the single source of truth: upgrading developer to ``xhigh`` there
automatically raises the floor here — no second map to keep in sync (ADR-001).
Unknown agent (a name outside the 6 roles) has no ``agent_effort_<agent>``
field; we degrade to the class-default of ``agent_effort_default`` (``high``),
a safe non-empty floor. Never raises.
"""
fields = type(settings).model_fields
for key in (f"agent_effort_{agent}", "agent_effort_default"):
field = fields.get(key)
if field is not None and field.default:
return field.default
return ""
def resolve_agent_effort(agent: str, project_id: str = None) -> str:
"""ORCH-41: resolve the --effort level for an agent (optionally per-project).
Same priority as resolve_agent_model. The resolved value is validated against
VALID_EFFORTS; an invalid value is logged and dropped (returns "") so a typo
in env/projects_json can never pass a bad flag to the CLI.
Same priority as resolve_agent_model, with one extra level below the global
default (ORCH-081 / ADR-001):
1. project-override (projects_json.agent_efforts[agent])
2. per-agent env (settings.agent_effort_<agent>)
3. global default (settings.agent_effort_default)
4. per-role FLOOR (class-default of agent_effort_<agent>) — NEW
The floor only kicks in when levels 1-3 are all empty (the prod bug: a present
but empty ``ORCH_AGENT_EFFORT_*=`` clobbers every default to ''), guaranteeing
a non-empty target effort for the 6 known roles regardless of host .env state.
The floor is applied BEFORE validation and ONLY to an empty resolve, so it
never masks a typo: an explicit invalid value (e.g. ``turbo``) is non-empty,
skips the floor, and is logged + dropped to "" exactly as in ORCH-41 (the
resolved value is validated against VALID_EFFORTS; an invalid value can never
pass a bad flag to the CLI). Never raises.
"""
value = _resolve_agent_attr(
agent, project_id,
@@ -171,6 +209,11 @@ def resolve_agent_effort(agent: str, project_id: str = None) -> str:
env_attr_prefix="agent_effort_",
default_attr="agent_effort_default",
)
if not value:
# Levels 1-3 all empty (typically a prod .env with empty ORCH_AGENT_EFFORT_*):
# fall through to the per-role floor (class-default). Applied before
# validation but only here, so a typo (non-empty) never reaches this branch.
value = _agent_effort_floor(agent)
if value and value not in VALID_EFFORTS:
logger.warning(
f"Invalid effort '{value}' for agent '{agent}' "

View File

@@ -97,13 +97,15 @@ class Settings(BaseSettings):
agent_model_deployer: str = ""
# ORCH-41: per-agent effort / reasoning level: low|medium|high|xhigh|max.
# Empty -> agent_effort_default. Same resolution order as model. Default split:
# thinking agents (analyst/architect/developer/reviewer) -> high; mechanical
# agents (tester/deployer) -> medium.
# Empty -> agent_effort_default. Same resolution order as model. Default split
# (ORCH-081/ORCH-52h): thinking agents (analyst/architect/reviewer) -> high;
# developer -> xhigh (coding/agentic role, Opus 4.8 canon); mechanical agents
# (tester/deployer) -> medium. These class-defaults are ALSO the per-role floor
# used by resolve_agent_effort when the env is empty (single source of truth).
agent_effort_default: str = "high"
agent_effort_analyst: str = "high"
agent_effort_architect: str = "high"
agent_effort_developer: str = "high"
agent_effort_developer: str = "xhigh"
agent_effort_reviewer: str = "high"
agent_effort_tester: str = "medium"
agent_effort_deployer: str = "medium"

View File

@@ -26,13 +26,22 @@ from src.projects import ProjectConfig, reload_projects
ORCH_PLANE_ID = "8da6aa25-a60e-44d6-a1e2-d8ae59aa7d6a"
# ORCH-081/ORCH-52h: canonical effort per role (developer upgraded high -> xhigh).
CANON_EFFORT = {
"analyst": "high",
"architect": "high",
"developer": "xhigh",
"reviewer": "high",
"tester": "medium",
"deployer": "medium",
}
@pytest.fixture(autouse=True)
def _clean_settings(monkeypatch):
monkeypatch.setattr(settings, "agent_effort_default", "high")
for a in ("analyst", "architect", "developer", "reviewer"):
monkeypatch.setattr(settings, f"agent_effort_{a}", "high")
for a in ("tester", "deployer"):
monkeypatch.setattr(settings, f"agent_effort_{a}", "medium")
for a, e in CANON_EFFORT.items():
monkeypatch.setattr(settings, f"agent_effort_{a}", e)
monkeypatch.setattr(P.settings, "projects_json", "")
reload_projects()
yield
@@ -50,19 +59,40 @@ def _install_registry(monkeypatch, agent_efforts):
monkeypatch.setattr(P, "_BY_REPO", {p.repo: p for p in reg})
# ---- default split ----------------------------------------------------------
# ---- TC-01: canonical defaults (AC-1 / FR-4) --------------------------------
def test_default_split():
assert resolve_agent_effort("developer") == "high"
assert resolve_agent_effort("developer") == "xhigh"
assert resolve_agent_effort("architect") == "high"
assert resolve_agent_effort("tester") == "medium"
assert resolve_agent_effort("deployer") == "medium"
# ---- level 4: nothing -> "" -------------------------------------------------
def test_no_config_returns_empty(monkeypatch):
@pytest.mark.parametrize("agent,expected", list(CANON_EFFORT.items()))
def test_canonical_effort_all_roles(agent, expected):
assert resolve_agent_effort(agent) == expected
# ---- TC-02: empty env -> per-role floor (variant c, AC-2) -------------------
@pytest.mark.parametrize("agent,expected", list(CANON_EFFORT.items()))
def test_empty_env_falls_back_to_per_role_floor(monkeypatch, agent, expected):
"""Models the prod bug: ORCH_AGENT_EFFORT_*= present-but-empty -> every level
resolves to '' on the instance; the per-role floor (config class-default) must
still yield the canonical level (NOT '')."""
monkeypatch.setattr(settings, "agent_effort_default", "")
for a in CANON_EFFORT:
monkeypatch.setattr(settings, f"agent_effort_{a}", "")
result = resolve_agent_effort(agent)
assert result == expected
assert result != ""
# ---- unknown agent floor degrades to default (high), never '' ---------------
def test_empty_env_unknown_agent_floor_is_default(monkeypatch):
monkeypatch.setattr(settings, "agent_effort_default", "")
monkeypatch.setattr(settings, "agent_effort_tester", "")
assert resolve_agent_effort("tester") == ""
# An agent with no agent_effort_<name> field falls back to the
# agent_effort_default class-default (high), a safe non-empty floor.
assert resolve_agent_effort("nonexistent_role") == "high"
# ---- level 2: per-agent env beats default -----------------------------------
@@ -103,6 +133,45 @@ def test_all_valid_efforts_pass(monkeypatch):
assert resolve_agent_effort("developer") == e
# ---- TC-03: floor does NOT mask a typo (FR-3 / AC-5) ------------------------
def test_floor_does_not_mask_typo(monkeypatch):
"""An explicit invalid value is non-empty, so the floor is NOT applied: the
value is validated and dropped to '' (never-break ORCH-41), even though the
developer floor (xhigh) exists."""
monkeypatch.setattr(settings, "agent_effort_default", "")
monkeypatch.setattr(settings, "agent_effort_developer", "turbo")
assert resolve_agent_effort("developer") == ""
# ---- TC-04: priority preserved — explicit config beats floor (FR-2) ---------
def test_explicit_env_beats_floor(monkeypatch):
"""Operator may deliberately downgrade developer to high; the explicit
non-empty env wins over the xhigh floor."""
monkeypatch.setattr(settings, "agent_effort_developer", "high")
assert resolve_agent_effort("developer") == "high"
def test_default_beats_floor(monkeypatch):
"""A non-empty global default wins over the per-role floor (floor is strictly
below default): default=max with empty per-agent -> max, not the xhigh floor."""
monkeypatch.setattr(settings, "agent_effort_developer", "")
monkeypatch.setattr(settings, "agent_effort_default", "max")
assert resolve_agent_effort("developer") == "max"
def test_project_override_beats_floor(monkeypatch):
monkeypatch.setattr(settings, "agent_effort_developer", "")
_install_registry(monkeypatch, {"developer": "high"})
assert resolve_agent_effort("developer", ORCH_PLANE_ID) == "high"
# ---- TC-05: xhigh is a valid effort (FR-5) ----------------------------------
def test_xhigh_is_valid():
assert "xhigh" in VALID_EFFORTS
# developer canonical xhigh resolves (is not dropped by validation)
assert resolve_agent_effort("developer") == "xhigh"
# ---- flag assembly (mirror of launcher cmd construction) --------------------
def _build_flags(model, effort, fb):
model_flag = f"--model {model} " if model else ""
@@ -111,6 +180,7 @@ def _build_flags(model, effort, fb):
return f"{model_flag}{effort_flag}{fb_flag}"
# ---- TC-06: flag assembly (AC-3) --------------------------------------------
def test_flags_present_when_configured(monkeypatch):
monkeypatch.setattr(settings, "agent_fallback_model", "claude-sonnet-4-6")
model = resolve_agent_model("developer")
@@ -118,21 +188,32 @@ def test_flags_present_when_configured(monkeypatch):
fb = settings.agent_fallback_model
flags = _build_flags(model, effort, fb)
assert "--model claude-opus-4-8 " in flags
assert "--effort high " in flags
assert "--effort xhigh " in flags
assert "--fallback-model claude-sonnet-4-6 " in flags
def test_flags_absent_when_empty(monkeypatch):
def test_flags_effort_per_role(monkeypatch):
"""developer -> --effort xhigh; tester -> --effort medium (mirrors _spawn)."""
assert "--effort xhigh " in _build_flags("", resolve_agent_effort("developer"), "")
assert "--effort medium " in _build_flags("", resolve_agent_effort("tester"), "")
def test_flags_absent_when_effort_empty():
"""When the resolved effort is empty, --effort is omitted entirely. Mirrors the
`f"--effort {effort} " if effort else ""` branch in _spawn (AC-3 negative case)."""
flags = _build_flags("", "", "")
assert flags == ""
assert "--effort" not in flags
def test_flags_absent_when_model_empty(monkeypatch):
monkeypatch.setattr(settings, "agent_model_default", "")
monkeypatch.setattr(settings, "agent_model_developer", "")
monkeypatch.setattr(settings, "agent_effort_default", "")
monkeypatch.setattr(settings, "agent_effort_developer", "")
monkeypatch.setattr(settings, "agent_fallback_model", "")
model = resolve_agent_model("developer")
effort = resolve_agent_effort("developer")
fb = settings.agent_fallback_model
flags = _build_flags(model, effort, fb)
flags = _build_flags(model, "", fb)
assert flags == ""
assert "--model" not in flags
assert "--effort" not in flags
assert "--fallback-model" not in flags
assert "--fallback-model" not in flags