fix(observability): merge-gate on deploy, full token input, Plane Done, artifact links

1. BUG 8 (second door): merge webhook no longer fake-completes a task at the
   deploy stage; done is gated by the deployer verdict (check_deploy_status).
   Other stages keep merge->done.
2. Token accounting: parse+persist cache_creation_input_tokens (new
   idempotent agent_runs column). usage_comment / task_summary now show the
   FULL input (input + cache_read + cache_creation) with a cached breakdown.
   cost_usd untouched.
3. deploy->done success now forces the Plane issue to terminal Done state.
4. All agents (architect/developer/reviewer/tester/deployer) attach artifact
   links to their finish comment via gitea_public_url.

Tests added for each fix; pytest 244 passed / 9 failed (off-limits HMAC group).
This commit is contained in:
Dev Agent
2026-06-04 11:17:58 +03:00
parent 2629dffe1b
commit 61e26a8930
9 changed files with 476 additions and 16 deletions

View File

@@ -31,7 +31,8 @@ def parse_usage_from_text(text: str) -> dict | None:
top-level '{' ... '}' that parses and carries usage/total_cost_usd.
Returns a normalised dict
{input_tokens, output_tokens, cache_read_tokens, cost_usd}
{input_tokens, output_tokens, cache_read_tokens, cache_creation_tokens,
cost_usd}
(ints / float, missing fields -> 0 / 0.0), or None if no usable JSON found.
"""
if not text:
@@ -71,6 +72,12 @@ def parse_usage_from_text(text: str) -> dict | None:
"cache_read_tokens": _int(
usage.get("cache_read_input_tokens", usage.get("cache_read_tokens"))
),
# The cache-CREATION slice (writing new cache entries) is part of the
# REAL input and used to be dropped on the floor. Persist it so the
# "X in" figure reflects the full prompt size, not just fresh tokens.
"cache_creation_tokens": _int(
usage.get("cache_creation_input_tokens", usage.get("cache_creation_tokens"))
),
"cost_usd": _float(cost),
}
@@ -150,11 +157,12 @@ def record_usage(run_id: int, usage: dict | None):
try:
conn.execute(
"UPDATE agent_runs SET input_tokens=?, output_tokens=?, "
"cache_read_tokens=?, cost_usd=? WHERE id=?",
"cache_read_tokens=?, cache_creation_tokens=?, cost_usd=? WHERE id=?",
(
usage.get("input_tokens"),
usage.get("output_tokens"),
usage.get("cache_read_tokens"),
usage.get("cache_creation_tokens"),
usage.get("cost_usd"),
run_id,
),
@@ -197,19 +205,132 @@ AGENT_DISPLAY = {
}
def usage_comment(agent: str, usage: dict | None) -> str:
def _input_total(usage: dict) -> int:
"""FULL input = fresh input + cache-read + cache-creation tokens."""
def _i(k):
try:
return int(usage.get(k) or 0)
except (TypeError, ValueError):
return 0
return _i("input_tokens") + _i("cache_read_tokens") + _i("cache_creation_tokens")
def _cached_total(usage: dict) -> int:
"""Cached portion of the input = cache-read + cache-creation tokens."""
def _i(k):
try:
return int(usage.get(k) or 0)
except (TypeError, ValueError):
return 0
return _i("cache_read_tokens") + _i("cache_creation_tokens")
def fmt_in(usage: dict) -> str:
"""Render the input figure as full total with a cached breakdown.
'8.5M in (8.4M cached)' when there is a cache; '45.2k in' when cached==0.
"""
total = _input_total(usage)
cached = _cached_total(usage)
if cached > 0:
return f"{fmt_tokens(total)} in ({fmt_tokens(cached)} cached)"
return f"{fmt_tokens(total)} in"
def usage_comment(
agent: str,
usage: dict | None,
repo: str | None = None,
branch: str | None = None,
work_item_id: str | None = None,
pr_number=None,
) -> str:
"""Build the per-agent finish comment, e.g.
'\U0001f4bb Developer \u0433\u043e\u0442\u043e\u0432 \u00b7 45.2k in / 12.1k out \u00b7 $0.21'.
'\U0001f4bb Developer \u0433\u043e\u0442\u043e\u0432 \u00b7 8.5M in (8.4M cached) / 45.8k out \u00b7 $7.29'.
When repo/branch/work_item_id are supplied, the agent's artifact link(s) are
appended (BUG: only analyst used to link its docs). Missing artifacts are
silently skipped — link building never raises.
"""
usage = usage or {}
name = AGENT_DISPLAY.get(agent, agent.capitalize())
icon = AGENT_ICON.get(agent, "\u2705")
return (
line = (
f"{icon} {name} \u0433\u043e\u0442\u043e\u0432 \u00b7 "
f"{fmt_tokens(usage.get('input_tokens'))} in / "
f"{fmt_in(usage)} / "
f"{fmt_tokens(usage.get('output_tokens'))} out \u00b7 "
f"{fmt_cost(usage.get('cost_usd'))}"
)
links = artifact_links(agent, repo, branch, work_item_id, pr_number)
if links:
line += "\n" + "\n".join(links)
return line
# Per-agent artifact file under docs/work-items/{wid}/ (architect/developer use
# special handling for ADR dirs / PR links, see artifact_links()).
AGENT_ARTIFACT = {
"reviewer": ("Review", "12-review.md"),
"tester": ("Test report", "13-test-report.md"),
"deployer": ("Deploy log", "14-deploy-log.md"),
}
def artifact_links(
agent: str,
repo: str | None,
branch: str | None,
work_item_id: str | None,
pr_number=None,
) -> list[str]:
"""Markdown link(s) to the finishing agent's artifact(s) in Gitea.
Uses gitea_public_url (falls back to gitea_url) for clickable links, mirroring
the analyst doc links. Returns [] (never raises) when there is nothing to
link or the required context is missing. analyst is intentionally NOT handled
here — its richer doc list lives in stage_engine._build_analyst_ready_comment.
"""
try:
from .config import settings
owner = getattr(settings, "gitea_owner", "admin")
base = (
getattr(settings, "gitea_public_url", "") or getattr(settings, "gitea_url", "")
).rstrip("/")
if not base or not repo:
return []
links: list[str] = []
if agent == "developer":
if branch:
links.append(
f"\U0001f4c2 [Branch {branch}]({base}/{owner}/{repo}/src/branch/{branch})"
)
if pr_number:
links.append(
f"\U0001f517 [PR #{pr_number}]({base}/{owner}/{repo}/pulls/{pr_number})"
)
return links
if agent == "architect":
if branch and work_item_id:
adr_dir = (
f"{base}/{owner}/{repo}/src/branch/{branch}/"
f"docs/work-items/{work_item_id}/06-adr"
)
links.append(f"\U0001f4d0 [ADR]({adr_dir})")
return links
spec = AGENT_ARTIFACT.get(agent)
if spec and branch and work_item_id:
label, fname = spec
href = (
f"{base}/{owner}/{repo}/src/branch/{branch}/"
f"docs/work-items/{work_item_id}/{fname}"
)
links.append(f"\U0001f4c4 [{label}]({href})")
return links
except Exception:
return []
AGENT_ICON = {
@@ -225,13 +346,22 @@ AGENT_ICON = {
def task_usage_summary(task_id: int) -> dict:
"""Aggregate agent_runs usage for a task.
Returns {total_in, total_out, total_cost, per_agent: [(agent, in, out, cost), ...]}.
total_in counts the FULL input (input + cache_read + cache_creation), and
total_cached counts the cached portion (cache_read + cache_creation).
COALESCE(...,0) keeps pre-existing rows (NULL cache_creation) from breaking.
Returns {total_in, total_cached, total_out, total_cost,
per_agent: [(agent, in, cached, out, cost), ...]}.
"""
conn = get_db()
try:
rows = conn.execute(
"SELECT agent, "
"COALESCE(SUM(input_tokens),0), "
"COALESCE(SUM(input_tokens),0) "
" + COALESCE(SUM(cache_read_tokens),0) "
" + COALESCE(SUM(cache_creation_tokens),0), "
"COALESCE(SUM(cache_read_tokens),0) "
" + COALESCE(SUM(cache_creation_tokens),0), "
"COALESCE(SUM(output_tokens),0), "
"COALESCE(SUM(cost_usd),0.0) "
"FROM agent_runs WHERE task_id=? GROUP BY agent ORDER BY agent",
@@ -239,12 +369,14 @@ def task_usage_summary(task_id: int) -> dict:
).fetchall()
finally:
conn.close()
per_agent = [(r[0], int(r[1]), int(r[2]), float(r[3])) for r in rows]
per_agent = [(r[0], int(r[1]), int(r[2]), int(r[3]), float(r[4])) for r in rows]
total_in = sum(r[1] for r in per_agent)
total_out = sum(r[2] for r in per_agent)
total_cost = sum(r[3] for r in per_agent)
total_cached = sum(r[2] for r in per_agent)
total_out = sum(r[3] for r in per_agent)
total_cost = sum(r[4] for r in per_agent)
return {
"total_in": total_in,
"total_cached": total_cached,
"total_out": total_out,
"total_cost": total_cost,
"per_agent": per_agent,
@@ -254,15 +386,26 @@ def task_usage_summary(task_id: int) -> dict:
def task_summary_comment(task_id: int) -> str:
"""Build the Deployer end-of-task summary comment (Feature 4, variant B)."""
s = task_usage_summary(task_id)
cached = s.get("total_cached", 0)
head_in = (
f"{fmt_tokens(s['total_in'])} \u0432\u0445\u043e\u0434 ({fmt_tokens(cached)} cached)"
if cached > 0
else f"{fmt_tokens(s['total_in'])} \u0432\u0445\u043e\u0434"
)
lines = [
f"\U0001f4ca \u0418\u0442\u043e\u0433\u043e \u043f\u043e \u0437\u0430\u0434\u0430\u0447\u0435: "
f"{fmt_tokens(s['total_in'])} \u0442\u043e\u043a\u0435\u043d\u043e\u0432 \u0432\u0445\u043e\u0434 / "
f"{head_in} / "
f"{fmt_tokens(s['total_out'])} \u0432\u044b\u0445\u043e\u0434 \u00b7 "
f"{fmt_cost(s['total_cost'])}"
]
for agent, ti, to, cost in s["per_agent"]:
for agent, ti, tc, to, cost in s["per_agent"]:
name = AGENT_DISPLAY.get(agent, agent.capitalize())
in_str = (
f"{fmt_tokens(ti)} in ({fmt_tokens(tc)} cached)"
if tc > 0
else f"{fmt_tokens(ti)} in"
)
lines.append(
f"\u2022 {name}: {fmt_tokens(ti)} in / {fmt_tokens(to)} out \u00b7 {fmt_cost(cost)}"
f"\u2022 {name}: {in_str} / {fmt_tokens(to)} out \u00b7 {fmt_cost(cost)}"
)
return "\n".join(lines)