feat(metrics): per-agent token/cost accounting

Feature 4. claude is now launched with --output-format json; the run-log trailing result JSON is parsed (defensively, never fatal) for usage + total_cost_usd. New idempotent ALTERs add input_tokens/output_tokens/cache_read_tokens/cost_usd to agent_runs; the launcher monitor records usage per run, posts a per-agent finish comment under that agent bot (e.g. Developer gotov · 45.2k in / 12.1k out · $0.21), and the deployer posts an end-of-task summary (SUM over agent_runs GROUP BY agent) on done. New src/usage.py holds parse/format/record/summary helpers; test_usage.py covers parsing a real CLI JSON blob, NULL-on-garbage, recording, formatting, and the per-task aggregate.
2026-06-03 18:18:46 +03:00
parent 38a741d24e
commit 9a702a0216
4 changed files with 502 additions and 0 deletions
--- a/src/agents/launcher.py
+++ b/src/agents/launcher.py
@@ -209,9 +209,15 @@ class AgentLauncher:

        # No git fetch/checkout here: ensure_worktree() already put the worktree on
        # the right branch. The agent simply runs inside its isolated work_path.
+        # Feature 4 (token usage): --output-format json makes claude emit a single
+        # result JSON (with usage + total_cost_usd) at the end of stdout. The log
+        # still captures it; _monitor_agent parses the trailing JSON after the run
+        # to record per-agent tokens/cost. _monitor_agent's failure handling keys
+        # off the process exit_code (not stdout shape), so this is safe.
        cmd = (
            f'cd {work_path} && '
            f'{self.CLAUDE_BIN} --print '
+            f'--output-format json '
            f'{model_flag}'
            f'"$(cat {task_file})" '
            f'--system-prompt "$(cat {system_prompt})" '
@@ -400,6 +406,17 @@ class AgentLauncher:

        notify_agent_finished(run_id, agent, exit_code, task_id=_task_id, duration_s=_duration_s)

+        # Feature 4: parse token usage / cost from the (json) run log and record
+        # it on the agent_runs row. Never fatal — a garbled/missing JSON records
+        # NULLs and logs a warning so a broken run can't crash the monitor.
+        try:
+            from ..usage import parse_usage_from_log, record_usage
+            _usage = parse_usage_from_log(output_path) if output_path else None
+            record_usage(run_id, _usage)
+        except Exception as e:
+            logger.warning(f"run_id={run_id}: usage accounting failed: {e}")
+            _usage = None
+
        # Commit and push any changes — in the per-branch worktree (ORCH-2 / S-4),
        # NOT in the shared /repos/<repo>. The worktree is already on `branch`
        # (ensure_worktree did the checkout), so no checkout is needed here.
@@ -490,6 +507,14 @@ class AgentLauncher:
                from ..notifications import send_telegram
                send_telegram(f"\u26a0\ufe0f {_wid}: Agent {agent} failed (exit_code={exit_code}). Check logs: /app/data/runs/{run_id}.log")

+        # Feature 4: post the per-agent usage comment under that agent's bot, and
+        # — for the deployer finishing the task — the per-task usage summary.
+        if exit_code == 0:
+            try:
+                self._post_usage_comments(run_id, agent, repo, branch, _usage)
+            except Exception as e:
+                logger.warning(f"run_id={run_id}: usage comment failed: {e}")
+
        # Auto-advance stage if agent finished successfully and QG passes
        if exit_code == 0:
            self._try_advance_stage(run_id, agent, repo, branch)
@@ -654,6 +679,32 @@ class AgentLauncher:
            logger.error(f"Auto-advance failed for run_id={run_id}: {e}")


+    def _post_usage_comments(self, run_id, agent, repo, branch, usage):
+        """Feature 4: post the per-agent usage comment (and Deployer summary).
+
+        - Always (on success, with a work_item_id): a per-agent finish comment
+          with token/cost, authored by the finishing agent's Plane bot.
+        - When the deployer finishes: also a per-task summary (SUM over
+          agent_runs GROUP BY agent), authored by the deployer.
+        """
+        from ..usage import usage_comment, task_summary_comment
+        conn = get_db()
+        row = conn.execute(
+            "SELECT id, work_item_id FROM tasks WHERE repo=? AND branch=?",
+            (repo, branch),
+        ).fetchone()
+        conn.close()
+        if not row:
+            return
+        task_id, work_item_id = row[0], row[1]
+        if not work_item_id:
+            return
+        plane_add_comment(work_item_id, usage_comment(agent, usage), author=agent)
+        if agent == "deployer":
+            plane_add_comment(
+                work_item_id, task_summary_comment(task_id), author="deployer"
+            )
+
    def _ensure_pr(self, repo: str, branch: str, run_id: int):
        import httpx
        owner = settings.gitea_owner