From 61e26a8930d62b4ce7556c56533ed23e25b95f81 Mon Sep 17 00:00:00 2001 From: Dev Agent Date: Thu, 4 Jun 2026 11:17:58 +0300 Subject: [PATCH] fix(observability): merge-gate on deploy, full token input, Plane Done, artifact links 1. BUG 8 (second door): merge webhook no longer fake-completes a task at the deploy stage; done is gated by the deployer verdict (check_deploy_status). Other stages keep merge->done. 2. Token accounting: parse+persist cache_creation_input_tokens (new idempotent agent_runs column). usage_comment / task_summary now show the FULL input (input + cache_read + cache_creation) with a cached breakdown. cost_usd untouched. 3. deploy->done success now forces the Plane issue to terminal Done state. 4. All agents (architect/developer/reviewer/tester/deployer) attach artifact links to their finish comment via gitea_public_url. Tests added for each fix; pytest 244 passed / 9 failed (off-limits HMAC group). --- src/agents/launcher.py | 39 ++++++++- src/db.py | 6 ++ src/plane_sync.py | 11 +++ src/stage_engine.py | 17 ++++ src/usage.py | 171 ++++++++++++++++++++++++++++++++++--- src/webhooks/gitea.py | 14 +++ tests/test_stage_engine.py | 35 ++++++++ tests/test_usage.py | 135 ++++++++++++++++++++++++++++- tests/test_webhooks.py | 64 ++++++++++++++ 9 files changed, 476 insertions(+), 16 deletions(-) diff --git a/src/agents/launcher.py b/src/agents/launcher.py index dd2be7a..b10d274 100644 --- a/src/agents/launcher.py +++ b/src/agents/launcher.py @@ -699,12 +699,49 @@ class AgentLauncher: task_id, work_item_id = row[0], row[1] if not work_item_id: return - plane_add_comment(work_item_id, usage_comment(agent, usage), author=agent) + # Observability: every agent's finish comment links its artifact(s) + # (reviewer->12-review, tester->13-test-report, deployer->14-deploy-log, + # architect->ADR, developer->PR/branch). For the developer we resolve the + # open PR number so the link points straight at it. + pr_number = None + if agent == "developer": + pr_number = self._open_pr_number(repo, branch) + plane_add_comment( + work_item_id, + usage_comment( + agent, + usage, + repo=repo, + branch=branch, + work_item_id=work_item_id, + pr_number=pr_number, + ), + author=agent, + ) if agent == "deployer": plane_add_comment( work_item_id, task_summary_comment(task_id), author="deployer" ) + def _open_pr_number(self, repo: str, branch: str): + """Return the open PR number for `branch`, or None. Never raises.""" + try: + import httpx + owner = settings.gitea_owner + headers = {"Authorization": f"token {settings.gitea_token}"} + resp = httpx.get( + f"{settings.gitea_url}/api/v1/repos/{owner}/{repo}/pulls", + params={"state": "open", "head": branch}, + headers=headers, timeout=5, + ) + if resp.status_code == 200: + prs = resp.json() + if prs: + return prs[0].get("number") + except Exception: + pass + return None + def _ensure_pr(self, repo: str, branch: str, run_id: int): import httpx owner = settings.gitea_owner diff --git a/src/db.py b/src/db.py index 816e2ac..4d64a68 100644 --- a/src/db.py +++ b/src/db.py @@ -83,6 +83,12 @@ def init_db(): _ensure_column(conn, "agent_runs", "input_tokens", "INTEGER") _ensure_column(conn, "agent_runs", "output_tokens", "INTEGER") _ensure_column(conn, "agent_runs", "cache_read_tokens", "INTEGER") + # Observability fix: also persist cache-CREATION input tokens. Claude CLI + # reports the real input split across input_tokens (fresh, ~tens) + + # cache_read_input_tokens (cache hit, millions) + cache_creation_input_tokens + # (writing new cache). Without this column the cache_creation slice is lost + # and the "X in" figure understates the true prompt size. Idempotent ALTER. + _ensure_column(conn, "agent_runs", "cache_creation_tokens", "INTEGER") _ensure_column(conn, "agent_runs", "cost_usd", "REAL") conn.commit() conn.close() diff --git a/src/plane_sync.py b/src/plane_sync.py index 9d061c4..b09f366 100644 --- a/src/plane_sync.py +++ b/src/plane_sync.py @@ -343,6 +343,17 @@ def set_issue_blocked(work_item_id: str, project_id: str = None): _set_issue_state_direct(work_item_id, PLANE_STATES["blocked"], project_id) +def set_issue_done(work_item_id: str, project_id: str = None): + """Observability fix: force the issue into the TERMINAL Done state. + + Used by the deploy->done success path so a completed task always reaches the + terminal Plane state (it used to stick on In Progress because the merge + webhook bypassed the stage engine). Uses the existing PLANE_STATES['done'] + UUID — the mapping itself is NOT changed. + """ + _set_issue_state_direct(work_item_id, PLANE_STATES["done"], project_id) + + def set_issue_in_progress(work_item_id: str, project_id: str = None): """Set issue to 'In Progress' state — agent working.""" _set_issue_state_direct(work_item_id, PLANE_STATES["in_progress"], project_id) diff --git a/src/stage_engine.py b/src/stage_engine.py index 012deb9..0fe4284 100644 --- a/src/stage_engine.py +++ b/src/stage_engine.py @@ -47,6 +47,7 @@ from .plane_sync import ( set_issue_needs_input, set_issue_in_progress, set_issue_blocked, + set_issue_done, ) from .config import settings @@ -247,6 +248,22 @@ def advance_stage( f"(auto-advance after {agent})" ) + # --- Terminal sync: deploy -> done must reach Plane's Done ----------- + # When the deployer's check_deploy_status passes we advance to the + # terminal 'done' stage. Previously a merged-PR webhook completed the + # task out-of-band and Plane stuck on In Progress. Now done flows through + # here, so explicitly drive the Plane issue into the terminal Done state + # (PLANE_STATES['done'] — mapping unchanged) in addition to the + # stage-change comment above. + if next_stage == "done" and work_item_id: + try: + set_issue_done(work_item_id) + logger.info( + f"Task {task_id}: deploy->done, Plane state forced to Done" + ) + except Exception as e: + logger.error(f"Task {task_id}: failed to set Plane Done: {e}") + # --- Launch the next agent (ORCH-4 fix: current_stage, not next) ----- next_agent = get_agent_for_stage(current_stage) if next_agent: diff --git a/src/usage.py b/src/usage.py index 8968381..ce37ce0 100644 --- a/src/usage.py +++ b/src/usage.py @@ -31,7 +31,8 @@ def parse_usage_from_text(text: str) -> dict | None: top-level '{' ... '}' that parses and carries usage/total_cost_usd. Returns a normalised dict - {input_tokens, output_tokens, cache_read_tokens, cost_usd} + {input_tokens, output_tokens, cache_read_tokens, cache_creation_tokens, + cost_usd} (ints / float, missing fields -> 0 / 0.0), or None if no usable JSON found. """ if not text: @@ -71,6 +72,12 @@ def parse_usage_from_text(text: str) -> dict | None: "cache_read_tokens": _int( usage.get("cache_read_input_tokens", usage.get("cache_read_tokens")) ), + # The cache-CREATION slice (writing new cache entries) is part of the + # REAL input and used to be dropped on the floor. Persist it so the + # "X in" figure reflects the full prompt size, not just fresh tokens. + "cache_creation_tokens": _int( + usage.get("cache_creation_input_tokens", usage.get("cache_creation_tokens")) + ), "cost_usd": _float(cost), } @@ -150,11 +157,12 @@ def record_usage(run_id: int, usage: dict | None): try: conn.execute( "UPDATE agent_runs SET input_tokens=?, output_tokens=?, " - "cache_read_tokens=?, cost_usd=? WHERE id=?", + "cache_read_tokens=?, cache_creation_tokens=?, cost_usd=? WHERE id=?", ( usage.get("input_tokens"), usage.get("output_tokens"), usage.get("cache_read_tokens"), + usage.get("cache_creation_tokens"), usage.get("cost_usd"), run_id, ), @@ -197,19 +205,132 @@ AGENT_DISPLAY = { } -def usage_comment(agent: str, usage: dict | None) -> str: +def _input_total(usage: dict) -> int: + """FULL input = fresh input + cache-read + cache-creation tokens.""" + def _i(k): + try: + return int(usage.get(k) or 0) + except (TypeError, ValueError): + return 0 + return _i("input_tokens") + _i("cache_read_tokens") + _i("cache_creation_tokens") + + +def _cached_total(usage: dict) -> int: + """Cached portion of the input = cache-read + cache-creation tokens.""" + def _i(k): + try: + return int(usage.get(k) or 0) + except (TypeError, ValueError): + return 0 + return _i("cache_read_tokens") + _i("cache_creation_tokens") + + +def fmt_in(usage: dict) -> str: + """Render the input figure as full total with a cached breakdown. + + '8.5M in (8.4M cached)' when there is a cache; '45.2k in' when cached==0. + """ + total = _input_total(usage) + cached = _cached_total(usage) + if cached > 0: + return f"{fmt_tokens(total)} in ({fmt_tokens(cached)} cached)" + return f"{fmt_tokens(total)} in" + + +def usage_comment( + agent: str, + usage: dict | None, + repo: str | None = None, + branch: str | None = None, + work_item_id: str | None = None, + pr_number=None, +) -> str: """Build the per-agent finish comment, e.g. - '\U0001f4bb Developer \u0433\u043e\u0442\u043e\u0432 \u00b7 45.2k in / 12.1k out \u00b7 $0.21'. + '\U0001f4bb Developer \u0433\u043e\u0442\u043e\u0432 \u00b7 8.5M in (8.4M cached) / 45.8k out \u00b7 $7.29'. + + When repo/branch/work_item_id are supplied, the agent's artifact link(s) are + appended (BUG: only analyst used to link its docs). Missing artifacts are + silently skipped — link building never raises. """ usage = usage or {} name = AGENT_DISPLAY.get(agent, agent.capitalize()) icon = AGENT_ICON.get(agent, "\u2705") - return ( + line = ( f"{icon} {name} \u0433\u043e\u0442\u043e\u0432 \u00b7 " - f"{fmt_tokens(usage.get('input_tokens'))} in / " + f"{fmt_in(usage)} / " f"{fmt_tokens(usage.get('output_tokens'))} out \u00b7 " f"{fmt_cost(usage.get('cost_usd'))}" ) + links = artifact_links(agent, repo, branch, work_item_id, pr_number) + if links: + line += "\n" + "\n".join(links) + return line + + +# Per-agent artifact file under docs/work-items/{wid}/ (architect/developer use +# special handling for ADR dirs / PR links, see artifact_links()). +AGENT_ARTIFACT = { + "reviewer": ("Review", "12-review.md"), + "tester": ("Test report", "13-test-report.md"), + "deployer": ("Deploy log", "14-deploy-log.md"), +} + + +def artifact_links( + agent: str, + repo: str | None, + branch: str | None, + work_item_id: str | None, + pr_number=None, +) -> list[str]: + """Markdown link(s) to the finishing agent's artifact(s) in Gitea. + + Uses gitea_public_url (falls back to gitea_url) for clickable links, mirroring + the analyst doc links. Returns [] (never raises) when there is nothing to + link or the required context is missing. analyst is intentionally NOT handled + here — its richer doc list lives in stage_engine._build_analyst_ready_comment. + """ + try: + from .config import settings + owner = getattr(settings, "gitea_owner", "admin") + base = ( + getattr(settings, "gitea_public_url", "") or getattr(settings, "gitea_url", "") + ).rstrip("/") + if not base or not repo: + return [] + links: list[str] = [] + + if agent == "developer": + if branch: + links.append( + f"\U0001f4c2 [Branch {branch}]({base}/{owner}/{repo}/src/branch/{branch})" + ) + if pr_number: + links.append( + f"\U0001f517 [PR #{pr_number}]({base}/{owner}/{repo}/pulls/{pr_number})" + ) + return links + + if agent == "architect": + if branch and work_item_id: + adr_dir = ( + f"{base}/{owner}/{repo}/src/branch/{branch}/" + f"docs/work-items/{work_item_id}/06-adr" + ) + links.append(f"\U0001f4d0 [ADR]({adr_dir})") + return links + + spec = AGENT_ARTIFACT.get(agent) + if spec and branch and work_item_id: + label, fname = spec + href = ( + f"{base}/{owner}/{repo}/src/branch/{branch}/" + f"docs/work-items/{work_item_id}/{fname}" + ) + links.append(f"\U0001f4c4 [{label}]({href})") + return links + except Exception: + return [] AGENT_ICON = { @@ -225,13 +346,22 @@ AGENT_ICON = { def task_usage_summary(task_id: int) -> dict: """Aggregate agent_runs usage for a task. - Returns {total_in, total_out, total_cost, per_agent: [(agent, in, out, cost), ...]}. + total_in counts the FULL input (input + cache_read + cache_creation), and + total_cached counts the cached portion (cache_read + cache_creation). + COALESCE(...,0) keeps pre-existing rows (NULL cache_creation) from breaking. + + Returns {total_in, total_cached, total_out, total_cost, + per_agent: [(agent, in, cached, out, cost), ...]}. """ conn = get_db() try: rows = conn.execute( "SELECT agent, " - "COALESCE(SUM(input_tokens),0), " + "COALESCE(SUM(input_tokens),0) " + " + COALESCE(SUM(cache_read_tokens),0) " + " + COALESCE(SUM(cache_creation_tokens),0), " + "COALESCE(SUM(cache_read_tokens),0) " + " + COALESCE(SUM(cache_creation_tokens),0), " "COALESCE(SUM(output_tokens),0), " "COALESCE(SUM(cost_usd),0.0) " "FROM agent_runs WHERE task_id=? GROUP BY agent ORDER BY agent", @@ -239,12 +369,14 @@ def task_usage_summary(task_id: int) -> dict: ).fetchall() finally: conn.close() - per_agent = [(r[0], int(r[1]), int(r[2]), float(r[3])) for r in rows] + per_agent = [(r[0], int(r[1]), int(r[2]), int(r[3]), float(r[4])) for r in rows] total_in = sum(r[1] for r in per_agent) - total_out = sum(r[2] for r in per_agent) - total_cost = sum(r[3] for r in per_agent) + total_cached = sum(r[2] for r in per_agent) + total_out = sum(r[3] for r in per_agent) + total_cost = sum(r[4] for r in per_agent) return { "total_in": total_in, + "total_cached": total_cached, "total_out": total_out, "total_cost": total_cost, "per_agent": per_agent, @@ -254,15 +386,26 @@ def task_usage_summary(task_id: int) -> dict: def task_summary_comment(task_id: int) -> str: """Build the Deployer end-of-task summary comment (Feature 4, variant B).""" s = task_usage_summary(task_id) + cached = s.get("total_cached", 0) + head_in = ( + f"{fmt_tokens(s['total_in'])} \u0432\u0445\u043e\u0434 ({fmt_tokens(cached)} cached)" + if cached > 0 + else f"{fmt_tokens(s['total_in'])} \u0432\u0445\u043e\u0434" + ) lines = [ f"\U0001f4ca \u0418\u0442\u043e\u0433\u043e \u043f\u043e \u0437\u0430\u0434\u0430\u0447\u0435: " - f"{fmt_tokens(s['total_in'])} \u0442\u043e\u043a\u0435\u043d\u043e\u0432 \u0432\u0445\u043e\u0434 / " + f"{head_in} / " f"{fmt_tokens(s['total_out'])} \u0432\u044b\u0445\u043e\u0434 \u00b7 " f"{fmt_cost(s['total_cost'])}" ] - for agent, ti, to, cost in s["per_agent"]: + for agent, ti, tc, to, cost in s["per_agent"]: name = AGENT_DISPLAY.get(agent, agent.capitalize()) + in_str = ( + f"{fmt_tokens(ti)} in ({fmt_tokens(tc)} cached)" + if tc > 0 + else f"{fmt_tokens(ti)} in" + ) lines.append( - f"\u2022 {name}: {fmt_tokens(ti)} in / {fmt_tokens(to)} out \u00b7 {fmt_cost(cost)}" + f"\u2022 {name}: {in_str} / {fmt_tokens(to)} out \u00b7 {fmt_cost(cost)}" ) return "\n".join(lines) diff --git a/src/webhooks/gitea.py b/src/webhooks/gitea.py index 083a075..a4d11a0 100644 --- a/src/webhooks/gitea.py +++ b/src/webhooks/gitea.py @@ -334,6 +334,20 @@ async def handle_pr(payload: dict): logger.error(f"Task {task_id}: max retries reached, needs manual intervention") elif action == "closed" and pr.get("merged", False): + # BUG 8 (second door): at the deploy stage `done` is gated by the + # deployer's verdict (check_deploy_status via advance_stage), NOT by the + # fact that the PR was merged. The deployer merges the PR at the START of + # its run, so a merged webhook arrives ~30s later while the deployer is + # still working — blindly setting done here would fake-complete the task + # and discard a later deploy_status: FAILED verdict. advance_stage will + # drive deploy→done (and Plane→Done) when the deployer job finishes. + # For every OTHER stage the merge-driven done behaviour is preserved. + if current_stage == "deploy": + logger.info( + f"Task {task_id}: PR merged at deploy stage — done gated by " + f"deployer verdict (check_deploy_status), ignoring merge-driven done." + ) + return update_task_stage(task_id, "done") notify_stage_change(task_id, current_stage, "done") logger.info(f"Task {task_id}: PR merged, stage → done") diff --git a/tests/test_stage_engine.py b/tests/test_stage_engine.py index 1e06a77..c98bb45 100644 --- a/tests/test_stage_engine.py +++ b/tests/test_stage_engine.py @@ -69,6 +69,7 @@ def silence_side_effects(monkeypatch): "set_issue_needs_input", "set_issue_in_progress", "set_issue_blocked", + "set_issue_done", ): monkeypatch.setattr(stage_engine, name, MagicMock()) @@ -177,6 +178,40 @@ class TestHappyPathAgentSelection: assert res.enqueued_agent is None assert _jobs() == [] + def test_deploy_success_syncs_plane_to_terminal_done(self, monkeypatch): + """FIX 3: a successful deploy->done forces the Plane issue to terminal Done. + + Previously the task could stick on In Progress because the merge webhook + completed it out-of-band. Now the engine drives set_issue_done() on the + deploy->done success transition. + """ + monkeypatch.setattr( + stage_engine, "QG_CHECKS", + {k: _pass for k in stage_engine.QG_CHECKS}, + ) + task_id = _make_task("deploy", wi="ET-012") + res = advance_stage( + task_id, "deploy", "enduro-trails", "ET-012", + "feature/ET-012-x", finished_agent="deployer", + ) + assert res.advanced is True + assert _stage(task_id) == "done" + # The terminal Plane sync was invoked with the work item id. + stage_engine.set_issue_done.assert_called_once_with("ET-012") + + def test_non_terminal_advance_does_not_force_plane_done(self, monkeypatch): + """set_issue_done must only fire on the terminal deploy->done transition.""" + monkeypatch.setattr( + stage_engine, "QG_CHECKS", + {k: _pass for k in stage_engine.QG_CHECKS}, + ) + task_id = _make_task("review") + advance_stage( + task_id, "review", "enduro-trails", "ET-001", + "feature/ET-001-x", finished_agent=None, + ) + stage_engine.set_issue_done.assert_not_called() + def test_done_is_terminal(self): task_id = _make_task("done") res = advance_stage(task_id, "done", "enduro-trails", "ET-001", diff --git a/tests/test_usage.py b/tests/test_usage.py index 59d059b..d5765a0 100644 --- a/tests/test_usage.py +++ b/tests/test_usage.py @@ -62,9 +62,27 @@ def test_parse_real_result_json(): assert u["input_tokens"] == 45231 assert u["output_tokens"] == 12100 assert u["cache_read_tokens"] == 18500 + # FIX 2: cache_creation slice must now be parsed (was dropped before). + assert u["cache_creation_tokens"] == 7418 assert abs(u["cost_usd"] - 0.0560175) < 1e-9 +def test_parse_cache_creation_present(): + u = U.parse_usage_from_text(REAL_RESULT_JSON) + assert u["cache_creation_tokens"] == 7418 + + +def test_parse_cache_creation_missing_defaults_zero(): + blob = ( + '{"total_cost_usd":0.01,' + '"usage":{"input_tokens":10,"output_tokens":5,' + '"cache_read_input_tokens":100}}' + ) + u = U.parse_usage_from_text(blob) + assert u["cache_creation_tokens"] == 0 + assert u["cache_read_tokens"] == 100 + + def test_parse_with_leading_text(): """The agent may print text before the trailing JSON; we still find it.""" text = "some agent stdout line\nanother line\n" + REAL_RESULT_JSON @@ -106,13 +124,16 @@ def test_record_usage_writes_columns(): U.record_usage(rid, u) conn = get_db() row = conn.execute( - "SELECT input_tokens, output_tokens, cache_read_tokens, cost_usd " + "SELECT input_tokens, output_tokens, cache_read_tokens, " + "cache_creation_tokens, cost_usd " "FROM agent_runs WHERE id=?", (rid,) ).fetchone() conn.close() assert row["input_tokens"] == 45231 assert row["output_tokens"] == 12100 assert row["cache_read_tokens"] == 18500 + # FIX 2: cache_creation column is now persisted. + assert row["cache_creation_tokens"] == 7418 assert abs(row["cost_usd"] - 0.0560175) < 1e-9 @@ -144,14 +165,82 @@ def test_fmt_cost(): def test_usage_comment_format(): + # No cache -> in_total == input_tokens, no cached breakdown shown. u = {"input_tokens": 45231, "output_tokens": 12100, "cost_usd": 0.21} c = U.usage_comment("developer", u) assert "Developer" in c assert "45.2k in" in c + assert "cached" not in c assert "12.1k out" in c assert "$0.21" in c +def test_usage_comment_shows_full_input_with_cached(): + """FIX 2: in = input + cache_read + cache_creation, with cached breakdown.""" + u = { + "input_tokens": 81, + "cache_read_tokens": 8_400_000, + "cache_creation_tokens": 100_000, + "output_tokens": 45_800, + "cost_usd": 7.29, + } + c = U.usage_comment("developer", u) + # total in = 8_500_081 -> 8.5M ; cached = 8_500_000 -> 8.5M + assert "8.5M in (8.5M cached)" in c + assert "45.8k out" in c + assert "$7.29" in c + + +def test_usage_comment_no_cached_when_zero(): + u = {"input_tokens": 1234, "cache_read_tokens": 0, + "cache_creation_tokens": 0, "output_tokens": 50, "cost_usd": 0.01} + c = U.usage_comment("developer", u) + assert "1.2k in" in c + assert "cached" not in c + + +# --------------------------------------------------------------------------- # +# FIX 4: per-agent artifact links in finish comments +# --------------------------------------------------------------------------- # +def _ctx(): + return dict(repo="enduro-trails", branch="feature/ET-012-x", + work_item_id="ET-012") + + +def test_usage_comment_reviewer_links_review_doc(): + c = U.usage_comment("reviewer", {"input_tokens": 5}, **_ctx()) + assert "12-review.md" in c + assert "ET-012" in c + + +def test_usage_comment_tester_links_test_report(): + c = U.usage_comment("tester", {"input_tokens": 5}, **_ctx()) + assert "13-test-report.md" in c + + +def test_usage_comment_deployer_links_deploy_log(): + c = U.usage_comment("deployer", {"input_tokens": 5}, **_ctx()) + assert "14-deploy-log.md" in c + + +def test_usage_comment_developer_links_pr_and_branch(): + c = U.usage_comment("developer", {"input_tokens": 5}, pr_number=7, **_ctx()) + assert "pulls/7" in c + assert "feature/ET-012-x" in c + + +def test_usage_comment_architect_links_adr(): + c = U.usage_comment("architect", {"input_tokens": 5}, **_ctx()) + assert "06-adr" in c + + +def test_usage_comment_no_links_without_context(): + """Without repo/branch context, no links are appended (no crash).""" + c = U.usage_comment("reviewer", {"input_tokens": 5}) + assert "12-review.md" not in c + assert "http" not in c + + # --------------------------------------------------------------------------- # # task summary # --------------------------------------------------------------------------- # @@ -174,3 +263,47 @@ def test_task_summary_aggregates_over_agents(): assert "$0.15" in comment # total cost assert "Developer" in comment assert "Tester" in comment + + +def test_task_summary_sums_all_three_input_components(): + """FIX 2: total_in = SUM(input + cache_read + cache_creation); total_cached too.""" + rid = _new_run(agent="developer", task_id=77) + U.record_usage(rid, { + "input_tokens": 100, + "cache_read_tokens": 2000, + "cache_creation_tokens": 900, + "output_tokens": 50, + "cost_usd": 0.10, + }) + rid2 = _new_run(agent="tester", task_id=77) + U.record_usage(rid2, { + "input_tokens": 10, + "cache_read_tokens": 500, + "cache_creation_tokens": 0, + "output_tokens": 5, + "cost_usd": 0.05, + }) + s = U.task_usage_summary(77) + # total_in = (100+2000+900) + (10+500+0) = 3510 + assert s["total_in"] == 3510 + # total_cached = (2000+900) + (500+0) = 3400 + assert s["total_cached"] == 3400 + assert s["total_out"] == 55 + comment = U.task_summary_comment(77) + assert "cached" in comment + + +def test_task_summary_handles_null_cache_creation(): + """Pre-existing rows (NULL cache_creation) must not break aggregation.""" + rid = _new_run(agent="developer", task_id=88) + conn = get_db() + conn.execute( + "UPDATE agent_runs SET input_tokens=100, cache_read_tokens=200, " + "cache_creation_tokens=NULL, output_tokens=10, cost_usd=0.01 WHERE id=?", + (rid,), + ) + conn.commit() + conn.close() + s = U.task_usage_summary(88) # must not raise + assert s["total_in"] == 300 # 100 + 200 + (NULL->0) + assert s["total_cached"] == 200 diff --git a/tests/test_webhooks.py b/tests/test_webhooks.py index 3276478..5f3850f 100644 --- a/tests/test_webhooks.py +++ b/tests/test_webhooks.py @@ -433,3 +433,67 @@ def test_ci_failure_development_escalates_at_limit( assert "after CI failure" in err_msg # Stage untouched. assert not mock_update_stage.called + + +# --------------------------------------------------------------------------- +# BUG 8 (second door): a merged-PR webhook must NOT fake-complete a task that is +# still in the deploy stage. On `deploy` done is gated by the deployer's verdict +# (check_deploy_status via advance_stage), not by the merge event. For every +# other stage the merge->done behaviour is preserved. Pure-logic tests: invoke +# handle_pr() directly with mocked helpers (no HMAC barrier). +# --------------------------------------------------------------------------- + +def _merged_pr_payload(branch="feature/ET-012-x"): + return { + "action": "closed", + "pull_request": { + "merged": True, + "number": 7, + "head": {"ref": branch}, + }, + "repository": {"name": "enduro-trails"}, + } + + +@patch("src.webhooks.gitea.notify_stage_change") +@patch("src.webhooks.gitea.update_task_stage") +@patch("src.webhooks.gitea.get_task_by_repo_branch") +@patch("src.webhooks.gitea.get_project_by_repo") +def test_merge_on_deploy_stage_does_not_set_done( + mock_proj, mock_task, mock_update_stage, mock_notify, +): + """FIX 1: merge at deploy stage is ignored — done is gated by deployer verdict.""" + from src.webhooks.gitea import handle_pr + + mock_proj.return_value = {"repo": "enduro-trails"} + mock_task.return_value = { + "id": 1, "stage": "deploy", "work_item_id": "ET-012", + } + + asyncio.run(handle_pr(_merged_pr_payload())) + + # The merge-driven done path must NOT run on deploy. + assert not mock_update_stage.called + assert not mock_notify.called + + +@patch("src.webhooks.gitea.notify_stage_change") +@patch("src.webhooks.gitea.update_task_stage") +@patch("src.webhooks.gitea.get_task_by_repo_branch") +@patch("src.webhooks.gitea.get_project_by_repo") +def test_merge_on_non_deploy_stage_sets_done( + mock_proj, mock_task, mock_update_stage, mock_notify, +): + """FIX 1: merge behaviour is preserved for non-deploy stages (e.g. review).""" + from src.webhooks.gitea import handle_pr + + mock_proj.return_value = {"repo": "enduro-trails"} + mock_task.return_value = { + "id": 2, "stage": "review", "work_item_id": "ET-013", + } + + asyncio.run(handle_pr(_merged_pr_payload(branch="feature/ET-013-x"))) + + # Non-deploy stages still get the merge-driven done. + mock_update_stage.assert_called_once_with(2, "done") + assert mock_notify.called -- 2.49.1