fix(observability): merge-gate on deploy, full token input, Plane Done, artifact links
1. BUG 8 (second door): merge webhook no longer fake-completes a task at the deploy stage; done is gated by the deployer verdict (check_deploy_status). Other stages keep merge->done. 2. Token accounting: parse+persist cache_creation_input_tokens (new idempotent agent_runs column). usage_comment / task_summary now show the FULL input (input + cache_read + cache_creation) with a cached breakdown. cost_usd untouched. 3. deploy->done success now forces the Plane issue to terminal Done state. 4. All agents (architect/developer/reviewer/tester/deployer) attach artifact links to their finish comment via gitea_public_url. Tests added for each fix; pytest 244 passed / 9 failed (off-limits HMAC group).
This commit is contained in:
@@ -62,9 +62,27 @@ def test_parse_real_result_json():
|
||||
assert u["input_tokens"] == 45231
|
||||
assert u["output_tokens"] == 12100
|
||||
assert u["cache_read_tokens"] == 18500
|
||||
# FIX 2: cache_creation slice must now be parsed (was dropped before).
|
||||
assert u["cache_creation_tokens"] == 7418
|
||||
assert abs(u["cost_usd"] - 0.0560175) < 1e-9
|
||||
|
||||
|
||||
def test_parse_cache_creation_present():
|
||||
u = U.parse_usage_from_text(REAL_RESULT_JSON)
|
||||
assert u["cache_creation_tokens"] == 7418
|
||||
|
||||
|
||||
def test_parse_cache_creation_missing_defaults_zero():
|
||||
blob = (
|
||||
'{"total_cost_usd":0.01,'
|
||||
'"usage":{"input_tokens":10,"output_tokens":5,'
|
||||
'"cache_read_input_tokens":100}}'
|
||||
)
|
||||
u = U.parse_usage_from_text(blob)
|
||||
assert u["cache_creation_tokens"] == 0
|
||||
assert u["cache_read_tokens"] == 100
|
||||
|
||||
|
||||
def test_parse_with_leading_text():
|
||||
"""The agent may print text before the trailing JSON; we still find it."""
|
||||
text = "some agent stdout line\nanother line\n" + REAL_RESULT_JSON
|
||||
@@ -106,13 +124,16 @@ def test_record_usage_writes_columns():
|
||||
U.record_usage(rid, u)
|
||||
conn = get_db()
|
||||
row = conn.execute(
|
||||
"SELECT input_tokens, output_tokens, cache_read_tokens, cost_usd "
|
||||
"SELECT input_tokens, output_tokens, cache_read_tokens, "
|
||||
"cache_creation_tokens, cost_usd "
|
||||
"FROM agent_runs WHERE id=?", (rid,)
|
||||
).fetchone()
|
||||
conn.close()
|
||||
assert row["input_tokens"] == 45231
|
||||
assert row["output_tokens"] == 12100
|
||||
assert row["cache_read_tokens"] == 18500
|
||||
# FIX 2: cache_creation column is now persisted.
|
||||
assert row["cache_creation_tokens"] == 7418
|
||||
assert abs(row["cost_usd"] - 0.0560175) < 1e-9
|
||||
|
||||
|
||||
@@ -144,14 +165,82 @@ def test_fmt_cost():
|
||||
|
||||
|
||||
def test_usage_comment_format():
|
||||
# No cache -> in_total == input_tokens, no cached breakdown shown.
|
||||
u = {"input_tokens": 45231, "output_tokens": 12100, "cost_usd": 0.21}
|
||||
c = U.usage_comment("developer", u)
|
||||
assert "Developer" in c
|
||||
assert "45.2k in" in c
|
||||
assert "cached" not in c
|
||||
assert "12.1k out" in c
|
||||
assert "$0.21" in c
|
||||
|
||||
|
||||
def test_usage_comment_shows_full_input_with_cached():
|
||||
"""FIX 2: in = input + cache_read + cache_creation, with cached breakdown."""
|
||||
u = {
|
||||
"input_tokens": 81,
|
||||
"cache_read_tokens": 8_400_000,
|
||||
"cache_creation_tokens": 100_000,
|
||||
"output_tokens": 45_800,
|
||||
"cost_usd": 7.29,
|
||||
}
|
||||
c = U.usage_comment("developer", u)
|
||||
# total in = 8_500_081 -> 8.5M ; cached = 8_500_000 -> 8.5M
|
||||
assert "8.5M in (8.5M cached)" in c
|
||||
assert "45.8k out" in c
|
||||
assert "$7.29" in c
|
||||
|
||||
|
||||
def test_usage_comment_no_cached_when_zero():
|
||||
u = {"input_tokens": 1234, "cache_read_tokens": 0,
|
||||
"cache_creation_tokens": 0, "output_tokens": 50, "cost_usd": 0.01}
|
||||
c = U.usage_comment("developer", u)
|
||||
assert "1.2k in" in c
|
||||
assert "cached" not in c
|
||||
|
||||
|
||||
# --------------------------------------------------------------------------- #
|
||||
# FIX 4: per-agent artifact links in finish comments
|
||||
# --------------------------------------------------------------------------- #
|
||||
def _ctx():
|
||||
return dict(repo="enduro-trails", branch="feature/ET-012-x",
|
||||
work_item_id="ET-012")
|
||||
|
||||
|
||||
def test_usage_comment_reviewer_links_review_doc():
|
||||
c = U.usage_comment("reviewer", {"input_tokens": 5}, **_ctx())
|
||||
assert "12-review.md" in c
|
||||
assert "ET-012" in c
|
||||
|
||||
|
||||
def test_usage_comment_tester_links_test_report():
|
||||
c = U.usage_comment("tester", {"input_tokens": 5}, **_ctx())
|
||||
assert "13-test-report.md" in c
|
||||
|
||||
|
||||
def test_usage_comment_deployer_links_deploy_log():
|
||||
c = U.usage_comment("deployer", {"input_tokens": 5}, **_ctx())
|
||||
assert "14-deploy-log.md" in c
|
||||
|
||||
|
||||
def test_usage_comment_developer_links_pr_and_branch():
|
||||
c = U.usage_comment("developer", {"input_tokens": 5}, pr_number=7, **_ctx())
|
||||
assert "pulls/7" in c
|
||||
assert "feature/ET-012-x" in c
|
||||
|
||||
|
||||
def test_usage_comment_architect_links_adr():
|
||||
c = U.usage_comment("architect", {"input_tokens": 5}, **_ctx())
|
||||
assert "06-adr" in c
|
||||
|
||||
|
||||
def test_usage_comment_no_links_without_context():
|
||||
"""Without repo/branch context, no links are appended (no crash)."""
|
||||
c = U.usage_comment("reviewer", {"input_tokens": 5})
|
||||
assert "12-review.md" not in c
|
||||
assert "http" not in c
|
||||
|
||||
|
||||
# --------------------------------------------------------------------------- #
|
||||
# task summary
|
||||
# --------------------------------------------------------------------------- #
|
||||
@@ -174,3 +263,47 @@ def test_task_summary_aggregates_over_agents():
|
||||
assert "$0.15" in comment # total cost
|
||||
assert "Developer" in comment
|
||||
assert "Tester" in comment
|
||||
|
||||
|
||||
def test_task_summary_sums_all_three_input_components():
|
||||
"""FIX 2: total_in = SUM(input + cache_read + cache_creation); total_cached too."""
|
||||
rid = _new_run(agent="developer", task_id=77)
|
||||
U.record_usage(rid, {
|
||||
"input_tokens": 100,
|
||||
"cache_read_tokens": 2000,
|
||||
"cache_creation_tokens": 900,
|
||||
"output_tokens": 50,
|
||||
"cost_usd": 0.10,
|
||||
})
|
||||
rid2 = _new_run(agent="tester", task_id=77)
|
||||
U.record_usage(rid2, {
|
||||
"input_tokens": 10,
|
||||
"cache_read_tokens": 500,
|
||||
"cache_creation_tokens": 0,
|
||||
"output_tokens": 5,
|
||||
"cost_usd": 0.05,
|
||||
})
|
||||
s = U.task_usage_summary(77)
|
||||
# total_in = (100+2000+900) + (10+500+0) = 3510
|
||||
assert s["total_in"] == 3510
|
||||
# total_cached = (2000+900) + (500+0) = 3400
|
||||
assert s["total_cached"] == 3400
|
||||
assert s["total_out"] == 55
|
||||
comment = U.task_summary_comment(77)
|
||||
assert "cached" in comment
|
||||
|
||||
|
||||
def test_task_summary_handles_null_cache_creation():
|
||||
"""Pre-existing rows (NULL cache_creation) must not break aggregation."""
|
||||
rid = _new_run(agent="developer", task_id=88)
|
||||
conn = get_db()
|
||||
conn.execute(
|
||||
"UPDATE agent_runs SET input_tokens=100, cache_read_tokens=200, "
|
||||
"cache_creation_tokens=NULL, output_tokens=10, cost_usd=0.01 WHERE id=?",
|
||||
(rid,),
|
||||
)
|
||||
conn.commit()
|
||||
conn.close()
|
||||
s = U.task_usage_summary(88) # must not raise
|
||||
assert s["total_in"] == 300 # 100 + 200 + (NULL->0)
|
||||
assert s["total_cached"] == 200
|
||||
|
||||
Reference in New Issue
Block a user