1. BUG 8 (second door): merge webhook no longer fake-completes a task at the deploy stage; done is gated by the deployer verdict (check_deploy_status). Other stages keep merge->done. 2. Token accounting: parse+persist cache_creation_input_tokens (new idempotent agent_runs column). usage_comment / task_summary now show the FULL input (input + cache_read + cache_creation) with a cached breakdown. cost_usd untouched. 3. deploy->done success now forces the Plane issue to terminal Done state. 4. All agents (architect/developer/reviewer/tester/deployer) attach artifact links to their finish comment via gitea_public_url. Tests added for each fix; pytest 244 passed / 9 failed (off-limits HMAC group).
310 lines
10 KiB
Python
310 lines
10 KiB
Python
"""Feature 4: token / cost accounting tests.
|
|
|
|
Covers:
|
|
* parse_usage_from_text on a REAL claude --output-format json result blob
|
|
(captured live from CLI 2.1.142), including a leading text line.
|
|
* parse on garbage / missing JSON -> None (never raises).
|
|
* record_usage writes the columns; NULLs when usage is None.
|
|
* fmt_tokens / fmt_cost formatting.
|
|
* usage_comment string format.
|
|
* task_usage_summary / task_summary_comment aggregate over agent_runs.
|
|
|
|
DB is an isolated temp file; no network or subprocess.
|
|
"""
|
|
|
|
import os
|
|
import tempfile
|
|
|
|
os.environ.setdefault("ORCH_PLANE_API_TOKEN", "test-token")
|
|
os.environ.setdefault("ORCH_GITEA_TOKEN", "test-token")
|
|
|
|
_test_db = os.path.join(tempfile.gettempdir(), "test_orchestrator_usage.db")
|
|
os.environ["ORCH_DB_PATH"] = _test_db
|
|
|
|
import pytest # noqa: E402
|
|
|
|
from src import db as db_module # noqa: E402
|
|
from src.db import init_db, get_db # noqa: E402
|
|
from src import usage as U # noqa: E402
|
|
|
|
|
|
# Real claude --output-format json result object (captured from CLI 2.1.142).
|
|
REAL_RESULT_JSON = (
|
|
'{"type":"result","subtype":"success","is_error":false,"duration_ms":1795,'
|
|
'"num_turns":1,"result":"Hi!","session_id":"abc",'
|
|
'"total_cost_usd":0.0560175,'
|
|
'"usage":{"input_tokens":45231,"cache_creation_input_tokens":7418,'
|
|
'"cache_read_input_tokens":18500,"output_tokens":12100,'
|
|
'"service_tier":"standard"},'
|
|
'"modelUsage":{"claude-opus-4-7":{"inputTokens":6,"outputTokens":7}},'
|
|
'"permission_denials":[]}'
|
|
)
|
|
|
|
|
|
@pytest.fixture(autouse=True)
|
|
def setup_db(monkeypatch):
|
|
# get_db() reads settings.db_path live; pin it to our isolated DB.
|
|
monkeypatch.setattr(db_module.settings, "db_path", _test_db, raising=False)
|
|
if os.path.exists(_test_db):
|
|
os.unlink(_test_db)
|
|
init_db()
|
|
yield
|
|
if os.path.exists(_test_db):
|
|
os.unlink(_test_db)
|
|
|
|
|
|
# --------------------------------------------------------------------------- #
|
|
# parsing
|
|
# --------------------------------------------------------------------------- #
|
|
def test_parse_real_result_json():
|
|
u = U.parse_usage_from_text(REAL_RESULT_JSON)
|
|
assert u is not None
|
|
assert u["input_tokens"] == 45231
|
|
assert u["output_tokens"] == 12100
|
|
assert u["cache_read_tokens"] == 18500
|
|
# FIX 2: cache_creation slice must now be parsed (was dropped before).
|
|
assert u["cache_creation_tokens"] == 7418
|
|
assert abs(u["cost_usd"] - 0.0560175) < 1e-9
|
|
|
|
|
|
def test_parse_cache_creation_present():
|
|
u = U.parse_usage_from_text(REAL_RESULT_JSON)
|
|
assert u["cache_creation_tokens"] == 7418
|
|
|
|
|
|
def test_parse_cache_creation_missing_defaults_zero():
|
|
blob = (
|
|
'{"total_cost_usd":0.01,'
|
|
'"usage":{"input_tokens":10,"output_tokens":5,'
|
|
'"cache_read_input_tokens":100}}'
|
|
)
|
|
u = U.parse_usage_from_text(blob)
|
|
assert u["cache_creation_tokens"] == 0
|
|
assert u["cache_read_tokens"] == 100
|
|
|
|
|
|
def test_parse_with_leading_text():
|
|
"""The agent may print text before the trailing JSON; we still find it."""
|
|
text = "some agent stdout line\nanother line\n" + REAL_RESULT_JSON
|
|
u = U.parse_usage_from_text(text)
|
|
assert u is not None
|
|
assert u["input_tokens"] == 45231
|
|
assert u["output_tokens"] == 12100
|
|
|
|
|
|
def test_parse_garbage_returns_none():
|
|
assert U.parse_usage_from_text("not json at all { broken") is None
|
|
assert U.parse_usage_from_text("") is None
|
|
assert U.parse_usage_from_text(None) is None
|
|
|
|
|
|
def test_parse_json_without_usage_returns_none():
|
|
assert U.parse_usage_from_text('{"hello":"world"}') is None
|
|
|
|
|
|
def test_parse_from_log_missing_file_returns_none():
|
|
assert U.parse_usage_from_log("/no/such/file.log") is None
|
|
|
|
|
|
# --------------------------------------------------------------------------- #
|
|
# record_usage
|
|
# --------------------------------------------------------------------------- #
|
|
def _new_run(agent="developer", task_id=1):
|
|
conn = get_db()
|
|
cur = conn.execute("INSERT INTO agent_runs (task_id, agent) VALUES (?, ?)", (task_id, agent))
|
|
rid = cur.lastrowid
|
|
conn.commit()
|
|
conn.close()
|
|
return rid
|
|
|
|
|
|
def test_record_usage_writes_columns():
|
|
rid = _new_run()
|
|
u = U.parse_usage_from_text(REAL_RESULT_JSON)
|
|
U.record_usage(rid, u)
|
|
conn = get_db()
|
|
row = conn.execute(
|
|
"SELECT input_tokens, output_tokens, cache_read_tokens, "
|
|
"cache_creation_tokens, cost_usd "
|
|
"FROM agent_runs WHERE id=?", (rid,)
|
|
).fetchone()
|
|
conn.close()
|
|
assert row["input_tokens"] == 45231
|
|
assert row["output_tokens"] == 12100
|
|
assert row["cache_read_tokens"] == 18500
|
|
# FIX 2: cache_creation column is now persisted.
|
|
assert row["cache_creation_tokens"] == 7418
|
|
assert abs(row["cost_usd"] - 0.0560175) < 1e-9
|
|
|
|
|
|
def test_record_usage_none_writes_nulls():
|
|
rid = _new_run()
|
|
U.record_usage(rid, None) # must not raise
|
|
conn = get_db()
|
|
row = conn.execute("SELECT input_tokens, cost_usd FROM agent_runs WHERE id=?", (rid,)).fetchone()
|
|
conn.close()
|
|
assert row["input_tokens"] is None
|
|
assert row["cost_usd"] is None
|
|
|
|
|
|
# --------------------------------------------------------------------------- #
|
|
# formatting
|
|
# --------------------------------------------------------------------------- #
|
|
def test_fmt_tokens():
|
|
assert U.fmt_tokens(6) == "6"
|
|
assert U.fmt_tokens(1234) == "1.2k"
|
|
assert U.fmt_tokens(45231) == "45.2k"
|
|
assert U.fmt_tokens(2_500_000) == "2.5M"
|
|
assert U.fmt_tokens(None) == "0"
|
|
|
|
|
|
def test_fmt_cost():
|
|
assert U.fmt_cost(0.21) == "$0.21"
|
|
assert U.fmt_cost(0.0560175) == "$0.06"
|
|
assert U.fmt_cost(None) == "$0.00"
|
|
|
|
|
|
def test_usage_comment_format():
|
|
# No cache -> in_total == input_tokens, no cached breakdown shown.
|
|
u = {"input_tokens": 45231, "output_tokens": 12100, "cost_usd": 0.21}
|
|
c = U.usage_comment("developer", u)
|
|
assert "Developer" in c
|
|
assert "45.2k in" in c
|
|
assert "cached" not in c
|
|
assert "12.1k out" in c
|
|
assert "$0.21" in c
|
|
|
|
|
|
def test_usage_comment_shows_full_input_with_cached():
|
|
"""FIX 2: in = input + cache_read + cache_creation, with cached breakdown."""
|
|
u = {
|
|
"input_tokens": 81,
|
|
"cache_read_tokens": 8_400_000,
|
|
"cache_creation_tokens": 100_000,
|
|
"output_tokens": 45_800,
|
|
"cost_usd": 7.29,
|
|
}
|
|
c = U.usage_comment("developer", u)
|
|
# total in = 8_500_081 -> 8.5M ; cached = 8_500_000 -> 8.5M
|
|
assert "8.5M in (8.5M cached)" in c
|
|
assert "45.8k out" in c
|
|
assert "$7.29" in c
|
|
|
|
|
|
def test_usage_comment_no_cached_when_zero():
|
|
u = {"input_tokens": 1234, "cache_read_tokens": 0,
|
|
"cache_creation_tokens": 0, "output_tokens": 50, "cost_usd": 0.01}
|
|
c = U.usage_comment("developer", u)
|
|
assert "1.2k in" in c
|
|
assert "cached" not in c
|
|
|
|
|
|
# --------------------------------------------------------------------------- #
|
|
# FIX 4: per-agent artifact links in finish comments
|
|
# --------------------------------------------------------------------------- #
|
|
def _ctx():
|
|
return dict(repo="enduro-trails", branch="feature/ET-012-x",
|
|
work_item_id="ET-012")
|
|
|
|
|
|
def test_usage_comment_reviewer_links_review_doc():
|
|
c = U.usage_comment("reviewer", {"input_tokens": 5}, **_ctx())
|
|
assert "12-review.md" in c
|
|
assert "ET-012" in c
|
|
|
|
|
|
def test_usage_comment_tester_links_test_report():
|
|
c = U.usage_comment("tester", {"input_tokens": 5}, **_ctx())
|
|
assert "13-test-report.md" in c
|
|
|
|
|
|
def test_usage_comment_deployer_links_deploy_log():
|
|
c = U.usage_comment("deployer", {"input_tokens": 5}, **_ctx())
|
|
assert "14-deploy-log.md" in c
|
|
|
|
|
|
def test_usage_comment_developer_links_pr_and_branch():
|
|
c = U.usage_comment("developer", {"input_tokens": 5}, pr_number=7, **_ctx())
|
|
assert "pulls/7" in c
|
|
assert "feature/ET-012-x" in c
|
|
|
|
|
|
def test_usage_comment_architect_links_adr():
|
|
c = U.usage_comment("architect", {"input_tokens": 5}, **_ctx())
|
|
assert "06-adr" in c
|
|
|
|
|
|
def test_usage_comment_no_links_without_context():
|
|
"""Without repo/branch context, no links are appended (no crash)."""
|
|
c = U.usage_comment("reviewer", {"input_tokens": 5})
|
|
assert "12-review.md" not in c
|
|
assert "http" not in c
|
|
|
|
|
|
# --------------------------------------------------------------------------- #
|
|
# task summary
|
|
# --------------------------------------------------------------------------- #
|
|
def test_task_summary_aggregates_over_agents():
|
|
# two runs for the same task: developer + tester
|
|
for agent, ti, to, cost in [("developer", 1000, 200, 0.10), ("tester", 500, 100, 0.05)]:
|
|
rid = _new_run(agent=agent, task_id=42)
|
|
U.record_usage(rid, {"input_tokens": ti, "output_tokens": to,
|
|
"cache_read_tokens": 0, "cost_usd": cost})
|
|
|
|
s = U.task_usage_summary(42)
|
|
assert s["total_in"] == 1500
|
|
assert s["total_out"] == 300
|
|
assert abs(s["total_cost"] - 0.15) < 1e-9
|
|
agents = {a for a, *_ in s["per_agent"]}
|
|
assert agents == {"developer", "tester"}
|
|
|
|
comment = U.task_summary_comment(42)
|
|
assert "1.5k" in comment # total in
|
|
assert "$0.15" in comment # total cost
|
|
assert "Developer" in comment
|
|
assert "Tester" in comment
|
|
|
|
|
|
def test_task_summary_sums_all_three_input_components():
|
|
"""FIX 2: total_in = SUM(input + cache_read + cache_creation); total_cached too."""
|
|
rid = _new_run(agent="developer", task_id=77)
|
|
U.record_usage(rid, {
|
|
"input_tokens": 100,
|
|
"cache_read_tokens": 2000,
|
|
"cache_creation_tokens": 900,
|
|
"output_tokens": 50,
|
|
"cost_usd": 0.10,
|
|
})
|
|
rid2 = _new_run(agent="tester", task_id=77)
|
|
U.record_usage(rid2, {
|
|
"input_tokens": 10,
|
|
"cache_read_tokens": 500,
|
|
"cache_creation_tokens": 0,
|
|
"output_tokens": 5,
|
|
"cost_usd": 0.05,
|
|
})
|
|
s = U.task_usage_summary(77)
|
|
# total_in = (100+2000+900) + (10+500+0) = 3510
|
|
assert s["total_in"] == 3510
|
|
# total_cached = (2000+900) + (500+0) = 3400
|
|
assert s["total_cached"] == 3400
|
|
assert s["total_out"] == 55
|
|
comment = U.task_summary_comment(77)
|
|
assert "cached" in comment
|
|
|
|
|
|
def test_task_summary_handles_null_cache_creation():
|
|
"""Pre-existing rows (NULL cache_creation) must not break aggregation."""
|
|
rid = _new_run(agent="developer", task_id=88)
|
|
conn = get_db()
|
|
conn.execute(
|
|
"UPDATE agent_runs SET input_tokens=100, cache_read_tokens=200, "
|
|
"cache_creation_tokens=NULL, output_tokens=10, cost_usd=0.01 WHERE id=?",
|
|
(rid,),
|
|
)
|
|
conn.commit()
|
|
conn.close()
|
|
s = U.task_usage_summary(88) # must not raise
|
|
assert s["total_in"] == 300 # 100 + 200 + (NULL->0)
|
|
assert s["total_cached"] == 200
|