"""Feature 4: token / cost accounting tests. Covers: * parse_usage_from_text on a REAL claude --output-format json result blob (captured live from CLI 2.1.142), including a leading text line. * parse on garbage / missing JSON -> None (never raises). * record_usage writes the columns; NULLs when usage is None. * fmt_tokens / fmt_cost formatting. * usage_comment string format. * task_usage_summary / task_summary_comment aggregate over agent_runs. DB is an isolated temp file; no network or subprocess. """ import os import tempfile os.environ.setdefault("ORCH_PLANE_API_TOKEN", "test-token") os.environ.setdefault("ORCH_GITEA_TOKEN", "test-token") _test_db = os.path.join(tempfile.gettempdir(), "test_orchestrator_usage.db") os.environ["ORCH_DB_PATH"] = _test_db import pytest # noqa: E402 from src import db as db_module # noqa: E402 from src.db import init_db, get_db # noqa: E402 from src import usage as U # noqa: E402 # Real claude --output-format json result object (captured from CLI 2.1.142). REAL_RESULT_JSON = ( '{"type":"result","subtype":"success","is_error":false,"duration_ms":1795,' '"num_turns":1,"result":"Hi!","session_id":"abc",' '"total_cost_usd":0.0560175,' '"usage":{"input_tokens":45231,"cache_creation_input_tokens":7418,' '"cache_read_input_tokens":18500,"output_tokens":12100,' '"service_tier":"standard"},' '"modelUsage":{"claude-opus-4-7":{"inputTokens":6,"outputTokens":7}},' '"permission_denials":[]}' ) @pytest.fixture(autouse=True) def setup_db(monkeypatch): # get_db() reads settings.db_path live; pin it to our isolated DB. monkeypatch.setattr(db_module.settings, "db_path", _test_db, raising=False) if os.path.exists(_test_db): os.unlink(_test_db) init_db() yield if os.path.exists(_test_db): os.unlink(_test_db) # --------------------------------------------------------------------------- # # parsing # --------------------------------------------------------------------------- # def test_parse_real_result_json(): u = U.parse_usage_from_text(REAL_RESULT_JSON) assert u is not None assert u["input_tokens"] == 45231 assert u["output_tokens"] == 12100 assert u["cache_read_tokens"] == 18500 assert abs(u["cost_usd"] - 0.0560175) < 1e-9 def test_parse_with_leading_text(): """The agent may print text before the trailing JSON; we still find it.""" text = "some agent stdout line\nanother line\n" + REAL_RESULT_JSON u = U.parse_usage_from_text(text) assert u is not None assert u["input_tokens"] == 45231 assert u["output_tokens"] == 12100 def test_parse_garbage_returns_none(): assert U.parse_usage_from_text("not json at all { broken") is None assert U.parse_usage_from_text("") is None assert U.parse_usage_from_text(None) is None def test_parse_json_without_usage_returns_none(): assert U.parse_usage_from_text('{"hello":"world"}') is None def test_parse_from_log_missing_file_returns_none(): assert U.parse_usage_from_log("/no/such/file.log") is None # --------------------------------------------------------------------------- # # record_usage # --------------------------------------------------------------------------- # def _new_run(agent="developer", task_id=1): conn = get_db() cur = conn.execute("INSERT INTO agent_runs (task_id, agent) VALUES (?, ?)", (task_id, agent)) rid = cur.lastrowid conn.commit() conn.close() return rid def test_record_usage_writes_columns(): rid = _new_run() u = U.parse_usage_from_text(REAL_RESULT_JSON) U.record_usage(rid, u) conn = get_db() row = conn.execute( "SELECT input_tokens, output_tokens, cache_read_tokens, cost_usd " "FROM agent_runs WHERE id=?", (rid,) ).fetchone() conn.close() assert row["input_tokens"] == 45231 assert row["output_tokens"] == 12100 assert row["cache_read_tokens"] == 18500 assert abs(row["cost_usd"] - 0.0560175) < 1e-9 def test_record_usage_none_writes_nulls(): rid = _new_run() U.record_usage(rid, None) # must not raise conn = get_db() row = conn.execute("SELECT input_tokens, cost_usd FROM agent_runs WHERE id=?", (rid,)).fetchone() conn.close() assert row["input_tokens"] is None assert row["cost_usd"] is None # --------------------------------------------------------------------------- # # formatting # --------------------------------------------------------------------------- # def test_fmt_tokens(): assert U.fmt_tokens(6) == "6" assert U.fmt_tokens(1234) == "1.2k" assert U.fmt_tokens(45231) == "45.2k" assert U.fmt_tokens(2_500_000) == "2.5M" assert U.fmt_tokens(None) == "0" def test_fmt_cost(): assert U.fmt_cost(0.21) == "$0.21" assert U.fmt_cost(0.0560175) == "$0.06" assert U.fmt_cost(None) == "$0.00" def test_usage_comment_format(): u = {"input_tokens": 45231, "output_tokens": 12100, "cost_usd": 0.21} c = U.usage_comment("developer", u) assert "Developer" in c assert "45.2k in" in c assert "12.1k out" in c assert "$0.21" in c # --------------------------------------------------------------------------- # # task summary # --------------------------------------------------------------------------- # def test_task_summary_aggregates_over_agents(): # two runs for the same task: developer + tester for agent, ti, to, cost in [("developer", 1000, 200, 0.10), ("tester", 500, 100, 0.05)]: rid = _new_run(agent=agent, task_id=42) U.record_usage(rid, {"input_tokens": ti, "output_tokens": to, "cache_read_tokens": 0, "cost_usd": cost}) s = U.task_usage_summary(42) assert s["total_in"] == 1500 assert s["total_out"] == 300 assert abs(s["total_cost"] - 0.15) < 1e-9 agents = {a for a, *_ in s["per_agent"]} assert agents == {"developer", "tester"} comment = U.task_summary_comment(42) assert "1.5k" in comment # total in assert "$0.15" in comment # total cost assert "Developer" in comment assert "Tester" in comment