Files
orchestrator/tests/test_telegram_tracker.py
dev-bot 9a0298de9d feat(telegram): live editable task tracker (Variant B+), replace 15-message spam
Replace the ~15 separate Telegram messages per task (agent start/finish, stage
transition, QG-pending, tech noise) with ONE live tracker message edited in
place (editMessageText) on every stage transition. Only attention-worthy events
are still sent as SEPARATE, notifying messages: approve-gate, deploy-fail,
agent-fail, task error.

- db.py: idempotent ALTERs — tasks.tracker_message_id, tasks.title,
  tasks.brd_review_started_at/ended_at, agent_runs.model. Helpers for
  tracker message_id + BRD-review clock.
- usage.py: short_model_name() (strip provider/claude- prefix); parse model
  from result-JSON modelUsage; record_usage persists model.
- notifications.py: render_task_tracker(task_id) (stateless render from
  agent_runs), update_task_tracker (sendMessage->store id->editMessageText with
  fallback to a new message, silent), edit_telegram(). Per-stage line
  in↓/out↑·cost·model, ⏸️ Ревью БРД (human time), 💰 totals, finish block
  (⏱️ wall/agents/yours, 🔗 PR · 📦). notify_* are now tracker-only/log-only
  except the four alerts.
- stage_engine.py: stamp brd_review_ended on analysis->architecture advance.
- webhooks/plane.py: persist task title on creation.
- tests/test_telegram_tracker.py: render, short_model_name, send/edit/fallback,
  separate-vs-silent alert behavior.
2026-06-04 11:42:46 +03:00

343 lines
14 KiB
Python

"""feat/telegram-live-tracker: tests for the live Telegram task tracker.
Covers (per DEV_TASK_TELEGRAM_TRACKER.md):
* short_model_name: provider/claude- prefix trimming.
* render_task_tracker: per-stage line format (in↓/out↑, model, cost, minutes),
the "⏸️ Ревью БРД · твоё время" line, the 💰 totals, and the finish block
(⏱️ three times + 🔗/📦).
* first message -> sendMessage stores message_id; transition -> editMessageText.
* fallback: editMessageText fails -> a NEW message is sent and the id updated.
* which alerts go out SEPARATELY (approve-gate / deploy-fail / agent-fail /
error) vs which do NOT (QG-pending / agent-start / stage-transition).
Isolated temp DB; no network (httpx is patched).
"""
import os
import tempfile
os.environ.setdefault("ORCH_PLANE_API_TOKEN", "test-token")
os.environ.setdefault("ORCH_GITEA_TOKEN", "test-token")
_test_db = os.path.join(tempfile.gettempdir(), "test_orchestrator_tracker.db")
os.environ["ORCH_DB_PATH"] = _test_db
from unittest.mock import MagicMock, patch # noqa: E402
import pytest # noqa: E402
import src.db as db_module # noqa: E402
from src.db import init_db, get_db # noqa: E402
from src import notifications as N # noqa: E402
from src import usage as U # noqa: E402
@pytest.fixture(autouse=True)
def setup_db(monkeypatch):
monkeypatch.setattr(db_module.settings, "db_path", _test_db, raising=False)
if os.path.exists(_test_db):
os.unlink(_test_db)
init_db()
# Re-enable send_telegram (conftest stubs it to a no-op); these tests patch
# httpx / the lower-level helpers explicitly per case.
yield
if os.path.exists(_test_db):
os.unlink(_test_db)
# --------------------------------------------------------------------------- #
# helpers to build a task + runs in the DB
# --------------------------------------------------------------------------- #
def _mk_task(stage="development", title="\u0422\u0440\u0435\u043a\u0438 \u0441 \u0437\u0443\u043c\u0430 z5",
wid="ET-012", brd_start=None, brd_end=None):
conn = get_db()
cur = conn.execute(
"INSERT INTO tasks (plane_id, work_item_id, repo, branch, stage, title, "
"brd_review_started_at, brd_review_ended_at) "
"VALUES (?, ?, ?, ?, ?, ?, ?, ?)",
("p1", wid, "enduro-trails", "feature/ET-012-x", stage, title,
brd_start, brd_end),
)
tid = cur.lastrowid
conn.commit()
conn.close()
return tid
def _mk_run(task_id, agent, started, finished, in_tok, out_tok,
cache_read=0, cache_creation=0, cost=0.0, model=None, exit_code=0):
conn = get_db()
cur = conn.execute(
"INSERT INTO agent_runs (task_id, agent, started_at, finished_at, "
"exit_code, input_tokens, output_tokens, cache_read_tokens, "
"cache_creation_tokens, cost_usd, model) "
"VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
(task_id, agent, started, finished, exit_code, in_tok, out_tok,
cache_read, cache_creation, cost, model),
)
rid = cur.lastrowid
conn.commit()
conn.close()
return rid
# --------------------------------------------------------------------------- #
# short_model_name
# --------------------------------------------------------------------------- #
def test_short_model_name():
assert U.short_model_name("tokenator/claude-opus-4-8") == "opus-4-8"
assert U.short_model_name("vibecode/claude-sonnet-4.6") == "sonnet-4.6"
assert U.short_model_name("claude-opus-4-8") == "opus-4-8"
assert U.short_model_name("opus-4-8") == "opus-4-8"
assert U.short_model_name(None) == ""
assert U.short_model_name("") == ""
def test_parse_usage_extracts_model_from_modelusage():
blob = (
'{"total_cost_usd":0.01,'
'"usage":{"input_tokens":10,"output_tokens":5},'
'"modelUsage":{"claude-opus-4-8":{"inputTokens":10,"outputTokens":5}}}'
)
u = U.parse_usage_from_text(blob)
assert u["model"] == "claude-opus-4-8"
# --------------------------------------------------------------------------- #
# render_task_tracker
# --------------------------------------------------------------------------- #
def test_render_in_progress_stage_lines_and_totals():
tid = _mk_task(stage="deploy", brd_start="2026-06-04 10:00:00",
brd_end="2026-06-04 10:08:00")
# Analysis: 10м, 1.1M in (mostly cache) / 39.6k out, $2.38, opus-4-8
_mk_run(tid, "analyst", "2026-06-04 09:00:00", "2026-06-04 09:10:00",
in_tok=1000, out_tok=39600, cache_read=1_100_000, cost=2.38,
model="tokenator/claude-opus-4-8")
_mk_run(tid, "architect", "2026-06-04 10:08:00", "2026-06-04 10:17:00",
in_tok=500, out_tok=34400, cache_read=1_500_000, cost=2.24,
model="tokenator/claude-opus-4-8")
_mk_run(tid, "developer", "2026-06-04 10:17:00", "2026-06-04 10:28:00",
in_tok=400, out_tok=45800, cache_read=8_400_000, cost=7.29,
model="tokenator/claude-opus-4-8")
_mk_run(tid, "reviewer", "2026-06-04 10:28:00", "2026-06-04 10:31:00",
in_tok=300, out_tok=12900, cache_read=1_200_000, cost=1.53,
model="vibecode/claude-sonnet-4.6")
_mk_run(tid, "tester", "2026-06-04 10:31:00", "2026-06-04 10:36:00",
in_tok=200, out_tok=19500, cache_read=1_200_000, cost=1.51,
model="vibecode/claude-sonnet-4.6")
# deployer started but not finished -> active "идёт" line.
_mk_run(tid, "deployer", "2026-06-04 10:36:00", None,
in_tok=0, out_tok=0, model=None, exit_code=None)
text = N.render_task_tracker(tid)
# Header in-progress
assert text.startswith("\U0001f6e0\ufe0f ET-012 \u00b7 \u0422\u0440\u0435\u043a\u0438")
# Per-stage format: in↓/out↑ · cost · model
assert "\u2705 Analysis" in text
assert "10\u043c" in text # analysis duration
assert "39.6k\u2191" in text # analysis out
assert "$2.38" in text
assert "opus-4-8" in text
assert "sonnet-4.6" in text # reviewer/tester model
# BRD review line (human time, ended)
assert "\u0420\u0435\u0432\u044c\u044e \u0411\u0420\u0414" in text
assert "\u0442\u0432\u043e\u0451 \u0432\u0440\u0435\u043c\u044f" in text
# Active stage
assert "\U0001f504 Deploy" in text
assert "\u0438\u0434\u0451\u0442" in text
# Totals line present with 💰
assert "\U0001f4b0" in text
# In-progress: no final ⏱️ line
assert "\u0412\u0441\u0435\u0433\u043e" not in text
def test_render_brd_review_waiting_shows_hourglass():
tid = _mk_task(stage="analysis", brd_start="2026-06-04 10:00:00",
brd_end=None)
_mk_run(tid, "analyst", "2026-06-04 09:00:00", "2026-06-04 09:10:00",
in_tok=1000, out_tok=39600, cache_read=1_100_000, cost=2.38,
model="tokenator/claude-opus-4-8")
text = N.render_task_tracker(tid)
assert "\u0420\u0435\u0432\u044c\u044e \u0411\u0420\u0414" in text
assert "\u23f3" in text # hourglass while waiting
def test_render_done_has_times_and_links():
tid = _mk_task(stage="done", brd_start="2026-06-04 10:00:00",
brd_end="2026-06-04 10:08:00")
# set created/updated to compute wall clock
conn = get_db()
conn.execute(
"UPDATE tasks SET created_at='2026-06-04 09:00:00', "
"updated_at='2026-06-04 09:56:00' WHERE id=?", (tid,))
conn.commit()
conn.close()
_mk_run(tid, "analyst", "2026-06-04 09:00:00", "2026-06-04 09:10:00",
in_tok=1000, out_tok=39600, cache_read=1_100_000, cost=2.38,
model="tokenator/claude-opus-4-8")
_mk_run(tid, "deployer", "2026-06-04 09:50:00", "2026-06-04 09:56:00",
in_tok=400, out_tok=22400, cache_read=1_600_000, cost=1.73,
model="tokenator/claude-opus-4-8")
with patch("src.notifications.httpx") as _hx:
# No PR found -> just "📦 deployed"
_resp = MagicMock(status_code=200)
_resp.json.return_value = []
_hx.get.return_value = _resp
text = N.render_task_tracker(tid)
assert text.startswith("\U0001f389 ET-012")
assert "\u0413\u041e\u0422\u041e\u0412\u041e" in text
# ⏱️ with three times
assert "\u23f1\ufe0f" in text
assert "\u0412\u0441\u0435\u0433\u043e" in text
assert "\u0430\u0433\u0435\u043d\u0442\u044b" in text
assert "\u0442\u0432\u043e\u0451" in text
# 📦 deployed line
assert "\U0001f4e6" in text
def test_render_escapes_html_in_title():
tid = _mk_task(stage="analysis", title="A <b>& B</b>")
_mk_run(tid, "analyst", "2026-06-04 09:00:00", "2026-06-04 09:10:00",
in_tok=10, out_tok=5, cost=0.0)
text = N.render_task_tracker(tid)
assert "&lt;b&gt;" in text
assert "&amp;" in text
def test_render_omits_model_when_unknown():
tid = _mk_task(stage="analysis")
_mk_run(tid, "analyst", "2026-06-04 09:00:00", "2026-06-04 09:10:00",
in_tok=10, out_tok=5, cost=0.0, model=None)
text = N.render_task_tracker(tid)
# No trailing " · <model>" — line ends at cost.
line = [l for l in text.splitlines() if l.startswith("\u2705 Analysis")][0]
assert line.rstrip().endswith("$0.00")
# --------------------------------------------------------------------------- #
# tracker send / edit / fallback
# --------------------------------------------------------------------------- #
def test_first_call_sends_message_and_stores_id(monkeypatch):
tid = _mk_task(stage="analysis")
_mk_run(tid, "analyst", "2026-06-04 09:00:00", None, in_tok=0, out_tok=0,
exit_code=None)
sent = {}
def _fake_send(text, disable_notification=False):
sent["text"] = text
sent["silent"] = disable_notification
return 555
monkeypatch.setattr(N, "send_telegram", _fake_send)
monkeypatch.setattr(N, "edit_telegram", lambda *a, **k: (_ for _ in ()).throw(AssertionError("should not edit on first call")))
N.update_task_tracker(tid)
from src.db import get_tracker_message_id
assert get_tracker_message_id(tid) == 555
assert sent["silent"] is True # tracker is silent
def test_second_call_edits_existing_message(monkeypatch):
tid = _mk_task(stage="development")
_mk_run(tid, "analyst", "2026-06-04 09:00:00", "2026-06-04 09:10:00",
in_tok=10, out_tok=5, cost=0.1)
from src.db import set_tracker_message_id
set_tracker_message_id(tid, 777)
edited = {}
monkeypatch.setattr(N, "edit_telegram",
lambda mid, text: edited.update(mid=mid) or True)
monkeypatch.setattr(N, "send_telegram",
lambda *a, **k: (_ for _ in ()).throw(AssertionError("should not send when edit succeeds")))
N.update_task_tracker(tid)
assert edited["mid"] == 777
def test_fallback_to_new_message_when_edit_fails(monkeypatch):
tid = _mk_task(stage="development")
_mk_run(tid, "analyst", "2026-06-04 09:00:00", "2026-06-04 09:10:00",
in_tok=10, out_tok=5, cost=0.1)
from src.db import set_tracker_message_id, get_tracker_message_id
set_tracker_message_id(tid, 100)
monkeypatch.setattr(N, "edit_telegram", lambda mid, text: False) # edit fails
monkeypatch.setattr(N, "send_telegram", lambda text, disable_notification=False: 200)
N.update_task_tracker(tid)
assert get_tracker_message_id(tid) == 200 # id updated to the new message
# --------------------------------------------------------------------------- #
# which alerts are SEPARATE vs tracker-only
# --------------------------------------------------------------------------- #
def test_approve_gate_sends_separate_message_and_starts_brd_clock(monkeypatch):
tid = _mk_task(stage="analysis")
calls = []
monkeypatch.setattr(N, "send_telegram",
lambda text, disable_notification=False: calls.append((text, disable_notification)) or 1)
monkeypatch.setattr(N, "update_task_tracker", lambda task_id: None)
N.notify_approve_requested(tid)
# exactly one SEPARATE (notifying) send for the approve gate
assert len(calls) == 1
assert calls[0][1] is False # notifying
assert "Approved" in calls[0][0]
# BRD clock started
conn = get_db()
row = conn.execute("SELECT brd_review_started_at FROM tasks WHERE id=?", (tid,)).fetchone()
conn.close()
assert row[0] is not None
def test_error_sends_separate_message(monkeypatch):
tid = _mk_task(stage="development")
calls = []
monkeypatch.setattr(N, "send_telegram",
lambda text, disable_notification=False: calls.append((text, disable_notification)) or 1)
N.notify_error(tid, "boom")
assert len(calls) == 1
assert calls[0][1] is False # notifying
assert "ERROR" in calls[0][0]
def test_stage_change_does_not_send_separate_message(monkeypatch):
tid = _mk_task(stage="development")
sent = []
monkeypatch.setattr(N, "send_telegram",
lambda text, disable_notification=False: sent.append(text) or 1)
# tracker refresh is allowed (edit/send silent) but must NOT use send_telegram
# for a separate notification; stub update to isolate.
refreshed = []
monkeypatch.setattr(N, "update_task_tracker", lambda task_id: refreshed.append(task_id))
N.notify_stage_change(tid, "development", "review")
assert sent == [] # no separate message
assert refreshed == [tid] # tracker refreshed instead
def test_agent_started_does_not_send_separate_message(monkeypatch):
tid = _mk_task(stage="analysis")
sent = []
monkeypatch.setattr(N, "send_telegram",
lambda text, disable_notification=False: sent.append(text) or 1)
refreshed = []
monkeypatch.setattr(N, "update_task_tracker", lambda task_id: refreshed.append(task_id))
N.notify_agent_started(1, "analyst", tid)
assert sent == []
assert refreshed == [tid]
def test_qg_failure_does_not_send_separate_message(monkeypatch):
tid = _mk_task(stage="development")
sent = []
monkeypatch.setattr(N, "send_telegram",
lambda text, disable_notification=False: sent.append(text) or 1)
N.notify_qg_failure(tid, "development", "check_ci_green", "CI state: pending")
assert sent == [] # QG-pending is log-only, never a separate ping