fix(tracker): no duplicate Telegram messages on not-modified/transient edits

edit_telegram now returns a distinguishable outcome (ok|not_modified|gone| failed) instead of a bare bool. update_task_tracker only sends a NEW message when the original is truly gone; not_modified and transient failures no longer spawn duplicate trackers or orphan the live one. render_task_tracker shows "попытка N" on an actively re-run stage (>=2 agent runs) so the text changes between review<->development cycles. Finished (✅) lines are unchanged. Tests: edit_telegram classification (ok/not_modified/gone/failed via mocked httpx), update_task_tracker (not_modified/failed -> no send, gone -> send+id), render attempt marker.
Merge pull request 'feat(telegram): live editable task tracker (Variant B+)' (#21 ) from feat/telegram-live-tracker into main
2026-06-04 13:20:40 +03:00 · 2026-06-04 11:46:21 +03:00
2 changed files with 285 additions and 22 deletions
--- a/src/notifications.py
+++ b/src/notifications.py
@@ -68,16 +68,43 @@ def send_telegram(text: str, disable_notification: bool = False):
    return None


-def edit_telegram(message_id: int, text: str) -> bool:
-    """Edit an existing Telegram message. Returns True on success, else False.
+# edit_telegram outcome codes -> let update_task_tracker decide what to do:
+#   "ok"           edit applied -> nothing else to do
+#   "not_modified" Telegram says text is identical (400 "message is not
+#                  modified" / "exactly the same") -> success, NO new message
+#   "gone"         original message can't be edited (deleted / too old /
+#                  invalid id) -> caller must fall back to a NEW message
+#   "failed"       transient failure (network / timeout / 5xx / unknown 400)
+#                  -> caller must NOT send a new message (avoid duplicates)
+EDIT_OK = "ok"
+EDIT_NOT_MODIFIED = "not_modified"
+EDIT_GONE = "gone"
+EDIT_FAILED = "failed"

-    Used by the live tracker to refresh the single per-task message in place.
-    Never raises. A False return tells the caller to fall back to a new message
-    (e.g. the message is too old to edit / was deleted / 400).
+# Telegram error descriptions that mean the message is permanently un-editable
+# (it is gone / orphaned) -> fall back to a fresh message.
+_GONE_MARKERS = (
+    "message to edit not found",
+    "message can't be edited",
+    "message_id_invalid",
+)
+# Telegram "nothing changed" -> treat as success, never a duplicate.
+_NOT_MODIFIED_MARKERS = (
+    "message is not modified",
+    "exactly the same",
+)
+
+
+def edit_telegram(message_id: int, text: str) -> str:
+    """Edit an existing Telegram message. Never raises.
+
+    Returns a distinguishable outcome (see EDIT_* constants) so the caller can
+    tell apart "all good" / "nothing changed" / "message gone" / "transient
+    failure" and only fall back to a NEW message when the original is truly gone.
    """
    s = _get_settings()
    if not s.telegram_bot_token or not s.telegram_chat_id:
-        return False
+        return EDIT_FAILED
    try:
        url = f"https://api.telegram.org/bot{s.telegram_bot_token}/editMessageText"
        resp = httpx.post(
@@ -91,9 +118,32 @@ def edit_telegram(message_id: int, text: str) -> bool:
            timeout=5,
        )
        data = resp.json()
-        return bool(data.get("ok"))
-    except Exception:
-        return False
+        if data.get("ok"):
+            return EDIT_OK
+        # ok:false -> inspect the description to classify the 400.
+        desc = str(data.get("description") or "").lower()
+        if any(m in desc for m in _NOT_MODIFIED_MARKERS):
+            # Text is identical between transitions (e.g. repeat review cycle
+            # renders the same line). Nothing to do, NOT a duplicate.
+            logger.debug(
+                f"edit_telegram(mid={message_id}): not modified, skipping"
+            )
+            return EDIT_NOT_MODIFIED
+        if any(m in desc for m in _GONE_MARKERS):
+            logger.warning(
+                f"edit_telegram(mid={message_id}): message gone ({desc!r}), "
+                f"will fall back to a new message"
+            )
+            return EDIT_GONE
+        # Unknown 400 / other non-ok -> transient/unknown, do NOT duplicate.
+        logger.warning(
+            f"edit_telegram(mid={message_id}): edit failed ({desc!r})"
+        )
+        return EDIT_FAILED
+    except Exception as e:
+        # Network / timeout / 5xx -> transient, do NOT duplicate.
+        logger.warning(f"edit_telegram(mid={message_id}): transient error: {e}")
+        return EDIT_FAILED


 def _get_work_item_id(task_id: int) -> str:
@@ -280,11 +330,35 @@ def render_task_tracker(task_id: int) -> str:

    for stage_key, label, agent in _TRACKER_STAGES:
        run = last_done.get(agent)
-        if run is not None:
+        # The stage is "in progress" only when it is the task's current stage AND
+        # there is an unfinished run for its agent (the agent is actually still
+        # working). A finished run with no in-flight run -> show the \u2705 result,
+        # even if the task still sits in that stage (just-finished snapshot).
+        agent_runs = agent_runs_by_agent.get(agent, [])
+        has_inflight = any(ar["finished_at"] is None for ar in agent_runs)
+        is_active_stage = (
+            _STAGE_ACTIVE_AGENT.get(stage) == agent
+            and stage == stage_key
+            and (has_inflight or run is None)
+        )
+        if is_active_stage:
+            # Live "\U0001f504 ... \u0438\u0434\u0451\u0442" line. Count how many times THIS stage's
+            # agent has run for this task; a 2nd+ run means we're re-doing the
+            # stage (e.g. review->development->review), so show "\u043f\u043e\u043f\u044b\u0442\u043a\u0430 N"
+            # to make the text change between cycles and to honestly show Slava
+            # the stage is being re-worked.
+            attempt = len(agent_runs)
+            if attempt >= 2:
+                lines.append(
+                    f"\U0001f504 {label} \u00b7 \u043f\u043e\u043f\u044b\u0442\u043a\u0430 {attempt} "
+                    f"\u2026 \u0438\u0434\u0451\u0442"
+                )
+            else:
+                lines.append(
+                    f"\U0001f504 {label:<13} \u2026   \u00b7 \u0438\u0434\u0451\u0442"
+                )
+        elif run is not None:
            lines.append(_stage_line(label, run))
-        elif _STAGE_ACTIVE_AGENT.get(stage) == agent and stage == stage_key:
-            # This stage is the active one and has no finished run yet.
-            lines.append(f"\U0001f504 {label:<13} \u2026   \u00b7 \u0438\u0434\u0451\u0442")
        # else: not started yet -> not shown.

        # Insert the BRD review line right after Analysis.
@@ -372,19 +446,32 @@ def update_task_tracker(task_id: int):
    """Render + push the live tracker for a task. Never raises.

    First call (no stored tracker_message_id): sendMessage (silent) and store the
-    returned message_id. Subsequent calls: editMessageText the stored message; if
-    the edit fails (too old / deleted / 400), fall back to a NEW message and
-    update the stored id. The tracker is always sent with disable_notification so
-    it never pings — only the dedicated alert helpers ping.
+    returned message_id. Subsequent calls: editMessageText the stored message.
+    A NEW message is sent ONLY when the original is truly gone (deleted / too old
+    / invalid id). On "not modified" (text unchanged) or transient failures
+    (network / timeout / 5xx / unknown 400) we do NOT send a new message — that
+    is exactly what produced duplicate trackers and orphaned (lagging) messages.
+    The tracker is always sent with disable_notification so it never pings —
+    only the dedicated alert helpers ping.
    """
    try:
        from .db import get_tracker_message_id, set_tracker_message_id
        text = render_task_tracker(task_id)
        mid = get_tracker_message_id(task_id)
        if mid is not None:
-            if edit_telegram(mid, text):
+            result = edit_telegram(mid, text)
+            if result in (EDIT_OK, EDIT_NOT_MODIFIED):
+                # Edited in place (or nothing to change) -> done, no duplicate.
                return
-            # Edit failed -> fall back to a fresh message.
+            if result == EDIT_FAILED:
+                # Transient -> don't duplicate; tracker redraws next transition.
+                logger.debug(
+                    f"update_task_tracker({task_id}): edit failed transiently, "
+                    f"keeping message {mid}"
+                )
+                return
+            # result == EDIT_GONE -> the stored message is gone; fall through
+            # to send a fresh one and re-point tracker_message_id at it.
        new_mid = send_telegram(text, disable_notification=True)
        if new_mid is not None:
            set_tracker_message_id(task_id, new_mid)
--- a/tests/test_telegram_tracker.py
+++ b/tests/test_telegram_tracker.py
@@ -249,7 +249,7 @@ def test_second_call_edits_existing_message(monkeypatch):

    edited = {}
    monkeypatch.setattr(N, "edit_telegram",
-                        lambda mid, text: edited.update(mid=mid) or True)
+                        lambda mid, text: edited.update(mid=mid) or N.EDIT_OK)
    monkeypatch.setattr(N, "send_telegram",
                        lambda *a, **k: (_ for _ in ()).throw(AssertionError("should not send when edit succeeds")))

@@ -257,20 +257,196 @@ def test_second_call_edits_existing_message(monkeypatch):
    assert edited["mid"] == 777


-def test_fallback_to_new_message_when_edit_fails(monkeypatch):
+def test_fallback_to_new_message_when_edit_gone(monkeypatch):
+    """edit returns 'gone' (message deleted/too old) -> send NEW + update id."""
    tid = _mk_task(stage="development")
    _mk_run(tid, "analyst", "2026-06-04 09:00:00", "2026-06-04 09:10:00",
            in_tok=10, out_tok=5, cost=0.1)
    from src.db import set_tracker_message_id, get_tracker_message_id
    set_tracker_message_id(tid, 100)

-    monkeypatch.setattr(N, "edit_telegram", lambda mid, text: False)  # edit fails
+    monkeypatch.setattr(N, "edit_telegram", lambda mid, text: N.EDIT_GONE)
    monkeypatch.setattr(N, "send_telegram", lambda text, disable_notification=False: 200)

    N.update_task_tracker(tid)
    assert get_tracker_message_id(tid) == 200  # id updated to the new message


+def test_not_modified_does_not_send_new_message(monkeypatch):
+    """edit returns 'not_modified' -> NO new message, id unchanged (no dupe)."""
+    tid = _mk_task(stage="development")
+    _mk_run(tid, "analyst", "2026-06-04 09:00:00", "2026-06-04 09:10:00",
+            in_tok=10, out_tok=5, cost=0.1)
+    from src.db import set_tracker_message_id, get_tracker_message_id
+    set_tracker_message_id(tid, 100)
+
+    monkeypatch.setattr(N, "edit_telegram", lambda mid, text: N.EDIT_NOT_MODIFIED)
+    monkeypatch.setattr(N, "send_telegram",
+                        lambda *a, **k: (_ for _ in ()).throw(AssertionError("must not send on not_modified")))
+
+    N.update_task_tracker(tid)
+    assert get_tracker_message_id(tid) == 100  # unchanged, no duplicate
+
+
+def test_transient_edit_failure_does_not_send_new_message(monkeypatch):
+    """edit returns 'failed' (network/timeout/5xx) -> NO new message (no dupe)."""
+    tid = _mk_task(stage="development")
+    _mk_run(tid, "analyst", "2026-06-04 09:00:00", "2026-06-04 09:10:00",
+            in_tok=10, out_tok=5, cost=0.1)
+    from src.db import set_tracker_message_id, get_tracker_message_id
+    set_tracker_message_id(tid, 100)
+
+    monkeypatch.setattr(N, "edit_telegram", lambda mid, text: N.EDIT_FAILED)
+    monkeypatch.setattr(N, "send_telegram",
+                        lambda *a, **k: (_ for _ in ()).throw(AssertionError("must not send on transient failure")))
+
+    N.update_task_tracker(tid)
+    assert get_tracker_message_id(tid) == 100  # unchanged, no duplicate
+
+
+# --------------------------------------------------------------------------- #
+# edit_telegram outcome classification (httpx mocked)
+# --------------------------------------------------------------------------- #
+def _edit_resp(ok, description=None):
+    resp = MagicMock()
+    body = {"ok": ok}
+    if description is not None:
+        body["description"] = description
+    resp.json.return_value = body
+    return resp
+
+
+def _patch_tg_creds(monkeypatch):
+    monkeypatch.setattr(N._get_settings(), "telegram_bot_token", "T", raising=False)
+    monkeypatch.setattr(N._get_settings(), "telegram_chat_id", "C", raising=False)
+
+
+def test_edit_telegram_ok(monkeypatch):
+    _patch_tg_creds(monkeypatch)
+    with patch("src.notifications.httpx") as hx:
+        hx.post.return_value = _edit_resp(True)
+        assert N.edit_telegram(1, "x") == N.EDIT_OK
+
+
+def test_edit_telegram_not_modified_is_success(monkeypatch):
+    # 400 "message is not modified" -> success, not gone, no duplicate
+    _patch_tg_creds(monkeypatch)
+    with patch("src.notifications.httpx") as hx:
+        hx.post.return_value = _edit_resp(
+            False, "Bad Request: message is not modified: ...")
+        assert N.edit_telegram(1, "x") == N.EDIT_NOT_MODIFIED
+
+
+def test_edit_telegram_exactly_the_same_is_not_modified(monkeypatch):
+    _patch_tg_creds(monkeypatch)
+    with patch("src.notifications.httpx") as hx:
+        hx.post.return_value = _edit_resp(
+            False, "Bad Request: specified new message content and reply markup "
+                   "are exactly the same")
+        assert N.edit_telegram(1, "x") == N.EDIT_NOT_MODIFIED
+
+
+def test_edit_telegram_message_not_found_is_gone(monkeypatch):
+    _patch_tg_creds(monkeypatch)
+    with patch("src.notifications.httpx") as hx:
+        hx.post.return_value = _edit_resp(
+            False, "Bad Request: message to edit not found")
+        assert N.edit_telegram(1, "x") == N.EDIT_GONE
+
+
+def test_edit_telegram_cant_be_edited_is_gone(monkeypatch):
+    _patch_tg_creds(monkeypatch)
+    with patch("src.notifications.httpx") as hx:
+        hx.post.return_value = _edit_resp(
+            False, "Bad Request: message can't be edited")
+        assert N.edit_telegram(1, "x") == N.EDIT_GONE
+
+
+def test_edit_telegram_unknown_400_is_failed(monkeypatch):
+    # unknown 400 -> failed (NOT gone) -> caller won't duplicate
+    _patch_tg_creds(monkeypatch)
+    with patch("src.notifications.httpx") as hx:
+        hx.post.return_value = _edit_resp(
+            False, "Bad Request: some other unexpected error")
+        assert N.edit_telegram(1, "x") == N.EDIT_FAILED
+
+
+def test_edit_telegram_timeout_is_failed(monkeypatch):
+    _patch_tg_creds(monkeypatch)
+    with patch("src.notifications.httpx") as hx:
+        hx.post.side_effect = Exception("read timeout")
+        assert N.edit_telegram(1, "x") == N.EDIT_FAILED
+
+
+def test_edit_telegram_5xx_is_failed(monkeypatch):
+    # Telegram 5xx still returns ok:false w/o gone/not_modified markers
+    _patch_tg_creds(monkeypatch)
+    with patch("src.notifications.httpx") as hx:
+        hx.post.return_value = _edit_resp(False, "Internal Server Error")
+        assert N.edit_telegram(1, "x") == N.EDIT_FAILED
+
+
+# --------------------------------------------------------------------------- #
+# render: repeated stage attempt shows "попытка N"
+# --------------------------------------------------------------------------- #
+_POPYTKA = "\u043f\u043e\u043f\u044b\u0442\u043a\u0430"  # popytka
+
+
+def test_render_active_stage_shows_attempt_on_second_run():
+    # Two reviewer runs while in review -> active line shows attempt 2.
+    tid = _mk_task(stage="review")
+    _mk_run(tid, "analyst", "2026-06-04 09:00:00", "2026-06-04 09:10:00",
+            in_tok=10, out_tok=5, cost=0.1, model="tokenator/claude-opus-4-8")
+    _mk_run(tid, "developer", "2026-06-04 09:10:00", "2026-06-04 09:20:00",
+            in_tok=10, out_tok=5, cost=0.1, model="tokenator/claude-opus-4-8")
+    # First review run finished (sent back to dev), second review run active.
+    _mk_run(tid, "reviewer", "2026-06-04 09:20:00", "2026-06-04 09:25:00",
+            in_tok=10, out_tok=5, cost=0.1, model="vibecode/claude-sonnet-4.6",
+            exit_code=0)
+    _mk_run(tid, "reviewer", "2026-06-04 09:30:00", None,
+            in_tok=0, out_tok=0, exit_code=None)
+
+    text = N.render_task_tracker(tid)
+    active = [l for l in text.splitlines()
+              if l.startswith("\U0001f504") and "Review" in l][0]
+    assert _POPYTKA in active
+    assert "2" in active
+    assert "\u0438\u0434\u0451\u0442" in active
+
+
+def test_render_active_stage_no_attempt_on_first_run():
+    # Single reviewer run -> active line has NO attempt marker.
+    tid = _mk_task(stage="review")
+    _mk_run(tid, "analyst", "2026-06-04 09:00:00", "2026-06-04 09:10:00",
+            in_tok=10, out_tok=5, cost=0.1, model="tokenator/claude-opus-4-8")
+    _mk_run(tid, "developer", "2026-06-04 09:10:00", "2026-06-04 09:20:00",
+            in_tok=10, out_tok=5, cost=0.1, model="tokenator/claude-opus-4-8")
+    _mk_run(tid, "reviewer", "2026-06-04 09:20:00", None,
+            in_tok=0, out_tok=0, exit_code=None)
+
+    text = N.render_task_tracker(tid)
+    active = [l for l in text.splitlines()
+              if l.startswith("\U0001f504") and "Review" in l][0]
+    assert _POPYTKA not in active
+    assert "\u0438\u0434\u0451\u0442" in active
+
+
+def test_render_finished_lines_unaffected_by_attempt_logic():
+    # Completed (checkmark) lines never carry an attempt marker.
+    tid = _mk_task(stage="review")
+    _mk_run(tid, "analyst", "2026-06-04 09:00:00", "2026-06-04 09:10:00",
+            in_tok=10, out_tok=5, cost=0.1, model="tokenator/claude-opus-4-8")
+    # developer ran twice (retry) but is a FINISHED stage now.
+    _mk_run(tid, "developer", "2026-06-04 09:10:00", "2026-06-04 09:15:00",
+            in_tok=10, out_tok=5, cost=0.1, model="tokenator/claude-opus-4-8")
+    _mk_run(tid, "developer", "2026-06-04 09:16:00", "2026-06-04 09:20:00",
+            in_tok=10, out_tok=5, cost=0.1, model="tokenator/claude-opus-4-8")
+    text = N.render_task_tracker(tid)
+    for l in text.splitlines():
+        if l.startswith("\u2705"):
+            assert _POPYTKA not in l
+
+
 # --------------------------------------------------------------------------- #
 # which alerts are SEPARATE vs tracker-only
 # --------------------------------------------------------------------------- #