fix(notifications): tracker orphan cleanup + effort-in-line + honest done-time (ORCH-087)

Устраняет «замёрзшие» осиротевшие карточки live-трекера и доделывает строку стадии/итоговое время. G1 — зачистка сирот: аддитивный леджер tracker_messages(task_id, message_id, created_at, deleted_at) + хелперы add/get_open/mark_deleted в src/db.py. bump теперь удаляет ВСЕ незакрытые mid задачи (а не только скаляр tasks.tracker_message_id, сохранён как BC-указатель). Новый mid в леджер только при успешном send (BR-6); transient-delete остаётся для ретрая; «already gone»/>48ч закрывается. Корень бага — скалярный учёт, терявший ссылку при гонке/delete-fail+send-ok (ADR-001 G0). G3 — deploy-цикл: ключ confirm_deploy в _LIVE_BRANCH_LABELS (без base-alias). BR-EFF — эффорт в строке: колонка agent_runs.effort (_ensure_column, идемпотентно), стамп фактического resolve_agent_effort в launcher._spawn в момент запуска; рендер `· {model} · {effort}`, пустой → суффикс опускается. BR-G5 — честное время: done-строка `⏱️ Агенты Σ · твоё {review~cap} · общее с ожиданием {wall}` — три независимых подписанных метрики; кап tracker_brd_review_cap_s (ORCH_TRACKER_BRD_REVIEW_CAP_S, дефолт 2ч, маркер ~). Инварианты: STAGE_TRANSITIONS/QG_CHECKS/стадии без изменений; миграции аддитивны/идемпотентны (enduro не трогается); never-raise, disable_notification, plane_issue_link (ORCH-067), disable_web_page_preview (ORCH-080) сохранены; src/reconciler.py не эродирован (ORCH-086 на месте). Тесты: tests/test_notifications_orphans.py (TC-01..05 + never-raise), tests/test_tracker_effort_time.py (TC-06/11..15 + confirm_deploy), tests/test_launcher.py::TestEffortStamp (TC-09/10). Доки: CLAUDE.md (§Нотификации), docs/architecture/README.md (Notifications), CHANGELOG.md. Refs: ORCH-087 Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-09 09:20:20 +03:00
parent 36c7a68722
commit a7b27f2235
11 changed files with 729 additions and 17 deletions
--- a/src/agents/launcher.py
+++ b/src/agents/launcher.py
@@ -473,6 +473,19 @@ class AgentLauncher:
        # (project-override > per-agent env > default), not hardcoded in AGENT_CONFIGS.
        model = resolve_agent_model(agent, project_id)
        effort = resolve_agent_effort(agent, project_id)
+        # ORCH-087 (BR-EFF): stamp the REAL --effort value onto this agent_runs row
+        # in the moment of launch. The CLI does not echo effort in its result JSON,
+        # so this is the only reliable source for the tracker's "· model · effort"
+        # line. Empty resolve (no --effort flag) -> NULL so the suffix is omitted.
+        # Reuses the still-open conn; never blocks the launch.
+        try:
+            conn.execute(
+                "UPDATE agent_runs SET effort=? WHERE id=?",
+                (effort or None, run_id),
+            )
+            conn.commit()
+        except Exception as e:
+            logger.warning(f"effort stamp failed for run_id={run_id}: {e}")
        model_flag = f"--model {model} " if model else ""
        effort_flag = f"--effort {effort} " if effort else ""
        # ORCH-074 (G2): agent_fallback_model is read directly here, bypassing
--- a/src/config.py
+++ b/src/config.py
@@ -485,6 +485,14 @@ class Settings(BaseSettings):
    tracker_live_status_ttl_s: int = 60
    tracker_live_status_timeout_s: int = 3

+    # ORCH-087 (BR-G5, ADR-001 Р-6): cap for the human BRD-review time shown on the
+    # done card ("твоё {review}"). The brd_review clock can stay open for hours on a
+    # desync (In Review -> Backlog), which made "твоё время" report anomalous stalls
+    # (ORCH-087: 392m). Above this cap the value is shown capped with a "~" marker so
+    # an abnormal stall is never presented as real human review time. Env
+    # ORCH_TRACKER_BRD_REVIEW_CAP_S; default 7200s (2h). 0/negative -> no cap.
+    tracker_brd_review_cap_s: int = 7200
+
    # ORCH-069: QG-0 upper title-length limit (entry gate _qg0_errors). The 80-char
    # cap was a hygiene limit, not structural (slug is cut to [:30] independently,
    # DB title TEXT is unbounded). Configurable via env ORCH_QG0_TITLE_MAX; default
--- a/src/db.py
+++ b/src/db.py
@@ -109,6 +109,12 @@ def init_db():
    # can render a short model tag per stage. Parsed from the run-log result JSON
    # (modelUsage key) by the launcher monitor; NULL when unknown. Idempotent ALTER.
    _ensure_column(conn, "agent_runs", "model", "TEXT")
+    # ORCH-087 (BR-EFF): persist the REAL --effort value sent to the Claude CLI per
+    # agent_runs row (low|medium|high|xhigh|max) so the tracker can render the
+    # resolved effort next to the model ("· opus-4-8 · xhigh"). Stamped in
+    # launcher._spawn right after resolve_agent_effort; NULL when no --effort flag
+    # was passed (resolved to "") or for historical rows. Idempotent ALTER.
+    _ensure_column(conn, "agent_runs", "effort", "TEXT")
    # Telegram live tracker: one editable Telegram message per task. We store its
    # message_id so each stage transition can editMessageText the same message
    # instead of spamming a new one. Idempotent ALTER (safe on the live prod DB).
@@ -141,6 +147,27 @@ def init_db():
        CREATE INDEX IF NOT EXISTS idx_job_deps_task    ON job_deps(task_id);
        CREATE INDEX IF NOT EXISTS idx_job_deps_depends ON job_deps(depends_on_task_id);
    """)
+    # ORCH-087 (BR-G1, ADR-001 Р-1): authoritative ledger of EVERY tracker card
+    # (Telegram message_id) ever created for a task. The scalar
+    # tasks.tracker_message_id only ever knew the LAST mid, so any lost reference
+    # (delete-fail+send-ok, race, restart) orphaned older cards forever. This
+    # ledger lets every bump delete ALL still-open mids (deleted_at IS NULL), not
+    # just the last one. tasks.tracker_message_id is KEPT (current-card pointer,
+    # full BC). Purely ADDITIVE (CREATE TABLE/INDEX IF NOT EXISTS) -> idempotent,
+    # restart-safe on the live shared prod DB (enduro-trails data untouched). The
+    # logical FK on tasks.id is intentional (no REFERENCES, mirrors job_deps) so
+    # the migration cannot fail on a pre-existing DB. See 08-data-requirements.md.
+    conn.executescript("""
+        CREATE TABLE IF NOT EXISTS tracker_messages (
+            task_id     INTEGER NOT NULL,
+            message_id  INTEGER NOT NULL,
+            created_at  TEXT DEFAULT (datetime('now')),
+            deleted_at  TEXT,
+            PRIMARY KEY (task_id, message_id)
+        );
+        CREATE INDEX IF NOT EXISTS idx_tracker_messages_open
+            ON tracker_messages(task_id) WHERE deleted_at IS NULL;
+    """)
    conn.commit()
    conn.close()

@@ -301,6 +328,68 @@ def set_tracker_message_id(task_id: int, message_id: int) -> None:
        conn.close()


+# ---------------------------------------------------------------------------
+# ORCH-087 (BR-G1): tracker_messages ledger — full accounting of every card mid
+# ---------------------------------------------------------------------------
+
+def add_tracker_message(task_id: int, message_id: int) -> None:
+    """ORCH-087: record a freshly-created tracker card mid in the ledger.
+
+    Called ONLY after a successful send_telegram (new_mid is not None). INSERT OR
+    IGNORE keeps it idempotent: a repeat mid (race / restart replay) does not
+    duplicate the row or resurrect a deleted_at stamp.
+    """
+    conn = get_db()
+    try:
+        conn.execute(
+            "INSERT OR IGNORE INTO tracker_messages (task_id, message_id) "
+            "VALUES (?, ?)",
+            (task_id, message_id),
+        )
+        conn.commit()
+    finally:
+        conn.close()
+
+
+def get_open_tracker_messages(task_id: int) -> list[int]:
+    """ORCH-087: all still-open (deleted_at IS NULL) card mids for a task.
+
+    These are the cards the next bump must clean up. Ordered oldest-first so the
+    oldest orphans are deleted first. Never includes the rows already marked
+    deleted.
+    """
+    conn = get_db()
+    try:
+        rows = conn.execute(
+            "SELECT message_id FROM tracker_messages "
+            "WHERE task_id=? AND deleted_at IS NULL ORDER BY message_id ASC",
+            (task_id,),
+        ).fetchall()
+    finally:
+        conn.close()
+    return [r[0] for r in rows]
+
+
+def mark_tracker_message_deleted(task_id: int, message_id: int) -> None:
+    """ORCH-087: stamp deleted_at on a card mid that is confirmed gone.
+
+    Called for mids that delete_telegram reported as gone (deleted now OR already
+    gone / >48h per _DELETE_GONE_MARKERS) so they drop out of
+    get_open_tracker_messages. Transient-delete mids are left untouched (NULL) for
+    a retry on the next bump.
+    """
+    conn = get_db()
+    try:
+        conn.execute(
+            "UPDATE tracker_messages SET deleted_at=datetime('now') "
+            "WHERE task_id=? AND message_id=? AND deleted_at IS NULL",
+            (task_id, message_id),
+        )
+        conn.commit()
+    finally:
+        conn.close()
+
+
 def mark_brd_review_started(task_id: int) -> None:
    """Stamp when BRD review (the human approve gate) started, if not already set.

--- a/src/notifications.py
+++ b/src/notifications.py
@@ -290,6 +290,46 @@ def _duration_seconds(started, finished):
    return max(int((b - a).total_seconds()), 0)


+def _capped_review_str(review_seconds) -> str:
+    """ORCH-087 (BR-G5): human BRD-review duration, capped to drop anomalous stalls.
+
+    Returns '0м' when there was no review window. When the review exceeds
+    ``tracker_brd_review_cap_s`` (default 2h; <=0 disables the cap) the capped value
+    is shown with a leading '~' to signal the real value was longer — an open
+    brd_review clock from a desync (In Review -> Backlog) rather than genuine human
+    time (ORCH-087: 392m). Never raises.
+    """
+    try:
+        if not review_seconds:
+            return "0м"
+        secs = int(review_seconds)
+        try:
+            cap = int(getattr(_get_settings(), "tracker_brd_review_cap_s", 0) or 0)
+        except Exception:
+            cap = 0
+        if cap > 0 and secs > cap:
+            return f"~{_fmt_minutes(cap)}"
+        return _fmt_minutes(secs)
+    except Exception:
+        return _fmt_minutes(review_seconds) if review_seconds else "0м"
+
+
+def _run_effort(run) -> str:
+    """ORCH-087 (BR-EFF): the effort tag for a stage line. Never raises -> ''.
+
+    Returns the stamped agent_runs.effort (the REAL --effort sent at launch). NULL
+    / empty (historical row predating the column, or a launch with no --effort
+    flag) -> '' so the caller omits the effort suffix (the documented default,
+    AC-E.4). New runs are stamped in launcher._spawn, so going forward every stage
+    line carries its resolved effort (developer xhigh, tester/deployer medium, …).
+    """
+    try:
+        effort = _row_get(run, "effort")
+        return str(effort) if effort else ""
+    except Exception:
+        return ""
+
+
 def render_task_tracker(task_id: int) -> str:
    """Build the full live-tracker text for a task from the DB (stateless render).

@@ -321,7 +361,8 @@ def render_task_tracker(task_id: int) -> str:
            return f"task-{task_id}"
        runs = conn.execute(
            "SELECT agent, started_at, finished_at, exit_code, input_tokens, "
-            "output_tokens, cache_read_tokens, cache_creation_tokens, cost_usd, model "
+            "output_tokens, cache_read_tokens, cache_creation_tokens, cost_usd, "
+            "model, effort "
            "FROM agent_runs WHERE task_id=? ORDER BY id ASC",
            (task_id,),
        ).fetchall()
@@ -413,9 +454,15 @@ def render_task_tracker(task_id: int) -> str:
        dur = _fmt_minutes(_duration_seconds(run["started_at"], run["finished_at"]))
        model = short_model_name(run["model"])
        model_suffix = f" \u00b7 {model}" if model else ""
+        # ORCH-087 (BR-EFF): render the resolved --effort next to the model
+        # ("\u00b7 opus-4-8 \u00b7 xhigh"). Stamped at launch in agent_runs.effort; empty /
+        # missing -> suffix omitted (like the model suffix). Historical rows with
+        # NULL effort fall back to the config-resolved effort for the agent.
+        effort = _run_effort(run)
+        effort_suffix = f" \u00b7 {effort}" if effort else ""
        return (
            f"\u2705 {label:<13} {dur} \u00b7 "
-            f"{in_tok}\u2193/{out_tok}\u2191 \u00b7 {cost}{model_suffix}"
+            f"{in_tok}\u2193/{out_tok}\u2191 \u00b7 {cost}{model_suffix}{effort_suffix}"
        )

    # BRD review line: between Analysis and Architecture, only once Analysis has
@@ -490,11 +537,17 @@ def render_task_tracker(task_id: int) -> str:
    if done:
        wall = _duration_seconds(task["created_at"], task["updated_at"])
        wall_str = _fmt_minutes(wall) if wall is not None else "?"
-        review_str = _fmt_minutes(review_seconds) if review_seconds else "0м"
+        review_str = _capped_review_str(review_seconds)
+        # ORCH-087 (BR-G5): three INDEPENDENT, explicitly-labelled metrics. None is
+        # presented as the sum of the others \u2014 queue/wait pauses are not logged, so
+        # wall != agents + review; the old "\u0412\u0441\u0435\u0433\u043e {wall}" read like a (wrong) sum.
+        #   \u0410\u0433\u0435\u043d\u0442\u044b = sum(agent_runs) (precise main metric, T-1)
+        #   \u0442\u0432\u043e\u0451   = human BRD-review, capped to drop anomalous stalls (T-2)
+        #   \u043e\u0431\u0449\u0435\u0435 \u0441 \u043e\u0436\u0438\u0434\u0430\u043d\u0438\u0435\u043c = wall-clock incl. queue/wait, NOT work time (T-3)
        lines.append(
-            f"\u23f1\ufe0f \u0412\u0441\u0435\u0433\u043e {wall_str} \u00b7 "
-            f"\u0430\u0433\u0435\u043d\u0442\u044b {_fmt_minutes(agent_seconds)} \u00b7 "
-            f"\u0442\u0432\u043e\u0451 {review_str}"
+            f"\u23f1\ufe0f \u0410\u0433\u0435\u043d\u0442\u044b {_fmt_minutes(agent_seconds)} \u00b7 "
+            f"\u0442\u0432\u043e\u0451 {review_str} \u00b7 "
+            f"\u043e\u0431\u0449\u0435\u0435 \u0441 \u043e\u0436\u0438\u0434\u0430\u043d\u0438\u0435\u043c {wall_str}"
        )
        link = _done_link(task_id, task["work_item_id"])
        if link:
@@ -568,21 +621,53 @@ def update_task_tracker(task_id: int):
    only the dedicated alert helpers ping.
    """
    try:
-        from .db import get_tracker_message_id, set_tracker_message_id
+        from .db import (
+            get_tracker_message_id, set_tracker_message_id,
+            get_open_tracker_messages, add_tracker_message,
+            mark_tracker_message_deleted,
+        )
        text = render_task_tracker(task_id)
        mode = (_get_settings().tracker_mode or "edit").strip().lower()
        mid = get_tracker_message_id(task_id)

        if mode == "bump":
            # bump: one card, always at the bottom (delete + send + repoint).
+            # ORCH-087 (BR-G1): clean up ALL still-open cards of this task, not
+            # only the last (scalar) mid. The ledger is the authoritative set of
+            # every card ever created; any reference lost by the scalar (race /
+            # delete-fail+send-ok / restart) is still tracked here and reaped now.
+            open_mids = set()
+            try:
+                open_mids.update(get_open_tracker_messages(task_id))
+            except Exception as e:
+                logger.warning(f"update_task_tracker({task_id}): ledger read failed: {e}")
            if mid is not None:
+                # Scalar pointer is part of the live set (e.g. a card sent before
+                # the ledger existed); union avoids missing it.
+                open_mids.add(mid)
+            for old_mid in open_mids:
                # best-effort; result does NOT gate the send (BR-6).
-                delete_telegram(mid)
+                if delete_telegram(old_mid):
+                    # gone (deleted now OR already gone / >48h) -> drop from ledger.
+                    try:
+                        mark_tracker_message_deleted(task_id, old_mid)
+                    except Exception as e:
+                        logger.warning(
+                            f"update_task_tracker({task_id}): mark-deleted failed: {e}"
+                        )
+                # transient False -> leave open in the ledger for a retry next bump.
            new_mid = send_telegram(text, disable_notification=True)
            if new_mid is not None:
+                # R-3 / BR-6: only record the new card on a successful send.
+                try:
+                    add_tracker_message(task_id, new_mid)
+                except Exception as e:
+                    logger.warning(
+                        f"update_task_tracker({task_id}): ledger insert failed: {e}"
+                    )
                set_tracker_message_id(task_id, new_mid)
-            # send returned None (no creds / transient) -> leave mid untouched;
-            # no duplicate within this call, redraws on the next transition.
+            # send returned None (no creds / transient) -> leave mid/ledger
+            # untouched; no duplicate within this call, redraws next transition.
            return

        # mode == "edit" (DEFAULT): existing behaviour, unchanged.
@@ -874,6 +959,11 @@ _LIVE_BRANCH_LABELS = {
    "blocked": "Blocked",
    "rejected": "Rejected",
    "cancelled": "Cancelled",
+    # ORCH-087 (G3, ADR-001 Р-4): close the deploy cycle on the card. The
+    # confirm_deploy logical key already exists in plane_sync (ORCH-059); drawn as
+    # a real, dedicated status (no base-alias) when its UUID is live in Plane so the
+    # card can show Awaiting Deploy → Deploying → Confirm Deploy → Monitoring → Done.
+    "confirm_deploy": "⏳ Confirm Deploy — подтвердите прод-деплой",
    "deploying": "Deploying",
    "monitoring": "Monitoring after Deploy",
 }