fix(notifications): tracker orphan cleanup + effort-in-line + honest done-time (ORCH-087)

Устраняет «замёрзшие» осиротевшие карточки live-трекера и доделывает строку
стадии/итоговое время.

G1 — зачистка сирот: аддитивный леджер tracker_messages(task_id, message_id,
created_at, deleted_at) + хелперы add/get_open/mark_deleted в src/db.py. bump
теперь удаляет ВСЕ незакрытые mid задачи (а не только скаляр
tasks.tracker_message_id, сохранён как BC-указатель). Новый mid в леджер только
при успешном send (BR-6); transient-delete остаётся для ретрая; «already
gone»/>48ч закрывается. Корень бага — скалярный учёт, терявший ссылку при
гонке/delete-fail+send-ok (ADR-001 G0).

G3 — deploy-цикл: ключ confirm_deploy в _LIVE_BRANCH_LABELS (без base-alias).

BR-EFF — эффорт в строке: колонка agent_runs.effort (_ensure_column,
идемпотентно), стамп фактического resolve_agent_effort в launcher._spawn в
момент запуска; рендер `· {model} · {effort}`, пустой → суффикс опускается.

BR-G5 — честное время: done-строка `⏱️ Агенты Σ · твоё {review~cap} · общее с
ожиданием {wall}` — три независимых подписанных метрики; кап
tracker_brd_review_cap_s (ORCH_TRACKER_BRD_REVIEW_CAP_S, дефолт 2ч, маркер ~).

Инварианты: STAGE_TRANSITIONS/QG_CHECKS/стадии без изменений; миграции
аддитивны/идемпотентны (enduro не трогается); never-raise,
disable_notification, plane_issue_link (ORCH-067), disable_web_page_preview
(ORCH-080) сохранены; src/reconciler.py не эродирован (ORCH-086 на месте).

Тесты: tests/test_notifications_orphans.py (TC-01..05 + never-raise),
tests/test_tracker_effort_time.py (TC-06/11..15 + confirm_deploy),
tests/test_launcher.py::TestEffortStamp (TC-09/10). Доки: CLAUDE.md
(§Нотификации), docs/architecture/README.md (Notifications), CHANGELOG.md.

Refs: ORCH-087

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
2026-06-09 09:20:20 +03:00
committed by stream
parent 36c7a68722
commit a7b27f2235
11 changed files with 729 additions and 17 deletions

View File

@@ -473,6 +473,19 @@ class AgentLauncher:
# (project-override > per-agent env > default), not hardcoded in AGENT_CONFIGS.
model = resolve_agent_model(agent, project_id)
effort = resolve_agent_effort(agent, project_id)
# ORCH-087 (BR-EFF): stamp the REAL --effort value onto this agent_runs row
# in the moment of launch. The CLI does not echo effort in its result JSON,
# so this is the only reliable source for the tracker's "· model · effort"
# line. Empty resolve (no --effort flag) -> NULL so the suffix is omitted.
# Reuses the still-open conn; never blocks the launch.
try:
conn.execute(
"UPDATE agent_runs SET effort=? WHERE id=?",
(effort or None, run_id),
)
conn.commit()
except Exception as e:
logger.warning(f"effort stamp failed for run_id={run_id}: {e}")
model_flag = f"--model {model} " if model else ""
effort_flag = f"--effort {effort} " if effort else ""
# ORCH-074 (G2): agent_fallback_model is read directly here, bypassing

View File

@@ -485,6 +485,14 @@ class Settings(BaseSettings):
tracker_live_status_ttl_s: int = 60
tracker_live_status_timeout_s: int = 3
# ORCH-087 (BR-G5, ADR-001 Р-6): cap for the human BRD-review time shown on the
# done card ("твоё {review}"). The brd_review clock can stay open for hours on a
# desync (In Review -> Backlog), which made "твоё время" report anomalous stalls
# (ORCH-087: 392m). Above this cap the value is shown capped with a "~" marker so
# an abnormal stall is never presented as real human review time. Env
# ORCH_TRACKER_BRD_REVIEW_CAP_S; default 7200s (2h). 0/negative -> no cap.
tracker_brd_review_cap_s: int = 7200
# ORCH-069: QG-0 upper title-length limit (entry gate _qg0_errors). The 80-char
# cap was a hygiene limit, not structural (slug is cut to [:30] independently,
# DB title TEXT is unbounded). Configurable via env ORCH_QG0_TITLE_MAX; default

View File

@@ -109,6 +109,12 @@ def init_db():
# can render a short model tag per stage. Parsed from the run-log result JSON
# (modelUsage key) by the launcher monitor; NULL when unknown. Idempotent ALTER.
_ensure_column(conn, "agent_runs", "model", "TEXT")
# ORCH-087 (BR-EFF): persist the REAL --effort value sent to the Claude CLI per
# agent_runs row (low|medium|high|xhigh|max) so the tracker can render the
# resolved effort next to the model ("· opus-4-8 · xhigh"). Stamped in
# launcher._spawn right after resolve_agent_effort; NULL when no --effort flag
# was passed (resolved to "") or for historical rows. Idempotent ALTER.
_ensure_column(conn, "agent_runs", "effort", "TEXT")
# Telegram live tracker: one editable Telegram message per task. We store its
# message_id so each stage transition can editMessageText the same message
# instead of spamming a new one. Idempotent ALTER (safe on the live prod DB).
@@ -141,6 +147,27 @@ def init_db():
CREATE INDEX IF NOT EXISTS idx_job_deps_task ON job_deps(task_id);
CREATE INDEX IF NOT EXISTS idx_job_deps_depends ON job_deps(depends_on_task_id);
""")
# ORCH-087 (BR-G1, ADR-001 Р-1): authoritative ledger of EVERY tracker card
# (Telegram message_id) ever created for a task. The scalar
# tasks.tracker_message_id only ever knew the LAST mid, so any lost reference
# (delete-fail+send-ok, race, restart) orphaned older cards forever. This
# ledger lets every bump delete ALL still-open mids (deleted_at IS NULL), not
# just the last one. tasks.tracker_message_id is KEPT (current-card pointer,
# full BC). Purely ADDITIVE (CREATE TABLE/INDEX IF NOT EXISTS) -> idempotent,
# restart-safe on the live shared prod DB (enduro-trails data untouched). The
# logical FK on tasks.id is intentional (no REFERENCES, mirrors job_deps) so
# the migration cannot fail on a pre-existing DB. See 08-data-requirements.md.
conn.executescript("""
CREATE TABLE IF NOT EXISTS tracker_messages (
task_id INTEGER NOT NULL,
message_id INTEGER NOT NULL,
created_at TEXT DEFAULT (datetime('now')),
deleted_at TEXT,
PRIMARY KEY (task_id, message_id)
);
CREATE INDEX IF NOT EXISTS idx_tracker_messages_open
ON tracker_messages(task_id) WHERE deleted_at IS NULL;
""")
conn.commit()
conn.close()
@@ -301,6 +328,68 @@ def set_tracker_message_id(task_id: int, message_id: int) -> None:
conn.close()
# ---------------------------------------------------------------------------
# ORCH-087 (BR-G1): tracker_messages ledger — full accounting of every card mid
# ---------------------------------------------------------------------------
def add_tracker_message(task_id: int, message_id: int) -> None:
"""ORCH-087: record a freshly-created tracker card mid in the ledger.
Called ONLY after a successful send_telegram (new_mid is not None). INSERT OR
IGNORE keeps it idempotent: a repeat mid (race / restart replay) does not
duplicate the row or resurrect a deleted_at stamp.
"""
conn = get_db()
try:
conn.execute(
"INSERT OR IGNORE INTO tracker_messages (task_id, message_id) "
"VALUES (?, ?)",
(task_id, message_id),
)
conn.commit()
finally:
conn.close()
def get_open_tracker_messages(task_id: int) -> list[int]:
"""ORCH-087: all still-open (deleted_at IS NULL) card mids for a task.
These are the cards the next bump must clean up. Ordered oldest-first so the
oldest orphans are deleted first. Never includes the rows already marked
deleted.
"""
conn = get_db()
try:
rows = conn.execute(
"SELECT message_id FROM tracker_messages "
"WHERE task_id=? AND deleted_at IS NULL ORDER BY message_id ASC",
(task_id,),
).fetchall()
finally:
conn.close()
return [r[0] for r in rows]
def mark_tracker_message_deleted(task_id: int, message_id: int) -> None:
"""ORCH-087: stamp deleted_at on a card mid that is confirmed gone.
Called for mids that delete_telegram reported as gone (deleted now OR already
gone / >48h per _DELETE_GONE_MARKERS) so they drop out of
get_open_tracker_messages. Transient-delete mids are left untouched (NULL) for
a retry on the next bump.
"""
conn = get_db()
try:
conn.execute(
"UPDATE tracker_messages SET deleted_at=datetime('now') "
"WHERE task_id=? AND message_id=? AND deleted_at IS NULL",
(task_id, message_id),
)
conn.commit()
finally:
conn.close()
def mark_brd_review_started(task_id: int) -> None:
"""Stamp when BRD review (the human approve gate) started, if not already set.

View File

@@ -290,6 +290,46 @@ def _duration_seconds(started, finished):
return max(int((b - a).total_seconds()), 0)
def _capped_review_str(review_seconds) -> str:
"""ORCH-087 (BR-G5): human BRD-review duration, capped to drop anomalous stalls.
Returns '' when there was no review window. When the review exceeds
``tracker_brd_review_cap_s`` (default 2h; <=0 disables the cap) the capped value
is shown with a leading '~' to signal the real value was longer — an open
brd_review clock from a desync (In Review -> Backlog) rather than genuine human
time (ORCH-087: 392m). Never raises.
"""
try:
if not review_seconds:
return ""
secs = int(review_seconds)
try:
cap = int(getattr(_get_settings(), "tracker_brd_review_cap_s", 0) or 0)
except Exception:
cap = 0
if cap > 0 and secs > cap:
return f"~{_fmt_minutes(cap)}"
return _fmt_minutes(secs)
except Exception:
return _fmt_minutes(review_seconds) if review_seconds else ""
def _run_effort(run) -> str:
"""ORCH-087 (BR-EFF): the effort tag for a stage line. Never raises -> ''.
Returns the stamped agent_runs.effort (the REAL --effort sent at launch). NULL
/ empty (historical row predating the column, or a launch with no --effort
flag) -> '' so the caller omits the effort suffix (the documented default,
AC-E.4). New runs are stamped in launcher._spawn, so going forward every stage
line carries its resolved effort (developer xhigh, tester/deployer medium, …).
"""
try:
effort = _row_get(run, "effort")
return str(effort) if effort else ""
except Exception:
return ""
def render_task_tracker(task_id: int) -> str:
"""Build the full live-tracker text for a task from the DB (stateless render).
@@ -321,7 +361,8 @@ def render_task_tracker(task_id: int) -> str:
return f"task-{task_id}"
runs = conn.execute(
"SELECT agent, started_at, finished_at, exit_code, input_tokens, "
"output_tokens, cache_read_tokens, cache_creation_tokens, cost_usd, model "
"output_tokens, cache_read_tokens, cache_creation_tokens, cost_usd, "
"model, effort "
"FROM agent_runs WHERE task_id=? ORDER BY id ASC",
(task_id,),
).fetchall()
@@ -413,9 +454,15 @@ def render_task_tracker(task_id: int) -> str:
dur = _fmt_minutes(_duration_seconds(run["started_at"], run["finished_at"]))
model = short_model_name(run["model"])
model_suffix = f" \u00b7 {model}" if model else ""
# ORCH-087 (BR-EFF): render the resolved --effort next to the model
# ("\u00b7 opus-4-8 \u00b7 xhigh"). Stamped at launch in agent_runs.effort; empty /
# missing -> suffix omitted (like the model suffix). Historical rows with
# NULL effort fall back to the config-resolved effort for the agent.
effort = _run_effort(run)
effort_suffix = f" \u00b7 {effort}" if effort else ""
return (
f"\u2705 {label:<13} {dur} \u00b7 "
f"{in_tok}\u2193/{out_tok}\u2191 \u00b7 {cost}{model_suffix}"
f"{in_tok}\u2193/{out_tok}\u2191 \u00b7 {cost}{model_suffix}{effort_suffix}"
)
# BRD review line: between Analysis and Architecture, only once Analysis has
@@ -490,11 +537,17 @@ def render_task_tracker(task_id: int) -> str:
if done:
wall = _duration_seconds(task["created_at"], task["updated_at"])
wall_str = _fmt_minutes(wall) if wall is not None else "?"
review_str = _fmt_minutes(review_seconds) if review_seconds else ""
review_str = _capped_review_str(review_seconds)
# ORCH-087 (BR-G5): three INDEPENDENT, explicitly-labelled metrics. None is
# presented as the sum of the others \u2014 queue/wait pauses are not logged, so
# wall != agents + review; the old "\u0412\u0441\u0435\u0433\u043e {wall}" read like a (wrong) sum.
# \u0410\u0433\u0435\u043d\u0442\u044b = sum(agent_runs) (precise main metric, T-1)
# \u0442\u0432\u043e\u0451 = human BRD-review, capped to drop anomalous stalls (T-2)
# \u043e\u0431\u0449\u0435\u0435 \u0441 \u043e\u0436\u0438\u0434\u0430\u043d\u0438\u0435\u043c = wall-clock incl. queue/wait, NOT work time (T-3)
lines.append(
f"\u23f1\ufe0f \u0412\u0441\u0435\u0433\u043e {wall_str} \u00b7 "
f"\u0430\u0433\u0435\u043d\u0442\u044b {_fmt_minutes(agent_seconds)} \u00b7 "
f"\u0442\u0432\u043e\u0451 {review_str}"
f"\u23f1\ufe0f \u0410\u0433\u0435\u043d\u0442\u044b {_fmt_minutes(agent_seconds)} \u00b7 "
f"\u0442\u0432\u043e\u0451 {review_str} \u00b7 "
f"\u043e\u0431\u0449\u0435\u0435 \u0441 \u043e\u0436\u0438\u0434\u0430\u043d\u0438\u0435\u043c {wall_str}"
)
link = _done_link(task_id, task["work_item_id"])
if link:
@@ -568,21 +621,53 @@ def update_task_tracker(task_id: int):
only the dedicated alert helpers ping.
"""
try:
from .db import get_tracker_message_id, set_tracker_message_id
from .db import (
get_tracker_message_id, set_tracker_message_id,
get_open_tracker_messages, add_tracker_message,
mark_tracker_message_deleted,
)
text = render_task_tracker(task_id)
mode = (_get_settings().tracker_mode or "edit").strip().lower()
mid = get_tracker_message_id(task_id)
if mode == "bump":
# bump: one card, always at the bottom (delete + send + repoint).
# ORCH-087 (BR-G1): clean up ALL still-open cards of this task, not
# only the last (scalar) mid. The ledger is the authoritative set of
# every card ever created; any reference lost by the scalar (race /
# delete-fail+send-ok / restart) is still tracked here and reaped now.
open_mids = set()
try:
open_mids.update(get_open_tracker_messages(task_id))
except Exception as e:
logger.warning(f"update_task_tracker({task_id}): ledger read failed: {e}")
if mid is not None:
# Scalar pointer is part of the live set (e.g. a card sent before
# the ledger existed); union avoids missing it.
open_mids.add(mid)
for old_mid in open_mids:
# best-effort; result does NOT gate the send (BR-6).
delete_telegram(mid)
if delete_telegram(old_mid):
# gone (deleted now OR already gone / >48h) -> drop from ledger.
try:
mark_tracker_message_deleted(task_id, old_mid)
except Exception as e:
logger.warning(
f"update_task_tracker({task_id}): mark-deleted failed: {e}"
)
# transient False -> leave open in the ledger for a retry next bump.
new_mid = send_telegram(text, disable_notification=True)
if new_mid is not None:
# R-3 / BR-6: only record the new card on a successful send.
try:
add_tracker_message(task_id, new_mid)
except Exception as e:
logger.warning(
f"update_task_tracker({task_id}): ledger insert failed: {e}"
)
set_tracker_message_id(task_id, new_mid)
# send returned None (no creds / transient) -> leave mid untouched;
# no duplicate within this call, redraws on the next transition.
# send returned None (no creds / transient) -> leave mid/ledger
# untouched; no duplicate within this call, redraws next transition.
return
# mode == "edit" (DEFAULT): existing behaviour, unchanged.
@@ -874,6 +959,11 @@ _LIVE_BRANCH_LABELS = {
"blocked": "Blocked",
"rejected": "Rejected",
"cancelled": "Cancelled",
# ORCH-087 (G3, ADR-001 Р-4): close the deploy cycle on the card. The
# confirm_deploy logical key already exists in plane_sync (ORCH-059); drawn as
# a real, dedicated status (no base-alias) when its UUID is live in Plane so the
# card can show Awaiting Deploy → Deploying → Confirm Deploy → Monitoring → Done.
"confirm_deploy": "⏳ Confirm Deploy — подтвердите прод-деплой",
"deploying": "Deploying",
"monitoring": "Monitoring after Deploy",
}