fix(notifications): tracker card — status-map completeness, rollback reflection, stage-metric summation (ORCH-091)

Three verified live-card defects in src/notifications.py (ORCH-067/087), all additive and indication-only (STAGE_TRANSITIONS / QG_CHECKS / check_* / transport / DB schema untouched; never-raise; revert = git revert): - Деф.1 (D1): _STAGE_STATUS_LABEL covered 8 of 10 STAGE_TRANSITIONS keys — deploy-staging and cancelled (ORCH-090) fell back to the misleading "To Analyse". Added deploy-staging→"Deploying (staging)", cancelled→"Cancelled"; replaced the runtime fallback for an UNMAPPED stage with a neutral capitalized label (_neutral_stage_label). created stays an explicit "To Analyse"; broken/None input degrades safely. Map completeness is asserted programmatically from STAGE_TRANSITIONS.keys() (single source of truth), not a static list. - Деф.2 (D2): the stage-row loop drew ✅ for any stage with a finished agent run regardless of position — after a rollback the card showed the absurd "✅ Внедрение + 🔄 Разработка". Added read-only _pipeline_pos from the STAGE_TRANSITIONS order and a suppression gate (✅ only when current_pos >= _pipeline_pos(stage_key)); deploy-staging→deploy normalization applied ONLY to the current position; is_active_stage untouched. - Деф.3 (D3): _stage_line took only the LAST run (ORCH-069: developer 3 runs Σ $3.98 rendered ~$0.00). It now aggregates ALL of the agent's runs with the same per-run formulas as the task totals → strict convergence with SUM(agent_runs) by task_id; model/effort/attempt come from the last run. Tests: test_tracker_status_line.py (ORCH-091 TC-01..TC-03 + updated tc06); new test_tracker_rollback_metrics.py (TC-05..TC-08). Full suite green (1370). Docs: CHANGELOG + internals.md (architecture README already updated by architect). Refs: ORCH-091 Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-09 21:58:26 +03:00
parent c0f2d917bf
commit 328ae78da3
6 changed files with 467 additions and 21 deletions
--- a/src/notifications.py
+++ b/src/notifications.py
@@ -254,6 +254,28 @@ _STAGE_ACTIVE_AGENT = {
    "deploy": "deployer",
 }

+# ORCH-091 (D2): pipeline order is read (read-only) from the single source of
+# truth src/stages.py::STAGE_TRANSITIONS — NOT from _TRACKER_STAGES (which lacks
+# deploy-staging/cancelled and is not authoritative about ordering, NFR-3). Used
+# to suppress the "✅ <stage>" line for a stage positioned AFTER the task's
+# current stage (a rollback, e.g. deploy-staging -> development), which otherwise
+# rendered the absurd "✅ Внедрение … + 🔄 Разработка".
+from .stages import STAGE_TRANSITIONS  # noqa: E402
+
+_PIPELINE_ORDER = list(STAGE_TRANSITIONS.keys())
+
+
+def _pipeline_pos(stage) -> int:
+    """Index of ``stage`` in the pipeline order; unknown -> "far future".
+
+    Never raises. An unknown/broken stage maps past the end so it is never
+    spuriously suppressed (degrades to the pre-ORCH-091 behaviour: ✅ kept).
+    """
+    try:
+        return _PIPELINE_ORDER.index(stage)
+    except (ValueError, TypeError):
+        return len(_PIPELINE_ORDER)
+

 def _fmt_minutes(seconds) -> str:
    """Render a duration in whole minutes: 0..59s -> '<1м', else '<n>м'."""
@@ -442,23 +464,42 @@ def render_task_tracker(task_id: int) -> str:
        except Exception:
            pass

-    def _stage_line(label, run):
-        usage = {
-            "input_tokens": run["input_tokens"],
-            "cache_read_tokens": run["cache_read_tokens"],
-            "cache_creation_tokens": run["cache_creation_tokens"],
-        }
-        in_tok = fmt_tokens(_input_total(usage))
-        out_tok = fmt_tokens(run["output_tokens"])
-        cost = fmt_cost(run["cost_usd"])
-        dur = _fmt_minutes(_duration_seconds(run["started_at"], run["finished_at"]))
-        model = short_model_name(run["model"])
+    def _stage_line(label, stage_runs):
+        # ORCH-091 (D3): aggregate ALL of the stage agent's runs (retries
+        # included) with the SAME per-run formulas as the task totals block
+        # (:388-404) -> the stage line converges with SUM(agent_runs) instead of
+        # showing only the last run (which understated a multi-attempt stage:
+        # ORCH-069 developer \u03a3 $3.98 rendered as ~$0.00). Each agent maps to
+        # exactly one _TRACKER_STAGES row, so \u03a3(stage lines) \u2261 task totals.
+        in_sum = 0
+        out_sum = 0
+        cost_sum = 0.0
+        dur_sum = 0
+        for run in stage_runs:
+            usage = {
+                "input_tokens": run["input_tokens"],
+                "cache_read_tokens": run["cache_read_tokens"],
+                "cache_creation_tokens": run["cache_creation_tokens"],
+            }
+            in_sum += _input_total(usage)
+            out_sum += int(run["output_tokens"] or 0)
+            cost_sum += float(run["cost_usd"] or 0.0)
+            d = _duration_seconds(run["started_at"], run["finished_at"])
+            if d is not None:
+                dur_sum += d
+        in_tok = fmt_tokens(in_sum)
+        out_tok = fmt_tokens(out_sum)
+        cost = fmt_cost(cost_sum)
+        dur = _fmt_minutes(dur_sum)
+        # Model/effort/"\u043f\u043e\u043f\u044b\u0442\u043a\u0430 N" come from the LAST run (agent_runs are id ASC).
+        last = stage_runs[-1] if stage_runs else None
+        model = short_model_name(last["model"]) if last is not None else ""
        model_suffix = f" \u00b7 {model}" if model else ""
        # ORCH-087 (BR-EFF): render the resolved --effort next to the model
        # ("\u00b7 opus-4-8 \u00b7 xhigh"). Stamped at launch in agent_runs.effort; empty /
        # missing -> suffix omitted (like the model suffix). Historical rows with
        # NULL effort fall back to the config-resolved effort for the agent.
-        effort = _run_effort(run)
+        effort = _run_effort(last) if last is not None else ""
        effort_suffix = f" \u00b7 {effort}" if effort else ""
        return (
            f"\u2705 {label:<13} {dur} \u00b7 "
@@ -471,6 +512,14 @@ def render_task_tracker(task_id: int) -> str:
    brd_ended = task["brd_review_ended_at"]
    review_seconds = _duration_seconds(brd_started, brd_ended)

+    # ORCH-091 (D2): the task's current position in the pipeline, used to suppress
+    # \u2705-lines for stages POSITIONED AFTER it (a rollback). The deploy-staging ->
+    # deploy normalization is applied ONLY here (not to is_active_stage): the
+    # collapsed "\u0412\u043d\u0435\u0434\u0440\u0435\u043d\u0438\u0435" row carries stage_key="deploy" (pos 7); on
+    # stage='deploy-staging' (pos 6) the row would otherwise be wrongly suppressed.
+    effective_stage = "deploy" if stage == "deploy-staging" else stage
+    current_pos = _pipeline_pos(effective_stage)
+
    for stage_key, label, agent in _TRACKER_STAGES:
        run = last_done.get(agent)
        # The stage is "in progress" only when it is the task's current stage AND
@@ -500,9 +549,14 @@ def render_task_tracker(task_id: int) -> str:
                lines.append(
                    f"\U0001f504 {label:<13} \u2026   \u00b7 \u0438\u0434\u0451\u0442"
                )
-        elif run is not None:
-            lines.append(_stage_line(label, run))
-        # else: not started yet -> not shown.
+        elif run is not None and current_pos >= _pipeline_pos(stage_key):
+            # ORCH-091 (D2): show ✅ only for stages AT or BEFORE the current
+            # position. A finished run on a stage POSITIONED AFTER the current one
+            # (rollback, e.g. deploy-staging->development) is suppressed — its runs
+            # still count in the task totals (intended rollback semantics). Pass the
+            # FULL run list so the line aggregates all attempts (D3).
+            lines.append(_stage_line(label, agent_runs))
+        # else: not started yet, or rolled back past -> not shown.

        # Insert the BRD review line right after Analysis.
        if stage_key == "analysis" and brd_started:
@@ -944,8 +998,16 @@ _STAGE_STATUS_LABEL = {
    "development": "Development",
    "review": "Code-Review",
    "testing": "Testing",
+    # ORCH-091 (D1): deploy-staging was missing -> the card froze on "To Analyse".
+    # Plain-style active label (like Analysis/Testing, no ⏸️ pause marker); the
+    # "(staging)" suffix keeps it distinct from the prod-overlay "Deploying"
+    # (_LIVE_BRANCH_LABELS['deploying']) and from the deploy stage's pause label.
+    "deploy-staging": "Deploying (staging)",
    "deploy": "⏸️ Awaiting Deploy — ожидание Confirm Deploy",
    "done": "Done",
+    # ORCH-091 (D1): offline base for the ORCH-090 system-terminal. Matches the
+    # overlay label _LIVE_BRANCH_LABELS['cancelled'] -> no precedence conflict.
+    "cancelled": "Cancelled",
 }
 _DEFAULT_STATUS_LABEL = "To Analyse"
 _IN_REVIEW_LABEL = (
@@ -987,6 +1049,25 @@ def _row_get(row, key, default=None):
            return default


+def _neutral_stage_label(stage) -> str:
+    """ORCH-091 (D1): neutral fallback for a stage NOT in _STAGE_STATUS_LABEL.
+
+    A genuinely unknown / future / broken stage gets a capitalized stage name
+    ("deploy-staging" -> "Deploy Staging") instead of the misleading "To Analyse"
+    (which read as a false "first status"). Empty / unparseable -> the safe
+    _DEFAULT_STATUS_LABEL. Never raises. NOTE: the curated map stays the source of
+    human-meaningful labels; this is only the safety net for unmapped stages
+    (FR-3 / AC-3).
+    """
+    try:
+        s = str(stage).strip()
+        if not s:
+            return _DEFAULT_STATUS_LABEL
+        return s.replace("-", " ").title()
+    except Exception:
+        return _DEFAULT_STATUS_LABEL
+
+
 def plane_status_label(task_row) -> str:
    """ORCH-067 (Р-1, layer 1): current Plane status label for the card header.

@@ -1006,7 +1087,13 @@ def plane_status_label(task_row) -> str:
            ended = _row_get(task_row, "brd_review_ended_at")
            if started and not ended:
                return _IN_REVIEW_LABEL
-        return _STAGE_STATUS_LABEL.get(stage, _DEFAULT_STATUS_LABEL)
+        # ORCH-091 (D1/FR-3): a mapped stage keeps its curated label; an UNMAPPED
+        # (future/unknown) stage degrades to a neutral capitalized label, NOT the
+        # misleading "To Analyse". 'created' stays an explicit key -> "To Analyse".
+        label = _STAGE_STATUS_LABEL.get(stage)
+        if label:
+            return label
+        return _neutral_stage_label(stage)
    except Exception:
        return _DEFAULT_STATUS_LABEL