fix(notifications): tracker card — status-map completeness, rollback reflection, stage-metric summation (ORCH-091)

Three verified live-card defects in src/notifications.py (ORCH-067/087),
all additive and indication-only (STAGE_TRANSITIONS / QG_CHECKS / check_* /
transport / DB schema untouched; never-raise; revert = git revert):

- Деф.1 (D1): _STAGE_STATUS_LABEL covered 8 of 10 STAGE_TRANSITIONS keys —
  deploy-staging and cancelled (ORCH-090) fell back to the misleading
  "To Analyse". Added deploy-staging→"Deploying (staging)",
  cancelled→"Cancelled"; replaced the runtime fallback for an UNMAPPED stage
  with a neutral capitalized label (_neutral_stage_label). created stays an
  explicit "To Analyse"; broken/None input degrades safely. Map completeness
  is asserted programmatically from STAGE_TRANSITIONS.keys() (single source of
  truth), not a static list.
- Деф.2 (D2): the stage-row loop drew  for any stage with a finished agent
  run regardless of position — after a rollback the card showed the absurd
  " Внедрение + 🔄 Разработка". Added read-only _pipeline_pos from the
  STAGE_TRANSITIONS order and a suppression gate ( only when
  current_pos >= _pipeline_pos(stage_key)); deploy-staging→deploy normalization
  applied ONLY to the current position; is_active_stage untouched.
- Деф.3 (D3): _stage_line took only the LAST run (ORCH-069: developer 3 runs
  Σ $3.98 rendered ~$0.00). It now aggregates ALL of the agent's runs with the
  same per-run formulas as the task totals → strict convergence with
  SUM(agent_runs) by task_id; model/effort/attempt come from the last run.

Tests: test_tracker_status_line.py (ORCH-091 TC-01..TC-03 + updated tc06);
new test_tracker_rollback_metrics.py (TC-05..TC-08). Full suite green (1370).
Docs: CHANGELOG + internals.md (architecture README already updated by architect).

Refs: ORCH-091
Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
2026-06-09 21:58:26 +03:00
committed by orchestrator-deployer
parent c0f2d917bf
commit 328ae78da3
6 changed files with 467 additions and 21 deletions

View File

@@ -254,6 +254,28 @@ _STAGE_ACTIVE_AGENT = {
"deploy": "deployer",
}
# ORCH-091 (D2): pipeline order is read (read-only) from the single source of
# truth src/stages.py::STAGE_TRANSITIONS — NOT from _TRACKER_STAGES (which lacks
# deploy-staging/cancelled and is not authoritative about ordering, NFR-3). Used
# to suppress the "✅ <stage>" line for a stage positioned AFTER the task's
# current stage (a rollback, e.g. deploy-staging -> development), which otherwise
# rendered the absurd "✅ Внедрение … + 🔄 Разработка".
from .stages import STAGE_TRANSITIONS # noqa: E402
_PIPELINE_ORDER = list(STAGE_TRANSITIONS.keys())
def _pipeline_pos(stage) -> int:
"""Index of ``stage`` in the pipeline order; unknown -> "far future".
Never raises. An unknown/broken stage maps past the end so it is never
spuriously suppressed (degrades to the pre-ORCH-091 behaviour: ✅ kept).
"""
try:
return _PIPELINE_ORDER.index(stage)
except (ValueError, TypeError):
return len(_PIPELINE_ORDER)
def _fmt_minutes(seconds) -> str:
"""Render a duration in whole minutes: 0..59s -> '<1м', else '<n>м'."""
@@ -442,23 +464,42 @@ def render_task_tracker(task_id: int) -> str:
except Exception:
pass
def _stage_line(label, run):
usage = {
"input_tokens": run["input_tokens"],
"cache_read_tokens": run["cache_read_tokens"],
"cache_creation_tokens": run["cache_creation_tokens"],
}
in_tok = fmt_tokens(_input_total(usage))
out_tok = fmt_tokens(run["output_tokens"])
cost = fmt_cost(run["cost_usd"])
dur = _fmt_minutes(_duration_seconds(run["started_at"], run["finished_at"]))
model = short_model_name(run["model"])
def _stage_line(label, stage_runs):
# ORCH-091 (D3): aggregate ALL of the stage agent's runs (retries
# included) with the SAME per-run formulas as the task totals block
# (:388-404) -> the stage line converges with SUM(agent_runs) instead of
# showing only the last run (which understated a multi-attempt stage:
# ORCH-069 developer \u03a3 $3.98 rendered as ~$0.00). Each agent maps to
# exactly one _TRACKER_STAGES row, so \u03a3(stage lines) \u2261 task totals.
in_sum = 0
out_sum = 0
cost_sum = 0.0
dur_sum = 0
for run in stage_runs:
usage = {
"input_tokens": run["input_tokens"],
"cache_read_tokens": run["cache_read_tokens"],
"cache_creation_tokens": run["cache_creation_tokens"],
}
in_sum += _input_total(usage)
out_sum += int(run["output_tokens"] or 0)
cost_sum += float(run["cost_usd"] or 0.0)
d = _duration_seconds(run["started_at"], run["finished_at"])
if d is not None:
dur_sum += d
in_tok = fmt_tokens(in_sum)
out_tok = fmt_tokens(out_sum)
cost = fmt_cost(cost_sum)
dur = _fmt_minutes(dur_sum)
# Model/effort/"\u043f\u043e\u043f\u044b\u0442\u043a\u0430 N" come from the LAST run (agent_runs are id ASC).
last = stage_runs[-1] if stage_runs else None
model = short_model_name(last["model"]) if last is not None else ""
model_suffix = f" \u00b7 {model}" if model else ""
# ORCH-087 (BR-EFF): render the resolved --effort next to the model
# ("\u00b7 opus-4-8 \u00b7 xhigh"). Stamped at launch in agent_runs.effort; empty /
# missing -> suffix omitted (like the model suffix). Historical rows with
# NULL effort fall back to the config-resolved effort for the agent.
effort = _run_effort(run)
effort = _run_effort(last) if last is not None else ""
effort_suffix = f" \u00b7 {effort}" if effort else ""
return (
f"\u2705 {label:<13} {dur} \u00b7 "
@@ -471,6 +512,14 @@ def render_task_tracker(task_id: int) -> str:
brd_ended = task["brd_review_ended_at"]
review_seconds = _duration_seconds(brd_started, brd_ended)
# ORCH-091 (D2): the task's current position in the pipeline, used to suppress
# \u2705-lines for stages POSITIONED AFTER it (a rollback). The deploy-staging ->
# deploy normalization is applied ONLY here (not to is_active_stage): the
# collapsed "\u0412\u043d\u0435\u0434\u0440\u0435\u043d\u0438\u0435" row carries stage_key="deploy" (pos 7); on
# stage='deploy-staging' (pos 6) the row would otherwise be wrongly suppressed.
effective_stage = "deploy" if stage == "deploy-staging" else stage
current_pos = _pipeline_pos(effective_stage)
for stage_key, label, agent in _TRACKER_STAGES:
run = last_done.get(agent)
# The stage is "in progress" only when it is the task's current stage AND
@@ -500,9 +549,14 @@ def render_task_tracker(task_id: int) -> str:
lines.append(
f"\U0001f504 {label:<13} \u2026 \u00b7 \u0438\u0434\u0451\u0442"
)
elif run is not None:
lines.append(_stage_line(label, run))
# else: not started yet -> not shown.
elif run is not None and current_pos >= _pipeline_pos(stage_key):
# ORCH-091 (D2): show ✅ only for stages AT or BEFORE the current
# position. A finished run on a stage POSITIONED AFTER the current one
# (rollback, e.g. deploy-staging->development) is suppressed — its runs
# still count in the task totals (intended rollback semantics). Pass the
# FULL run list so the line aggregates all attempts (D3).
lines.append(_stage_line(label, agent_runs))
# else: not started yet, or rolled back past -> not shown.
# Insert the BRD review line right after Analysis.
if stage_key == "analysis" and brd_started:
@@ -944,8 +998,16 @@ _STAGE_STATUS_LABEL = {
"development": "Development",
"review": "Code-Review",
"testing": "Testing",
# ORCH-091 (D1): deploy-staging was missing -> the card froze on "To Analyse".
# Plain-style active label (like Analysis/Testing, no ⏸️ pause marker); the
# "(staging)" suffix keeps it distinct from the prod-overlay "Deploying"
# (_LIVE_BRANCH_LABELS['deploying']) and from the deploy stage's pause label.
"deploy-staging": "Deploying (staging)",
"deploy": "⏸️ Awaiting Deploy — ожидание Confirm Deploy",
"done": "Done",
# ORCH-091 (D1): offline base for the ORCH-090 system-terminal. Matches the
# overlay label _LIVE_BRANCH_LABELS['cancelled'] -> no precedence conflict.
"cancelled": "Cancelled",
}
_DEFAULT_STATUS_LABEL = "To Analyse"
_IN_REVIEW_LABEL = (
@@ -987,6 +1049,25 @@ def _row_get(row, key, default=None):
return default
def _neutral_stage_label(stage) -> str:
"""ORCH-091 (D1): neutral fallback for a stage NOT in _STAGE_STATUS_LABEL.
A genuinely unknown / future / broken stage gets a capitalized stage name
("deploy-staging" -> "Deploy Staging") instead of the misleading "To Analyse"
(which read as a false "first status"). Empty / unparseable -> the safe
_DEFAULT_STATUS_LABEL. Never raises. NOTE: the curated map stays the source of
human-meaningful labels; this is only the safety net for unmapped stages
(FR-3 / AC-3).
"""
try:
s = str(stage).strip()
if not s:
return _DEFAULT_STATUS_LABEL
return s.replace("-", " ").title()
except Exception:
return _DEFAULT_STATUS_LABEL
def plane_status_label(task_row) -> str:
"""ORCH-067 (Р-1, layer 1): current Plane status label for the card header.
@@ -1006,7 +1087,13 @@ def plane_status_label(task_row) -> str:
ended = _row_get(task_row, "brd_review_ended_at")
if started and not ended:
return _IN_REVIEW_LABEL
return _STAGE_STATUS_LABEL.get(stage, _DEFAULT_STATUS_LABEL)
# ORCH-091 (D1/FR-3): a mapped stage keeps its curated label; an UNMAPPED
# (future/unknown) stage degrades to a neutral capitalized label, NOT the
# misleading "To Analyse". 'created' stays an explicit key -> "To Analyse".
label = _STAGE_STATUS_LABEL.get(stage)
if label:
return label
return _neutral_stage_label(stage)
except Exception:
return _DEFAULT_STATUS_LABEL