Files
orchestrator/tests/test_tracker_status_line.py
claude-bot 328ae78da3 fix(notifications): tracker card — status-map completeness, rollback reflection, stage-metric summation (ORCH-091)
Three verified live-card defects in src/notifications.py (ORCH-067/087),
all additive and indication-only (STAGE_TRANSITIONS / QG_CHECKS / check_* /
transport / DB schema untouched; never-raise; revert = git revert):

- Деф.1 (D1): _STAGE_STATUS_LABEL covered 8 of 10 STAGE_TRANSITIONS keys —
  deploy-staging and cancelled (ORCH-090) fell back to the misleading
  "To Analyse". Added deploy-staging→"Deploying (staging)",
  cancelled→"Cancelled"; replaced the runtime fallback for an UNMAPPED stage
  with a neutral capitalized label (_neutral_stage_label). created stays an
  explicit "To Analyse"; broken/None input degrades safely. Map completeness
  is asserted programmatically from STAGE_TRANSITIONS.keys() (single source of
  truth), not a static list.
- Деф.2 (D2): the stage-row loop drew  for any stage with a finished agent
  run regardless of position — after a rollback the card showed the absurd
  " Внедрение + 🔄 Разработка". Added read-only _pipeline_pos from the
  STAGE_TRANSITIONS order and a suppression gate ( only when
  current_pos >= _pipeline_pos(stage_key)); deploy-staging→deploy normalization
  applied ONLY to the current position; is_active_stage untouched.
- Деф.3 (D3): _stage_line took only the LAST run (ORCH-069: developer 3 runs
  Σ $3.98 rendered ~$0.00). It now aggregates ALL of the agent's runs with the
  same per-run formulas as the task totals → strict convergence with
  SUM(agent_runs) by task_id; model/effort/attempt come from the last run.

Tests: test_tracker_status_line.py (ORCH-091 TC-01..TC-03 + updated tc06);
new test_tracker_rollback_metrics.py (TC-05..TC-08). Full suite green (1370).
Docs: CHANGELOG + internals.md (architecture README already updated by architect).

Refs: ORCH-091
Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-09 22:08:52 +03:00

285 lines
13 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""ORCH-067 — Group B: the Plane-status line on the live card (AC-5..AC-9).
The card now carries an explicit '📍 <Plane status>' line under the header that
follows the ORCH-066 status model. The OFFLINE core (stage->status + In Review
from the brd-clock + Awaiting Deploy) is pure/deterministic and never touches the
network; a best-effort LIVE overlay draws the branch statuses that are
indistinguishable offline (Needs Input / Blocked / …). Everything degrades to the
stage default and NEVER raises (AC-9). Network is isolated: the live-state read
(`_live_state_uuid_cached`) and `get_project_states` are patched per case; the DB
is a temp SQLite.
Test ids TC-05..TC-09 from 04-test-plan.yaml.
"""
import os
import tempfile
os.environ.setdefault("ORCH_PLANE_API_TOKEN", "test-token")
os.environ.setdefault("ORCH_GITEA_TOKEN", "test-token")
_test_db = os.path.join(tempfile.gettempdir(), "test_orchestrator_status_line.db")
os.environ["ORCH_DB_PATH"] = _test_db
from types import SimpleNamespace # noqa: E402
import pytest # noqa: E402
import src.db as db_module # noqa: E402
import src.projects as projects_mod # noqa: E402
from src.db import init_db, get_db # noqa: E402
from src import notifications as N # noqa: E402
import src.plane_sync as plane_sync # noqa: E402
_ORCH_PROJECT_ID = "8da6aa25-a60e-44d6-a1e2-d8ae59aa7d6a"
@pytest.fixture(autouse=True)
def setup_db(monkeypatch):
monkeypatch.setattr(db_module.settings, "db_path", _test_db, raising=False)
if os.path.exists(_test_db):
os.unlink(_test_db)
init_db()
# Live overlay OFF by default for the offline-core tests; cases that need it
# turn it back on explicitly. Keep the per-issue cache clean between cases.
monkeypatch.setattr(N._get_settings(), "tracker_live_status", False, raising=False)
N._LIVE_STATE_CACHE.clear()
# Pin repo->project resolution (cross-file ORCH_PROJECTS_JSON reloads must not
# strip 'orchestrator' and disable the live overlay under us).
monkeypatch.setattr(
projects_mod, "get_project_by_repo",
lambda repo: (SimpleNamespace(plane_project_id=_ORCH_PROJECT_ID)
if repo == "orchestrator" else None),
)
yield
if os.path.exists(_test_db):
os.unlink(_test_db)
def _mk_task(stage="development", wid="ORCH-067", repo="orchestrator",
plane_issue_id="issue-uuid-1", brd_started=None, brd_ended=None,
title="status line"):
conn = get_db()
cur = conn.execute(
"INSERT INTO tasks (plane_id, work_item_id, repo, branch, stage, title, "
"plane_issue_id, brd_review_started_at, brd_review_ended_at) "
"VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)",
("p1", wid, repo, "feature/ORCH-067-x", stage, title, plane_issue_id,
brd_started, brd_ended),
)
tid = cur.lastrowid
conn.commit()
conn.close()
return tid
def _status_line(text):
"""Extract the single '📍 ...' status line from rendered card text."""
for ln in text.splitlines():
if ln.startswith("\U0001f4cd"):
return ln
return None
# --------------------------------------------------------------------------- #
# TC-05 / AC-5 — render carries an explicit Plane-status line
# --------------------------------------------------------------------------- #
def test_tc05_render_has_status_line():
tid = _mk_task(stage="development")
text = N.render_task_tracker(tid)
line = _status_line(text)
assert line is not None # '📍 ...' present
assert line == "\U0001f4cd Development" # stage -> Plane status
# --------------------------------------------------------------------------- #
# TC-06 / AC-6 — stage -> Plane status mapping (ТЗ §2.2), parametrized
# --------------------------------------------------------------------------- #
@pytest.mark.parametrize("stage,expected", [
("created", "To Analyse"),
("analysis", "Analysis"),
("architecture", "Architecture"),
("development", "Development"),
("review", "Code-Review"),
("testing", "Testing"),
("deploy", "⏸️ Awaiting Deploy — ожидание Confirm Deploy"),
("done", "Done"),
])
def test_tc06_stage_to_plane_status(stage, expected):
# plane_status_label is pure/offline -> assert directly off a row-like dict.
assert N.plane_status_label({"stage": stage}) == expected
def test_tc06_unknown_stage_degrades_to_neutral():
# ORCH-091 (AC-3): a genuinely unknown stage degrades to a NEUTRAL capitalized
# label, NOT the misleading "To Analyse". A broken row with no stage key falls
# back to 'created' -> "To Analyse" (the real first status), never an error.
assert N.plane_status_label({"stage": "weird-stage"}) == "Weird Stage"
assert N.plane_status_label({"stage": "weird-stage"}) != "To Analyse"
assert N.plane_status_label({}) == "To Analyse"
# --------------------------------------------------------------------------- #
# TC-07 / AC-7 — In Review from the brd-clock, OFFLINE (no network)
# --------------------------------------------------------------------------- #
def test_tc07_in_review_from_brd_clock(monkeypatch):
# analysis + brd started + not ended -> '⏸️ In Review' (waiting BRD approve).
# Guard: any network read would fail this test -> prove it stays offline.
def _boom(*a, **k):
raise AssertionError("In Review must be resolved OFFLINE (no network)")
monkeypatch.setattr(N, "_live_state_uuid_cached", _boom)
tid = _mk_task(stage="analysis", brd_started="2026-06-08 10:00:00",
brd_ended=None)
text = N.render_task_tracker(tid)
assert _status_line(text) == "\U0001f4cd " + N._IN_REVIEW_LABEL
# The human-gate 'Подтверждение BRD' line with ⏸️/⏳ is still rendered.
assert N._BRD_LABEL in text
assert "" in text # ⏳ still-waiting marker
def test_tc07b_in_review_clears_once_brd_ended():
# Once the BRD review ended, analysis is back to the plain 'Analysis' status.
tid = _mk_task(stage="analysis", brd_started="2026-06-08 10:00:00",
brd_ended="2026-06-08 10:30:00")
assert _status_line(N.render_task_tracker(tid)) == "\U0001f4cd Analysis"
# --------------------------------------------------------------------------- #
# TC-08 / AC-8 — Awaiting Deploy (offline) + Needs Input (live overlay)
# --------------------------------------------------------------------------- #
def test_tc08_awaiting_deploy_offline():
# stage=deploy -> '⏸️ Awaiting Deploy' purely offline (no overlay needed).
tid = _mk_task(stage="deploy")
line = _status_line(N.render_task_tracker(tid))
assert line == "\U0001f4cd ⏸️ Awaiting Deploy — ожидание Confirm Deploy"
def test_tc08_needs_input_via_live_overlay(monkeypatch):
# Needs Input is NOT derivable offline -> drawn by the best-effort overlay
# reading the LIVE Plane status. Patch the live read + the state map.
monkeypatch.setattr(N._get_settings(), "tracker_live_status", True,
raising=False)
monkeypatch.setattr(N, "_live_state_uuid_cached",
lambda issue_id, project_id: "uuid-needs-input")
monkeypatch.setattr(
plane_sync, "get_project_states",
lambda project_id: {"needs_input": "uuid-needs-input"},
)
# repo='orchestrator' resolves to a real registry project_id -> overlay runs.
tid = _mk_task(stage="development", repo="orchestrator")
line = _status_line(N.render_task_tracker(tid))
assert line == "\U0001f4cd ❓ Needs Input — нужны уточнения"
def test_tc08b_overlay_no_match_keeps_offline_base(monkeypatch):
# Live status maps to no branch key -> the offline stage base is kept.
monkeypatch.setattr(N._get_settings(), "tracker_live_status", True,
raising=False)
monkeypatch.setattr(N, "_live_state_uuid_cached",
lambda issue_id, project_id: "uuid-in-progress")
monkeypatch.setattr(
plane_sync, "get_project_states",
lambda project_id: {"in_progress": "uuid-in-progress",
"needs_input": "uuid-needs-input"},
)
tid = _mk_task(stage="development", repo="orchestrator")
assert _status_line(N.render_task_tracker(tid)) == "\U0001f4cd Development"
# --------------------------------------------------------------------------- #
# TC-09 / AC-9, AC-16 — render never raises on broken/unreachable status data
# --------------------------------------------------------------------------- #
def test_tc09_render_survives_overlay_exception(monkeypatch):
# The live overlay blowing up must NOT escape render -> degrade to stage base.
monkeypatch.setattr(N._get_settings(), "tracker_live_status", True,
raising=False)
def _boom(*a, **k):
raise RuntimeError("plane down")
monkeypatch.setattr(N, "_live_state_uuid_cached", _boom)
tid = _mk_task(stage="development", repo="orchestrator")
text = N.render_task_tracker(tid) # must not raise
assert _status_line(text) == "\U0001f4cd Development"
def test_tc09b_card_status_label_never_raises(monkeypatch):
# _card_status_label swallows everything -> a usable default, never an error.
def _boom(*a, **k):
raise RuntimeError("boom")
monkeypatch.setattr(N, "plane_status_label", _boom)
assert N._card_status_label({"stage": "development"}) == "To Analyse"
def test_tc09c_plane_status_label_never_raises():
# Garbage row (None / object without keys) -> safe default, no exception.
assert N.plane_status_label(None) == "To Analyse"
assert N.plane_status_label(object()) == "To Analyse"
# =========================================================================== #
# ORCH-091 — Group 1 (D1): completeness of the status map, staging label,
# neutral fallback. Plane_status_label is pure/offline -> assert directly.
# =========================================================================== #
from src.stages import STAGE_TRANSITIONS # noqa: E402
# --------------------------------------------------------------------------- #
# ORCH-091 TC-01 / AC-1 — completeness of the map vs STAGE_TRANSITIONS
# --------------------------------------------------------------------------- #
@pytest.mark.parametrize("stage", [s for s in STAGE_TRANSITIONS if s != "created"])
def test_orch091_tc01_every_stage_has_meaningful_label(stage):
"""AC-1: for EVERY STAGE_TRANSITIONS key (bar 'created') plane_status_label
returns a non-empty label that is NOT the misleading 'To Analyse'. Completeness
is derived programmatically from STAGE_TRANSITIONS (the single source of truth),
NOT a hardcoded list — a new engine stage without a curated label fails here."""
label = N.plane_status_label({"stage": stage})
assert label, f"stage {stage!r} produced an empty label"
assert label != N._DEFAULT_STATUS_LABEL, (
f"stage {stage!r} still falls back to 'To Analyse'"
)
# The curated map must actually carry the key (not just a neutral autogen).
assert stage in N._STAGE_STATUS_LABEL, (
f"stage {stage!r} missing a curated label in _STAGE_STATUS_LABEL"
)
def test_orch091_tc01_created_stays_to_analyse():
# 'created' keeps the meaningful real first status.
assert N.plane_status_label({"stage": "created"}) == "To Analyse"
# --------------------------------------------------------------------------- #
# ORCH-091 TC-02 / AC-2 — staging label is meaningful and distinct
# --------------------------------------------------------------------------- #
def test_orch091_tc02_deploy_staging_label():
"""AC-2: stage='deploy-staging' -> a meaningful staging label, distinct from
'To Analyse' AND from the deploy stage's Awaiting-Deploy label."""
staging = N.plane_status_label({"stage": "deploy-staging"})
deploy = N.plane_status_label({"stage": "deploy"})
assert staging == "Deploying (staging)"
assert staging != "To Analyse"
assert staging != deploy
assert "staging" in staging.lower()
# --------------------------------------------------------------------------- #
# ORCH-091 TC-03 / AC-3 — neutral fallback for a truly unknown stage
# --------------------------------------------------------------------------- #
def test_orch091_tc03_unknown_stage_neutral_not_to_analyse():
"""AC-3: a genuinely unknown stage -> neutral capitalized label (NOT
'To Analyse'); never raises on broken/None input."""
assert N.plane_status_label({"stage": "__bogus__"}) != "To Analyse"
assert N.plane_status_label({"stage": "__bogus__"}) # non-empty
# never-raise on broken input; None/missing-key degrade to the safe default.
assert N.plane_status_label(None) == "To Analyse"
assert N.plane_status_label({"stage": None}) == "To Analyse"
assert N.plane_status_label({"stage": ""}) == "To Analyse"
def test_orch091_tc03_cancelled_offline_label():
# ORCH-090 terminal: offline base label, no longer 'To Analyse'.
assert N.plane_status_label({"stage": "cancelled"}) == "Cancelled"