Files
orchestrator/tests/test_status_comment_dedup_regression.py
claude-bot 0663da6e4c
All checks were successful
CI / test (push) Successful in 11s
CI / test (pull_request) Successful in 12s
feat(plane): unified status-comment format with duration line (ORCH-016)
Все агенты (analyst..deployer) теперь пишут финальный коммент через единый
хелпер usage.build_status_comment(...) — заголовок «{icon} {Role} — {описание}»,
опциональная строка Verdict/Status из YAML-frontmatter, строка
«Длительность: 4m 12s» (явный duration_s от launcher, fallback из agent_runs
для аналитика), HTML-блок Документы, тех-хвост <sub>tokens · cost</sub>.

- Новые публичные функции в src/usage.py: build_status_comment, fmt_duration,
  get_agent_duration. usage_comment(...) → тонкая deprecated-обёртка (legacy
  тесты в tests/test_usage.py продолжают работать). artifact_links(...)
  переписан на HTML <li><a>…</a></li> (breaking change для внутреннего API,
  но единственный внешний клиент — _post_usage_comments — мигрирован).
- Новый модуль src/frontmatter.py: defensive YAML reader, никогда не raise.
- stage_engine._build_analyst_ready_comment(...) теперь тонкая обёртка над
  build_status_comment(agent="analyst", ...); task_id пробрасывается из
  _handle_analysis_approved_flow для DB-фоллбэка длительности (AC-14).
- launcher._post_usage_comments(...) принимает duration_s, резолвит stage из
  tasks для deployer и worktree_root для AC-8 graceful skipping.

Тесты (16 файлов, 56 новых тестовых функций, покрывают TC-01..TC-25):
fmt_duration table, build_status_comment по всем агентам, DB-фоллбэк,
authorship под per-agent ботами, дедуп-инвариант, regression на
status-only verdict аналитика и финальный notify_done, snapshot
QG_CHECKS + STAGE_TRANSITIONS.

Документация: docs/architecture/README.md (раздел Plane Sync),
CHANGELOG.md (Unreleased Added/Changed).

Refs: ORCH-016

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-06-05 12:39:06 +00:00

125 lines
4.7 KiB
Python

"""ORCH-016 / TC-17 + AC-7: status-comment de-dup contract.
The «one comment per agent per stage» guarantee is enforced upstream of
build_status_comment by:
- the webhook event-dedup table (events.delivery_id PARTIAL UNIQUE, ORCH-5 /
src.db.insert_event_dedup),
- the job queue claim-once contract (src.db.claim_next_job, ORCH-1).
The ORCH-016 PR introduces a new comment FORMAT but must not weaken these
guarantees. This regression test:
1. exercises insert_event_dedup directly to confirm the same delivery_id is
accepted exactly once (sanity for the dedup primitive),
2. exercises build_status_comment to confirm it is a PURE function (same
inputs -> same output), so a retried call from a poorly-isolated test or a
misbehaving caller doesn't silently produce two different comment bodies.
"""
import os
import tempfile
os.environ.setdefault("ORCH_PLANE_API_TOKEN", "test-token")
os.environ.setdefault("ORCH_GITEA_TOKEN", "test-token")
_test_db = os.path.join(tempfile.gettempdir(), "test_orch016_dedup_regression.db")
os.environ["ORCH_DB_PATH"] = _test_db
import pytest # noqa: E402
from src import db as db_module # noqa: E402
from src.db import init_db, insert_event_dedup # noqa: E402
@pytest.fixture(autouse=True)
def setup_db(monkeypatch):
monkeypatch.setattr(db_module.settings, "db_path", _test_db, raising=False)
if os.path.exists(_test_db):
os.unlink(_test_db)
init_db()
yield
if os.path.exists(_test_db):
os.unlink(_test_db)
# ---------------------------------------------------------------------------
# Primitive: event-dedup still rejects a re-delivered webhook.
# ---------------------------------------------------------------------------
def test_tc17_event_dedup_inserts_once_for_same_delivery_id():
"""Two webhook deliveries with the same delivery_id -> one row inserted.
First call returns True (new row); second call returns False (rejected).
This is the primitive every status-comment trigger relies on.
"""
assert insert_event_dedup("plane", "issue.updated", "{}", "delivery-XYZ") is True
assert insert_event_dedup("plane", "issue.updated", "{}", "delivery-XYZ") is False
def test_tc17_event_dedup_distinguishes_delivery_ids():
"""Distinct delivery IDs are independent — two different webhooks both go through."""
assert insert_event_dedup("plane", "issue.updated", "{}", "delivery-A") is True
assert insert_event_dedup("plane", "issue.updated", "{}", "delivery-B") is True
# ---------------------------------------------------------------------------
# Format: build_status_comment is deterministic. A double-fire from buggy code
# still produces an IDENTICAL body -- so the upstream dedup primitive can
# safely treat the second call as no-op without comparing prose.
# ---------------------------------------------------------------------------
def test_tc17_build_status_comment_is_pure(tmp_path):
"""Same inputs produce byte-identical output (deterministic / side-effect free)."""
from src import usage as U
wt = tmp_path / "wt"
(wt / "docs" / "work-items" / "ET-016").mkdir(parents=True)
(wt / "docs" / "work-items" / "ET-016" / "12-review.md").write_text(
"---\nverdict: APPROVE\n---\n",
)
args = dict(
repo="enduro-trails",
branch="feature/ET-016-x",
work_item_id="ET-016",
duration_s=120,
worktree_root=str(wt),
usage={"input_tokens": 100, "output_tokens": 50, "cost_usd": 0.05},
)
a = U.build_status_comment("reviewer", **args)
b = U.build_status_comment("reviewer", **args)
c = U.build_status_comment("reviewer", **args)
assert a == b == c
def test_tc17_build_status_comment_no_db_side_effects(tmp_path):
"""A status-comment build must NOT write to the DB.
Otherwise a webhook-dedup hit would still touch state via the comment
builder. We check by counting rows in `tasks`/`agent_runs`/`jobs` before
and after.
"""
from src import usage as U
from src.db import get_db
conn = get_db()
counts_before = [
conn.execute("SELECT COUNT(*) FROM tasks").fetchone()[0],
conn.execute("SELECT COUNT(*) FROM agent_runs").fetchone()[0],
conn.execute("SELECT COUNT(*) FROM jobs").fetchone()[0],
]
conn.close()
U.build_status_comment(
"developer", repo="enduro-trails", branch="b",
work_item_id="ET-016", pr_number=1, duration_s=10,
usage={"input_tokens": 1, "output_tokens": 1, "cost_usd": 0.01},
)
conn = get_db()
counts_after = [
conn.execute("SELECT COUNT(*) FROM tasks").fetchone()[0],
conn.execute("SELECT COUNT(*) FROM agent_runs").fetchone()[0],
conn.execute("SELECT COUNT(*) FROM jobs").fetchone()[0],
]
conn.close()
assert counts_before == counts_after