125 lines
4.7 KiB
Python
125 lines
4.7 KiB
Python
"""ORCH-016 / TC-17 + AC-7: status-comment de-dup contract.
|
|
|
|
The «one comment per agent per stage» guarantee is enforced upstream of
|
|
build_status_comment by:
|
|
- the webhook event-dedup table (events.delivery_id PARTIAL UNIQUE, ORCH-5 /
|
|
src.db.insert_event_dedup),
|
|
- the job queue claim-once contract (src.db.claim_next_job, ORCH-1).
|
|
|
|
The ORCH-016 PR introduces a new comment FORMAT but must not weaken these
|
|
guarantees. This regression test:
|
|
1. exercises insert_event_dedup directly to confirm the same delivery_id is
|
|
accepted exactly once (sanity for the dedup primitive),
|
|
2. exercises build_status_comment to confirm it is a PURE function (same
|
|
inputs -> same output), so a retried call from a poorly-isolated test or a
|
|
misbehaving caller doesn't silently produce two different comment bodies.
|
|
"""
|
|
|
|
import os
|
|
import tempfile
|
|
|
|
os.environ.setdefault("ORCH_PLANE_API_TOKEN", "test-token")
|
|
os.environ.setdefault("ORCH_GITEA_TOKEN", "test-token")
|
|
|
|
_test_db = os.path.join(tempfile.gettempdir(), "test_orch016_dedup_regression.db")
|
|
os.environ["ORCH_DB_PATH"] = _test_db
|
|
|
|
import pytest # noqa: E402
|
|
|
|
from src import db as db_module # noqa: E402
|
|
from src.db import init_db, insert_event_dedup # noqa: E402
|
|
|
|
|
|
@pytest.fixture(autouse=True)
|
|
def setup_db(monkeypatch):
|
|
monkeypatch.setattr(db_module.settings, "db_path", _test_db, raising=False)
|
|
if os.path.exists(_test_db):
|
|
os.unlink(_test_db)
|
|
init_db()
|
|
yield
|
|
if os.path.exists(_test_db):
|
|
os.unlink(_test_db)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Primitive: event-dedup still rejects a re-delivered webhook.
|
|
# ---------------------------------------------------------------------------
|
|
def test_tc17_event_dedup_inserts_once_for_same_delivery_id():
|
|
"""Two webhook deliveries with the same delivery_id -> one row inserted.
|
|
|
|
First call returns True (new row); second call returns False (rejected).
|
|
This is the primitive every status-comment trigger relies on.
|
|
"""
|
|
assert insert_event_dedup("plane", "issue.updated", "{}", "delivery-XYZ") is True
|
|
assert insert_event_dedup("plane", "issue.updated", "{}", "delivery-XYZ") is False
|
|
|
|
|
|
def test_tc17_event_dedup_distinguishes_delivery_ids():
|
|
"""Distinct delivery IDs are independent — two different webhooks both go through."""
|
|
assert insert_event_dedup("plane", "issue.updated", "{}", "delivery-A") is True
|
|
assert insert_event_dedup("plane", "issue.updated", "{}", "delivery-B") is True
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Format: build_status_comment is deterministic. A double-fire from buggy code
|
|
# still produces an IDENTICAL body -- so the upstream dedup primitive can
|
|
# safely treat the second call as no-op without comparing prose.
|
|
# ---------------------------------------------------------------------------
|
|
def test_tc17_build_status_comment_is_pure(tmp_path):
|
|
"""Same inputs produce byte-identical output (deterministic / side-effect free)."""
|
|
from src import usage as U
|
|
|
|
wt = tmp_path / "wt"
|
|
(wt / "docs" / "work-items" / "ET-016").mkdir(parents=True)
|
|
(wt / "docs" / "work-items" / "ET-016" / "12-review.md").write_text(
|
|
"---\nverdict: APPROVE\n---\n",
|
|
)
|
|
|
|
args = dict(
|
|
repo="enduro-trails",
|
|
branch="feature/ET-016-x",
|
|
work_item_id="ET-016",
|
|
duration_s=120,
|
|
worktree_root=str(wt),
|
|
usage={"input_tokens": 100, "output_tokens": 50, "cost_usd": 0.05},
|
|
)
|
|
a = U.build_status_comment("reviewer", **args)
|
|
b = U.build_status_comment("reviewer", **args)
|
|
c = U.build_status_comment("reviewer", **args)
|
|
|
|
assert a == b == c
|
|
|
|
|
|
def test_tc17_build_status_comment_no_db_side_effects(tmp_path):
|
|
"""A status-comment build must NOT write to the DB.
|
|
|
|
Otherwise a webhook-dedup hit would still touch state via the comment
|
|
builder. We check by counting rows in `tasks`/`agent_runs`/`jobs` before
|
|
and after.
|
|
"""
|
|
from src import usage as U
|
|
from src.db import get_db
|
|
|
|
conn = get_db()
|
|
counts_before = [
|
|
conn.execute("SELECT COUNT(*) FROM tasks").fetchone()[0],
|
|
conn.execute("SELECT COUNT(*) FROM agent_runs").fetchone()[0],
|
|
conn.execute("SELECT COUNT(*) FROM jobs").fetchone()[0],
|
|
]
|
|
conn.close()
|
|
|
|
U.build_status_comment(
|
|
"developer", repo="enduro-trails", branch="b",
|
|
work_item_id="ET-016", pr_number=1, duration_s=10,
|
|
usage={"input_tokens": 1, "output_tokens": 1, "cost_usd": 0.01},
|
|
)
|
|
|
|
conn = get_db()
|
|
counts_after = [
|
|
conn.execute("SELECT COUNT(*) FROM tasks").fetchone()[0],
|
|
conn.execute("SELECT COUNT(*) FROM agent_runs").fetchone()[0],
|
|
conn.execute("SELECT COUNT(*) FROM jobs").fetchone()[0],
|
|
]
|
|
conn.close()
|
|
assert counts_before == counts_after
|