orchestrator/tests/test_status_comment_dedup_regression.py

"""ORCH-016 / TC-17 + AC-7: status-comment de-dup contract.

The «one comment per agent per stage» guarantee is enforced upstream of
build_status_comment by:
  - the webhook event-dedup table (events.delivery_id PARTIAL UNIQUE, ORCH-5 /
    src.db.insert_event_dedup),
  - the job queue claim-once contract (src.db.claim_next_job, ORCH-1).

The ORCH-016 PR introduces a new comment FORMAT but must not weaken these
guarantees. This regression test:
  1. exercises insert_event_dedup directly to confirm the same delivery_id is
     accepted exactly once (sanity for the dedup primitive),
  2. exercises build_status_comment to confirm it is a PURE function (same
     inputs -> same output), so a retried call from a poorly-isolated test or a
     misbehaving caller doesn't silently produce two different comment bodies.
"""

import os
import tempfile

os.environ.setdefault("ORCH_PLANE_API_TOKEN", "test-token")
os.environ.setdefault("ORCH_GITEA_TOKEN", "test-token")

_test_db = os.path.join(tempfile.gettempdir(), "test_orch016_dedup_regression.db")
os.environ["ORCH_DB_PATH"] = _test_db

import pytest  # noqa: E402

from src import db as db_module  # noqa: E402
from src.db import init_db, insert_event_dedup  # noqa: E402


@pytest.fixture(autouse=True)
def setup_db(monkeypatch):
    monkeypatch.setattr(db_module.settings, "db_path", _test_db, raising=False)
    if os.path.exists(_test_db):
        os.unlink(_test_db)
    init_db()
    yield
    if os.path.exists(_test_db):
        os.unlink(_test_db)


# ---------------------------------------------------------------------------
# Primitive: event-dedup still rejects a re-delivered webhook.
# ---------------------------------------------------------------------------
def test_tc17_event_dedup_inserts_once_for_same_delivery_id():
    """Two webhook deliveries with the same delivery_id -> one row inserted.

    First call returns True (new row); second call returns False (rejected).
    This is the primitive every status-comment trigger relies on.
    """
    assert insert_event_dedup("plane", "issue.updated", "{}", "delivery-XYZ") is True
    assert insert_event_dedup("plane", "issue.updated", "{}", "delivery-XYZ") is False


def test_tc17_event_dedup_distinguishes_delivery_ids():
    """Distinct delivery IDs are independent — two different webhooks both go through."""
    assert insert_event_dedup("plane", "issue.updated", "{}", "delivery-A") is True
    assert insert_event_dedup("plane", "issue.updated", "{}", "delivery-B") is True


# ---------------------------------------------------------------------------
# Format: build_status_comment is deterministic. A double-fire from buggy code
# still produces an IDENTICAL body -- so the upstream dedup primitive can
# safely treat the second call as no-op without comparing prose.
# ---------------------------------------------------------------------------
def test_tc17_build_status_comment_is_pure(tmp_path):
    """Same inputs produce byte-identical output (deterministic / side-effect free)."""
    from src import usage as U

    wt = tmp_path / "wt"
    (wt / "docs" / "work-items" / "ET-016").mkdir(parents=True)
    (wt / "docs" / "work-items" / "ET-016" / "12-review.md").write_text(
        "---\nverdict: APPROVE\n---\n",
    )

    args = dict(
        repo="enduro-trails",
        branch="feature/ET-016-x",
        work_item_id="ET-016",
        duration_s=120,
        worktree_root=str(wt),
        usage={"input_tokens": 100, "output_tokens": 50, "cost_usd": 0.05},
    )
    a = U.build_status_comment("reviewer", **args)
    b = U.build_status_comment("reviewer", **args)
    c = U.build_status_comment("reviewer", **args)

    assert a == b == c


def test_tc17_build_status_comment_no_db_side_effects(tmp_path):
    """A status-comment build must NOT write to the DB.

    Otherwise a webhook-dedup hit would still touch state via the comment
    builder. We check by counting rows in `tasks`/`agent_runs`/`jobs` before
    and after.
    """
    from src import usage as U
    from src.db import get_db

    conn = get_db()
    counts_before = [
        conn.execute("SELECT COUNT(*) FROM tasks").fetchone()[0],
        conn.execute("SELECT COUNT(*) FROM agent_runs").fetchone()[0],
        conn.execute("SELECT COUNT(*) FROM jobs").fetchone()[0],
    ]
    conn.close()

    U.build_status_comment(
        "developer", repo="enduro-trails", branch="b",
        work_item_id="ET-016", pr_number=1, duration_s=10,
        usage={"input_tokens": 1, "output_tokens": 1, "cost_usd": 0.01},
    )

    conn = get_db()
    counts_after = [
        conn.execute("SELECT COUNT(*) FROM tasks").fetchone()[0],
        conn.execute("SELECT COUNT(*) FROM agent_runs").fetchone()[0],
        conn.execute("SELECT COUNT(*) FROM jobs").fetchone()[0],
    ]
    conn.close()
    assert counts_before == counts_after