orchestrator/tests/test_tracker_html_escape.py

"""ORCH-095 — HTML-safety of dynamic data fields in render_task_tracker.

The live card text is sent/edited with parse_mode=HTML. It is assembled from
two kinds of slots:

  * category M (intentional markup, NEVER escaped): the clickable issue number
    (plane_issue_link -> <a href>), the ⏳-waiting links (link_for), the done
    line (_done_link), and the already-escaped title (esc_title);
  * category D (data, escaped EXACTLY once at the render boundary): durations
    (_fmt_minutes / _capped_review_str), the status label, model, effort, and
    the token/cost metrics.

The bug (ORCH-093 incident): _fmt_minutes returns the literal "<1м" for a
sub-minute stage; interpolated raw into HTML text Telegram parsed "<1м" as an
opening tag -> 400 can't parse entities -> EDIT_FAILED -> the card froze. ADR-001
closes the whole class by escaping every D-slot at the boundary (helper N._esc)
while keeping the M-slots intact (so the number stays clickable, no double-escape).

These tests assert: sub-minute durations are safe (TC-01/02/03), all data fields
escape special chars without double-escaping (TC-04/05/06), markup survives
(TC-07/08), the edit payload is parse-safe and the anti-duplicate invariant
(ORCH-087) holds (TC-09/10), and the render path never raises (TC-11).

Network is isolated (no live overlay HTTP); the DB is a temp SQLite.
"""

import os
import tempfile

os.environ.setdefault("ORCH_PLANE_API_TOKEN", "test-token")
os.environ.setdefault("ORCH_GITEA_TOKEN", "test-token")

_test_db = os.path.join(tempfile.gettempdir(), "test_orchestrator_html_escape.db")
os.environ["ORCH_DB_PATH"] = _test_db

from types import SimpleNamespace  # noqa: E402
from unittest.mock import MagicMock  # noqa: E402

import pytest  # noqa: E402

import src.db as db_module  # noqa: E402
import src.projects as projects_mod  # noqa: E402
from src.db import init_db, get_db  # noqa: E402
from src import notifications as N  # noqa: E402

# orchestrator repo -> default project registry uuid (src/projects.py).
_ORCH_PROJECT_ID = "8da6aa25-a60e-44d6-a1e2-d8ae59aa7d6a"


@pytest.fixture(autouse=True)
def setup_db(monkeypatch):
    monkeypatch.setattr(db_module.settings, "db_path", _test_db, raising=False)
    if os.path.exists(_test_db):
        os.unlink(_test_db)
    init_db()
    # Keep the render path fully offline (no live overlay HTTP).
    monkeypatch.setattr(N._get_settings(), "tracker_live_status", False,
                        raising=False)
    monkeypatch.setattr(
        projects_mod, "get_project_by_repo",
        lambda repo: (SimpleNamespace(plane_project_id=_ORCH_PROJECT_ID)
                      if repo == "orchestrator" else None),
    )
    yield
    if os.path.exists(_test_db):
        os.unlink(_test_db)


def _set(monkeypatch, **kw):
    s = N._get_settings()
    for k, v in kw.items():
        monkeypatch.setattr(s, k, v, raising=False)


def _mk_task(wid="ORCH-095", repo="orchestrator", title="card",
             plane_issue_id="issue-uuid-1", stage="development",
             brd_start=None, brd_end=None):
    conn = get_db()
    cur = conn.execute(
        "INSERT INTO tasks (plane_id, work_item_id, repo, branch, stage, title, "
        "plane_issue_id, brd_review_started_at, brd_review_ended_at) "
        "VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)",
        ("p1", wid, repo, "feature/ORCH-095-x", stage, title, plane_issue_id,
         brd_start, brd_end),
    )
    tid = cur.lastrowid
    conn.commit()
    conn.close()
    return tid


def _mk_run(task_id, agent, started, finished, in_tok=100, out_tok=50,
            cache_read=0, cache_creation=0, cost=0.0, model=None,
            effort=None, exit_code=0):
    conn = get_db()
    cur = conn.execute(
        "INSERT INTO agent_runs (task_id, agent, started_at, finished_at, "
        "exit_code, input_tokens, output_tokens, cache_read_tokens, "
        "cache_creation_tokens, cost_usd, model, effort) "
        "VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
        (task_id, agent, started, finished, exit_code, in_tok, out_tok,
         cache_read, cache_creation, cost, model, effort),
    )
    rid = cur.lastrowid
    conn.commit()
    conn.close()
    return rid


# A stage that lasted 30s (< 60s) -> _fmt_minutes -> "<1м".
_SUB_MIN_START = "2026-06-04 09:00:00"
_SUB_MIN_END = "2026-06-04 09:00:30"


# --------------------------------------------------------------------------- #
# TC-01 — sub-minute duration is HTML-safe at the render boundary
# --------------------------------------------------------------------------- #
def test_tc01_sub_minute_duration_escaped_at_boundary():
    # ADR-001 D2: _fmt_minutes keeps returning the literal "<1м" (source
    # unchanged); safety comes from _esc at the boundary -> "&lt;1м".
    assert N._fmt_minutes(30) == "<1м"
    escaped = N._esc(N._fmt_minutes(30))
    assert escaped == "&lt;1м"
    assert "<1" not in escaped          # no raw opening-tag-looking substring


# --------------------------------------------------------------------------- #
# TC-02 — _fmt_minutes boundary inputs: never-raise + boundary-safe
# --------------------------------------------------------------------------- #
@pytest.mark.parametrize("value", [0, None, "abc", 59, 60, 61, 100000, -5, 30])
def test_tc02_fmt_minutes_never_raise_and_safe(value):
    out = N._fmt_minutes(value)           # must not raise
    assert isinstance(out, str)
    safe = N._esc(out)
    # After boundary escaping no raw '<' (or '>'/'&') survives in any branch.
    assert "<" not in safe
    assert ">" not in safe


# --------------------------------------------------------------------------- #
# TC-03 — render_task_tracker for a sub-minute stage: no raw '<1м'
# --------------------------------------------------------------------------- #
def test_tc03_render_sub_minute_stage_is_safe():
    tid = _mk_task(stage="development")
    # Analysis stage lasted 30s; analysis sits before development -> ✅ line shown.
    _mk_run(tid, "analyst", _SUB_MIN_START, _SUB_MIN_END, model="claude-opus-4-8")

    text = N.render_task_tracker(tid)
    assert "<1м" not in text             # the bug: raw literal must be gone
    assert "&lt;1м" in text              # rendered safely instead
    # And no double escaping leaked in.
    assert "&amp;lt;" not in text


# --------------------------------------------------------------------------- #
# TC-04 — title with '<', '>', '&' escaped, no raw tags, no double-escape
# --------------------------------------------------------------------------- #
def test_tc04_title_special_chars_escaped_no_double():
    tid = _mk_task(title="A <b>x</b> & <1", stage="development")
    _mk_run(tid, "analyst", _SUB_MIN_START, _SUB_MIN_END)

    text = N.render_task_tracker(tid)
    # Data special chars present only escaped...
    assert "&lt;b&gt;" in text
    assert "&amp;" in text
    # ...never as raw markup from the title.
    assert "<b>" not in text
    assert "</b>" not in text
    # No double escaping anywhere.
    assert "&amp;lt;" not in text
    assert "&amp;amp;" not in text


# --------------------------------------------------------------------------- #
# TC-05 — status label + model + effort are escaped (defence-in-depth)
# --------------------------------------------------------------------------- #
def test_tc05_status_label_escaped(monkeypatch):
    monkeypatch.setattr(N, "_card_status_label", lambda *a, **k: "<danger>")
    tid = _mk_task(stage="development")
    text = N.render_task_tracker(tid)
    assert "&lt;danger&gt;" in text
    assert "<danger>" not in text


def test_tc05_model_escaped(monkeypatch):
    # The model name is a D-slot: even if a '<' ever reached it, it is escaped.
    import src.usage as U
    monkeypatch.setattr(U, "short_model_name", lambda m: "<m>" if m else "")
    tid = _mk_task(stage="development")
    _mk_run(tid, "analyst", _SUB_MIN_START, _SUB_MIN_END, model="whatever")
    text = N.render_task_tracker(tid)
    assert "&lt;m&gt;" in text
    assert "<m>" not in text


def test_tc05_effort_escaped():
    tid = _mk_task(stage="development")
    _mk_run(tid, "analyst", _SUB_MIN_START, _SUB_MIN_END,
            model="claude-opus-4-8", effort="<e>")
    text = N.render_task_tracker(tid)
    assert "&lt;e&gt;" in text
    assert "<e>" not in text


# --------------------------------------------------------------------------- #
# TC-06 — token / cost metrics are HTML-safe ('$' + digits)
# --------------------------------------------------------------------------- #
def test_tc06_token_cost_metrics_safe():
    tid = _mk_task(stage="development")
    _mk_run(tid, "analyst", _SUB_MIN_START, _SUB_MIN_END,
            in_tok=1_100_000, out_tok=39_600, cost=2.38,
            model="claude-opus-4-8")
    text = N.render_task_tracker(tid)
    # The 💰 totals line renders with a '$' cost and no stray angle brackets
    # coming from the metric data.
    assert "$" in text
    assert "&amp;" not in text or "&amp;lt;" not in text  # no double-escape
    # No raw opening-tag substring produced by the metrics.
    assert "<$" not in text


# --------------------------------------------------------------------------- #
# TC-07 — markup regression: clickable issue number stays a valid <a> tag
# --------------------------------------------------------------------------- #
def test_tc07_issue_number_stays_clickable(monkeypatch):
    _set(monkeypatch, plane_web_url="https://plane.example.org",
         plane_api_url="http://localhost:8091", plane_workspace_slug="acme")
    tid = _mk_task(plane_issue_id="abcd-issue-uuid", stage="development")
    _mk_run(tid, "analyst", _SUB_MIN_START, _SUB_MIN_END)

    text = N.render_task_tracker(tid)
    expected_url = (
        f"https://plane.example.org/acme/projects/{_ORCH_PROJECT_ID}"
        f"/issues/abcd-issue-uuid/"
    )
    # The anchor markup is NOT escaped (M-slot) -> still clickable & valid.
    assert f'<a href="{expected_url}">ORCH-095</a>' in text
    assert text.count("<a href=") == text.count("</a>")
    # href/label are not double-escaped.
    assert "&lt;a href=" not in text
    assert "&amp;lt;" not in text


# --------------------------------------------------------------------------- #
# TC-08 — markup regression: _done_link renders valid '🔗 PR #n · 📦 Внедрено'
# --------------------------------------------------------------------------- #
def test_tc08_done_link_markup_preserved(monkeypatch):
    tid = _mk_task(stage="done")
    _mk_run(tid, "deployer", _SUB_MIN_START, _SUB_MIN_END)

    # Mock the Gitea PR lookup inside _done_link.
    resp = MagicMock()
    resp.status_code = 200
    resp.json = lambda: [{"number": 105}]
    monkeypatch.setattr(N.httpx, "get", lambda *a, **k: resp)

    text = N.render_task_tracker(tid)
    assert "\U0001f517 PR #105" in text          # 🔗 PR #105
    assert "\U0001f4e6" in text                   # 📦
    # The done line is an M-slot -> not escaped.
    assert "&lt;" not in text.split("\n")[-1]


# --------------------------------------------------------------------------- #
# TC-09 — integration: edit payload for a sub-minute card is parse-safe
# --------------------------------------------------------------------------- #
def test_tc09_edit_payload_is_parse_safe(monkeypatch):
    from src.db import set_tracker_message_id
    _set(monkeypatch, tracker_mode="edit",
         telegram_bot_token="bot-token", telegram_chat_id="chat-1")
    tid = _mk_task(stage="development")
    _mk_run(tid, "analyst", _SUB_MIN_START, _SUB_MIN_END, model="claude-opus-4-8")
    set_tracker_message_id(tid, 18854)

    captured = {}

    def _fake_post(url, json=None, timeout=None, **kw):
        captured["url"] = url
        captured["json"] = json
        return SimpleNamespace(status_code=200, json=lambda: {"ok": True})

    monkeypatch.setattr(N.httpx, "post", _fake_post)

    N.update_task_tracker(tid)

    assert "editMessageText" in captured["url"]
    payload_text = captured["json"]["text"]
    assert captured["json"]["parse_mode"] == "HTML"
    # The crux of ORCH-095: no raw '<1м' reaches Telegram -> no 'can't parse
    # entities' -> the card does not freeze.
    assert "<1м" not in payload_text
    assert "&lt;1м" in payload_text


# --------------------------------------------------------------------------- #
# TC-10 — stuck card resumes; anti-duplicate (ORCH-087) preserved
# --------------------------------------------------------------------------- #
def test_tc10_valid_render_edits_in_place_no_new_card(monkeypatch):
    from src.db import set_tracker_message_id, get_tracker_message_id
    _set(monkeypatch, tracker_mode="edit")
    tid = _mk_task(stage="development")
    _mk_run(tid, "analyst", _SUB_MIN_START, _SUB_MIN_END, model="claude-opus-4-8")
    set_tracker_message_id(tid, 18854)

    # After the fix the render is valid -> edit succeeds in place (EDIT_OK).
    monkeypatch.setattr(N, "edit_telegram", lambda mid, text: N.EDIT_OK)
    send_mock = MagicMock(return_value=999)
    monkeypatch.setattr(N, "send_telegram", send_mock)

    N.update_task_tracker(tid)

    send_mock.assert_not_called()                 # edited in place, no new card
    assert get_tracker_message_id(tid) == 18854   # pointer unchanged


def test_tc10_transient_fail_does_not_duplicate(monkeypatch):
    # ORCH-087 invariant: a transient edit failure must NOT spawn a new card.
    from src.db import set_tracker_message_id, get_tracker_message_id
    _set(monkeypatch, tracker_mode="edit")
    tid = _mk_task(stage="development")
    _mk_run(tid, "analyst", _SUB_MIN_START, _SUB_MIN_END, model="claude-opus-4-8")
    set_tracker_message_id(tid, 18854)

    monkeypatch.setattr(N, "edit_telegram", lambda mid, text: N.EDIT_FAILED)
    send_mock = MagicMock(return_value=999)
    monkeypatch.setattr(N, "send_telegram", send_mock)

    N.update_task_tracker(tid)

    send_mock.assert_not_called()                 # no duplicate on transient fail
    assert get_tracker_message_id(tid) == 18854


# --------------------------------------------------------------------------- #
# TC-11 — never-raise on broken inputs
# --------------------------------------------------------------------------- #
def test_tc11_never_raise_missing_task():
    # No such task -> minimal fallback string, no exception.
    assert N.render_task_tracker(999999) == "task-999999"


def test_tc11_never_raise_none_title_and_bad_timestamps():
    tid = _mk_task(title=None, stage="development")
    # Unparseable timestamps -> _duration_seconds degrades to None, no raise.
    _mk_run(tid, "analyst", "not-a-ts", "also-bad", model="claude-opus-4-8")
    text = N.render_task_tracker(tid)        # must not raise
    assert isinstance(text, str)
    assert "ORCH-095" in text                # falls back to work_item_id


def test_tc11_esc_never_raises():
    class _Boom:
        def __str__(self):
            raise RuntimeError("boom")

    # _esc degrades to '' rather than propagating an exception (FR-5).
    assert N._esc(_Boom()) == ""
    assert N._esc(None) == "None"