Files
orchestrator/tests/test_tracker_html_escape.py
claude-bot b38cc16041 fix(notifications): escape all card data fields at the render boundary (ORCH-095)
render_task_tracker sends/edits the live card with parse_mode=HTML. _fmt_minutes
returns the literal "<1м" for a sub-minute stage; interpolated raw into HTML text
Telegram parsed "<1м" as an opening tag -> editMessageText 400 can't parse
entities -> edit_telegram EDIT_FAILED -> update_task_tracker early return
(anti-duplicate ORCH-087) -> the card froze (incident ORCH-093, message_id 18854).

Close the whole "unescaped data in HTML text" class per ADR-001: a module-local
_esc(x)=html.escape(str(x)) (never-raise) wraps every DATA slot (durations, status
label, model, effort, token/cost metrics) exactly once at the render boundary in
render_task_tracker/_stage_line. Source functions stay HTML-agnostic (_fmt_minutes
still returns "<1м"; escape on the boundary renders it visually identical as
&lt;1м, so the visible format is unchanged). Intentional MARKUP slots (num_html /
link_for / _done_link / already-escaped esc_title) are NOT escaped, so the issue
number stays a clickable <a> tag and nothing is double-escaped.

A previously-frozen card auto-recovers on the next stage transition (a new safe
render edits in place, 200) — no new code, no touch to edit_telegram /
update_task_tracker / the orphan ledger, so the ORCH-087 anti-duplicate invariant
is preserved (a transient edit failure still does not spawn a new card).

STAGE_TRANSITIONS / QG_CHECKS / check_* / notification transport / DB schema are
untouched. New tests/test_tracker_html_escape.py (TC-01..TC-11); full suite green.

Refs: ORCH-095

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-10 00:17:26 +03:00

359 lines
15 KiB
Python

"""ORCH-095 — HTML-safety of dynamic data fields in render_task_tracker.
The live card text is sent/edited with parse_mode=HTML. It is assembled from
two kinds of slots:
* category M (intentional markup, NEVER escaped): the clickable issue number
(plane_issue_link -> <a href>), the ⏳-waiting links (link_for), the done
line (_done_link), and the already-escaped title (esc_title);
* category D (data, escaped EXACTLY once at the render boundary): durations
(_fmt_minutes / _capped_review_str), the status label, model, effort, and
the token/cost metrics.
The bug (ORCH-093 incident): _fmt_minutes returns the literal "<1м" for a
sub-minute stage; interpolated raw into HTML text Telegram parsed "<1м" as an
opening tag -> 400 can't parse entities -> EDIT_FAILED -> the card froze. ADR-001
closes the whole class by escaping every D-slot at the boundary (helper N._esc)
while keeping the M-slots intact (so the number stays clickable, no double-escape).
These tests assert: sub-minute durations are safe (TC-01/02/03), all data fields
escape special chars without double-escaping (TC-04/05/06), markup survives
(TC-07/08), the edit payload is parse-safe and the anti-duplicate invariant
(ORCH-087) holds (TC-09/10), and the render path never raises (TC-11).
Network is isolated (no live overlay HTTP); the DB is a temp SQLite.
"""
import os
import tempfile
os.environ.setdefault("ORCH_PLANE_API_TOKEN", "test-token")
os.environ.setdefault("ORCH_GITEA_TOKEN", "test-token")
_test_db = os.path.join(tempfile.gettempdir(), "test_orchestrator_html_escape.db")
os.environ["ORCH_DB_PATH"] = _test_db
from types import SimpleNamespace # noqa: E402
from unittest.mock import MagicMock # noqa: E402
import pytest # noqa: E402
import src.db as db_module # noqa: E402
import src.projects as projects_mod # noqa: E402
from src.db import init_db, get_db # noqa: E402
from src import notifications as N # noqa: E402
# orchestrator repo -> default project registry uuid (src/projects.py).
_ORCH_PROJECT_ID = "8da6aa25-a60e-44d6-a1e2-d8ae59aa7d6a"
@pytest.fixture(autouse=True)
def setup_db(monkeypatch):
monkeypatch.setattr(db_module.settings, "db_path", _test_db, raising=False)
if os.path.exists(_test_db):
os.unlink(_test_db)
init_db()
# Keep the render path fully offline (no live overlay HTTP).
monkeypatch.setattr(N._get_settings(), "tracker_live_status", False,
raising=False)
monkeypatch.setattr(
projects_mod, "get_project_by_repo",
lambda repo: (SimpleNamespace(plane_project_id=_ORCH_PROJECT_ID)
if repo == "orchestrator" else None),
)
yield
if os.path.exists(_test_db):
os.unlink(_test_db)
def _set(monkeypatch, **kw):
s = N._get_settings()
for k, v in kw.items():
monkeypatch.setattr(s, k, v, raising=False)
def _mk_task(wid="ORCH-095", repo="orchestrator", title="card",
plane_issue_id="issue-uuid-1", stage="development",
brd_start=None, brd_end=None):
conn = get_db()
cur = conn.execute(
"INSERT INTO tasks (plane_id, work_item_id, repo, branch, stage, title, "
"plane_issue_id, brd_review_started_at, brd_review_ended_at) "
"VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)",
("p1", wid, repo, "feature/ORCH-095-x", stage, title, plane_issue_id,
brd_start, brd_end),
)
tid = cur.lastrowid
conn.commit()
conn.close()
return tid
def _mk_run(task_id, agent, started, finished, in_tok=100, out_tok=50,
cache_read=0, cache_creation=0, cost=0.0, model=None,
effort=None, exit_code=0):
conn = get_db()
cur = conn.execute(
"INSERT INTO agent_runs (task_id, agent, started_at, finished_at, "
"exit_code, input_tokens, output_tokens, cache_read_tokens, "
"cache_creation_tokens, cost_usd, model, effort) "
"VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
(task_id, agent, started, finished, exit_code, in_tok, out_tok,
cache_read, cache_creation, cost, model, effort),
)
rid = cur.lastrowid
conn.commit()
conn.close()
return rid
# A stage that lasted 30s (< 60s) -> _fmt_minutes -> "<1м".
_SUB_MIN_START = "2026-06-04 09:00:00"
_SUB_MIN_END = "2026-06-04 09:00:30"
# --------------------------------------------------------------------------- #
# TC-01 — sub-minute duration is HTML-safe at the render boundary
# --------------------------------------------------------------------------- #
def test_tc01_sub_minute_duration_escaped_at_boundary():
# ADR-001 D2: _fmt_minutes keeps returning the literal "<1м" (source
# unchanged); safety comes from _esc at the boundary -> "&lt;1м".
assert N._fmt_minutes(30) == "<1м"
escaped = N._esc(N._fmt_minutes(30))
assert escaped == "&lt;1м"
assert "<1" not in escaped # no raw opening-tag-looking substring
# --------------------------------------------------------------------------- #
# TC-02 — _fmt_minutes boundary inputs: never-raise + boundary-safe
# --------------------------------------------------------------------------- #
@pytest.mark.parametrize("value", [0, None, "abc", 59, 60, 61, 100000, -5, 30])
def test_tc02_fmt_minutes_never_raise_and_safe(value):
out = N._fmt_minutes(value) # must not raise
assert isinstance(out, str)
safe = N._esc(out)
# After boundary escaping no raw '<' (or '>'/'&') survives in any branch.
assert "<" not in safe
assert ">" not in safe
# --------------------------------------------------------------------------- #
# TC-03 — render_task_tracker for a sub-minute stage: no raw '<1м'
# --------------------------------------------------------------------------- #
def test_tc03_render_sub_minute_stage_is_safe():
tid = _mk_task(stage="development")
# Analysis stage lasted 30s; analysis sits before development -> ✅ line shown.
_mk_run(tid, "analyst", _SUB_MIN_START, _SUB_MIN_END, model="claude-opus-4-8")
text = N.render_task_tracker(tid)
assert "<1м" not in text # the bug: raw literal must be gone
assert "&lt;1м" in text # rendered safely instead
# And no double escaping leaked in.
assert "&amp;lt;" not in text
# --------------------------------------------------------------------------- #
# TC-04 — title with '<', '>', '&' escaped, no raw tags, no double-escape
# --------------------------------------------------------------------------- #
def test_tc04_title_special_chars_escaped_no_double():
tid = _mk_task(title="A <b>x</b> & <1", stage="development")
_mk_run(tid, "analyst", _SUB_MIN_START, _SUB_MIN_END)
text = N.render_task_tracker(tid)
# Data special chars present only escaped...
assert "&lt;b&gt;" in text
assert "&amp;" in text
# ...never as raw markup from the title.
assert "<b>" not in text
assert "</b>" not in text
# No double escaping anywhere.
assert "&amp;lt;" not in text
assert "&amp;amp;" not in text
# --------------------------------------------------------------------------- #
# TC-05 — status label + model + effort are escaped (defence-in-depth)
# --------------------------------------------------------------------------- #
def test_tc05_status_label_escaped(monkeypatch):
monkeypatch.setattr(N, "_card_status_label", lambda *a, **k: "<danger>")
tid = _mk_task(stage="development")
text = N.render_task_tracker(tid)
assert "&lt;danger&gt;" in text
assert "<danger>" not in text
def test_tc05_model_escaped(monkeypatch):
# The model name is a D-slot: even if a '<' ever reached it, it is escaped.
import src.usage as U
monkeypatch.setattr(U, "short_model_name", lambda m: "<m>" if m else "")
tid = _mk_task(stage="development")
_mk_run(tid, "analyst", _SUB_MIN_START, _SUB_MIN_END, model="whatever")
text = N.render_task_tracker(tid)
assert "&lt;m&gt;" in text
assert "<m>" not in text
def test_tc05_effort_escaped():
tid = _mk_task(stage="development")
_mk_run(tid, "analyst", _SUB_MIN_START, _SUB_MIN_END,
model="claude-opus-4-8", effort="<e>")
text = N.render_task_tracker(tid)
assert "&lt;e&gt;" in text
assert "<e>" not in text
# --------------------------------------------------------------------------- #
# TC-06 — token / cost metrics are HTML-safe ('$' + digits)
# --------------------------------------------------------------------------- #
def test_tc06_token_cost_metrics_safe():
tid = _mk_task(stage="development")
_mk_run(tid, "analyst", _SUB_MIN_START, _SUB_MIN_END,
in_tok=1_100_000, out_tok=39_600, cost=2.38,
model="claude-opus-4-8")
text = N.render_task_tracker(tid)
# The 💰 totals line renders with a '$' cost and no stray angle brackets
# coming from the metric data.
assert "$" in text
assert "&amp;" not in text or "&amp;lt;" not in text # no double-escape
# No raw opening-tag substring produced by the metrics.
assert "<$" not in text
# --------------------------------------------------------------------------- #
# TC-07 — markup regression: clickable issue number stays a valid <a> tag
# --------------------------------------------------------------------------- #
def test_tc07_issue_number_stays_clickable(monkeypatch):
_set(monkeypatch, plane_web_url="https://plane.example.org",
plane_api_url="http://localhost:8091", plane_workspace_slug="acme")
tid = _mk_task(plane_issue_id="abcd-issue-uuid", stage="development")
_mk_run(tid, "analyst", _SUB_MIN_START, _SUB_MIN_END)
text = N.render_task_tracker(tid)
expected_url = (
f"https://plane.example.org/acme/projects/{_ORCH_PROJECT_ID}"
f"/issues/abcd-issue-uuid/"
)
# The anchor markup is NOT escaped (M-slot) -> still clickable & valid.
assert f'<a href="{expected_url}">ORCH-095</a>' in text
assert text.count("<a href=") == text.count("</a>")
# href/label are not double-escaped.
assert "&lt;a href=" not in text
assert "&amp;lt;" not in text
# --------------------------------------------------------------------------- #
# TC-08 — markup regression: _done_link renders valid '🔗 PR #n · 📦 Внедрено'
# --------------------------------------------------------------------------- #
def test_tc08_done_link_markup_preserved(monkeypatch):
tid = _mk_task(stage="done")
_mk_run(tid, "deployer", _SUB_MIN_START, _SUB_MIN_END)
# Mock the Gitea PR lookup inside _done_link.
resp = MagicMock()
resp.status_code = 200
resp.json = lambda: [{"number": 105}]
monkeypatch.setattr(N.httpx, "get", lambda *a, **k: resp)
text = N.render_task_tracker(tid)
assert "\U0001f517 PR #105" in text # 🔗 PR #105
assert "\U0001f4e6" in text # 📦
# The done line is an M-slot -> not escaped.
assert "&lt;" not in text.split("\n")[-1]
# --------------------------------------------------------------------------- #
# TC-09 — integration: edit payload for a sub-minute card is parse-safe
# --------------------------------------------------------------------------- #
def test_tc09_edit_payload_is_parse_safe(monkeypatch):
from src.db import set_tracker_message_id
_set(monkeypatch, tracker_mode="edit",
telegram_bot_token="bot-token", telegram_chat_id="chat-1")
tid = _mk_task(stage="development")
_mk_run(tid, "analyst", _SUB_MIN_START, _SUB_MIN_END, model="claude-opus-4-8")
set_tracker_message_id(tid, 18854)
captured = {}
def _fake_post(url, json=None, timeout=None, **kw):
captured["url"] = url
captured["json"] = json
return SimpleNamespace(status_code=200, json=lambda: {"ok": True})
monkeypatch.setattr(N.httpx, "post", _fake_post)
N.update_task_tracker(tid)
assert "editMessageText" in captured["url"]
payload_text = captured["json"]["text"]
assert captured["json"]["parse_mode"] == "HTML"
# The crux of ORCH-095: no raw '<1м' reaches Telegram -> no 'can't parse
# entities' -> the card does not freeze.
assert "<1м" not in payload_text
assert "&lt;1м" in payload_text
# --------------------------------------------------------------------------- #
# TC-10 — stuck card resumes; anti-duplicate (ORCH-087) preserved
# --------------------------------------------------------------------------- #
def test_tc10_valid_render_edits_in_place_no_new_card(monkeypatch):
from src.db import set_tracker_message_id, get_tracker_message_id
_set(monkeypatch, tracker_mode="edit")
tid = _mk_task(stage="development")
_mk_run(tid, "analyst", _SUB_MIN_START, _SUB_MIN_END, model="claude-opus-4-8")
set_tracker_message_id(tid, 18854)
# After the fix the render is valid -> edit succeeds in place (EDIT_OK).
monkeypatch.setattr(N, "edit_telegram", lambda mid, text: N.EDIT_OK)
send_mock = MagicMock(return_value=999)
monkeypatch.setattr(N, "send_telegram", send_mock)
N.update_task_tracker(tid)
send_mock.assert_not_called() # edited in place, no new card
assert get_tracker_message_id(tid) == 18854 # pointer unchanged
def test_tc10_transient_fail_does_not_duplicate(monkeypatch):
# ORCH-087 invariant: a transient edit failure must NOT spawn a new card.
from src.db import set_tracker_message_id, get_tracker_message_id
_set(monkeypatch, tracker_mode="edit")
tid = _mk_task(stage="development")
_mk_run(tid, "analyst", _SUB_MIN_START, _SUB_MIN_END, model="claude-opus-4-8")
set_tracker_message_id(tid, 18854)
monkeypatch.setattr(N, "edit_telegram", lambda mid, text: N.EDIT_FAILED)
send_mock = MagicMock(return_value=999)
monkeypatch.setattr(N, "send_telegram", send_mock)
N.update_task_tracker(tid)
send_mock.assert_not_called() # no duplicate on transient fail
assert get_tracker_message_id(tid) == 18854
# --------------------------------------------------------------------------- #
# TC-11 — never-raise on broken inputs
# --------------------------------------------------------------------------- #
def test_tc11_never_raise_missing_task():
# No such task -> minimal fallback string, no exception.
assert N.render_task_tracker(999999) == "task-999999"
def test_tc11_never_raise_none_title_and_bad_timestamps():
tid = _mk_task(title=None, stage="development")
# Unparseable timestamps -> _duration_seconds degrades to None, no raise.
_mk_run(tid, "analyst", "not-a-ts", "also-bad", model="claude-opus-4-8")
text = N.render_task_tracker(tid) # must not raise
assert isinstance(text, str)
assert "ORCH-095" in text # falls back to work_item_id
def test_tc11_esc_never_raises():
class _Boom:
def __str__(self):
raise RuntimeError("boom")
# _esc degrades to '' rather than propagating an exception (FR-5).
assert N._esc(_Boom()) == ""
assert N._esc(None) == "None"