render_task_tracker sends/edits the live card with parse_mode=HTML. _fmt_minutes returns the literal "<1м" for a sub-minute stage; interpolated raw into HTML text Telegram parsed "<1м" as an opening tag -> editMessageText 400 can't parse entities -> edit_telegram EDIT_FAILED -> update_task_tracker early return (anti-duplicate ORCH-087) -> the card froze (incident ORCH-093, message_id 18854). Close the whole "unescaped data in HTML text" class per ADR-001: a module-local _esc(x)=html.escape(str(x)) (never-raise) wraps every DATA slot (durations, status label, model, effort, token/cost metrics) exactly once at the render boundary in render_task_tracker/_stage_line. Source functions stay HTML-agnostic (_fmt_minutes still returns "<1м"; escape on the boundary renders it visually identical as <1м, so the visible format is unchanged). Intentional MARKUP slots (num_html / link_for / _done_link / already-escaped esc_title) are NOT escaped, so the issue number stays a clickable <a> tag and nothing is double-escaped. A previously-frozen card auto-recovers on the next stage transition (a new safe render edits in place, 200) — no new code, no touch to edit_telegram / update_task_tracker / the orphan ledger, so the ORCH-087 anti-duplicate invariant is preserved (a transient edit failure still does not spawn a new card). STAGE_TRANSITIONS / QG_CHECKS / check_* / notification transport / DB schema are untouched. New tests/test_tracker_html_escape.py (TC-01..TC-11); full suite green. Refs: ORCH-095 Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
359 lines
15 KiB
Python
359 lines
15 KiB
Python
"""ORCH-095 — HTML-safety of dynamic data fields in render_task_tracker.
|
|
|
|
The live card text is sent/edited with parse_mode=HTML. It is assembled from
|
|
two kinds of slots:
|
|
|
|
* category M (intentional markup, NEVER escaped): the clickable issue number
|
|
(plane_issue_link -> <a href>), the ⏳-waiting links (link_for), the done
|
|
line (_done_link), and the already-escaped title (esc_title);
|
|
* category D (data, escaped EXACTLY once at the render boundary): durations
|
|
(_fmt_minutes / _capped_review_str), the status label, model, effort, and
|
|
the token/cost metrics.
|
|
|
|
The bug (ORCH-093 incident): _fmt_minutes returns the literal "<1м" for a
|
|
sub-minute stage; interpolated raw into HTML text Telegram parsed "<1м" as an
|
|
opening tag -> 400 can't parse entities -> EDIT_FAILED -> the card froze. ADR-001
|
|
closes the whole class by escaping every D-slot at the boundary (helper N._esc)
|
|
while keeping the M-slots intact (so the number stays clickable, no double-escape).
|
|
|
|
These tests assert: sub-minute durations are safe (TC-01/02/03), all data fields
|
|
escape special chars without double-escaping (TC-04/05/06), markup survives
|
|
(TC-07/08), the edit payload is parse-safe and the anti-duplicate invariant
|
|
(ORCH-087) holds (TC-09/10), and the render path never raises (TC-11).
|
|
|
|
Network is isolated (no live overlay HTTP); the DB is a temp SQLite.
|
|
"""
|
|
|
|
import os
|
|
import tempfile
|
|
|
|
os.environ.setdefault("ORCH_PLANE_API_TOKEN", "test-token")
|
|
os.environ.setdefault("ORCH_GITEA_TOKEN", "test-token")
|
|
|
|
_test_db = os.path.join(tempfile.gettempdir(), "test_orchestrator_html_escape.db")
|
|
os.environ["ORCH_DB_PATH"] = _test_db
|
|
|
|
from types import SimpleNamespace # noqa: E402
|
|
from unittest.mock import MagicMock # noqa: E402
|
|
|
|
import pytest # noqa: E402
|
|
|
|
import src.db as db_module # noqa: E402
|
|
import src.projects as projects_mod # noqa: E402
|
|
from src.db import init_db, get_db # noqa: E402
|
|
from src import notifications as N # noqa: E402
|
|
|
|
# orchestrator repo -> default project registry uuid (src/projects.py).
|
|
_ORCH_PROJECT_ID = "8da6aa25-a60e-44d6-a1e2-d8ae59aa7d6a"
|
|
|
|
|
|
@pytest.fixture(autouse=True)
|
|
def setup_db(monkeypatch):
|
|
monkeypatch.setattr(db_module.settings, "db_path", _test_db, raising=False)
|
|
if os.path.exists(_test_db):
|
|
os.unlink(_test_db)
|
|
init_db()
|
|
# Keep the render path fully offline (no live overlay HTTP).
|
|
monkeypatch.setattr(N._get_settings(), "tracker_live_status", False,
|
|
raising=False)
|
|
monkeypatch.setattr(
|
|
projects_mod, "get_project_by_repo",
|
|
lambda repo: (SimpleNamespace(plane_project_id=_ORCH_PROJECT_ID)
|
|
if repo == "orchestrator" else None),
|
|
)
|
|
yield
|
|
if os.path.exists(_test_db):
|
|
os.unlink(_test_db)
|
|
|
|
|
|
def _set(monkeypatch, **kw):
|
|
s = N._get_settings()
|
|
for k, v in kw.items():
|
|
monkeypatch.setattr(s, k, v, raising=False)
|
|
|
|
|
|
def _mk_task(wid="ORCH-095", repo="orchestrator", title="card",
|
|
plane_issue_id="issue-uuid-1", stage="development",
|
|
brd_start=None, brd_end=None):
|
|
conn = get_db()
|
|
cur = conn.execute(
|
|
"INSERT INTO tasks (plane_id, work_item_id, repo, branch, stage, title, "
|
|
"plane_issue_id, brd_review_started_at, brd_review_ended_at) "
|
|
"VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)",
|
|
("p1", wid, repo, "feature/ORCH-095-x", stage, title, plane_issue_id,
|
|
brd_start, brd_end),
|
|
)
|
|
tid = cur.lastrowid
|
|
conn.commit()
|
|
conn.close()
|
|
return tid
|
|
|
|
|
|
def _mk_run(task_id, agent, started, finished, in_tok=100, out_tok=50,
|
|
cache_read=0, cache_creation=0, cost=0.0, model=None,
|
|
effort=None, exit_code=0):
|
|
conn = get_db()
|
|
cur = conn.execute(
|
|
"INSERT INTO agent_runs (task_id, agent, started_at, finished_at, "
|
|
"exit_code, input_tokens, output_tokens, cache_read_tokens, "
|
|
"cache_creation_tokens, cost_usd, model, effort) "
|
|
"VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
|
|
(task_id, agent, started, finished, exit_code, in_tok, out_tok,
|
|
cache_read, cache_creation, cost, model, effort),
|
|
)
|
|
rid = cur.lastrowid
|
|
conn.commit()
|
|
conn.close()
|
|
return rid
|
|
|
|
|
|
# A stage that lasted 30s (< 60s) -> _fmt_minutes -> "<1м".
|
|
_SUB_MIN_START = "2026-06-04 09:00:00"
|
|
_SUB_MIN_END = "2026-06-04 09:00:30"
|
|
|
|
|
|
# --------------------------------------------------------------------------- #
|
|
# TC-01 — sub-minute duration is HTML-safe at the render boundary
|
|
# --------------------------------------------------------------------------- #
|
|
def test_tc01_sub_minute_duration_escaped_at_boundary():
|
|
# ADR-001 D2: _fmt_minutes keeps returning the literal "<1м" (source
|
|
# unchanged); safety comes from _esc at the boundary -> "<1м".
|
|
assert N._fmt_minutes(30) == "<1м"
|
|
escaped = N._esc(N._fmt_minutes(30))
|
|
assert escaped == "<1м"
|
|
assert "<1" not in escaped # no raw opening-tag-looking substring
|
|
|
|
|
|
# --------------------------------------------------------------------------- #
|
|
# TC-02 — _fmt_minutes boundary inputs: never-raise + boundary-safe
|
|
# --------------------------------------------------------------------------- #
|
|
@pytest.mark.parametrize("value", [0, None, "abc", 59, 60, 61, 100000, -5, 30])
|
|
def test_tc02_fmt_minutes_never_raise_and_safe(value):
|
|
out = N._fmt_minutes(value) # must not raise
|
|
assert isinstance(out, str)
|
|
safe = N._esc(out)
|
|
# After boundary escaping no raw '<' (or '>'/'&') survives in any branch.
|
|
assert "<" not in safe
|
|
assert ">" not in safe
|
|
|
|
|
|
# --------------------------------------------------------------------------- #
|
|
# TC-03 — render_task_tracker for a sub-minute stage: no raw '<1м'
|
|
# --------------------------------------------------------------------------- #
|
|
def test_tc03_render_sub_minute_stage_is_safe():
|
|
tid = _mk_task(stage="development")
|
|
# Analysis stage lasted 30s; analysis sits before development -> ✅ line shown.
|
|
_mk_run(tid, "analyst", _SUB_MIN_START, _SUB_MIN_END, model="claude-opus-4-8")
|
|
|
|
text = N.render_task_tracker(tid)
|
|
assert "<1м" not in text # the bug: raw literal must be gone
|
|
assert "<1м" in text # rendered safely instead
|
|
# And no double escaping leaked in.
|
|
assert "&lt;" not in text
|
|
|
|
|
|
# --------------------------------------------------------------------------- #
|
|
# TC-04 — title with '<', '>', '&' escaped, no raw tags, no double-escape
|
|
# --------------------------------------------------------------------------- #
|
|
def test_tc04_title_special_chars_escaped_no_double():
|
|
tid = _mk_task(title="A <b>x</b> & <1", stage="development")
|
|
_mk_run(tid, "analyst", _SUB_MIN_START, _SUB_MIN_END)
|
|
|
|
text = N.render_task_tracker(tid)
|
|
# Data special chars present only escaped...
|
|
assert "<b>" in text
|
|
assert "&" in text
|
|
# ...never as raw markup from the title.
|
|
assert "<b>" not in text
|
|
assert "</b>" not in text
|
|
# No double escaping anywhere.
|
|
assert "&lt;" not in text
|
|
assert "&amp;" not in text
|
|
|
|
|
|
# --------------------------------------------------------------------------- #
|
|
# TC-05 — status label + model + effort are escaped (defence-in-depth)
|
|
# --------------------------------------------------------------------------- #
|
|
def test_tc05_status_label_escaped(monkeypatch):
|
|
monkeypatch.setattr(N, "_card_status_label", lambda *a, **k: "<danger>")
|
|
tid = _mk_task(stage="development")
|
|
text = N.render_task_tracker(tid)
|
|
assert "<danger>" in text
|
|
assert "<danger>" not in text
|
|
|
|
|
|
def test_tc05_model_escaped(monkeypatch):
|
|
# The model name is a D-slot: even if a '<' ever reached it, it is escaped.
|
|
import src.usage as U
|
|
monkeypatch.setattr(U, "short_model_name", lambda m: "<m>" if m else "")
|
|
tid = _mk_task(stage="development")
|
|
_mk_run(tid, "analyst", _SUB_MIN_START, _SUB_MIN_END, model="whatever")
|
|
text = N.render_task_tracker(tid)
|
|
assert "<m>" in text
|
|
assert "<m>" not in text
|
|
|
|
|
|
def test_tc05_effort_escaped():
|
|
tid = _mk_task(stage="development")
|
|
_mk_run(tid, "analyst", _SUB_MIN_START, _SUB_MIN_END,
|
|
model="claude-opus-4-8", effort="<e>")
|
|
text = N.render_task_tracker(tid)
|
|
assert "<e>" in text
|
|
assert "<e>" not in text
|
|
|
|
|
|
# --------------------------------------------------------------------------- #
|
|
# TC-06 — token / cost metrics are HTML-safe ('$' + digits)
|
|
# --------------------------------------------------------------------------- #
|
|
def test_tc06_token_cost_metrics_safe():
|
|
tid = _mk_task(stage="development")
|
|
_mk_run(tid, "analyst", _SUB_MIN_START, _SUB_MIN_END,
|
|
in_tok=1_100_000, out_tok=39_600, cost=2.38,
|
|
model="claude-opus-4-8")
|
|
text = N.render_task_tracker(tid)
|
|
# The 💰 totals line renders with a '$' cost and no stray angle brackets
|
|
# coming from the metric data.
|
|
assert "$" in text
|
|
assert "&" not in text or "&lt;" not in text # no double-escape
|
|
# No raw opening-tag substring produced by the metrics.
|
|
assert "<$" not in text
|
|
|
|
|
|
# --------------------------------------------------------------------------- #
|
|
# TC-07 — markup regression: clickable issue number stays a valid <a> tag
|
|
# --------------------------------------------------------------------------- #
|
|
def test_tc07_issue_number_stays_clickable(monkeypatch):
|
|
_set(monkeypatch, plane_web_url="https://plane.example.org",
|
|
plane_api_url="http://localhost:8091", plane_workspace_slug="acme")
|
|
tid = _mk_task(plane_issue_id="abcd-issue-uuid", stage="development")
|
|
_mk_run(tid, "analyst", _SUB_MIN_START, _SUB_MIN_END)
|
|
|
|
text = N.render_task_tracker(tid)
|
|
expected_url = (
|
|
f"https://plane.example.org/acme/projects/{_ORCH_PROJECT_ID}"
|
|
f"/issues/abcd-issue-uuid/"
|
|
)
|
|
# The anchor markup is NOT escaped (M-slot) -> still clickable & valid.
|
|
assert f'<a href="{expected_url}">ORCH-095</a>' in text
|
|
assert text.count("<a href=") == text.count("</a>")
|
|
# href/label are not double-escaped.
|
|
assert "<a href=" not in text
|
|
assert "&lt;" not in text
|
|
|
|
|
|
# --------------------------------------------------------------------------- #
|
|
# TC-08 — markup regression: _done_link renders valid '🔗 PR #n · 📦 Внедрено'
|
|
# --------------------------------------------------------------------------- #
|
|
def test_tc08_done_link_markup_preserved(monkeypatch):
|
|
tid = _mk_task(stage="done")
|
|
_mk_run(tid, "deployer", _SUB_MIN_START, _SUB_MIN_END)
|
|
|
|
# Mock the Gitea PR lookup inside _done_link.
|
|
resp = MagicMock()
|
|
resp.status_code = 200
|
|
resp.json = lambda: [{"number": 105}]
|
|
monkeypatch.setattr(N.httpx, "get", lambda *a, **k: resp)
|
|
|
|
text = N.render_task_tracker(tid)
|
|
assert "\U0001f517 PR #105" in text # 🔗 PR #105
|
|
assert "\U0001f4e6" in text # 📦
|
|
# The done line is an M-slot -> not escaped.
|
|
assert "<" not in text.split("\n")[-1]
|
|
|
|
|
|
# --------------------------------------------------------------------------- #
|
|
# TC-09 — integration: edit payload for a sub-minute card is parse-safe
|
|
# --------------------------------------------------------------------------- #
|
|
def test_tc09_edit_payload_is_parse_safe(monkeypatch):
|
|
from src.db import set_tracker_message_id
|
|
_set(monkeypatch, tracker_mode="edit",
|
|
telegram_bot_token="bot-token", telegram_chat_id="chat-1")
|
|
tid = _mk_task(stage="development")
|
|
_mk_run(tid, "analyst", _SUB_MIN_START, _SUB_MIN_END, model="claude-opus-4-8")
|
|
set_tracker_message_id(tid, 18854)
|
|
|
|
captured = {}
|
|
|
|
def _fake_post(url, json=None, timeout=None, **kw):
|
|
captured["url"] = url
|
|
captured["json"] = json
|
|
return SimpleNamespace(status_code=200, json=lambda: {"ok": True})
|
|
|
|
monkeypatch.setattr(N.httpx, "post", _fake_post)
|
|
|
|
N.update_task_tracker(tid)
|
|
|
|
assert "editMessageText" in captured["url"]
|
|
payload_text = captured["json"]["text"]
|
|
assert captured["json"]["parse_mode"] == "HTML"
|
|
# The crux of ORCH-095: no raw '<1м' reaches Telegram -> no 'can't parse
|
|
# entities' -> the card does not freeze.
|
|
assert "<1м" not in payload_text
|
|
assert "<1м" in payload_text
|
|
|
|
|
|
# --------------------------------------------------------------------------- #
|
|
# TC-10 — stuck card resumes; anti-duplicate (ORCH-087) preserved
|
|
# --------------------------------------------------------------------------- #
|
|
def test_tc10_valid_render_edits_in_place_no_new_card(monkeypatch):
|
|
from src.db import set_tracker_message_id, get_tracker_message_id
|
|
_set(monkeypatch, tracker_mode="edit")
|
|
tid = _mk_task(stage="development")
|
|
_mk_run(tid, "analyst", _SUB_MIN_START, _SUB_MIN_END, model="claude-opus-4-8")
|
|
set_tracker_message_id(tid, 18854)
|
|
|
|
# After the fix the render is valid -> edit succeeds in place (EDIT_OK).
|
|
monkeypatch.setattr(N, "edit_telegram", lambda mid, text: N.EDIT_OK)
|
|
send_mock = MagicMock(return_value=999)
|
|
monkeypatch.setattr(N, "send_telegram", send_mock)
|
|
|
|
N.update_task_tracker(tid)
|
|
|
|
send_mock.assert_not_called() # edited in place, no new card
|
|
assert get_tracker_message_id(tid) == 18854 # pointer unchanged
|
|
|
|
|
|
def test_tc10_transient_fail_does_not_duplicate(monkeypatch):
|
|
# ORCH-087 invariant: a transient edit failure must NOT spawn a new card.
|
|
from src.db import set_tracker_message_id, get_tracker_message_id
|
|
_set(monkeypatch, tracker_mode="edit")
|
|
tid = _mk_task(stage="development")
|
|
_mk_run(tid, "analyst", _SUB_MIN_START, _SUB_MIN_END, model="claude-opus-4-8")
|
|
set_tracker_message_id(tid, 18854)
|
|
|
|
monkeypatch.setattr(N, "edit_telegram", lambda mid, text: N.EDIT_FAILED)
|
|
send_mock = MagicMock(return_value=999)
|
|
monkeypatch.setattr(N, "send_telegram", send_mock)
|
|
|
|
N.update_task_tracker(tid)
|
|
|
|
send_mock.assert_not_called() # no duplicate on transient fail
|
|
assert get_tracker_message_id(tid) == 18854
|
|
|
|
|
|
# --------------------------------------------------------------------------- #
|
|
# TC-11 — never-raise on broken inputs
|
|
# --------------------------------------------------------------------------- #
|
|
def test_tc11_never_raise_missing_task():
|
|
# No such task -> minimal fallback string, no exception.
|
|
assert N.render_task_tracker(999999) == "task-999999"
|
|
|
|
|
|
def test_tc11_never_raise_none_title_and_bad_timestamps():
|
|
tid = _mk_task(title=None, stage="development")
|
|
# Unparseable timestamps -> _duration_seconds degrades to None, no raise.
|
|
_mk_run(tid, "analyst", "not-a-ts", "also-bad", model="claude-opus-4-8")
|
|
text = N.render_task_tracker(tid) # must not raise
|
|
assert isinstance(text, str)
|
|
assert "ORCH-095" in text # falls back to work_item_id
|
|
|
|
|
|
def test_tc11_esc_never_raises():
|
|
class _Boom:
|
|
def __str__(self):
|
|
raise RuntimeError("boom")
|
|
|
|
# _esc degrades to '' rather than propagating an exception (FR-5).
|
|
assert N._esc(_Boom()) == ""
|
|
assert N._esc(None) == "None"
|