При заворотах на development task_desc теперь несёт дословный must-fix текст (P0/P1 ревьюера, причина FAIL тестера) вместо одной ссылки на файл — developer- агент видит суть претензий сразу и не повторяет ту же ошибку, экономя retry- бюджет и токены общего инстанса. - Новый defensive-модуль src/review_parse.py (never-raise): extract_review_findings (P0/P1 из 12-review.md ## Findings), extract_test_failures (фрагмент тела 13-test-report.md: pytest output / FAIL-строки / Итог), усечение по лимиту. - Две rollback-ветки stage_engine: встраивают текст + сохраняют ссылку на полный файл; graceful-фоллбэк на ссылку-строку при битом/пустом артефакте. - Последовательность отката, retry-счётчик, поля AdvanceResult, реестр QG_CHECKS не менялись. - Доки: README (Stage Engine / Откаты), CHANGELOG. - Тесты: tests/test_review_parse.py, test_stage_engine.py::TestRollbackTaskDescEmbedding. Refs: ORCH-046 Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
206 lines
7.7 KiB
Python
206 lines
7.7 KiB
Python
"""Defensive extractors for reviewer / tester artifact bodies (ORCH-046).
|
||
|
||
When a task is rolled back to ``development`` the stage engine builds the
|
||
``task_desc`` that ends up in the developer agent's ``.task-dev.md``. Historically
|
||
that text only carried a *link* to the artifact file (12-review.md /
|
||
13-test-report.md); the developer agent had to go read the file, and the key
|
||
must-fix points (reviewer P0/P1 findings, tester failure reason) were lost in
|
||
transit — "испорченный телефон" that burns the retry budget.
|
||
|
||
This module extracts the **verbatim** must-fix text so the stage engine can embed
|
||
it directly in ``task_desc`` (ADR docs/work-items/ORCH-046/06-adr/ADR-001-*).
|
||
|
||
Contract — **never raises** (mirrors ``src/frontmatter.py`` and
|
||
``src/qg/checks.py::_parse_tests_verdict``): any error — missing file, IOError,
|
||
malformed markdown/YAML, missing section — yields ``""``. The caller then falls
|
||
back to the previous link-only ``task_desc``. No network calls; disk reads only.
|
||
"""
|
||
|
||
import logging
|
||
import re
|
||
|
||
logger = logging.getLogger("orchestrator.review_parse")
|
||
|
||
# Truncation limits (module-level per ТЗ §2.3). The full context always stays in
|
||
# the artifact file; the embedded text is a focused excerpt.
|
||
MAX_FINDINGS_CHARS = 2000
|
||
MAX_FAILURES_CHARS = 2000
|
||
|
||
_TRUNCATED_MARKER = "\n…(truncated)"
|
||
|
||
# Recognize a `### P0`/`### P1` subsection header by the presence of the P0/P1
|
||
# token, tolerant to case and the dash/em-dash that follows it.
|
||
_P01_HEADER_RE = re.compile(r"(?<![A-Za-z0-9])p[01](?![0-9])", re.IGNORECASE)
|
||
|
||
|
||
def _read(path: str) -> str | None:
|
||
"""Read a file as UTF-8. Never raises; returns None on any OS error."""
|
||
try:
|
||
with open(path, "r", encoding="utf-8", errors="replace") as f:
|
||
return f.read()
|
||
except OSError as e:
|
||
logger.debug(f"review_parse: cannot open {path}: {e}")
|
||
return None
|
||
|
||
|
||
def _strip_frontmatter(content: str) -> str:
|
||
"""Drop a leading ``--- … ---`` YAML frontmatter block, if present."""
|
||
if content.startswith("---"):
|
||
parts = content.split("---", 2)
|
||
if len(parts) >= 3:
|
||
return parts[2]
|
||
return content
|
||
|
||
|
||
def _truncate(text: str, limit: int) -> str:
|
||
"""Trim ``text`` to ``limit`` chars, appending a truncation marker if cut."""
|
||
if len(text) <= limit:
|
||
return text
|
||
return text[:limit].rstrip() + _TRUNCATED_MARKER
|
||
|
||
|
||
def _section_body(md: str, heading_token: str) -> str:
|
||
"""Return the body lines under the first ``## <…heading_token…>`` heading.
|
||
|
||
Capture stops at the next level-2 (``## ``) heading. Matching is
|
||
case-insensitive substring match on the heading line, so callers pass a token
|
||
like ``"Вывод pytest"`` or ``"Findings"``. ``### ``-level headers do NOT
|
||
delimit the section (they start with ``"### "``, not ``"## "``).
|
||
"""
|
||
out: list[str] = []
|
||
capturing = False
|
||
for line in md.splitlines():
|
||
if line.startswith("## "):
|
||
if capturing:
|
||
break
|
||
if heading_token.lower() in line.lower():
|
||
capturing = True
|
||
continue
|
||
if capturing:
|
||
out.append(line)
|
||
return "\n".join(out)
|
||
|
||
|
||
def _is_placeholder_item(text: str) -> bool:
|
||
"""True for empty or template-placeholder list items (non-substantive).
|
||
|
||
The canonical reviewer template seeds each severity with
|
||
``- [ ] <описание> (если есть)``. Such lines must be ignored so an empty P0/P1
|
||
subsection does not leak the placeholder into ``task_desc``.
|
||
"""
|
||
t = text.strip()
|
||
if not t:
|
||
return True
|
||
if "(если есть)" in t:
|
||
return True
|
||
# An item whose entire payload is an angle-bracket placeholder, e.g. "<описание>".
|
||
if t.startswith("<") and t.endswith(">"):
|
||
return True
|
||
return False
|
||
|
||
|
||
def _item_payload(line: str) -> str | None:
|
||
"""If ``line`` is a markdown list item, return its payload text; else None.
|
||
|
||
Handles ``- foo``, ``* foo`` and checkbox forms ``- [ ] foo`` / ``- [x] foo``.
|
||
"""
|
||
m = re.match(r"\s*[-*]\s+(?:\[[ xX]?\]\s*)?(.*)$", line)
|
||
if not m:
|
||
return None
|
||
return m.group(1)
|
||
|
||
|
||
def _findings_subsections(findings_body: str):
|
||
"""Yield ``(header_line, body_lines)`` for each ``### `` subsection."""
|
||
header: str | None = None
|
||
body: list[str] = []
|
||
for line in findings_body.splitlines():
|
||
if line.startswith("### "):
|
||
if header is not None:
|
||
yield header, body
|
||
header = line
|
||
body = []
|
||
elif header is not None:
|
||
body.append(line)
|
||
if header is not None:
|
||
yield header, body
|
||
|
||
|
||
def extract_review_findings(path: str) -> str:
|
||
"""Дословный текст P0/P1 findings из 12-review.md. Never raises; '' при ошибке/пусто.
|
||
|
||
Reads the ``## Findings`` section of a reviewer report and returns the verbatim
|
||
P0 (Blocker) and P1 (Must fix) subsection items, suitable for embedding in a
|
||
rollback ``task_desc``. P2/P3 are ignored. Empty/placeholder-only subsections
|
||
are skipped; if no substantive P0/P1 item exists, returns ``""``. The result is
|
||
truncated to ``MAX_FINDINGS_CHARS``.
|
||
"""
|
||
content = _read(path)
|
||
if content is None:
|
||
return ""
|
||
|
||
try:
|
||
body = _strip_frontmatter(content)
|
||
findings_body = _section_body(body, "Findings")
|
||
if not findings_body.strip():
|
||
return ""
|
||
|
||
blocks: list[str] = []
|
||
for header, sub_body in _findings_subsections(findings_body):
|
||
if not _P01_HEADER_RE.search(header):
|
||
continue
|
||
kept: list[str] = []
|
||
for line in sub_body:
|
||
payload = _item_payload(line)
|
||
if payload is None:
|
||
continue
|
||
if _is_placeholder_item(payload):
|
||
continue
|
||
kept.append(line.rstrip())
|
||
if kept:
|
||
blocks.append("\n".join([header.rstrip(), *kept]))
|
||
|
||
if not blocks:
|
||
return ""
|
||
return _truncate("\n\n".join(blocks), MAX_FINDINGS_CHARS)
|
||
except Exception as e: # defensive: never raise out of the extractor
|
||
logger.debug(f"review_parse: extract_review_findings failed for {path}: {e}")
|
||
return ""
|
||
|
||
|
||
def extract_test_failures(path: str) -> str:
|
||
"""Релевантный фрагмент тела 13-test-report.md (причина FAIL). Never raises; '' при ошибке/пусто.
|
||
|
||
Picks the first non-empty source, in priority order:
|
||
1. ``## Вывод pytest`` — the pytest run output (shows failing tests);
|
||
2. rows of the ``## Результаты`` table that contain ``FAIL``;
|
||
3. ``## Итог`` — the verdict summary.
|
||
The result is truncated to ``MAX_FAILURES_CHARS``. The gate ``reason`` is added
|
||
by the caller; this returns the report-body excerpt on top of it.
|
||
"""
|
||
content = _read(path)
|
||
if content is None:
|
||
return ""
|
||
|
||
try:
|
||
# 1. pytest output.
|
||
pytest_out = _section_body(content, "Вывод pytest").strip()
|
||
if pytest_out:
|
||
return _truncate(pytest_out, MAX_FAILURES_CHARS)
|
||
|
||
# 2. FAIL rows from the results table.
|
||
results = _section_body(content, "Результаты")
|
||
fail_rows = [ln.rstrip() for ln in results.splitlines() if "FAIL" in ln.upper()]
|
||
if fail_rows:
|
||
return _truncate("\n".join(fail_rows).strip(), MAX_FAILURES_CHARS)
|
||
|
||
# 3. Verdict summary.
|
||
itog = _section_body(content, "Итог").strip()
|
||
if itog:
|
||
return _truncate(itog, MAX_FAILURES_CHARS)
|
||
|
||
return ""
|
||
except Exception as e: # defensive: never raise out of the extractor
|
||
logger.debug(f"review_parse: extract_test_failures failed for {path}: {e}")
|
||
return ""
|