"""Defensive extractors for reviewer / tester artifact bodies (ORCH-046). When a task is rolled back to ``development`` the stage engine builds the ``task_desc`` that ends up in the developer agent's ``.task-dev.md``. Historically that text only carried a *link* to the artifact file (12-review.md / 13-test-report.md); the developer agent had to go read the file, and the key must-fix points (reviewer P0/P1 findings, tester failure reason) were lost in transit — "испорченный телефон" that burns the retry budget. This module extracts the **verbatim** must-fix text so the stage engine can embed it directly in ``task_desc`` (ADR docs/work-items/ORCH-046/06-adr/ADR-001-*). Contract — **never raises** (mirrors ``src/frontmatter.py`` and ``src/qg/checks.py::_parse_tests_verdict``): any error — missing file, IOError, malformed markdown/YAML, missing section — yields ``""``. The caller then falls back to the previous link-only ``task_desc``. No network calls; disk reads only. """ import logging import re logger = logging.getLogger("orchestrator.review_parse") # Truncation limits (module-level per ТЗ §2.3). The full context always stays in # the artifact file; the embedded text is a focused excerpt. MAX_FINDINGS_CHARS = 2000 MAX_FAILURES_CHARS = 2000 _TRUNCATED_MARKER = "\n…(truncated)" # Recognize a `### P0`/`### P1` subsection header by the presence of the P0/P1 # token, tolerant to case and the dash/em-dash that follows it. _P01_HEADER_RE = re.compile(r"(? str | None: """Read a file as UTF-8. Never raises; returns None on any OS error.""" try: with open(path, "r", encoding="utf-8", errors="replace") as f: return f.read() except OSError as e: logger.debug(f"review_parse: cannot open {path}: {e}") return None def _strip_frontmatter(content: str) -> str: """Drop a leading ``--- … ---`` YAML frontmatter block, if present.""" if content.startswith("---"): parts = content.split("---", 2) if len(parts) >= 3: return parts[2] return content def _truncate(text: str, limit: int) -> str: """Trim ``text`` to ``limit`` chars, appending a truncation marker if cut.""" if len(text) <= limit: return text return text[:limit].rstrip() + _TRUNCATED_MARKER def _section_body(md: str, heading_token: str) -> str: """Return the body lines under the first ``## <…heading_token…>`` heading. Capture stops at the next level-2 (``## ``) heading. Matching is case-insensitive substring match on the heading line, so callers pass a token like ``"Вывод pytest"`` or ``"Findings"``. ``### ``-level headers do NOT delimit the section (they start with ``"### "``, not ``"## "``). """ out: list[str] = [] capturing = False for line in md.splitlines(): if line.startswith("## "): if capturing: break if heading_token.lower() in line.lower(): capturing = True continue if capturing: out.append(line) return "\n".join(out) def _is_placeholder_item(text: str) -> bool: """True for empty or template-placeholder list items (non-substantive). The canonical reviewer template seeds each severity with ``- [ ] <описание> (если есть)``. Such lines must be ignored so an empty P0/P1 subsection does not leak the placeholder into ``task_desc``. """ t = text.strip() if not t: return True if "(если есть)" in t: return True # An item whose entire payload is an angle-bracket placeholder, e.g. "<описание>". if t.startswith("<") and t.endswith(">"): return True return False def _item_payload(line: str) -> str | None: """If ``line`` is a markdown list item, return its payload text; else None. Handles ``- foo``, ``* foo`` and checkbox forms ``- [ ] foo`` / ``- [x] foo``. """ m = re.match(r"\s*[-*]\s+(?:\[[ xX]?\]\s*)?(.*)$", line) if not m: return None return m.group(1) def _findings_subsections(findings_body: str): """Yield ``(header_line, body_lines)`` for each ``### `` subsection.""" header: str | None = None body: list[str] = [] for line in findings_body.splitlines(): if line.startswith("### "): if header is not None: yield header, body header = line body = [] elif header is not None: body.append(line) if header is not None: yield header, body def extract_review_findings(path: str) -> str: """Дословный текст P0/P1 findings из 12-review.md. Never raises; '' при ошибке/пусто. Reads the ``## Findings`` section of a reviewer report and returns the verbatim P0 (Blocker) and P1 (Must fix) subsection items, suitable for embedding in a rollback ``task_desc``. P2/P3 are ignored. Empty/placeholder-only subsections are skipped; if no substantive P0/P1 item exists, returns ``""``. The result is truncated to ``MAX_FINDINGS_CHARS``. """ content = _read(path) if content is None: return "" try: body = _strip_frontmatter(content) findings_body = _section_body(body, "Findings") if not findings_body.strip(): return "" blocks: list[str] = [] for header, sub_body in _findings_subsections(findings_body): if not _P01_HEADER_RE.search(header): continue kept: list[str] = [] for line in sub_body: payload = _item_payload(line) if payload is None: continue if _is_placeholder_item(payload): continue kept.append(line.rstrip()) if kept: blocks.append("\n".join([header.rstrip(), *kept])) if not blocks: return "" return _truncate("\n\n".join(blocks), MAX_FINDINGS_CHARS) except Exception as e: # defensive: never raise out of the extractor logger.debug(f"review_parse: extract_review_findings failed for {path}: {e}") return "" def extract_test_failures(path: str) -> str: """Релевантный фрагмент тела 13-test-report.md (причина FAIL). Never raises; '' при ошибке/пусто. Picks the first non-empty source, in priority order: 1. ``## Вывод pytest`` — the pytest run output (shows failing tests); 2. rows of the ``## Результаты`` table that contain ``FAIL``; 3. ``## Итог`` — the verdict summary. The result is truncated to ``MAX_FAILURES_CHARS``. The gate ``reason`` is added by the caller; this returns the report-body excerpt on top of it. """ content = _read(path) if content is None: return "" try: # 1. pytest output. pytest_out = _section_body(content, "Вывод pytest").strip() if pytest_out: return _truncate(pytest_out, MAX_FAILURES_CHARS) # 2. FAIL rows from the results table. results = _section_body(content, "Результаты") fail_rows = [ln.rstrip() for ln in results.splitlines() if "FAIL" in ln.upper()] if fail_rows: return _truncate("\n".join(fail_rows).strip(), MAX_FAILURES_CHARS) # 3. Verdict summary. itog = _section_body(content, "Итог").strip() if itog: return _truncate(itog, MAX_FAILURES_CHARS) return "" except Exception as e: # defensive: never raise out of the extractor logger.debug(f"review_parse: extract_test_failures failed for {path}: {e}") return ""