feat(stage-engine): embed verbatim reviewer/tester findings in rollback task_desc

При заворотах на development task_desc теперь несёт дословный must-fix текст (P0/P1 ревьюера, причина FAIL тестера) вместо одной ссылки на файл — developer- агент видит суть претензий сразу и не повторяет ту же ошибку, экономя retry- бюджет и токены общего инстанса. - Новый defensive-модуль src/review_parse.py (never-raise): extract_review_findings (P0/P1 из 12-review.md ## Findings), extract_test_failures (фрагмент тела 13-test-report.md: pytest output / FAIL-строки / Итог), усечение по лимиту. - Две rollback-ветки stage_engine: встраивают текст + сохраняют ссылку на полный файл; graceful-фоллбэк на ссылку-строку при битом/пустом артефакте. - Последовательность отката, retry-счётчик, поля AdvanceResult, реестр QG_CHECKS не менялись. - Доки: README (Stage Engine / Откаты), CHANGELOG. - Тесты: tests/test_review_parse.py, test_stage_engine.py::TestRollbackTaskDescEmbedding. Refs: ORCH-046 Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-06-06 04:42:11 +00:00
parent 29e83341b5
commit 03c3d77cac
6 changed files with 663 additions and 6 deletions
--- a/src/review_parse.py
+++ b/src/review_parse.py
@@ -0,0 +1,205 @@
+"""Defensive extractors for reviewer / tester artifact bodies (ORCH-046).
+
+When a task is rolled back to ``development`` the stage engine builds the
+``task_desc`` that ends up in the developer agent's ``.task-dev.md``. Historically
+that text only carried a *link* to the artifact file (12-review.md /
+13-test-report.md); the developer agent had to go read the file, and the key
+must-fix points (reviewer P0/P1 findings, tester failure reason) were lost in
+transit — "испорченный телефон" that burns the retry budget.
+
+This module extracts the **verbatim** must-fix text so the stage engine can embed
+it directly in ``task_desc`` (ADR docs/work-items/ORCH-046/06-adr/ADR-001-*).
+
+Contract — **never raises** (mirrors ``src/frontmatter.py`` and
+``src/qg/checks.py::_parse_tests_verdict``): any error — missing file, IOError,
+malformed markdown/YAML, missing section — yields ``""``. The caller then falls
+back to the previous link-only ``task_desc``. No network calls; disk reads only.
+"""
+
+import logging
+import re
+
+logger = logging.getLogger("orchestrator.review_parse")
+
+# Truncation limits (module-level per ТЗ §2.3). The full context always stays in
+# the artifact file; the embedded text is a focused excerpt.
+MAX_FINDINGS_CHARS = 2000
+MAX_FAILURES_CHARS = 2000
+
+_TRUNCATED_MARKER = "\n…(truncated)"
+
+# Recognize a `### P0`/`### P1` subsection header by the presence of the P0/P1
+# token, tolerant to case and the dash/em-dash that follows it.
+_P01_HEADER_RE = re.compile(r"(?<![A-Za-z0-9])p[01](?![0-9])", re.IGNORECASE)
+
+
+def _read(path: str) -> str | None:
+    """Read a file as UTF-8. Never raises; returns None on any OS error."""
+    try:
+        with open(path, "r", encoding="utf-8", errors="replace") as f:
+            return f.read()
+    except OSError as e:
+        logger.debug(f"review_parse: cannot open {path}: {e}")
+        return None
+
+
+def _strip_frontmatter(content: str) -> str:
+    """Drop a leading ``--- … ---`` YAML frontmatter block, if present."""
+    if content.startswith("---"):
+        parts = content.split("---", 2)
+        if len(parts) >= 3:
+            return parts[2]
+    return content
+
+
+def _truncate(text: str, limit: int) -> str:
+    """Trim ``text`` to ``limit`` chars, appending a truncation marker if cut."""
+    if len(text) <= limit:
+        return text
+    return text[:limit].rstrip() + _TRUNCATED_MARKER
+
+
+def _section_body(md: str, heading_token: str) -> str:
+    """Return the body lines under the first ``## <…heading_token…>`` heading.
+
+    Capture stops at the next level-2 (``## ``) heading. Matching is
+    case-insensitive substring match on the heading line, so callers pass a token
+    like ``"Вывод pytest"`` or ``"Findings"``. ``### ``-level headers do NOT
+    delimit the section (they start with ``"### "``, not ``"## "``).
+    """
+    out: list[str] = []
+    capturing = False
+    for line in md.splitlines():
+        if line.startswith("## "):
+            if capturing:
+                break
+            if heading_token.lower() in line.lower():
+                capturing = True
+            continue
+        if capturing:
+            out.append(line)
+    return "\n".join(out)
+
+
+def _is_placeholder_item(text: str) -> bool:
+    """True for empty or template-placeholder list items (non-substantive).
+
+    The canonical reviewer template seeds each severity with
+    ``- [ ] <описание> (если есть)``. Such lines must be ignored so an empty P0/P1
+    subsection does not leak the placeholder into ``task_desc``.
+    """
+    t = text.strip()
+    if not t:
+        return True
+    if "(если есть)" in t:
+        return True
+    # An item whose entire payload is an angle-bracket placeholder, e.g. "<описание>".
+    if t.startswith("<") and t.endswith(">"):
+        return True
+    return False
+
+
+def _item_payload(line: str) -> str | None:
+    """If ``line`` is a markdown list item, return its payload text; else None.
+
+    Handles ``- foo``, ``* foo`` and checkbox forms ``- [ ] foo`` / ``- [x] foo``.
+    """
+    m = re.match(r"\s*[-*]\s+(?:\[[ xX]?\]\s*)?(.*)$", line)
+    if not m:
+        return None
+    return m.group(1)
+
+
+def _findings_subsections(findings_body: str):
+    """Yield ``(header_line, body_lines)`` for each ``### `` subsection."""
+    header: str | None = None
+    body: list[str] = []
+    for line in findings_body.splitlines():
+        if line.startswith("### "):
+            if header is not None:
+                yield header, body
+            header = line
+            body = []
+        elif header is not None:
+            body.append(line)
+    if header is not None:
+        yield header, body
+
+
+def extract_review_findings(path: str) -> str:
+    """Дословный текст P0/P1 findings из 12-review.md. Never raises; '' при ошибке/пусто.
+
+    Reads the ``## Findings`` section of a reviewer report and returns the verbatim
+    P0 (Blocker) and P1 (Must fix) subsection items, suitable for embedding in a
+    rollback ``task_desc``. P2/P3 are ignored. Empty/placeholder-only subsections
+    are skipped; if no substantive P0/P1 item exists, returns ``""``. The result is
+    truncated to ``MAX_FINDINGS_CHARS``.
+    """
+    content = _read(path)
+    if content is None:
+        return ""
+
+    try:
+        body = _strip_frontmatter(content)
+        findings_body = _section_body(body, "Findings")
+        if not findings_body.strip():
+            return ""
+
+        blocks: list[str] = []
+        for header, sub_body in _findings_subsections(findings_body):
+            if not _P01_HEADER_RE.search(header):
+                continue
+            kept: list[str] = []
+            for line in sub_body:
+                payload = _item_payload(line)
+                if payload is None:
+                    continue
+                if _is_placeholder_item(payload):
+                    continue
+                kept.append(line.rstrip())
+            if kept:
+                blocks.append("\n".join([header.rstrip(), *kept]))
+
+        if not blocks:
+            return ""
+        return _truncate("\n\n".join(blocks), MAX_FINDINGS_CHARS)
+    except Exception as e:  # defensive: never raise out of the extractor
+        logger.debug(f"review_parse: extract_review_findings failed for {path}: {e}")
+        return ""
+
+
+def extract_test_failures(path: str) -> str:
+    """Релевантный фрагмент тела 13-test-report.md (причина FAIL). Never raises; '' при ошибке/пусто.
+
+    Picks the first non-empty source, in priority order:
+      1. ``## Вывод pytest`` — the pytest run output (shows failing tests);
+      2. rows of the ``## Результаты`` table that contain ``FAIL``;
+      3. ``## Итог`` — the verdict summary.
+    The result is truncated to ``MAX_FAILURES_CHARS``. The gate ``reason`` is added
+    by the caller; this returns the report-body excerpt on top of it.
+    """
+    content = _read(path)
+    if content is None:
+        return ""
+
+    try:
+        # 1. pytest output.
+        pytest_out = _section_body(content, "Вывод pytest").strip()
+        if pytest_out:
+            return _truncate(pytest_out, MAX_FAILURES_CHARS)
+
+        # 2. FAIL rows from the results table.
+        results = _section_body(content, "Результаты")
+        fail_rows = [ln.rstrip() for ln in results.splitlines() if "FAIL" in ln.upper()]
+        if fail_rows:
+            return _truncate("\n".join(fail_rows).strip(), MAX_FAILURES_CHARS)
+
+        # 3. Verdict summary.
+        itog = _section_body(content, "Итог").strip()
+        if itog:
+            return _truncate(itog, MAX_FAILURES_CHARS)
+
+        return ""
+    except Exception as e:  # defensive: never raise out of the extractor
+        logger.debug(f"review_parse: extract_test_failures failed for {path}: {e}")
+        return ""
--- a/src/stage_engine.py
+++ b/src/stage_engine.py
@@ -32,6 +32,7 @@ from dataclasses import dataclass, field
 from .db import get_db, update_task_stage, enqueue_job
 from .stages import get_next_stage, get_qg_for_stage, get_agent_for_stage
 from .git_worktree import get_worktree_path
+from .review_parse import extract_review_findings, extract_test_failures
 from .qg.checks import QG_CHECKS
 from .notifications import (
    notify_stage_change,
@@ -416,12 +417,24 @@ def _handle_qg_failure_rollbacks(
        result.rolled_back_to = "development"
        retry_count = _developer_retry_count(task_id)
        if retry_count < MAX_DEVELOPER_RETRIES:
-            task_desc = (
+            # ORCH-046: embed the verbatim P0/P1 findings into task_desc so the
+            # developer agent sees the must-fix points directly (not just a link).
+            # extract_review_findings never raises; "" -> graceful link-only fallback.
+            review_ref = f"docs/work-items/{work_item_id}/12-review.md"
+            review_path = os.path.join(get_worktree_path(repo, branch), review_ref)
+            findings = extract_review_findings(review_path)
+            head = (
                f"Work item: {work_item_id}\nRepo: {repo}\nBranch: {branch}\n"
                f"Stage: development\nNote: REQUEST_CHANGES from reviewer "
-                f"(attempt {retry_count+1}/3). Fix findings in "
-                f"docs/work-items/{work_item_id}/12-review.md"
+                f"(attempt {retry_count+1}/3)."
            )
+            if findings:
+                task_desc = (
+                    f"{head}\nFindings (P0/P1):\n{findings}\n"
+                    f"Полный контекст: {review_ref}"
+                )
+            else:
+                task_desc = f"{head} Fix findings in {review_ref}"
            new_job = enqueue_job("developer", repo, task_desc, task_id=task_id)
            result.enqueued_agent = "developer"
            result.enqueued_job_id = new_job
@@ -452,11 +465,23 @@ def _handle_qg_failure_rollbacks(
        )
        retry_count = _developer_retry_count(task_id)
        if retry_count < MAX_DEVELOPER_RETRIES:
-            task_desc = (
+            # ORCH-046: embed the gate `reason` plus a verbatim excerpt of the
+            # test-report body (pytest output / FAIL rows / Итог) into task_desc.
+            # extract_test_failures never raises; "" -> graceful reason+link fallback.
+            report_ref = f"docs/work-items/{work_item_id}/13-test-report.md"
+            report_path = os.path.join(get_worktree_path(repo, branch), report_ref)
+            failures = extract_test_failures(report_path)
+            head = (
                f"Work item: {work_item_id}\nRepo: {repo}\nBranch: {branch}\n"
-                f"Stage: development\nNote: Tests FAILED. "
-                f"Fix failures described in docs/work-items/{work_item_id}/13-test-report.md"
+                f"Stage: development\nNote: Tests FAILED. Причина: {reason}."
            )
+            if failures:
+                task_desc = (
+                    f"{head}\nДетали:\n{failures}\n"
+                    f"Полный контекст: {report_ref}"
+                )
+            else:
+                task_desc = f"{head} Fix failures described in {report_ref}"
            new_job = enqueue_job("developer", repo, task_desc, task_id=task_id)
            result.enqueued_agent = "developer"
            result.enqueued_job_id = new_job