fix(qg): read result: alongside verdict:/status: in tests gate

_parse_tests_verdict now accepts three equal-rank machine-readable frontmatter fields in 13-test-report.md — result: (canonical tester output), verdict: and status: (legacy/enduro-trails). Any one non-empty field suffices; a negative token in any field stays authoritative. Fixes the producer/consumer contract mismatch where the tester emits `result: PASS` (per .openclaw/agents/tester.md) but the gate only read verdict:/status:, causing a testing->development rollback loop until MAX_DEVELOPER_RETRIES (observed on ORCH-17). Token sets frozen and gate signature/QG_CHECKS unchanged for full backward compatibility. Refs: ORCH-047 Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-06-05 21:03:32 +00:00
parent 75fb4069a4
commit 51a76e8169
3 changed files with 88 additions and 15 deletions
--- a/src/qg/checks.py
+++ b/src/qg/checks.py
@@ -188,8 +188,11 @@ def check_tests_passed(repo: str, work_item_id: str, branch: str | None = None)
    explicitly marked `verdict: BLOCKED` / `status: blocked` but whose prose mentioned
    "23 passed" / "✅ PASS" / "All checks passed" was treated as a pass, and an
    unfinished feature reached Done. This mirrors check_reviewer_verdict (S-5) and
-    check_deploy_status (БАГ 8): read ONLY the YAML frontmatter `verdict:` / `status:`
-    fields, never the body.
+    check_deploy_status (БАГ 8): read ONLY the YAML frontmatter, never the body.
+
+    ORCH-047: the machine verdict is read from any of three equal-rank frontmatter
+    fields — `result:` (canonical, what the tester prompt emits), `verdict:` or
+    `status:` (legacy / enduro-trails). See _parse_tests_verdict.

    File: docs/work-items/<work_item_id>/13-test-report.md
    """
@@ -222,15 +225,20 @@ _TESTS_POSITIVE_TOKENS = ("PASSED", "PASS", "READY-TO-DEPLOY", "READY_TO_DEPLOY"

 def _parse_tests_verdict(content: str) -> tuple[bool, str]:
    """Map a 13-test-report.md body to a quality-gate verdict by reading ONLY the
-    machine-readable `verdict:` (and corroborating `status:`) YAML frontmatter fields.
+    machine-readable YAML frontmatter fields — never the prose body.
+
+    Three equal-rank fields are accepted (ORCH-047): `result:` (the canonical field
+    the tester prompt `.openclaw/agents/tester.md` is told to emit, `result: PASS|FAIL`),
+    plus `verdict:` and `status:` (legacy / enduro-trails ET-001..ET-014). ANY single
+    non-empty field is sufficient. Token sets are frozen for backward compatibility.

    Rules:
-      - No frontmatter / bad YAML / neither field present -> (False, reason).
-      - A negative token (BLOCKED/FAILED/...) in verdict OR status -> (False) and is
-        authoritative (ET-013 main case: verdict BLOCKED wins over any prose PASS).
-      - Otherwise a positive token (PASS/PASSED/READY-TO-DEPLOY/...) in verdict OR
-        status -> (True).
-      - Anything else (unrecognized / empty verdict) -> (False, reason).
+      - No frontmatter / bad YAML / none of the three fields present -> (False, reason).
+      - A negative token (BLOCKED/FAILED/...) in ANY field -> (False) and is
+        authoritative (ET-013 main case: verdict BLOCKED wins over any prose PASS, and
+        beats a positive token in another field).
+      - Otherwise a positive token (PASS/PASSED/READY-TO-DEPLOY/...) in ANY field -> (True).
+      - Anything else (fields set but unrecognized) -> (False, reason).
    """
    import yaml

@@ -250,19 +258,25 @@ def _parse_tests_verdict(content: str) -> tuple[bool, str]:

    verdict = str(fm.get("verdict", "") or "").upper().strip()
    status = str(fm.get("status", "") or "").upper().strip()
+    result = str(fm.get("result", "") or "").upper().strip()

-    if not verdict and not status:
-        return False, "No machine-readable verdict/status in test report frontmatter"
+    if not verdict and not status and not result:
+        return False, "No machine-readable verdict/status/result in test report frontmatter"

-    fields = f"{verdict} {status}"
+    value = verdict or status or result
+    fields = f"{verdict} {status} {result}"
    for neg in _TESTS_NEGATIVE_TOKENS:
        if neg in fields:
-            return False, f"Test verdict: {verdict or status} ({neg})"
+            return False, f"Test verdict: {value} ({neg})"
    for pos in _TESTS_POSITIVE_TOKENS:
        if pos in fields:
-            return True, f"Test verdict: {verdict or status} (PASS)"
+            return True, f"Test verdict: {value} (PASS)"

-    return False, f"No recognized PASS verdict in frontmatter (verdict={verdict!r}, status={status!r})"
+    return (
+        False,
+        f"No recognized PASS verdict in frontmatter "
+        f"(verdict={verdict!r}, status={status!r}, result={result!r})",
+    )


 def check_analysis_approved(repo: str, work_item_id: str, branch: str | None = None) -> tuple[bool, str]: