From 51a76e8169ea1b6e66510bc080029b40a943dffd Mon Sep 17 00:00:00 2001 From: claude-bot Date: Fri, 5 Jun 2026 21:03:32 +0000 Subject: [PATCH] fix(qg): read result: alongside verdict:/status: in tests gate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit _parse_tests_verdict now accepts three equal-rank machine-readable frontmatter fields in 13-test-report.md — result: (canonical tester output), verdict: and status: (legacy/enduro-trails). Any one non-empty field suffices; a negative token in any field stays authoritative. Fixes the producer/consumer contract mismatch where the tester emits `result: PASS` (per .openclaw/agents/tester.md) but the gate only read verdict:/status:, causing a testing->development rollback loop until MAX_DEVELOPER_RETRIES (observed on ORCH-17). Token sets frozen and gate signature/QG_CHECKS unchanged for full backward compatibility. Refs: ORCH-047 Co-Authored-By: Claude Opus 4.7 --- CHANGELOG.md | 1 + src/qg/checks.py | 44 +++++++++++++++++++++++------------- tests/test_qg.py | 58 ++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 88 insertions(+), 15 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7a43352..7b1e1bc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -22,6 +22,7 @@ - Цепочка стадий: `... testing → deploy-staging → deploy → done` (была без `deploy-staging`). ### Fixed +- **Testing-гейт `check_tests_passed` читает `result:` наравне с `verdict:`/`status:`** (ORCH-047): парсер `_parse_tests_verdict` (`src/qg/checks.py`) теперь принимает три равноправных машиночитаемых поля frontmatter `13-test-report.md` — `result:` (канон промпта тестера `.openclaw/agents/tester.md`, `result: PASS|FAIL`), плюс легаси `verdict:` и `status:` (enduro-trails ET-001..ET-014); достаточно любого одного непустого. Устраняет рассинхрон контракта: тестер честно эмитил `result: PASS` без `verdict:`/`status:`, парсер попадал в ветку «нет машинного вердикта» → откат `testing → development` в петлю до исчерпания `MAX_DEVELOPER_RETRIES` (наблюдалось на ORCH-17; ORCH-016 прошёл лишь из-за избыточного дублирования полей). Семантика приоритетов сохранена и распространена на все три поля через объединённую строку: negative-токен в любом поле авторитетен (перебивает positive), наборы токенов заморожены (обратная совместимость). Сигнатура гейта, имя и реестр `QG_CHECKS` не менялись. ADR `docs/work-items/ORCH-047/06-adr/ADR-001-result-field-in-tests-gate.md`. Тесты: `tests/test_qg.py::TestCheckTestsPassed`. - БАГ-8: провал deploy/deploy-staging → корректный откат на `development`. - Изоляция тестов от живого Plane API (PR #27): autouse-фикстура сброса settings. diff --git a/src/qg/checks.py b/src/qg/checks.py index 8c97ad5..3d5e789 100644 --- a/src/qg/checks.py +++ b/src/qg/checks.py @@ -188,8 +188,11 @@ def check_tests_passed(repo: str, work_item_id: str, branch: str | None = None) explicitly marked `verdict: BLOCKED` / `status: blocked` but whose prose mentioned "23 passed" / "✅ PASS" / "All checks passed" was treated as a pass, and an unfinished feature reached Done. This mirrors check_reviewer_verdict (S-5) and - check_deploy_status (БАГ 8): read ONLY the YAML frontmatter `verdict:` / `status:` - fields, never the body. + check_deploy_status (БАГ 8): read ONLY the YAML frontmatter, never the body. + + ORCH-047: the machine verdict is read from any of three equal-rank frontmatter + fields — `result:` (canonical, what the tester prompt emits), `verdict:` or + `status:` (legacy / enduro-trails). See _parse_tests_verdict. File: docs/work-items//13-test-report.md """ @@ -222,15 +225,20 @@ _TESTS_POSITIVE_TOKENS = ("PASSED", "PASS", "READY-TO-DEPLOY", "READY_TO_DEPLOY" def _parse_tests_verdict(content: str) -> tuple[bool, str]: """Map a 13-test-report.md body to a quality-gate verdict by reading ONLY the - machine-readable `verdict:` (and corroborating `status:`) YAML frontmatter fields. + machine-readable YAML frontmatter fields — never the prose body. + + Three equal-rank fields are accepted (ORCH-047): `result:` (the canonical field + the tester prompt `.openclaw/agents/tester.md` is told to emit, `result: PASS|FAIL`), + plus `verdict:` and `status:` (legacy / enduro-trails ET-001..ET-014). ANY single + non-empty field is sufficient. Token sets are frozen for backward compatibility. Rules: - - No frontmatter / bad YAML / neither field present -> (False, reason). - - A negative token (BLOCKED/FAILED/...) in verdict OR status -> (False) and is - authoritative (ET-013 main case: verdict BLOCKED wins over any prose PASS). - - Otherwise a positive token (PASS/PASSED/READY-TO-DEPLOY/...) in verdict OR - status -> (True). - - Anything else (unrecognized / empty verdict) -> (False, reason). + - No frontmatter / bad YAML / none of the three fields present -> (False, reason). + - A negative token (BLOCKED/FAILED/...) in ANY field -> (False) and is + authoritative (ET-013 main case: verdict BLOCKED wins over any prose PASS, and + beats a positive token in another field). + - Otherwise a positive token (PASS/PASSED/READY-TO-DEPLOY/...) in ANY field -> (True). + - Anything else (fields set but unrecognized) -> (False, reason). """ import yaml @@ -250,19 +258,25 @@ def _parse_tests_verdict(content: str) -> tuple[bool, str]: verdict = str(fm.get("verdict", "") or "").upper().strip() status = str(fm.get("status", "") or "").upper().strip() + result = str(fm.get("result", "") or "").upper().strip() - if not verdict and not status: - return False, "No machine-readable verdict/status in test report frontmatter" + if not verdict and not status and not result: + return False, "No machine-readable verdict/status/result in test report frontmatter" - fields = f"{verdict} {status}" + value = verdict or status or result + fields = f"{verdict} {status} {result}" for neg in _TESTS_NEGATIVE_TOKENS: if neg in fields: - return False, f"Test verdict: {verdict or status} ({neg})" + return False, f"Test verdict: {value} ({neg})" for pos in _TESTS_POSITIVE_TOKENS: if pos in fields: - return True, f"Test verdict: {verdict or status} (PASS)" + return True, f"Test verdict: {value} (PASS)" - return False, f"No recognized PASS verdict in frontmatter (verdict={verdict!r}, status={status!r})" + return ( + False, + f"No recognized PASS verdict in frontmatter " + f"(verdict={verdict!r}, status={status!r}, result={result!r})", + ) def check_analysis_approved(repo: str, work_item_id: str, branch: str | None = None) -> tuple[bool, str]: diff --git a/tests/test_qg.py b/tests/test_qg.py index d5fe2c7..eb41680 100644 --- a/tests/test_qg.py +++ b/tests/test_qg.py @@ -322,6 +322,64 @@ class TestCheckTestsPassed: assert passed is False assert "not found" in reason.lower() + # --- ORCH-047: `result:` is read as an equal-rank machine field --- + + def test_result_pass_passes(self, setup_work_item_dir): + # TC-01 / AC-01: canonical tester field `result: PASS` (no verdict/status). + self._write( + setup_work_item_dir, + "---\ntype: test-report\nresult: PASS\n---\n\n# Test Report\n", + ) + passed, reason = check_tests_passed("enduro-trails", "ET-001") + assert passed is True + assert "PASS" in reason + + def test_result_fail_fails(self, setup_work_item_dir): + # TC-02 / AC-02: `result: FAIL` (no verdict/status) -> rollback, reason has FAIL. + self._write(setup_work_item_dir, "---\nresult: FAIL\n---\n\nbody\n") + passed, reason = check_tests_passed("enduro-trails", "ET-001") + assert passed is False + assert "FAIL" in reason + + def test_result_pass_but_verdict_blocked_fails(self, setup_work_item_dir): + # TC-03 / AC-03: negative in another field is authoritative over result: PASS. + self._write( + setup_work_item_dir, + "---\nresult: PASS\nverdict: BLOCKED\n---\n\n23 passed\n", + ) + passed, reason = check_tests_passed("enduro-trails", "ET-001") + assert passed is False + assert "BLOCKED" in reason + + def test_result_pass_but_status_failed_fails(self, setup_work_item_dir): + # TC-04 / AC-03: status: failed authoritative over result: PASS. + self._write( + setup_work_item_dir, + "---\nresult: PASS\nstatus: failed\n---\n\nbody\n", + ) + passed, reason = check_tests_passed("enduro-trails", "ET-001") + assert passed is False + assert "FAILED" in reason + + def test_result_ready_to_deploy_passes(self, setup_work_item_dir): + # TC-05 / AC-04: positive token without the word PASS, in result field. + self._write( + setup_work_item_dir, + "---\nresult: ready-to-deploy\n---\n\nbody\n", + ) + passed, reason = check_tests_passed("enduro-trails", "ET-001") + assert passed is True + + def test_no_machine_field_reason_mentions_result(self, setup_work_item_dir): + # AC-06: none of result/verdict/status -> fail; reason now lists result too. + self._write( + setup_work_item_dir, + "---\ntype: test-report\nversion: 1\n---\n\nResult: PASS\n", + ) + passed, reason = check_tests_passed("enduro-trails", "ET-001") + assert passed is False + assert "result" in reason.lower() + class TestCheckDeployStatus: """BUG 8: deploy -> done must be gated on the deployer's machine-readable