From e62d51aa7731a46792765af7b2978938259c578c Mon Sep 17 00:00:00 2001 From: claude-bot Date: Fri, 5 Jun 2026 18:34:25 +0000 Subject: [PATCH] fix(qg): testing gate reads documented tester `result:` frontmatter key (ORCH-017) check_tests_passed/_parse_tests_verdict gated the testing -> deploy-staging transition on `verdict:`/`status:` in 13-test-report.md, but the tester agent prompt (.openclaw/agents/tester*) documents `result: PASS | FAIL` as THE machine-readable field. A report that followed the contract literally (ORCH-017: only `result: PASS`, no verdict:/status:) was bounced back to development with a misleading "Tests FAILED". ORCH-016 only passed because its report redundantly carried both `verdict:` and `result:`. Treat `result:` as a first-class machine field alongside verdict/status; a negative token in any field stays authoritative (ET-013 contract preserved). Self-hosting QG fix: unblocks every project whose tester emits only `result:`. Docs updated in-PR: CHANGELOG, architecture README machine-keys note. Tests: test_qg.py::TestCheckTestsPassed::test_result_pass_only_passes / _fail_only_fails. Refs: ORCH-017 Co-Authored-By: Claude Opus 4.7 --- CHANGELOG.md | 1 + docs/architecture/README.md | 2 +- src/qg/checks.py | 38 +++++++++++++++++++++++++------------ tests/test_qg.py | 23 ++++++++++++++++++++++ 4 files changed, 51 insertions(+), 13 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 53b13bf..5183aab 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -21,6 +21,7 @@ - Цепочка стадий: `... testing → deploy-staging → deploy → done` (была без `deploy-staging`). ### Fixed +- **Гейт `check_tests_passed` теперь читает документированный ключ `result:`** (ORCH-017): `_parse_tests_verdict` распознавал только `verdict:`/`status:` во frontmatter `13-test-report.md`, тогда как промпт tester-агента (`.openclaw/agents/tester*`) предписывает писать `result: PASS | FAIL`. Отчёт, следующий контракту буквально (только `result: PASS`, без `verdict:`/`status:`), проваливал гейт с обманчивым «Tests FAILED» и откатывался на `development` (ORCH-016 проходил лишь потому, что дублировал `verdict:` и `result:`). Теперь `result:` — равноправное машинное поле наряду с `verdict:`/`status:`; отрицательный токен в любом из полей по-прежнему авторитетен. Тесты: `tests/test_qg.py::TestCheckTestsPassed::test_result_pass_only_passes`, `…::test_result_fail_only_fails`. - БАГ-8: провал deploy/deploy-staging → корректный откат на `development`. - Изоляция тестов от живого Plane API (PR #27): autouse-фикстура сброса settings. diff --git a/docs/architecture/README.md b/docs/architecture/README.md index 3eef0ce..b4c5014 100644 --- a/docs/architecture/README.md +++ b/docs/architecture/README.md @@ -58,7 +58,7 @@ created → analysis → architecture → development → review → testing → ``` - **Длительность** считается launcher'ом (`_monitor_agent`) и пробрасывается в `_post_usage_comments`; для analyst (коммент строится в `stage_engine`) используется DB-фоллбэк `usage.get_agent_duration(task_id, agent)`. -- **Vердикт-парсер** — `src/frontmatter.read_frontmatter_value(...)` (defensive, никогда не raise). Машинные ключи: `verdict:` (reviewer/tester), `deploy_status:` (14-deploy-log.md), `staging_status:` (15-staging-log.md). +- **Vердикт-парсер** — `src/frontmatter.read_frontmatter_value(...)` (defensive, никогда не raise). Машинные ключи: `verdict:` (reviewer/tester), `result:` (tester `13-test-report.md`, осн. ключ по промпту tester-агента; `check_tests_passed` читает `verdict:`/`status:`/`result:`), `deploy_status:` (14-deploy-log.md), `staging_status:` (15-staging-log.md). - Формат коммента **не** меняет реестр гейтов и стадий; коммент — отображение, не управление. ## База данных (SQLite) diff --git a/src/qg/checks.py b/src/qg/checks.py index 0c02b6b..f90cff2 100644 --- a/src/qg/checks.py +++ b/src/qg/checks.py @@ -179,15 +179,24 @@ _TESTS_POSITIVE_TOKENS = ("PASSED", "PASS", "READY-TO-DEPLOY", "READY_TO_DEPLOY" def _parse_tests_verdict(content: str) -> tuple[bool, str]: """Map a 13-test-report.md body to a quality-gate verdict by reading ONLY the - machine-readable `verdict:` (and corroborating `status:`) YAML frontmatter fields. + machine-readable `verdict:` / `status:` / `result:` YAML frontmatter fields. + + ORCH-017: the tester agent prompt (`.openclaw/agents/tester*`) documents + `result: PASS | FAIL` as THE machine-readable field, but this gate previously + read only `verdict:`/`status:`. A tester that followed the documented contract + literally (e.g. ORCH-017's report: `result: PASS`, no verdict:/status:) was + bounced back to development with a misleading "Tests FAILED". We now treat + `result:` as a first-class machine field alongside verdict/status so the gate + matches the contract the tester is actually told to emit. (ORCH-016 only passed + before because its report redundantly carried both `verdict:` AND `result:`.) Rules: - - No frontmatter / bad YAML / neither field present -> (False, reason). - - A negative token (BLOCKED/FAILED/...) in verdict OR status -> (False) and is + - No frontmatter / bad YAML / none of the three fields present -> (False, reason). + - A negative token (BLOCKED/FAILED/...) in any field -> (False) and is authoritative (ET-013 main case: verdict BLOCKED wins over any prose PASS). - - Otherwise a positive token (PASS/PASSED/READY-TO-DEPLOY/...) in verdict OR - status -> (True). - - Anything else (unrecognized / empty verdict) -> (False, reason). + - Otherwise a positive token (PASS/PASSED/READY-TO-DEPLOY/...) in any field + -> (True). + - Anything else (unrecognized / empty fields) -> (False, reason). """ import yaml @@ -207,19 +216,24 @@ def _parse_tests_verdict(content: str) -> tuple[bool, str]: verdict = str(fm.get("verdict", "") or "").upper().strip() status = str(fm.get("status", "") or "").upper().strip() + result = str(fm.get("result", "") or "").upper().strip() - if not verdict and not status: - return False, "No machine-readable verdict/status in test report frontmatter" + if not verdict and not status and not result: + return False, "No machine-readable verdict/status/result in test report frontmatter" - fields = f"{verdict} {status}" + label = verdict or status or result + fields = f"{verdict} {status} {result}" for neg in _TESTS_NEGATIVE_TOKENS: if neg in fields: - return False, f"Test verdict: {verdict or status} ({neg})" + return False, f"Test verdict: {label} ({neg})" for pos in _TESTS_POSITIVE_TOKENS: if pos in fields: - return True, f"Test verdict: {verdict or status} (PASS)" + return True, f"Test verdict: {label} (PASS)" - return False, f"No recognized PASS verdict in frontmatter (verdict={verdict!r}, status={status!r})" + return False, ( + "No recognized PASS verdict in frontmatter " + f"(verdict={verdict!r}, status={status!r}, result={result!r})" + ) def check_analysis_approved(repo: str, work_item_id: str, branch: str | None = None) -> tuple[bool, str]: diff --git a/tests/test_qg.py b/tests/test_qg.py index e50b02a..1eecdb4 100644 --- a/tests/test_qg.py +++ b/tests/test_qg.py @@ -216,6 +216,29 @@ class TestCheckTestsPassed: passed, reason = check_tests_passed("enduro-trails", "ET-001") assert passed is True + def test_result_pass_only_passes(self, setup_work_item_dir): + # ORCH-017: the tester agent prompt documents `result: PASS | FAIL` as the + # machine-readable field. A report that follows that contract literally + # (only `result: PASS`, no verdict:/status:) MUST pass the gate. Before this + # fix the gate ignored `result:` and bounced such reports to development. + self._write( + setup_work_item_dir, + "---\ntype: test-report\nwork_item_id: ET-001\nresult: PASS\n---\n\nbody\n", + ) + passed, reason = check_tests_passed("enduro-trails", "ET-001") + assert passed is True + assert "PASS" in reason + + def test_result_fail_only_fails(self, setup_work_item_dir): + # The negative side of the documented `result: PASS | FAIL` contract. + self._write( + setup_work_item_dir, + "---\ntype: test-report\nresult: FAIL\n---\n\n23 passed in body\n", + ) + passed, reason = check_tests_passed("enduro-trails", "ET-001") + assert passed is False + assert "FAIL" in reason.upper() + def test_blocked_verdict_with_pass_in_body_fails(self, setup_work_item_dir): # THE ET-013 BUG: verdict BLOCKED but body is full of "PASS"/"passed". self._write(