fix(ci): bounce task back to developer on red CI (capped retries)

Merge pull request 'fix(qg): use check_ci_green instead of local tests on development stage' (#17 ) from fix/drop-local-tests-qg into main
fix(qg): use check_ci_green instead of local tests on development stage
2026-06-04 01:39:40 +03:00 · 2026-06-04 01:24:14 +03:00 · 2026-06-04 01:22:43 +03:00 · 2026-06-04 00:44:40 +03:00
6 changed files with 183 additions and 9 deletions
--- a/src/qg/checks.py
+++ b/src/qg/checks.py
@@ -249,6 +249,9 @@ def check_reviewer_verdict(repo: str, work_item_id: str, branch: str | None = No
 def check_tests_local(repo: str, branch: str) -> tuple[bool, str]:
    """
    DEPRECATED: replaced by check_ci_green on the development stage (CI is now
    configured). Kept for backward-compat; not wired to any stage.
    S-1 fix: run the project test suite locally and judge by exit code, instead of
    depending on Gitea CI (which is not configured -> always false).
--- a/src/stages.py
+++ b/src/stages.py
@@ -13,7 +13,7 @@ STAGE_TRANSITIONS = {
    "created": {"next": "analysis", "agent": "analyst", "qg": None},
    "analysis": {"next": "architecture", "agent": "architect", "qg": "check_analysis_approved"},
    "architecture": {"next": "development", "agent": "developer", "qg": "check_architecture_done"},
-    "development": {"next": "review", "agent": "reviewer", "qg": "check_tests_local"},
+    "development": {"next": "review", "agent": "reviewer", "qg": "check_ci_green"},
    "review": {"next": "testing", "agent": "tester", "qg": "check_reviewer_verdict"},
    "testing": {"next": "deploy", "agent": "deployer", "qg": "check_tests_passed"},
    "deploy": {"next": "done", "agent": None, "qg": None},
--- a/src/webhooks/gitea.py
+++ b/src/webhooks/gitea.py
@@ -216,12 +216,31 @@ async def handle_ci_status(payload: dict):
        else:
            notify_qg_failure(task_id, current_stage, "check_ci_green", reason)
-    elif state == "failure":
+    elif state == "failure" and current_stage == "development":
-        # S-1: Gitea CI is NOT the authoritative gate anymore (the orchestrator runs
+        # CI is the authoritative gate for development -> review.
-        # tests locally via check_tests_local). Gitea CI is often unconfigured, so a
+        # On red CI: notify, then bounce the task back to the developer (capped retries),
-        # "failure"/empty status here is not actionable. Log only, do not alert.
+        # symmetric to the review REQUEST_CHANGES path.
-        logger.debug(f"Task {task_id}: Gitea CI state='failure' on branch '{branch}' "
+        notify_qg_failure(task_id, current_stage, "check_ci_green", f"Gitea CI failed on branch '{branch}'")
-                     f"(non-authoritative, suppressed — local tests are the gate)")
+        conn = get_db()
        retry_count = conn.execute(
            "SELECT COUNT(*) as cnt FROM agent_runs WHERE task_id = ? AND agent = 'developer'",
            (task_id,),
        ).fetchone()["cnt"]
        conn.close()
        if retry_count < MAX_DEV_RETRIES:
            # task already on 'development' — no stage change needed, just relaunch developer
            try:
                task_desc = (
                    f"Work item: {work_item_id}\nRepo: {repo_name}\nBranch: {branch}\n"
                    f"Stage: development\nNote: CI failed, fix and re-push (attempt {retry_count + 1}/{MAX_DEV_RETRIES})"
                )
                job_id = enqueue_job("developer", repo_name, task_desc, task_id=task_id)
                logger.info(f"Task {task_id}: CI failed, enqueued developer (attempt {retry_count + 1}, job_id={job_id})")
            except Exception as e:
                notify_error(task_id, f"Failed to relaunch developer after CI failure: {e}")
        else:
            notify_error(task_id, f"Max developer retries ({MAX_DEV_RETRIES}) reached after CI failure, escalating")
            logger.error(f"Task {task_id}: max retries reached after CI failure, needs manual intervention")
 async def handle_pr(payload: dict):
--- a/tests/test_qg.py
+++ b/tests/test_qg.py
@@ -19,6 +19,7 @@ from src.qg.checks import (
    check_tests_passed,
    check_tests_local,
 )
 from src.stages import get_qg_for_stage
@pytest.fixture(autouse=True)
@@ -189,6 +190,22 @@ class TestCheckTestsPassed:
        assert "not found" in reason.lower()
 class TestDevelopmentStageQG:
    """BUG 6: development stage QG is now check_ci_green (CI is the authoritative
    gate), not the deprecated check_tests_local."""
    def test_development_qg_is_check_ci_green(self):
        assert get_qg_for_stage("development") == "check_ci_green"
    def test_check_tests_local_is_deprecated_and_unwired(self):
        # Kept in the registry for backward-compat, but not wired to any stage.
        from src.qg.checks import QG_CHECKS
        from src.stages import STAGE_TRANSITIONS
        assert "check_tests_local" in QG_CHECKS
        wired = {t.get("qg") for t in STAGE_TRANSITIONS.values()}
        assert "check_tests_local" not in wired
 class TestCheckTestsLocal:
    """BUG 5: check_tests_local must run pytest directly (not make, which is
    not installed in the orchestrator container)."""
--- a/tests/test_stage_engine.py
+++ b/tests/test_stage_engine.py
@@ -203,10 +203,13 @@ class TestQgFailureDoesNotAdvance:
        assert _jobs() == []
    def test_webhook_path_emits_qg_failure_notification(self, monkeypatch):
-        """finished_agent=None -> generic QG-failure notification fires (plane parity)."""
+        """finished_agent=None -> generic QG-failure notification fires (plane parity).
        development stage QG is now check_ci_green (was check_tests_local).
        """
        monkeypatch.setattr(
            stage_engine, "QG_CHECKS",
-            {**stage_engine.QG_CHECKS, "check_tests_local": _fail("ci red")},
+            {**stage_engine.QG_CHECKS, "check_ci_green": _fail("ci red")},
        )
        task_id = _make_task("development")
        advance_stage(task_id, "development", "enduro-trails", "ET-001",
--- a/tests/test_webhooks.py
+++ b/tests/test_webhooks.py
@@ -1,4 +1,5 @@
 import pytest
 import asyncio
 import os
 import tempfile
 from unittest.mock import patch, MagicMock, AsyncMock
@@ -272,6 +273,46 @@ def test_gitea_ci_success_advances_to_review(mock_launcher, mock_ci):
    assert task["stage"] == "review"
@patch("src.webhooks.gitea.notify_qg_failure")
@patch("src.webhooks.gitea.launcher")
 def test_gitea_ci_failure_on_development_notifies_qg_failure(mock_launcher, mock_notify):
    """BUG 6: CI failure at development is now the authoritative QG gate failing.
    It must notify QG failure (not silently suppress) and must NOT advance the stage.
    """
    conn = get_db()
    conn.execute(
        "INSERT INTO tasks (plane_id, work_item_id, repo, branch, stage) VALUES (?, ?, ?, ?, ?)",
        ("ci-fail-001", "ET-011", "enduro-trails", "feature/ET-011-test", "development"),
    )
    conn.commit()
    conn.close()
    resp = client.post(
        "/webhook/gitea",
        json={
            "state": "failure",
            "branches": [{"name": "feature/ET-011-test"}],
            "repository": {"name": "enduro-trails"},
        },
        headers={"X-Gitea-Event": "status"},
    )
    assert resp.status_code == 200
    # QG failure was reported for the development stage with check_ci_green.
    assert mock_notify.called
    args, kwargs = mock_notify.call_args
    call = list(args) + list(kwargs.values())
    assert "development" in call
    assert "check_ci_green" in call
    # Stage did NOT advance.
    conn = get_db()
    task = conn.execute("SELECT * FROM tasks WHERE plane_id = 'ci-fail-001'").fetchone()
    conn.close()
    assert task["stage"] == "development"
 def test_gitea_webhook_pr():
    """PR event is accepted."""
    resp = client.post(
@@ -301,3 +342,94 @@ def test_plane_webhook_event_logged():
    conn.close()
    assert event is not None
    assert event["source"] == "plane"
 # ---------------------------------------------------------------------------
 # BUG 7: red CI on development must bounce the task back to the developer
 # (capped retries, symmetric to review REQUEST_CHANGES). These are pure-logic
 # tests: they invoke handle_ci_status() directly with mocked helpers so they do
 # not pass through the TestClient HMAC barrier (baseline 401s are off-limits).
 # ---------------------------------------------------------------------------
 def _ci_failure_payload():
    return {
        "state": "failure",
        "branches": [{"name": "feature/ET-011-test"}],
        "repository": {"name": "enduro-trails"},
    }
 def _mock_db_with_retry_count(count):
    """Build a get_db() mock whose retry_count query returns `count`."""
    conn = MagicMock()
    conn.execute.return_value.fetchone.return_value = {"cnt": count}
    return conn
@patch("src.webhooks.gitea.notify_error")
@patch("src.webhooks.gitea.notify_qg_failure")
@patch("src.webhooks.gitea.enqueue_job")
@patch("src.webhooks.gitea.update_task_stage")
@patch("src.webhooks.gitea.get_db")
@patch("src.webhooks.gitea.get_task_by_repo_branch")
@patch("src.webhooks.gitea.get_project_by_repo")
 def test_ci_failure_development_retries_developer_under_limit(
    mock_proj, mock_task, mock_get_db, mock_update_stage,
    mock_enqueue, mock_qg, mock_err,
 ):
    """retry_count < MAX_DEV_RETRIES → relaunch developer, stage untouched."""
    from src.webhooks.gitea import handle_ci_status
    mock_proj.return_value = {"repo": "enduro-trails"}
    mock_task.return_value = {
        "id": 1, "stage": "development", "work_item_id": "ET-011",
    }
    mock_get_db.return_value = _mock_db_with_retry_count(0)
    mock_enqueue.return_value = 42
    asyncio.run(handle_ci_status(_ci_failure_payload()))
    # QG failure was still reported (Slava sees both the failure and the retry).
    assert mock_qg.called
    # developer was re-enqueued.
    assert mock_enqueue.called
    assert mock_enqueue.call_args[0][0] == "developer"
    # No escalation.
    assert not mock_err.called
    # Stage stays on development — no update_task_stage in the CI-failure path.
    assert not mock_update_stage.called
@patch("src.webhooks.gitea.notify_error")
@patch("src.webhooks.gitea.notify_qg_failure")
@patch("src.webhooks.gitea.enqueue_job")
@patch("src.webhooks.gitea.update_task_stage")
@patch("src.webhooks.gitea.get_db")
@patch("src.webhooks.gitea.get_task_by_repo_branch")
@patch("src.webhooks.gitea.get_project_by_repo")
 def test_ci_failure_development_escalates_at_limit(
    mock_proj, mock_task, mock_get_db, mock_update_stage,
    mock_enqueue, mock_qg, mock_err,
 ):
    """retry_count >= MAX_DEV_RETRIES → escalate via notify_error, no relaunch."""
    from src.webhooks.gitea import handle_ci_status, MAX_DEV_RETRIES
    mock_proj.return_value = {"repo": "enduro-trails"}
    mock_task.return_value = {
        "id": 1, "stage": "development", "work_item_id": "ET-011",
    }
    mock_get_db.return_value = _mock_db_with_retry_count(MAX_DEV_RETRIES)
    asyncio.run(handle_ci_status(_ci_failure_payload()))
    # QG failure still reported.
    assert mock_qg.called
    # developer NOT re-enqueued at the cap.
    assert not mock_enqueue.called
    # Escalation message mentions CI failure.
    assert mock_err.called
    err_msg = " ".join(str(a) for a in mock_err.call_args[0])
    assert "Max developer retries" in err_msg
    assert "after CI failure" in err_msg
    # Stage untouched.
    assert not mock_update_stage.called
Author	SHA1	Message	Date
Dev Agent	3a285de11d	fix(ci): bounce task back to developer on red CI (capped retries)	2026-06-04 01:39:40 +03:00
Slava	7922f6b67b	Merge pull request 'fix(qg): use check_ci_green instead of local tests on development stage' (#17 ) from fix/drop-local-tests-qg into main	2026-06-04 01:24:14 +03:00
Dev Agent	e15d339b14	fix(qg): use check_ci_green instead of local tests on development stage	2026-06-04 01:22:43 +03:00
Slava	994f73a78e	Merge pull request 'fix(qg): run pytest directly instead of make in check_tests_local' (#16 ) from fix/qg-pytest-no-make into main	2026-06-04 00:44:40 +03:00