Compare commits

...

4 Commits

6 changed files with 183 additions and 9 deletions

View File

@@ -249,6 +249,9 @@ def check_reviewer_verdict(repo: str, work_item_id: str, branch: str | None = No
def check_tests_local(repo: str, branch: str) -> tuple[bool, str]: def check_tests_local(repo: str, branch: str) -> tuple[bool, str]:
""" """
DEPRECATED: replaced by check_ci_green on the development stage (CI is now
configured). Kept for backward-compat; not wired to any stage.
S-1 fix: run the project test suite locally and judge by exit code, instead of S-1 fix: run the project test suite locally and judge by exit code, instead of
depending on Gitea CI (which is not configured -> always false). depending on Gitea CI (which is not configured -> always false).

View File

@@ -13,7 +13,7 @@ STAGE_TRANSITIONS = {
"created": {"next": "analysis", "agent": "analyst", "qg": None}, "created": {"next": "analysis", "agent": "analyst", "qg": None},
"analysis": {"next": "architecture", "agent": "architect", "qg": "check_analysis_approved"}, "analysis": {"next": "architecture", "agent": "architect", "qg": "check_analysis_approved"},
"architecture": {"next": "development", "agent": "developer", "qg": "check_architecture_done"}, "architecture": {"next": "development", "agent": "developer", "qg": "check_architecture_done"},
"development": {"next": "review", "agent": "reviewer", "qg": "check_tests_local"}, "development": {"next": "review", "agent": "reviewer", "qg": "check_ci_green"},
"review": {"next": "testing", "agent": "tester", "qg": "check_reviewer_verdict"}, "review": {"next": "testing", "agent": "tester", "qg": "check_reviewer_verdict"},
"testing": {"next": "deploy", "agent": "deployer", "qg": "check_tests_passed"}, "testing": {"next": "deploy", "agent": "deployer", "qg": "check_tests_passed"},
"deploy": {"next": "done", "agent": None, "qg": None}, "deploy": {"next": "done", "agent": None, "qg": None},

View File

@@ -216,12 +216,31 @@ async def handle_ci_status(payload: dict):
else: else:
notify_qg_failure(task_id, current_stage, "check_ci_green", reason) notify_qg_failure(task_id, current_stage, "check_ci_green", reason)
elif state == "failure": elif state == "failure" and current_stage == "development":
# S-1: Gitea CI is NOT the authoritative gate anymore (the orchestrator runs # CI is the authoritative gate for development -> review.
# tests locally via check_tests_local). Gitea CI is often unconfigured, so a # On red CI: notify, then bounce the task back to the developer (capped retries),
# "failure"/empty status here is not actionable. Log only, do not alert. # symmetric to the review REQUEST_CHANGES path.
logger.debug(f"Task {task_id}: Gitea CI state='failure' on branch '{branch}' " notify_qg_failure(task_id, current_stage, "check_ci_green", f"Gitea CI failed on branch '{branch}'")
f"(non-authoritative, suppressed — local tests are the gate)") conn = get_db()
retry_count = conn.execute(
"SELECT COUNT(*) as cnt FROM agent_runs WHERE task_id = ? AND agent = 'developer'",
(task_id,),
).fetchone()["cnt"]
conn.close()
if retry_count < MAX_DEV_RETRIES:
# task already on 'development' — no stage change needed, just relaunch developer
try:
task_desc = (
f"Work item: {work_item_id}\nRepo: {repo_name}\nBranch: {branch}\n"
f"Stage: development\nNote: CI failed, fix and re-push (attempt {retry_count + 1}/{MAX_DEV_RETRIES})"
)
job_id = enqueue_job("developer", repo_name, task_desc, task_id=task_id)
logger.info(f"Task {task_id}: CI failed, enqueued developer (attempt {retry_count + 1}, job_id={job_id})")
except Exception as e:
notify_error(task_id, f"Failed to relaunch developer after CI failure: {e}")
else:
notify_error(task_id, f"Max developer retries ({MAX_DEV_RETRIES}) reached after CI failure, escalating")
logger.error(f"Task {task_id}: max retries reached after CI failure, needs manual intervention")
async def handle_pr(payload: dict): async def handle_pr(payload: dict):

View File

@@ -19,6 +19,7 @@ from src.qg.checks import (
check_tests_passed, check_tests_passed,
check_tests_local, check_tests_local,
) )
from src.stages import get_qg_for_stage
@pytest.fixture(autouse=True) @pytest.fixture(autouse=True)
@@ -189,6 +190,22 @@ class TestCheckTestsPassed:
assert "not found" in reason.lower() assert "not found" in reason.lower()
class TestDevelopmentStageQG:
"""BUG 6: development stage QG is now check_ci_green (CI is the authoritative
gate), not the deprecated check_tests_local."""
def test_development_qg_is_check_ci_green(self):
assert get_qg_for_stage("development") == "check_ci_green"
def test_check_tests_local_is_deprecated_and_unwired(self):
# Kept in the registry for backward-compat, but not wired to any stage.
from src.qg.checks import QG_CHECKS
from src.stages import STAGE_TRANSITIONS
assert "check_tests_local" in QG_CHECKS
wired = {t.get("qg") for t in STAGE_TRANSITIONS.values()}
assert "check_tests_local" not in wired
class TestCheckTestsLocal: class TestCheckTestsLocal:
"""BUG 5: check_tests_local must run pytest directly (not make, which is """BUG 5: check_tests_local must run pytest directly (not make, which is
not installed in the orchestrator container).""" not installed in the orchestrator container)."""

View File

@@ -203,10 +203,13 @@ class TestQgFailureDoesNotAdvance:
assert _jobs() == [] assert _jobs() == []
def test_webhook_path_emits_qg_failure_notification(self, monkeypatch): def test_webhook_path_emits_qg_failure_notification(self, monkeypatch):
"""finished_agent=None -> generic QG-failure notification fires (plane parity).""" """finished_agent=None -> generic QG-failure notification fires (plane parity).
development stage QG is now check_ci_green (was check_tests_local).
"""
monkeypatch.setattr( monkeypatch.setattr(
stage_engine, "QG_CHECKS", stage_engine, "QG_CHECKS",
{**stage_engine.QG_CHECKS, "check_tests_local": _fail("ci red")}, {**stage_engine.QG_CHECKS, "check_ci_green": _fail("ci red")},
) )
task_id = _make_task("development") task_id = _make_task("development")
advance_stage(task_id, "development", "enduro-trails", "ET-001", advance_stage(task_id, "development", "enduro-trails", "ET-001",

View File

@@ -1,4 +1,5 @@
import pytest import pytest
import asyncio
import os import os
import tempfile import tempfile
from unittest.mock import patch, MagicMock, AsyncMock from unittest.mock import patch, MagicMock, AsyncMock
@@ -272,6 +273,46 @@ def test_gitea_ci_success_advances_to_review(mock_launcher, mock_ci):
assert task["stage"] == "review" assert task["stage"] == "review"
@patch("src.webhooks.gitea.notify_qg_failure")
@patch("src.webhooks.gitea.launcher")
def test_gitea_ci_failure_on_development_notifies_qg_failure(mock_launcher, mock_notify):
"""BUG 6: CI failure at development is now the authoritative QG gate failing.
It must notify QG failure (not silently suppress) and must NOT advance the stage.
"""
conn = get_db()
conn.execute(
"INSERT INTO tasks (plane_id, work_item_id, repo, branch, stage) VALUES (?, ?, ?, ?, ?)",
("ci-fail-001", "ET-011", "enduro-trails", "feature/ET-011-test", "development"),
)
conn.commit()
conn.close()
resp = client.post(
"/webhook/gitea",
json={
"state": "failure",
"branches": [{"name": "feature/ET-011-test"}],
"repository": {"name": "enduro-trails"},
},
headers={"X-Gitea-Event": "status"},
)
assert resp.status_code == 200
# QG failure was reported for the development stage with check_ci_green.
assert mock_notify.called
args, kwargs = mock_notify.call_args
call = list(args) + list(kwargs.values())
assert "development" in call
assert "check_ci_green" in call
# Stage did NOT advance.
conn = get_db()
task = conn.execute("SELECT * FROM tasks WHERE plane_id = 'ci-fail-001'").fetchone()
conn.close()
assert task["stage"] == "development"
def test_gitea_webhook_pr(): def test_gitea_webhook_pr():
"""PR event is accepted.""" """PR event is accepted."""
resp = client.post( resp = client.post(
@@ -301,3 +342,94 @@ def test_plane_webhook_event_logged():
conn.close() conn.close()
assert event is not None assert event is not None
assert event["source"] == "plane" assert event["source"] == "plane"
# ---------------------------------------------------------------------------
# BUG 7: red CI on development must bounce the task back to the developer
# (capped retries, symmetric to review REQUEST_CHANGES). These are pure-logic
# tests: they invoke handle_ci_status() directly with mocked helpers so they do
# not pass through the TestClient HMAC barrier (baseline 401s are off-limits).
# ---------------------------------------------------------------------------
def _ci_failure_payload():
return {
"state": "failure",
"branches": [{"name": "feature/ET-011-test"}],
"repository": {"name": "enduro-trails"},
}
def _mock_db_with_retry_count(count):
"""Build a get_db() mock whose retry_count query returns `count`."""
conn = MagicMock()
conn.execute.return_value.fetchone.return_value = {"cnt": count}
return conn
@patch("src.webhooks.gitea.notify_error")
@patch("src.webhooks.gitea.notify_qg_failure")
@patch("src.webhooks.gitea.enqueue_job")
@patch("src.webhooks.gitea.update_task_stage")
@patch("src.webhooks.gitea.get_db")
@patch("src.webhooks.gitea.get_task_by_repo_branch")
@patch("src.webhooks.gitea.get_project_by_repo")
def test_ci_failure_development_retries_developer_under_limit(
mock_proj, mock_task, mock_get_db, mock_update_stage,
mock_enqueue, mock_qg, mock_err,
):
"""retry_count < MAX_DEV_RETRIES → relaunch developer, stage untouched."""
from src.webhooks.gitea import handle_ci_status
mock_proj.return_value = {"repo": "enduro-trails"}
mock_task.return_value = {
"id": 1, "stage": "development", "work_item_id": "ET-011",
}
mock_get_db.return_value = _mock_db_with_retry_count(0)
mock_enqueue.return_value = 42
asyncio.run(handle_ci_status(_ci_failure_payload()))
# QG failure was still reported (Slava sees both the failure and the retry).
assert mock_qg.called
# developer was re-enqueued.
assert mock_enqueue.called
assert mock_enqueue.call_args[0][0] == "developer"
# No escalation.
assert not mock_err.called
# Stage stays on development — no update_task_stage in the CI-failure path.
assert not mock_update_stage.called
@patch("src.webhooks.gitea.notify_error")
@patch("src.webhooks.gitea.notify_qg_failure")
@patch("src.webhooks.gitea.enqueue_job")
@patch("src.webhooks.gitea.update_task_stage")
@patch("src.webhooks.gitea.get_db")
@patch("src.webhooks.gitea.get_task_by_repo_branch")
@patch("src.webhooks.gitea.get_project_by_repo")
def test_ci_failure_development_escalates_at_limit(
mock_proj, mock_task, mock_get_db, mock_update_stage,
mock_enqueue, mock_qg, mock_err,
):
"""retry_count >= MAX_DEV_RETRIES → escalate via notify_error, no relaunch."""
from src.webhooks.gitea import handle_ci_status, MAX_DEV_RETRIES
mock_proj.return_value = {"repo": "enduro-trails"}
mock_task.return_value = {
"id": 1, "stage": "development", "work_item_id": "ET-011",
}
mock_get_db.return_value = _mock_db_with_retry_count(MAX_DEV_RETRIES)
asyncio.run(handle_ci_status(_ci_failure_payload()))
# QG failure still reported.
assert mock_qg.called
# developer NOT re-enqueued at the cap.
assert not mock_enqueue.called
# Escalation message mentions CI failure.
assert mock_err.called
err_msg = " ".join(str(a) for a in mock_err.call_args[0])
assert "Max developer retries" in err_msg
assert "after CI failure" in err_msg
# Stage untouched.
assert not mock_update_stage.called