Merge pull request 'ORCH-4: unified stage-engine (M-3)' (#5) from feature/ORCH-4-stage-engine into main

This commit was merged in pull request #5.
This commit is contained in:
2026-06-03 08:59:51 +03:00
4 changed files with 859 additions and 241 deletions

View File

@@ -564,7 +564,15 @@ class AgentLauncher:
pass
def _try_advance_stage(self, run_id: int, agent: str, repo: str, branch: str):
"""After agent finishes successfully, check QG and advance stage if possible."""
"""After agent finishes successfully, advance the stage via the unified engine.
ORCH-4 / M-3: the 174-line body that used to live here moved into
src/stage_engine.advance_stage(). This is now a thin wrapper: it looks up
the task by (repo, branch) and delegates. `agent` is forwarded as
finished_agent so the analyst/reviewer/tester/architect rollback branches
still trigger exactly as before. The agent-selection bug (it used to call
get_agent_for_stage(next_stage)) is fixed inside the engine.
"""
try:
conn = get_db()
task_row = conn.execute(
@@ -576,174 +584,15 @@ class AgentLauncher:
return
task_id, current_stage, work_item_id = task_row
qg_name = get_qg_for_stage(current_stage)
next_stage = get_next_stage(current_stage)
if not next_stage:
return
# Run QG check if defined
if qg_name and qg_name in QG_CHECKS:
check_fn = QG_CHECKS[qg_name]
if qg_name in ("check_analysis_approved",):
# Requires human approval - post request comment if analyst just finished
if agent == "analyst" and qg_name == "check_analysis_approved" and work_item_id:
files_check = QG_CHECKS.get("check_analysis_complete")
if files_check:
files_ok, _ = files_check(repo, work_item_id, branch)
if files_ok:
# Full artifacts ready -> In Review
from ..plane_sync import set_issue_in_review
set_issue_in_review(work_item_id)
plane_add_comment(
work_item_id,
"\U0001f4cb BRD/\u0422\u0417/AC/TestPlan \u0433\u043e\u0442\u043e\u0432\u044b. "
"\u041f\u0440\u043e\u0448\u0443 review \u0438 \u0440\u0435\u0430\u043a\u0446\u0438\u044e :approved: \u0434\u043b\u044f \u043f\u0440\u043e\u0434\u0432\u0438\u0436\u0435\u043d\u0438\u044f \u0432 Architecture."
)
notify_approve_requested(task_id)
logger.info(f"Task {task_id}: analyst finished, requested :approved: in Plane")
else:
# Check if questions file exists (in the task worktree)
import os as _os
questions_path = _os.path.join(
get_worktree_path(repo, branch),
f"docs/work-items/{work_item_id}/01-questions.md"
)
if _os.path.isfile(questions_path):
# Analyst has questions -> Needs Input
from ..plane_sync import set_issue_needs_input
set_issue_needs_input(work_item_id)
with open(questions_path, "r") as qf:
questions_text = qf.read()
plane_add_comment(
work_item_id,
f"\u2753 Analyst \u043d\u0443\u0436\u0434\u0430\u0435\u0442\u0441\u044f \u0432 \u0443\u0442\u043e\u0447\u043d\u0435\u043d\u0438\u0438:\n\n{questions_text}"
)
from ..notifications import send_telegram
send_telegram(
f"\u2753 {work_item_id}: Analyst \u0437\u0430\u0434\u0430\u0451\u0442 \u0432\u043e\u043f\u0440\u043e\u0441\u044b. \u041e\u0442\u0432\u0435\u0442\u044c \u0432 Plane."
)
else:
# No artifacts and no questions
plane_add_comment(
work_item_id,
"\u26a0\ufe0f Analyst \u0437\u0430\u0432\u0435\u0440\u0448\u0438\u043b\u0441\u044f \u0431\u0435\u0437 \u0430\u0440\u0442\u0435\u0444\u0430\u043a\u0442\u043e\u0432 \u0438 \u0431\u0435\u0437 \u0432\u043e\u043f\u0440\u043e\u0441\u043e\u0432. \u041f\u0440\u043e\u0432\u0435\u0440\u044c\u0442\u0435 \u043b\u043e\u0433."
)
return
elif qg_name in ("check_ci_green", "check_tests_local"):
# (repo, branch) signature — already worktree-aware.
passed, reason = check_fn(repo, branch)
elif qg_name == "check_tests_passed":
# Artifact check — pass branch so it reads from the worktree.
passed, reason = check_fn(repo, work_item_id or "", branch)
else:
# Other artifact checks (check_architecture_done, etc.) — worktree-aware.
passed, reason = check_fn(repo, work_item_id or "", branch)
if not passed:
logger.info(f"Task {task_id}: QG '{qg_name}' not passed after {agent}: {reason}")
# If reviewer says REQUEST_CHANGES, rollback to development
if agent == "reviewer" and "REQUEST_CHANGES" in reason:
update_task_stage(task_id, "development")
notify_stage_change(task_id, current_stage, "development")
plane_notify_stage(work_item_id, current_stage, "development")
# Count retries
conn2 = get_db()
retry_count = conn2.execute(
"SELECT COUNT(*) FROM agent_runs WHERE task_id=? AND agent='developer'",
(task_id,)
).fetchone()[0]
conn2.close()
if retry_count < 3:
task_desc = (
f"Work item: {work_item_id}\nRepo: {repo}\nBranch: {branch}\n"
f"Stage: development\nNote: REQUEST_CHANGES from reviewer "
f"(attempt {retry_count+1}/3). Fix findings in "
f"docs/work-items/{work_item_id}/12-review.md"
)
new_job = enqueue_job("developer", repo, task_desc, task_id=task_id)
logger.info(f"Task {task_id}: reviewer REQUEST_CHANGES, enqueued developer (job_id={new_job})")
else:
from ..notifications import send_telegram
send_telegram(f"\u26a0\ufe0f {work_item_id}: Max developer retries (3) reached. Manual intervention needed.")
logger.error(f"Task {task_id}: max retries reached")
# Task 6: Tester FAIL -> rollback to development
if agent == "tester" and qg_name == "check_tests_passed" and not passed:
update_task_stage(task_id, "development")
notify_stage_change(task_id, current_stage, "development")
plane_notify_stage(work_item_id, current_stage, "development")
from ..plane_sync import set_issue_in_progress
set_issue_in_progress(work_item_id)
plane_add_comment(
work_item_id,
f"\u274c \u0422\u0435\u0441\u0442\u044b \u043d\u0435 \u043f\u0440\u043e\u0448\u043b\u0438: {reason}. Developer \u043f\u0435\u0440\u0435\u0437\u0430\u043f\u0443\u0449\u0435\u043d \u0434\u043b\u044f \u0444\u0438\u043a\u0441\u0430."
)
conn2 = get_db()
retry_count = conn2.execute(
"SELECT COUNT(*) FROM agent_runs WHERE task_id=? AND agent='developer'",
(task_id,)
).fetchone()[0]
conn2.close()
if retry_count < 3:
task_desc = (
f"Work item: {work_item_id}\nRepo: {repo}\nBranch: {branch}\n"
f"Stage: development\nNote: Tests FAILED. "
f"Fix failures described in docs/work-items/{work_item_id}/13-test-report.md"
)
new_job = enqueue_job("developer", repo, task_desc, task_id=task_id)
logger.info(f"Task {task_id}: tester FAIL, enqueued developer (job_id={new_job})")
else:
from ..notifications import send_telegram
from ..plane_sync import set_issue_blocked
set_issue_blocked(work_item_id)
send_telegram(f"\U0001f6a8 {work_item_id}: Tests still failing after 3 developer retries. Manual intervention needed.")
# Task 8: Architect conflict -> rollback to analysis
if agent == "architect" and qg_name == "check_architecture_done" and not passed:
import os as _os
conflict_path = _os.path.join(
get_worktree_path(repo, branch),
f"docs/work-items/{work_item_id}/10-conflict.md"
)
if _os.path.isfile(conflict_path):
update_task_stage(task_id, "analysis")
notify_stage_change(task_id, current_stage, "analysis")
plane_notify_stage(work_item_id, current_stage, "analysis")
from ..plane_sync import set_issue_in_progress
set_issue_in_progress(work_item_id)
with open(conflict_path, "r") as cf:
conflict_text = cf.read()[:500]
plane_add_comment(
work_item_id,
f"\u26a0\ufe0f Architect \u043d\u0430\u0448\u0451\u043b \u043a\u043e\u043d\u0444\u043b\u0438\u043a\u0442 \u0441 \u0422\u0417. \u0412\u043e\u0437\u0432\u0440\u0430\u0442 \u0432 Analysis.\n\n{conflict_text}"
)
task_desc = (
f"Work item: {work_item_id}\nRepo: {repo}\nBranch: {branch}\n"
f"Stage: analysis\nNote: Architect conflict. Revise TRZ. "
f"See docs/work-items/{work_item_id}/10-conflict.md"
)
new_job = enqueue_job("analyst", repo, task_desc, task_id=task_id)
logger.info(f"Task {task_id}: architect conflict, enqueued analyst (job_id={new_job})")
return
return
elif qg_name:
return
# Advance stage
update_task_stage(task_id, next_stage)
notify_stage_change(task_id, current_stage, next_stage)
plane_notify_stage(work_item_id, current_stage, next_stage)
logger.info(f"Task {task_id}: {current_stage} -> {next_stage} (auto-advance after {agent})")
# Launch next agent if defined
next_agent = get_agent_for_stage(next_stage)
if next_agent:
task_desc = f"Work item: {work_item_id}\nRepo: {repo}\nBranch: {branch}\nStage: {next_stage}"
new_job_id = enqueue_job(next_agent, repo, task_desc, task_id=task_id)
logger.info(f"Task {task_id}: enqueued '{next_agent}' (job_id={new_job_id})")
from ..stage_engine import advance_stage
advance_stage(
task_id=task_id,
current_stage=current_stage,
repo=repo,
work_item_id=work_item_id,
branch=branch,
finished_agent=agent,
)
except Exception as e:
logger.error(f"Auto-advance failed for run_id={run_id}: {e}")

425
src/stage_engine.py Normal file
View File

@@ -0,0 +1,425 @@
"""Unified stage engine (ORCH-4 / M-3).
Single source of truth for "an agent finished / a human approved -> run the
stage's quality gate and either advance the pipeline or roll it back".
Before ORCH-4 this logic was duplicated in two places that had silently
diverged:
- src/agents/launcher.py::_try_advance_stage (sync, rich business logic:
analyst approved-flow, reviewer REQUEST_CHANGES rollback+retry, tester FAIL
rollback+retry, architect conflict rollback) — but it picked the next agent
with get_agent_for_stage(next_stage), which is WRONG.
- src/webhooks/plane.py::_try_advance_stage (async, leaner, but it had the
check_review_approved PR-by-branch dispatch and used the CORRECT
get_agent_for_stage(current_stage)).
This module merges both into one sync `advance_stage(...)`. launcher calls it
directly; the plane webhook calls it through asyncio.to_thread so there is
exactly one implementation.
Agent-selection bug fix (ORCH-4):
stages.py defines `agent` as "the agent to launch when advancing FROM this
stage". So when advancing current -> next, the correct agent to launch is
get_agent_for_stage(current_stage). launcher's old next_stage lookup skipped a
stage (e.g. analysis->architecture launched 'developer' instead of
'architect'). plane and gitea already used current_stage; we unify on that.
"""
import logging
import os
from dataclasses import dataclass, field
from .db import get_db, update_task_stage, enqueue_job
from .stages import get_next_stage, get_qg_for_stage, get_agent_for_stage
from .git_worktree import get_worktree_path
from .qg.checks import QG_CHECKS
from .notifications import (
notify_stage_change,
notify_qg_failure,
notify_approve_requested,
send_telegram,
)
from .plane_sync import (
notify_stage_change as plane_notify_stage,
notify_qg_failure as plane_notify_qg,
add_comment as plane_add_comment,
set_issue_in_review,
set_issue_needs_input,
set_issue_in_progress,
set_issue_blocked,
)
from .config import settings
logger = logging.getLogger("orchestrator.stage_engine")
MAX_DEVELOPER_RETRIES = 3
@dataclass
class AdvanceResult:
"""Outcome of an advance_stage() call (mostly for tests/observability)."""
advanced: bool = False
from_stage: str | None = None
to_stage: str | None = None
enqueued_agent: str | None = None
enqueued_job_id: int | None = None
qg_name: str | None = None
qg_passed: bool | None = None
qg_reason: str | None = None
rolled_back_to: str | None = None
alerted: bool = False
note: str | None = None
notes: list = field(default_factory=list)
def _run_qg(qg_name: str, repo: str, work_item_id: str, branch: str):
"""Dispatch a quality-gate check to the right signature and run it.
Signatures (unified from launcher + plane):
- check_ci_green / check_tests_local -> (repo, branch)
- check_review_approved -> (repo, pr_number) [PR found by branch]
- everything else (artifact checks) -> (repo, work_item_id, branch)
Returns (passed: bool, reason: str).
"""
check_fn = QG_CHECKS.get(qg_name)
if not check_fn:
logger.error(f"QG function '{qg_name}' not found in registry")
return False, f"Unknown QG: {qg_name}"
if qg_name in ("check_ci_green", "check_tests_local"):
# (repo, branch) — already worktree-aware.
return check_fn(repo, branch)
if qg_name == "check_review_approved":
# Special case kept from plane: find the open PR for this branch via
# Gitea, then check it; fall back to a file-based review marker.
return _check_review_approved_by_branch(check_fn, repo, work_item_id, branch)
# All other artifact checks: (repo, work_item_id, branch). Pass branch so the
# check reads from the task worktree (ORCH-2 / S-4).
return check_fn(repo, work_item_id or "", branch)
def _check_review_approved_by_branch(check_fn, repo: str, work_item_id: str, branch: str):
"""check_review_approved dispatch preserved from plane._try_advance_stage.
Finds the open PR whose head ref == branch via the Gitea API and runs
check_review_approved(repo, pr_number). If no open PR exists, falls back to a
file-based review marker (12-review.md / 09-review.md) like the original.
"""
import httpx as _httpx
owner = settings.gitea_owner
url = f"{settings.gitea_url}/api/v1/repos/{owner}/{repo}/pulls?state=open&limit=50"
headers = {"Authorization": f"token {settings.gitea_token}"}
try:
resp = _httpx.get(url, headers=headers, timeout=10)
prs = resp.json()
pr_number = None
for pr in prs:
if pr.get("head", {}).get("ref") == branch:
pr_number = pr["number"]
break
if pr_number:
return check_fn(repo, pr_number)
# No open PR but a review file may exist — check file-based.
wt = get_worktree_path(repo, branch)
if not os.path.isdir(wt):
wt = os.path.join(settings.repos_dir, repo)
review_path = os.path.join(wt, f"docs/work-items/{work_item_id}/12-review.md")
review_path2 = os.path.join(wt, f"docs/work-items/{work_item_id}/09-review.md")
if os.path.isfile(review_path) or os.path.isfile(review_path2):
return True, "Review file exists (file-based approval)"
return False, "No open PR found and no review file"
except Exception as e:
return False, f"Error finding PR: {e}"
def _developer_retry_count(task_id: int) -> int:
"""How many developer runs have already happened for this task."""
conn = get_db()
n = conn.execute(
"SELECT COUNT(*) FROM agent_runs WHERE task_id=? AND agent='developer'",
(task_id,),
).fetchone()[0]
conn.close()
return n
def advance_stage(
task_id: int,
current_stage: str,
repo: str,
work_item_id: str,
branch: str,
finished_agent: str | None = None,
) -> AdvanceResult:
"""Run the current stage's quality gate and advance / roll back the pipeline.
This is the single merged implementation (ORCH-4 / M-3). It is synchronous;
the async plane webhook calls it via asyncio.to_thread.
Args:
task_id: tasks.id
current_stage: the stage the task is currently in
repo: repository name
work_item_id: Plane work item id (may be "" / None)
branch: feature branch
finished_agent: the agent that just finished (launcher path). Drives the
approved/REQUEST_CHANGES/tester/architect branches. In the
plane webhook path it is None, so those agent-specific
branches simply do not trigger (matches old plane behavior).
Returns AdvanceResult describing what happened.
"""
result = AdvanceResult(from_stage=current_stage)
agent = finished_agent
try:
qg_name = get_qg_for_stage(current_stage)
next_stage = get_next_stage(current_stage)
result.qg_name = qg_name
result.to_stage = next_stage
if not next_stage:
logger.info(f"Task {task_id}: already at terminal stage '{current_stage}'")
result.note = "terminal"
return result
# --- Quality gate ----------------------------------------------------
if qg_name and qg_name in QG_CHECKS:
# Human-approval gate: special analyst approved-flow (launcher only).
if qg_name == "check_analysis_approved":
_handle_analysis_approved_flow(
task_id, current_stage, repo, work_item_id, branch, agent, result
)
return result
passed, reason = _run_qg(qg_name, repo, work_item_id, branch)
result.qg_passed = passed
result.qg_reason = reason
if not passed:
logger.info(
f"Task {task_id}: QG '{qg_name}' not passed after {agent}: {reason}"
)
# Behaviour parity:
# - webhook path (finished_agent is None): emit the generic
# QG-failure notification, exactly like the old plane handler.
# - launcher path (finished_agent set): NO generic notification;
# the rollback branches below own their own messaging, exactly
# like the old launcher handler.
if agent is None:
notify_qg_failure(task_id, current_stage, qg_name, reason)
plane_notify_qg(work_item_id, current_stage, qg_name, reason)
_handle_qg_failure_rollbacks(
task_id, current_stage, repo, work_item_id, branch,
agent, qg_name, reason, result,
)
return result
elif qg_name:
# QG name set but not registered — do not advance (launcher behavior).
result.note = f"qg '{qg_name}' not in registry"
return result
# --- Advance ---------------------------------------------------------
update_task_stage(task_id, next_stage)
notify_stage_change(task_id, current_stage, next_stage)
plane_notify_stage(work_item_id, current_stage, next_stage)
result.advanced = True
logger.info(
f"Task {task_id}: {current_stage} -> {next_stage} "
f"(auto-advance after {agent})"
)
# --- Launch the next agent (ORCH-4 fix: current_stage, not next) -----
next_agent = get_agent_for_stage(current_stage)
if next_agent:
task_desc = (
f"Work item: {work_item_id}\nRepo: {repo}\n"
f"Branch: {branch}\nStage: {next_stage}"
)
new_job_id = enqueue_job(next_agent, repo, task_desc, task_id=task_id)
result.enqueued_agent = next_agent
result.enqueued_job_id = new_job_id
logger.info(
f"Task {task_id}: enqueued '{next_agent}' (job_id={new_job_id})"
)
return result
except Exception as e:
logger.error(f"advance_stage failed for task_id={task_id}: {e}")
result.note = f"error: {e}"
return result
def _handle_analysis_approved_flow(
task_id, current_stage, repo, work_item_id, branch, agent, result: AdvanceResult
):
"""Analyst approved-flow (launcher only).
Only triggers when the analyst just finished (agent == 'analyst') in the
launcher path. Decides between: artifacts ready -> In Review + request
:approved:; questions file -> Needs Input; otherwise a warning comment.
This gate never advances on its own (human approval does that via the plane
webhook), matching the original launcher behavior.
"""
result.qg_name = "check_analysis_approved"
result.note = "analysis-approval-gate"
if not (agent == "analyst" and work_item_id):
return
files_check = QG_CHECKS.get("check_analysis_complete")
if not files_check:
return
files_ok, _ = files_check(repo, work_item_id, branch)
if files_ok:
# Full artifacts ready -> In Review, ask for :approved:.
set_issue_in_review(work_item_id)
plane_add_comment(
work_item_id,
"\U0001f4cb BRD/\u0422\u0417/AC/TestPlan \u0433\u043e\u0442\u043e\u0432\u044b. "
"\u041f\u0440\u043e\u0448\u0443 review \u0438 \u0440\u0435\u0430\u043a\u0446\u0438\u044e :approved: "
"\u0434\u043b\u044f \u043f\u0440\u043e\u0434\u0432\u0438\u0436\u0435\u043d\u0438\u044f \u0432 Architecture.",
)
notify_approve_requested(task_id)
result.note = "analysis-in-review"
logger.info(
f"Task {task_id}: analyst finished, requested :approved: in Plane"
)
return
questions_path = os.path.join(
get_worktree_path(repo, branch),
f"docs/work-items/{work_item_id}/01-questions.md",
)
if os.path.isfile(questions_path):
set_issue_needs_input(work_item_id)
with open(questions_path, "r") as qf:
questions_text = qf.read()
plane_add_comment(
work_item_id,
f"\u2753 Analyst \u043d\u0443\u0436\u0434\u0430\u0435\u0442\u0441\u044f \u0432 \u0443\u0442\u043e\u0447\u043d\u0435\u043d\u0438\u0438:\n\n{questions_text}",
)
send_telegram(
f"\u2753 {work_item_id}: Analyst \u0437\u0430\u0434\u0430\u0451\u0442 \u0432\u043e\u043f\u0440\u043e\u0441\u044b. \u041e\u0442\u0432\u0435\u0442\u044c \u0432 Plane."
)
result.note = "analysis-needs-input"
return
# No artifacts and no questions.
plane_add_comment(
work_item_id,
"\u26a0\ufe0f Analyst \u0437\u0430\u0432\u0435\u0440\u0448\u0438\u043b\u0441\u044f \u0431\u0435\u0437 \u0430\u0440\u0442\u0435\u0444\u0430\u043a\u0442\u043e\u0432 \u0438 \u0431\u0435\u0437 \u0432\u043e\u043f\u0440\u043e\u0441\u043e\u0432. \u041f\u0440\u043e\u0432\u0435\u0440\u044c\u0442\u0435 \u043b\u043e\u0433.",
)
result.note = "analysis-empty"
def _handle_qg_failure_rollbacks(
task_id, current_stage, repo, work_item_id, branch,
agent, qg_name, reason, result: AdvanceResult,
):
"""All rollback/retry branches from the original launcher, preserved verbatim.
Only fire on the launcher path (finished_agent is set). The webhook path
passes finished_agent=None, so none of these agent-specific branches trigger
— that matches the old plane behavior (it just reported the QG failure).
"""
# Reviewer REQUEST_CHANGES -> rollback to development + retry (max 3).
if agent == "reviewer" and "REQUEST_CHANGES" in (reason or ""):
update_task_stage(task_id, "development")
notify_stage_change(task_id, current_stage, "development")
plane_notify_stage(work_item_id, current_stage, "development")
result.rolled_back_to = "development"
retry_count = _developer_retry_count(task_id)
if retry_count < MAX_DEVELOPER_RETRIES:
task_desc = (
f"Work item: {work_item_id}\nRepo: {repo}\nBranch: {branch}\n"
f"Stage: development\nNote: REQUEST_CHANGES from reviewer "
f"(attempt {retry_count+1}/3). Fix findings in "
f"docs/work-items/{work_item_id}/12-review.md"
)
new_job = enqueue_job("developer", repo, task_desc, task_id=task_id)
result.enqueued_agent = "developer"
result.enqueued_job_id = new_job
logger.info(
f"Task {task_id}: reviewer REQUEST_CHANGES, enqueued developer "
f"(job_id={new_job})"
)
else:
send_telegram(
f"\u26a0\ufe0f {work_item_id}: Max developer retries (3) reached. "
f"Manual intervention needed."
)
result.alerted = True
logger.error(f"Task {task_id}: max retries reached")
# Tester check_tests_passed FAIL -> rollback to development + retry (max 3).
if agent == "tester" and qg_name == "check_tests_passed":
update_task_stage(task_id, "development")
notify_stage_change(task_id, current_stage, "development")
plane_notify_stage(work_item_id, current_stage, "development")
result.rolled_back_to = "development"
set_issue_in_progress(work_item_id)
plane_add_comment(
work_item_id,
f"\u274c \u0422\u0435\u0441\u0442\u044b \u043d\u0435 \u043f\u0440\u043e\u0448\u043b\u0438: {reason}. "
f"Developer \u043f\u0435\u0440\u0435\u0437\u0430\u043f\u0443\u0449\u0435\u043d \u0434\u043b\u044f \u0444\u0438\u043a\u0441\u0430.",
)
retry_count = _developer_retry_count(task_id)
if retry_count < MAX_DEVELOPER_RETRIES:
task_desc = (
f"Work item: {work_item_id}\nRepo: {repo}\nBranch: {branch}\n"
f"Stage: development\nNote: Tests FAILED. "
f"Fix failures described in docs/work-items/{work_item_id}/13-test-report.md"
)
new_job = enqueue_job("developer", repo, task_desc, task_id=task_id)
result.enqueued_agent = "developer"
result.enqueued_job_id = new_job
logger.info(
f"Task {task_id}: tester FAIL, enqueued developer (job_id={new_job})"
)
else:
set_issue_blocked(work_item_id)
send_telegram(
f"\U0001f6a8 {work_item_id}: Tests still failing after 3 developer "
f"retries. Manual intervention needed."
)
result.alerted = True
# Architect conflict (10-conflict.md exists) -> rollback to analysis.
if agent == "architect" and qg_name == "check_architecture_done":
conflict_path = os.path.join(
get_worktree_path(repo, branch),
f"docs/work-items/{work_item_id}/10-conflict.md",
)
if os.path.isfile(conflict_path):
update_task_stage(task_id, "analysis")
notify_stage_change(task_id, current_stage, "analysis")
plane_notify_stage(work_item_id, current_stage, "analysis")
result.rolled_back_to = "analysis"
set_issue_in_progress(work_item_id)
with open(conflict_path, "r") as cf:
conflict_text = cf.read()[:500]
plane_add_comment(
work_item_id,
f"\u26a0\ufe0f Architect \u043d\u0430\u0448\u0451\u043b \u043a\u043e\u043d\u0444\u043b\u0438\u043a\u0442 \u0441 \u0422\u0417. "
f"\u0412\u043e\u0437\u0432\u0440\u0430\u0442 \u0432 Analysis.\n\n{conflict_text}",
)
task_desc = (
f"Work item: {work_item_id}\nRepo: {repo}\nBranch: {branch}\n"
f"Stage: analysis\nNote: Architect conflict. Revise TRZ. "
f"See docs/work-items/{work_item_id}/10-conflict.md"
)
new_job = enqueue_job("analyst", repo, task_desc, task_id=task_id)
result.enqueued_agent = "analyst"
result.enqueued_job_id = new_job
logger.info(
f"Task {task_id}: architect conflict, enqueued analyst "
f"(job_id={new_job})"
)

View File

@@ -318,81 +318,30 @@ async def handle_comment(data: dict, project_id: str = ""):
async def _try_advance_stage(
task_id: int, current_stage: str, repo: str, work_item_id: str, branch: str
):
"""Run QG check for current stage and advance if passed."""
qg_name = get_qg_for_stage(current_stage)
next_stage = get_next_stage(current_stage)
"""Thin async wrapper over the unified stage engine (ORCH-4 / M-3).
if not next_stage:
logger.info(f"Task {task_id}: already at terminal stage '{current_stage}'")
return
The QG dispatch (including the check_review_approved PR-by-branch logic) and
the advance/launch logic now live in src/stage_engine.advance_stage(), which
is synchronous. We run it off the event loop via asyncio.to_thread so there
is exactly one implementation shared with the launcher.
# Run QG check if one is required
if qg_name:
qg_func = QG_CHECKS.get(qg_name)
if not qg_func:
logger.error(f"QG function '{qg_name}' not found in registry")
return
finished_agent is None on this webhook path (a human :approved: comment, not
a finished agent), so the agent-specific rollback branches inside the engine
intentionally do not trigger — identical to the old plane behavior, which
only ran the QG and either advanced or reported the failure.
"""
import asyncio
from ..stage_engine import advance_stage
# Determine args based on QG function
if qg_name in ("check_analysis_approved", "check_analysis_complete", "check_architecture_done", "check_tests_passed", "check_reviewer_verdict"):
# ORCH-2 / S-4: pass branch so artifacts are read from the task worktree.
passed, reason = qg_func(repo, work_item_id, branch)
elif qg_name in ("check_ci_green", "check_tests_local"):
passed, reason = qg_func(repo, branch)
elif qg_name == "check_review_approved":
# Find PR number by branch via Gitea API
import httpx as _httpx
from ..config import settings as _s
_owner = _s.gitea_owner
_url = f"{_s.gitea_url}/api/v1/repos/{_owner}/{repo}/pulls?state=open&limit=50"
_headers = {"Authorization": f"token {_s.gitea_token}"}
try:
_resp = _httpx.get(_url, headers=_headers, timeout=10)
_prs = _resp.json()
_pr_number = None
for _pr in _prs:
if _pr.get("head", {}).get("ref") == branch:
_pr_number = _pr["number"]
break
if _pr_number:
passed, reason = qg_func(repo, _pr_number)
else:
# No open PR but review file exists — check file-based
import os
from ..git_worktree import get_worktree_path as _gwp
_wt = _gwp(repo, branch) if os.path.isdir(_gwp(repo, branch)) else os.path.join(_s.repos_dir, repo)
_review_path = os.path.join(_wt, f"docs/work-items/{work_item_id}/12-review.md")
_review_path2 = os.path.join(_wt, f"docs/work-items/{work_item_id}/09-review.md")
if os.path.isfile(_review_path) or os.path.isfile(_review_path2):
passed, reason = True, "Review file exists (file-based approval)"
else:
passed, reason = False, "No open PR found and no review file"
except Exception as _e:
passed, reason = False, f"Error finding PR: {_e}"
else:
passed, reason = False, f"Unknown QG: {qg_name}"
if not passed:
notify_qg_failure(task_id, current_stage, qg_name, reason)
plane_notify_qg(work_item_id, current_stage, qg_name, reason)
return
# Advance stage
update_task_stage(task_id, next_stage)
notify_stage_change(task_id, current_stage, next_stage)
plane_notify_stage(work_item_id, current_stage, next_stage)
# Launch agent associated with the current stage's transition
agent = get_agent_for_stage(current_stage)
if agent:
try:
task_desc = f"Work item: {work_item_id}\nRepo: {repo}\nBranch: {branch}\nStage: {next_stage}"
job_id = enqueue_job(agent, repo, task_desc, task_id=task_id)
plane_notify_stage(work_item_id, current_stage, next_stage, agent)
logger.info(f"Task {task_id}: enqueued agent '{agent}', job_id={job_id}")
except Exception as e:
notify_error(task_id, f"Failed to launch agent '{agent}': {e}")
logger.error(f"Agent launch failed: {e}")
await asyncio.to_thread(
advance_stage,
task_id,
current_stage,
repo,
work_item_id,
branch,
None,
)
async def _create_gitea_branch(repo: str, branch: str):

395
tests/test_stage_engine.py Normal file
View File

@@ -0,0 +1,395 @@
"""ORCH-4 / M-3: tests for the unified stage engine (src/stage_engine.advance_stage).
These verify the MERGED behavior of what used to be two diverged
_try_advance_stage implementations (launcher sync + plane async):
* happy-path advance for every stage launches the CORRECT agent
(the ORCH-4 fix: agent = get_agent_for_stage(current_stage), NOT next_stage);
* a QG failure does not advance;
* reviewer REQUEST_CHANGES -> rollback to development + enqueue developer;
* developer retries > 3 -> telegram alert, no further enqueue;
* tester FAIL -> rollback to development + enqueue developer;
* architect conflict (10-conflict.md) -> rollback to analysis + enqueue analyst;
* launcher AND plane both delegate to the engine.
Network/Plane/Telegram side effects are mocked at the src.stage_engine level so
the engine runs against a real isolated sqlite DB.
"""
import os
import tempfile
import pytest
# Isolated test DB (same convention as the other suites).
_test_db = os.path.join(tempfile.gettempdir(), "test_orchestrator_stage_engine.db")
os.environ["ORCH_DB_PATH"] = _test_db
os.environ["ORCH_REPOS_DIR"] = tempfile.gettempdir()
os.environ.setdefault("ORCH_GITEA_TOKEN", "test-token")
os.environ.setdefault("ORCH_PLANE_API_TOKEN", "test-token")
from unittest.mock import MagicMock, patch # noqa: E402
import src.db as _db # noqa: E402
from src.db import init_db, get_db # noqa: E402
from src import stage_engine # noqa: E402
from src.stage_engine import advance_stage # noqa: E402
from src.stages import get_agent_for_stage # noqa: E402
# ---------------------------------------------------------------------------
# Fixtures
# ---------------------------------------------------------------------------
@pytest.fixture(autouse=True)
def fresh_db(monkeypatch):
"""Fresh isolated DB per test."""
monkeypatch.setattr(_db.settings, "db_path", _test_db)
if os.path.exists(_test_db):
os.unlink(_test_db)
init_db()
yield
@pytest.fixture(autouse=True)
def silence_side_effects(monkeypatch):
"""Mock all Plane/Telegram/notification side effects in the engine.
Everything imported into src.stage_engine that touches the network or sends
a message becomes a no-op MagicMock so tests are deterministic and offline.
"""
for name in (
"notify_stage_change",
"notify_qg_failure",
"notify_approve_requested",
"send_telegram",
"plane_notify_stage",
"plane_notify_qg",
"plane_add_comment",
"set_issue_in_review",
"set_issue_needs_input",
"set_issue_in_progress",
"set_issue_blocked",
):
monkeypatch.setattr(stage_engine, name, MagicMock())
def _make_task(stage, repo="enduro-trails", branch="feature/ET-001-x", wi="ET-001"):
conn = get_db()
cur = conn.execute(
"INSERT INTO tasks (plane_id, work_item_id, repo, branch, stage) "
"VALUES (?, ?, ?, ?, ?)",
(f"plane-{wi}", wi, repo, branch, stage),
)
task_id = cur.lastrowid
conn.commit()
conn.close()
return task_id
def _stage(task_id):
conn = get_db()
row = conn.execute("SELECT stage FROM tasks WHERE id=?", (task_id,)).fetchone()
conn.close()
return row[0]
def _jobs():
conn = get_db()
rows = conn.execute("SELECT agent, repo, task_id FROM jobs ORDER BY id").fetchall()
conn.close()
return [dict(r) for r in rows]
def _add_developer_runs(task_id, n):
conn = get_db()
for _ in range(n):
conn.execute(
"INSERT INTO agent_runs (task_id, agent) VALUES (?, 'developer')",
(task_id,),
)
conn.commit()
conn.close()
def _pass(*a, **k):
return (True, "ok")
def _fail(reason):
def _f(*a, **k):
return (False, reason)
return _f
# ---------------------------------------------------------------------------
# Happy path: each stage advances and launches the CORRECT agent (ORCH-4 fix)
# ---------------------------------------------------------------------------
class TestHappyPathAgentSelection:
"""The fixed agent-selection: when advancing FROM current_stage, the engine
must enqueue get_agent_for_stage(current_stage), NOT next_stage.
"""
@pytest.mark.parametrize(
"current_stage,expected_next,expected_agent",
[
("architecture", "development", "developer"),
("development", "review", "reviewer"),
("review", "testing", "tester"),
("testing", "deploy", "deployer"),
],
)
def test_advance_launches_current_stage_agent(
self, monkeypatch, current_stage, expected_next, expected_agent
):
# All QG checks pass for this happy-path suite.
monkeypatch.setattr(
stage_engine, "QG_CHECKS",
{k: _pass for k in stage_engine.QG_CHECKS},
)
task_id = _make_task(current_stage)
res = advance_stage(
task_id, current_stage, "enduro-trails", "ET-001",
"feature/ET-001-x", finished_agent=None,
)
assert res.advanced is True
assert res.to_stage == expected_next
assert _stage(task_id) == expected_next
# The ORCH-4 fix: correct agent == get_agent_for_stage(current_stage).
assert expected_agent == get_agent_for_stage(current_stage)
assert res.enqueued_agent == expected_agent
jobs = _jobs()
assert len(jobs) == 1
assert jobs[0]["agent"] == expected_agent
def test_deploy_to_done_no_agent(self, monkeypatch):
"""deploy -> done advances but launches no agent (terminal-ish)."""
monkeypatch.setattr(
stage_engine, "QG_CHECKS",
{k: _pass for k in stage_engine.QG_CHECKS},
)
task_id = _make_task("deploy")
res = advance_stage(task_id, "deploy", "enduro-trails", "ET-001",
"feature/ET-001-x", finished_agent=None)
assert res.advanced is True
assert _stage(task_id) == "done"
assert res.enqueued_agent is None
assert _jobs() == []
def test_done_is_terminal(self):
task_id = _make_task("done")
res = advance_stage(task_id, "done", "enduro-trails", "ET-001",
"feature/ET-001-x", finished_agent=None)
assert res.advanced is False
assert _stage(task_id) == "done"
# ---------------------------------------------------------------------------
# QG failure: do not advance
# ---------------------------------------------------------------------------
class TestQgFailureDoesNotAdvance:
def test_qg_fail_keeps_stage(self, monkeypatch):
monkeypatch.setattr(
stage_engine, "QG_CHECKS",
{**stage_engine.QG_CHECKS, "check_architecture_done": _fail("not done")},
)
task_id = _make_task("architecture")
res = advance_stage(task_id, "architecture", "enduro-trails", "ET-001",
"feature/ET-001-x", finished_agent="architect")
assert res.advanced is False
assert res.qg_passed is False
assert _stage(task_id) == "architecture"
assert _jobs() == []
def test_webhook_path_emits_qg_failure_notification(self, monkeypatch):
"""finished_agent=None -> generic QG-failure notification fires (plane parity)."""
monkeypatch.setattr(
stage_engine, "QG_CHECKS",
{**stage_engine.QG_CHECKS, "check_tests_local": _fail("ci red")},
)
task_id = _make_task("development")
advance_stage(task_id, "development", "enduro-trails", "ET-001",
"feature/ET-001-x", finished_agent=None)
assert stage_engine.notify_qg_failure.called
assert stage_engine.plane_notify_qg.called
def test_launcher_path_no_generic_qg_notification(self, monkeypatch):
"""finished_agent set -> NO generic QG notification (launcher parity)."""
monkeypatch.setattr(
stage_engine, "QG_CHECKS",
{**stage_engine.QG_CHECKS, "check_architecture_done": _fail("not done")},
)
task_id = _make_task("architecture")
advance_stage(task_id, "architecture", "enduro-trails", "ET-001",
"feature/ET-001-x", finished_agent="architect")
assert not stage_engine.notify_qg_failure.called
# ---------------------------------------------------------------------------
# Reviewer REQUEST_CHANGES -> rollback to development + enqueue developer
# ---------------------------------------------------------------------------
class TestReviewerRequestChanges:
def test_rollback_and_enqueue_developer(self, monkeypatch):
monkeypatch.setattr(
stage_engine, "QG_CHECKS",
{**stage_engine.QG_CHECKS,
"check_reviewer_verdict": _fail("verdict: REQUEST_CHANGES")},
)
task_id = _make_task("review")
res = advance_stage(task_id, "review", "enduro-trails", "ET-001",
"feature/ET-001-x", finished_agent="reviewer")
assert res.advanced is False
assert res.rolled_back_to == "development"
assert _stage(task_id) == "development"
jobs = _jobs()
assert len(jobs) == 1
assert jobs[0]["agent"] == "developer"
def test_retry_over_3_alerts_no_enqueue(self, monkeypatch):
monkeypatch.setattr(
stage_engine, "QG_CHECKS",
{**stage_engine.QG_CHECKS,
"check_reviewer_verdict": _fail("verdict: REQUEST_CHANGES")},
)
task_id = _make_task("review")
_add_developer_runs(task_id, 3) # already at the max
res = advance_stage(task_id, "review", "enduro-trails", "ET-001",
"feature/ET-001-x", finished_agent="reviewer")
assert res.rolled_back_to == "development"
assert res.alerted is True
assert stage_engine.send_telegram.called
# No new developer job enqueued past the retry cap.
assert _jobs() == []
# ---------------------------------------------------------------------------
# Tester FAIL -> rollback to development + enqueue developer
# ---------------------------------------------------------------------------
class TestTesterFail:
def test_rollback_and_enqueue_developer(self, monkeypatch):
monkeypatch.setattr(
stage_engine, "QG_CHECKS",
{**stage_engine.QG_CHECKS, "check_tests_passed": _fail("2 tests failed")},
)
task_id = _make_task("testing")
res = advance_stage(task_id, "testing", "enduro-trails", "ET-001",
"feature/ET-001-x", finished_agent="tester")
assert res.advanced is False
assert res.rolled_back_to == "development"
assert _stage(task_id) == "development"
jobs = _jobs()
assert len(jobs) == 1
assert jobs[0]["agent"] == "developer"
def test_retry_over_3_blocks_and_alerts(self, monkeypatch):
monkeypatch.setattr(
stage_engine, "QG_CHECKS",
{**stage_engine.QG_CHECKS, "check_tests_passed": _fail("still failing")},
)
task_id = _make_task("testing")
_add_developer_runs(task_id, 3)
res = advance_stage(task_id, "testing", "enduro-trails", "ET-001",
"feature/ET-001-x", finished_agent="tester")
assert res.rolled_back_to == "development"
assert res.alerted is True
assert stage_engine.set_issue_blocked.called
assert _jobs() == []
# ---------------------------------------------------------------------------
# Architect conflict -> rollback to analysis + enqueue analyst
# ---------------------------------------------------------------------------
class TestArchitectConflict:
def test_conflict_rolls_back_to_analysis(self, monkeypatch, tmp_path):
# 10-conflict.md must exist in the worktree path the engine inspects.
wt = tmp_path / "wt"
conflict_dir = wt / "docs" / "work-items" / "ET-001"
conflict_dir.mkdir(parents=True)
(conflict_dir / "10-conflict.md").write_text("conflict with TRZ")
monkeypatch.setattr(stage_engine, "get_worktree_path", lambda repo, branch: str(wt))
monkeypatch.setattr(
stage_engine, "QG_CHECKS",
{**stage_engine.QG_CHECKS, "check_architecture_done": _fail("conflict")},
)
task_id = _make_task("architecture")
res = advance_stage(task_id, "architecture", "enduro-trails", "ET-001",
"feature/ET-001-x", finished_agent="architect")
assert res.advanced is False
assert res.rolled_back_to == "analysis"
assert _stage(task_id) == "analysis"
jobs = _jobs()
assert len(jobs) == 1
assert jobs[0]["agent"] == "analyst"
def test_no_conflict_file_no_rollback(self, monkeypatch, tmp_path):
wt = tmp_path / "wt"
(wt / "docs").mkdir(parents=True)
monkeypatch.setattr(stage_engine, "get_worktree_path", lambda repo, branch: str(wt))
monkeypatch.setattr(
stage_engine, "QG_CHECKS",
{**stage_engine.QG_CHECKS, "check_architecture_done": _fail("incomplete")},
)
task_id = _make_task("architecture")
res = advance_stage(task_id, "architecture", "enduro-trails", "ET-001",
"feature/ET-001-x", finished_agent="architect")
assert res.advanced is False
assert res.rolled_back_to is None
assert _stage(task_id) == "architecture"
assert _jobs() == []
# ---------------------------------------------------------------------------
# Analyst approved-flow (analysis gate): never auto-advances
# ---------------------------------------------------------------------------
class TestAnalysisApprovedFlow:
def test_artifacts_ready_requests_approval_no_advance(self, monkeypatch):
monkeypatch.setattr(
stage_engine, "QG_CHECKS",
{**stage_engine.QG_CHECKS, "check_analysis_complete": _pass},
)
task_id = _make_task("analysis")
res = advance_stage(task_id, "analysis", "enduro-trails", "ET-001",
"feature/ET-001-x", finished_agent="analyst")
assert res.advanced is False
assert _stage(task_id) == "analysis"
assert stage_engine.set_issue_in_review.called
assert stage_engine.notify_approve_requested.called
assert _jobs() == []
# ---------------------------------------------------------------------------
# launcher + plane both delegate to the engine
# ---------------------------------------------------------------------------
class TestDelegation:
def test_launcher_calls_engine(self):
from src.agents.launcher import AgentLauncher
task_id = _make_task("development", branch="feature/ET-777-deleg")
with patch("src.stage_engine.advance_stage") as m:
AgentLauncher()._try_advance_stage(
run_id=1, agent="developer", repo="enduro-trails",
branch="feature/ET-777-deleg",
)
m.assert_called_once()
kwargs = m.call_args.kwargs
assert kwargs["task_id"] == task_id
assert kwargs["current_stage"] == "development"
assert kwargs["finished_agent"] == "developer"
def test_plane_calls_engine(self):
import asyncio
from src.webhooks import plane as plane_mod
with patch("src.stage_engine.advance_stage") as m:
asyncio.run(
plane_mod._try_advance_stage(
task_id=5, current_stage="analysis", repo="enduro-trails",
work_item_id="ET-001", branch="feature/ET-001-x",
)
)
m.assert_called_once()
# plane passes positional args; finished_agent (last positional) is None.
args = m.call_args.args
assert args[0] == 5
assert args[1] == "analysis"
assert args[-1] is None