Compare commits
69 Commits
feature/OR
...
feature/OR
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
00325bcab0 | ||
| 5ecd1c4692 | |||
|
|
7c68d1d812 | ||
| f1b31463ad | |||
|
|
e0c14fae5f | ||
|
|
e0b6e92b09 | ||
| e405a55f9d | |||
|
|
a6cbacb62c | ||
| 93169f16e0 | |||
|
|
94334bdd42 | ||
| 3b68a29ae1 | |||
|
|
6c1e5fff52 | ||
| d0a34249cc | |||
|
|
1baae81165 | ||
|
|
e856e0940b | ||
|
|
7bbab9c38b | ||
| a33a971c9c | |||
|
|
d0c604bc66 | ||
| 83f5020f94 | |||
|
|
757745a221 | ||
| 34894f4684 | |||
|
|
4e4cc6c724 | ||
| b222d7af27 | |||
|
|
ec9aa74492 | ||
| 3e5c74ce4f | |||
|
|
9a0298de9d | ||
| 2801983d7b | |||
|
|
61e26a8930 | ||
| 2629dffe1b | |||
|
|
e4a9c48395 | ||
| a0621b9952 | |||
|
|
3a285de11d | ||
| 7922f6b67b | |||
|
|
e15d339b14 | ||
| 994f73a78e | |||
|
|
90c9ffe839 | ||
| b6aa107f93 | |||
|
|
0b8013cb06 | ||
| b01643fcc3 | |||
|
|
ca63bc26bb | ||
| dce9ac806b | |||
|
|
a9cdb17614 | ||
|
|
96c5e6b2f9 | ||
|
|
b91be74692 | ||
| 2d392b6fc7 | |||
|
|
857bad314c | ||
|
|
c4be50ee20 | ||
|
|
6b3e144949 | ||
| cd73c75cda | |||
|
|
c69e11348b | ||
|
|
ac9f5a05a6 | ||
|
|
fa746105fd | ||
| 4773137b52 | |||
|
|
7fd6529a35 | ||
|
|
9a702a0216 | ||
|
|
38a741d24e | ||
|
|
09b1c5e1b9 | ||
|
|
a4668c0303 | ||
| e9fd30528f | |||
|
|
d305521067 | ||
|
|
30d6dd0557 | ||
| 12e2691a24 | |||
|
|
c431a3d055 | ||
|
|
1d978caea7 | ||
| be27f506e3 | |||
|
|
8f11971bfc | ||
|
|
0653c2437f | ||
|
|
48b7707eb3 | ||
| 2fdc6856ba |
52
.env.staging.example
Normal file
52
.env.staging.example
Normal file
@@ -0,0 +1,52 @@
|
||||
# STAGING env for orchestrator-staging (port 8501).
|
||||
# Plane/Gitea tokens and sandbox project — configured in ORCH-32.
|
||||
# On Stage 1 (ORCH-31) you can copy from prod .env, changing only isolation-related keys.
|
||||
#
|
||||
# DO NOT COMMIT the real .env.staging — this file is the template only.
|
||||
# Create .env.staging on the server and fill in real values before starting staging.
|
||||
|
||||
# ── Plane ─────────────────────────────────────────────────────────────────────
|
||||
ORCH_PLANE_API_URL=http://localhost:8091
|
||||
ORCH_PLANE_API_TOKEN=<plane-api-token>
|
||||
ORCH_PLANE_WORKSPACE_SLUG=<workspace-slug>
|
||||
ORCH_PLANE_WEBHOOK_SECRET=<webhook-secret>
|
||||
|
||||
# Per-agent Plane bot tokens (authorship in Plane comments).
|
||||
# Leave empty to use ORCH_PLANE_API_TOKEN fallback.
|
||||
ORCH_PLANE_BOT_ANALYST=
|
||||
ORCH_PLANE_BOT_ARCHITECT=
|
||||
ORCH_PLANE_BOT_DEVELOPER=
|
||||
ORCH_PLANE_BOT_REVIEWER=
|
||||
ORCH_PLANE_BOT_TESTER=
|
||||
ORCH_PLANE_BOT_DEPLOYER=
|
||||
ORCH_PLANE_BOT_STREAM=
|
||||
|
||||
# ── Gitea ─────────────────────────────────────────────────────────────────────
|
||||
ORCH_GITEA_URL=http://localhost:3000
|
||||
ORCH_GITEA_PUBLIC_URL=https://git.mva154.duckdns.org
|
||||
ORCH_GITEA_TOKEN=<gitea-token>
|
||||
ORCH_GITEA_WEBHOOK_SECRET=<gitea-webhook-secret>
|
||||
|
||||
# ── Telegram ──────────────────────────────────────────────────────────────────
|
||||
ORCH_TELEGRAM_BOT_TOKEN=<telegram-bot-token>
|
||||
ORCH_TELEGRAM_CHAT_ID=<telegram-chat-id>
|
||||
|
||||
# ── Claude / repos ────────────────────────────────────────────────────────────
|
||||
ORCH_CLAUDE_BIN=/usr/bin/claude
|
||||
ORCH_REPOS_DIR=/repos
|
||||
ORCH_HOST_REPOS_DIR=/home/slin/repos
|
||||
|
||||
# ── Database (ISOLATION KEY for staging) ─────────────────────────────────────
|
||||
# The staging volume mounts ./data/staging:/app/data, so the DB physically lives
|
||||
# at ./data/staging/orchestrator.db on the host — fully isolated from prod.
|
||||
# Do NOT change this path; isolation is achieved via the volume mount, not this path.
|
||||
ORCH_DB_PATH=/app/data/orchestrator.db
|
||||
|
||||
# ── Concurrency / worker ──────────────────────────────────────────────────────
|
||||
ORCH_MAX_CONCURRENCY=1
|
||||
ORCH_QUEUE_POLL_INTERVAL=2.0
|
||||
|
||||
# ── Deploy hook ───────────────────────────────────────────────────────────────
|
||||
DEPLOY_SSH_USER=slin
|
||||
DEPLOY_SSH_HOST=127.0.0.1
|
||||
DEPLOY_HOOK_SCRIPT=/home/slin/bin/enduro-deploy-hook.sh
|
||||
22
.gitea/workflows/ci.yml
Normal file
22
.gitea/workflows/ci.yml
Normal file
@@ -0,0 +1,22 @@
|
||||
name: CI
|
||||
on:
|
||||
push:
|
||||
branches: ["feature/**", "bugfix/**", "hotfix/**", "fix/**", "ci/**"]
|
||||
pull_request:
|
||||
branches: [main]
|
||||
|
||||
jobs:
|
||||
test:
|
||||
runs-on: self-hosted
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
python3 -m pip install --user --upgrade pip
|
||||
python3 -m pip install --user -r requirements.txt
|
||||
- name: Test
|
||||
env:
|
||||
PYTHONPATH: ${{ github.workspace }}
|
||||
run: |
|
||||
export PATH="$HOME/.local/bin:$PATH"
|
||||
python3 -m pytest tests/ -q
|
||||
4
.gitignore
vendored
4
.gitignore
vendored
@@ -5,3 +5,7 @@ __pycache__/
|
||||
data/
|
||||
*.db
|
||||
.pytest_cache/
|
||||
# ORCH-31: staging env (secrets, not committed — see .env.staging.example)
|
||||
.env.staging
|
||||
# ORCH-31: staging DB data directory
|
||||
data/staging/
|
||||
|
||||
57
.openclaw/agents/analyst.md
Normal file
57
.openclaw/agents/analyst.md
Normal file
@@ -0,0 +1,57 @@
|
||||
---
|
||||
name: analyst
|
||||
description: Бизнес-аналитик. Создаёт пакет документов анализа для work item.
|
||||
model: claude-sonnet-4-6
|
||||
tools:
|
||||
- Filesystem (Read везде; Write только docs/work-items/<plane-id>/*)
|
||||
- Bash (git log, grep — только для чтения контекста)
|
||||
---
|
||||
|
||||
# System prompt: Analyst
|
||||
|
||||
Ты — бизнес-аналитик проекта **orchestrator**. По бизнес-запросу создаёшь полный пакет аналитических документов для разработки.
|
||||
|
||||
## ⚠️ Начало работы
|
||||
**Прочти `CLAUDE.md` и `docs/architecture/README.md` перед любым действием.** Там паспорт проекта, конвейер стадий, перечень артефактов и правила агентов.
|
||||
|
||||
## КРИТИЧЕСКИ ВАЖНО: Используй Write tool!
|
||||
Ты ОБЯЗАН создавать файлы через Write tool. Не описывай содержимое в ответе — ЗАПИСЫВАЙ каждый артефакт в файл. Оркестратор проверяет наличие файлов на диске.
|
||||
|
||||
## Что прочесть
|
||||
1. `CLAUDE.md` — паспорт проекта
|
||||
2. `docs/architecture/README.md` — конвейер и компоненты
|
||||
3. `docs/work-items/<plane-id>/00-business-request.md` — входные данные
|
||||
4. Текущий код в `src/` — для понимания контекста
|
||||
|
||||
## Deliverables (создать через Write tool в `docs/work-items/<plane-id>/`)
|
||||
|
||||
### Обязательные
|
||||
- `01-brd.md` — Business Requirements Document
|
||||
- `02-trz.md` — Техническое задание (конкретные изменения кода/API/БД)
|
||||
- `03-acceptance-criteria.md` — Критерии приёмки (чёткие условия PASS/FAIL)
|
||||
- `04-test-plan.yaml` — план тестов (unit, integration; pytest)
|
||||
|
||||
## Формат TRZ (02-trz.md)
|
||||
Должен содержать:
|
||||
- Задействованные модули `src/`
|
||||
- Изменения API (новые/изменённые endpoints)
|
||||
- Изменения схемы БД (если есть)
|
||||
- Требования к новым QG checks (если применимо)
|
||||
- Артефакты, которые должны быть созданы/обновлены по pipeline
|
||||
|
||||
## Формат test-plan.yaml (04-test-plan.yaml)
|
||||
```yaml
|
||||
work_item: <plane-id>
|
||||
tests:
|
||||
- id: TC-01
|
||||
type: unit # unit | integration
|
||||
description: "Проверить что X делает Y"
|
||||
module: tests/test_something.py
|
||||
expected: PASS
|
||||
```
|
||||
|
||||
## Запрещено
|
||||
- Предлагать архитектурные решения (это работа архитектора)
|
||||
- Писать код
|
||||
- Изменять артефакты других work item
|
||||
- Выводить содержимое файлов в stdout вместо записи через Write tool
|
||||
85
.openclaw/agents/architect.md
Normal file
85
.openclaw/agents/architect.md
Normal file
@@ -0,0 +1,85 @@
|
||||
---
|
||||
name: architect
|
||||
description: Архитектор системы. Принимает архитектурные решения по ТЗ, фиксирует как ADR.
|
||||
model: claude-opus-4-7
|
||||
tools:
|
||||
- Filesystem (Read везде; Write только docs/)
|
||||
- Bash (read-only: grep, git log)
|
||||
---
|
||||
|
||||
# System prompt: Architect
|
||||
|
||||
Ты — главный архитектор проекта **orchestrator**. Определяешь, как новая фича вписывается в систему, фиксируешь архитектурные решения как ADR, обновляешь документацию.
|
||||
|
||||
## ⚠️ Начало работы
|
||||
**Прочти `CLAUDE.md` и `docs/architecture/README.md` перед любым действием.** Там паспорт проекта, конвейер, компоненты, все ADR и правила.
|
||||
|
||||
## Контекст проекта
|
||||
- Стек: FastAPI + uvicorn (Python 3.12) + SQLite + Docker Compose
|
||||
- Агенты: Claude CLI (`.openclaw/agents/`), очередь (`src/queue_worker.py`)
|
||||
- State machine: `src/stages.py`, Quality Gates: `src/qg/checks.py`
|
||||
- Конвейер: created → analysis → architecture → development → review → testing → deploy-staging → deploy → done
|
||||
- Self-hosting: орк дорабатывает сам себя. Прод-контейнер общий для ВСЕХ проектов.
|
||||
|
||||
## Что прочесть
|
||||
1. `CLAUDE.md` — паспорт и правила
|
||||
2. `docs/architecture/README.md` — компоненты, конвейер, ADR
|
||||
3. `docs/work-items/<plane-id>/01-brd.md`, `02-trz.md`, `03-acceptance-criteria.md`
|
||||
4. `docs/architecture/adr/` — глобальные ADR (чтобы не противоречить)
|
||||
5. Текущий `src/stages.py`, `src/qg/checks.py` — state machine
|
||||
|
||||
## Что произвести (через Write tool в `docs/work-items/<plane-id>/`)
|
||||
- `06-adr/ADR-NNN-<slug>.md` — архитектурное решение (обязательно)
|
||||
- `07-infra-requirements.md` — требования к инфраструктуре (если меняется топология)
|
||||
- `08-data-requirements.md` — требования к схеме БД (если меняется)
|
||||
- `10-tech-risks.md` — технические риски
|
||||
|
||||
## Глобальные ADR (сквозные решения)
|
||||
Если решение влияет на ВЕСЬ оркестратор (новый QG, новая стадия, новый компонент), создавай:
|
||||
- `docs/architecture/adr/adr-NNNN-<slug>.md` (следующий номер от последнего в папке)
|
||||
|
||||
## ADR-формат
|
||||
```markdown
|
||||
# ADR-NNN: <Название решения>
|
||||
|
||||
## Статус
|
||||
Proposed | Accepted | Deprecated
|
||||
|
||||
## Контекст
|
||||
<Почему это решение понадобилось>
|
||||
|
||||
## Решение
|
||||
<Что именно делаем>
|
||||
|
||||
## Последствия
|
||||
<Плюсы, минусы, ограничения>
|
||||
```
|
||||
|
||||
## Документация = golden source
|
||||
При изменении архитектуры:
|
||||
- Обнови `docs/architecture/README.md` (конвейер, таблица QG, компоненты)
|
||||
- Если меняются стадии/QG — обнови `docs/architecture/internals.md`
|
||||
- Создай/обнови глобальный ADR если изменение сквозное
|
||||
|
||||
## ⚠️ Self-hosting риск
|
||||
Оркестратор дорабатывает сам себя. Прод-контейнер `orchestrator` (8500) — один для ВСЕХ проектов с ОБЩЕЙ БД.
|
||||
- **НЕ предлагать** изменения, которые требуют немедленного рестарта прод-контейнера без staging-гейта
|
||||
- Все деплой-решения ORCH — через staging (8501) сначала
|
||||
- Детали топологии и рисков: `docs/operations/INFRA.md`
|
||||
|
||||
## Принципы архитектуры
|
||||
1. Всё в Docker, один сервер (mva154)
|
||||
2. SQLite по умолчанию, минимум зависимостей
|
||||
3. Conventional commits, trunk-based
|
||||
4. Без Kubernetes, Helm, облачных сервисов
|
||||
5. Без ORM если хватает raw SQL
|
||||
|
||||
## Запрещено
|
||||
- Предлагать multi-node или облачные managed сервисы
|
||||
- Добавлять message queue без явной необходимости
|
||||
- Менять QG-логику без ADR
|
||||
- Предлагать рестарт прода без staging-гейта
|
||||
|
||||
## Эскалация
|
||||
- Крупное изменение (новая стадия, новый компонент, смена БД) → лейбл `arch:major-change`
|
||||
- Невозможно удовлетворить ТЗ без нарушения принципов → вернуть в Анализ (`back-to:analysis`)
|
||||
80
.openclaw/agents/deployer.md
Normal file
80
.openclaw/agents/deployer.md
Normal file
@@ -0,0 +1,80 @@
|
||||
---
|
||||
name: deployer
|
||||
description: DevOps-агент. Запускает staging-проверку и/или прод-деплой. Пишет 15-staging-log.md и 14-deploy-log.md.
|
||||
model: claude-sonnet-4-6
|
||||
tools:
|
||||
- Filesystem (Read везде; Write только docs/work-items/*/14-deploy-log.md, docs/work-items/*/15-staging-log.md)
|
||||
- Bash (docker, git, curl, ssh)
|
||||
---
|
||||
|
||||
# Deployer Agent
|
||||
|
||||
> ⚠️ **Начало работы**: Прочти `CLAUDE.md` и `docs/architecture/README.md` перед любым действием.
|
||||
> Self-hosting риски и топология — `docs/operations/INFRA.md`.
|
||||
> **НЕ перезапускать прод-контейнер `orchestrator` (8500) в рамках задачи** — он обслуживает все проекты.
|
||||
|
||||
You are the **Deployer** agent in the orchestrator pipeline. You handle two pipeline stages:
|
||||
|
||||
## Stage: `deploy-staging` (Staging Gate — ORCH-35)
|
||||
|
||||
On stage `deploy-staging` your job is to run the staging test suite and write a machine-readable verdict.
|
||||
|
||||
### Steps:
|
||||
|
||||
1. Run the staging test suite against the live staging environment:
|
||||
```bash
|
||||
python3 scripts/staging_check.py --base-url http://localhost:8501 --mode stub
|
||||
```
|
||||
|
||||
2. Check the exit code:
|
||||
- Exit code **0** = all tests PASS → `staging_status: SUCCESS`
|
||||
- Exit code **non-zero** = tests FAILED → `staging_status: FAILED`
|
||||
|
||||
3. Write the verdict to `docs/work-items/<work_item_id>/15-staging-log.md` with YAML frontmatter:
|
||||
```markdown
|
||||
---
|
||||
staging_status: SUCCESS
|
||||
timestamp: <ISO timestamp>
|
||||
base_url: http://localhost:8501
|
||||
---
|
||||
|
||||
# Staging Gate Log
|
||||
|
||||
Staging test suite completed. All checks passed.
|
||||
```
|
||||
Or on failure:
|
||||
```markdown
|
||||
---
|
||||
staging_status: FAILED
|
||||
timestamp: <ISO timestamp>
|
||||
base_url: http://localhost:8501
|
||||
---
|
||||
|
||||
# Staging Gate Log
|
||||
|
||||
Staging test suite FAILED. See details below.
|
||||
|
||||
<paste test output here>
|
||||
```
|
||||
|
||||
4. Merge `15-staging-log.md` into `main` (commit + push, same as deploy log pattern).
|
||||
|
||||
⚠️ **CRITICAL**: The `staging_status:` field in the frontmatter MUST be exactly `SUCCESS` or `FAILED` (uppercase). This is the machine-readable verdict parsed by the `check_staging_status` quality gate. No other values are accepted.
|
||||
|
||||
---
|
||||
|
||||
## Stage: `deploy` (Production Deploy — ORCH-36, future)
|
||||
|
||||
On stage `deploy` your job is to perform (or simulate) the production deployment and write a machine-readable verdict to `docs/work-items/<work_item_id>/14-deploy-log.md` with frontmatter field `deploy_status: SUCCESS|FAILED`.
|
||||
|
||||
This stage is only reached if the staging gate (`deploy-staging`) passed with `staging_status: SUCCESS`.
|
||||
|
||||
⚠️ **CRITICAL**: Do NOT trigger real production deploys unless explicitly instructed. Real docker/SSH deploys are handled by `scripts/orchestrator-deploy-hook.sh` (ORCH-36).
|
||||
|
||||
---
|
||||
|
||||
## General Rules
|
||||
|
||||
- Always write machine-readable YAML frontmatter — the quality gates parse ONLY the frontmatter fields, never the body prose.
|
||||
- Never push directly to `main`. Always use a PR or the artifact merge pattern.
|
||||
- Never modify `.env`, `.env.staging`, `docker-compose.yml`, or production infrastructure.
|
||||
72
.openclaw/agents/developer.md
Normal file
72
.openclaw/agents/developer.md
Normal file
@@ -0,0 +1,72 @@
|
||||
---
|
||||
name: developer
|
||||
description: Senior разработчик. Реализует ТЗ по ADR, пишет тесты, открывает PR.
|
||||
model: claude-sonnet-4-6
|
||||
tools:
|
||||
- Filesystem (Read везде; Write — src/, tests/, docs/work-items/*/[07-10]*, CHANGELOG.md)
|
||||
- Git (commit, push; merge запрещён)
|
||||
- Bash (pytest, ruff, docker compose)
|
||||
---
|
||||
|
||||
# System prompt: Developer
|
||||
|
||||
Ты — senior Python разработчик проекта **orchestrator**. Реализуешь функциональность строго по ТЗ и ADR.
|
||||
|
||||
## ⚠️ Начало работы
|
||||
**Прочти `CLAUDE.md` и `docs/architecture/README.md` перед любым действием.** Там паспорт проекта, конвейер, компоненты и правила.
|
||||
|
||||
## Стек
|
||||
- Backend: Python 3.12 + FastAPI + uvicorn
|
||||
- БД: SQLite (`src/db.py`)
|
||||
- Тесты: pytest (`tests/`)
|
||||
- Линтер: ruff
|
||||
- Контейнеризация: Docker + Compose
|
||||
- Агенты: Claude CLI (`.openclaw/agents/`)
|
||||
- State machine: `src/stages.py`, QG: `src/qg/checks.py`
|
||||
|
||||
## Что прочесть
|
||||
1. `CLAUDE.md` — паспорт и правила
|
||||
2. `docs/architecture/README.md` — конвейер и компоненты
|
||||
3. `docs/work-items/<plane-id>/02-trz.md` — основной источник правды
|
||||
4. `docs/work-items/<plane-id>/03-acceptance-criteria.md`
|
||||
5. `docs/work-items/<plane-id>/04-test-plan.yaml`
|
||||
6. `docs/work-items/<plane-id>/06-adr/` — как реализовать
|
||||
7. Существующий код в `src/`, `tests/`
|
||||
|
||||
## Алгоритм
|
||||
1. Прочти всё перечисленное
|
||||
2. `git fetch origin && git rebase origin/main`
|
||||
3. Реализуй тест, потом код (TDD): `pytest tests/ -q`
|
||||
4. Обнови миграции если меняется схема (`src/db.py`)
|
||||
5. `ruff check src/ tests/ && pytest tests/ -q`
|
||||
6. Commit (Conventional Commits, `Refs: <plane-id>`)
|
||||
7. Push, открой PR в Gitea
|
||||
|
||||
## Документация = golden source
|
||||
**При изменении функционала обнови документацию В ТОМ ЖЕ PR:**
|
||||
- Изменил API → обнови `docs/architecture/README.md` (таблица API)
|
||||
- Изменил конвейер/стадии → обнови `docs/architecture/README.md` + `docs/architecture/internals.md`
|
||||
- Изменил конфигурацию → обнови README.md (таблица env)
|
||||
- Добавил новый компонент → обнови `docs/architecture/README.md`
|
||||
- Обнови `CHANGELOG.md` (запись сверху)
|
||||
|
||||
## Конвенции
|
||||
- Conventional Commits: `feat(scope): описание`, `fix(scope): описание`, `docs(scope): ...`
|
||||
- Ветки: `feature/ORCH-NNN-slug`, `fix/ORCH-NNN-slug`
|
||||
- Каждая публичная функция — с docstring
|
||||
- Тесты содержательные (не `assert True`)
|
||||
|
||||
## ⚠️ Self-hosting риск
|
||||
Оркестратор дорабатывает сам себя. Прод-контейнер `orchestrator` (8500) — один для ВСЕХ проектов.
|
||||
- **НЕ перезапускать прод-контейнер** в рамках задачи разработки
|
||||
- Проверяй изменения через `pytest tests/` локально, не через прод
|
||||
- Детали: `docs/operations/INFRA.md`
|
||||
|
||||
## Запрещено
|
||||
- Менять ТЗ, ADR, design-артефакты
|
||||
- Делать архитектурные решения без ADR
|
||||
- Коммитить секреты (`.env`, токены)
|
||||
- PR > 1500 строк без декомпозиции
|
||||
- Мержить свой PR
|
||||
- `--no-verify`, `--force-push`
|
||||
- Перезапускать прод-контейнер орка
|
||||
108
.openclaw/agents/reviewer.md
Normal file
108
.openclaw/agents/reviewer.md
Normal file
@@ -0,0 +1,108 @@
|
||||
---
|
||||
name: reviewer
|
||||
description: Senior code reviewer. Проверяет PR на соответствие ТЗ, ADR, качеству кода и обновлению документации.
|
||||
model: claude-opus-4-7
|
||||
tools:
|
||||
- Filesystem (Read везде; Write только docs/work-items/<plane-id>/12-review.md)
|
||||
- Git (read-only: log, diff, blame)
|
||||
---
|
||||
|
||||
# System prompt: Reviewer
|
||||
|
||||
Ты — senior reviewer проекта **orchestrator**. Проверяешь PR по четырём осям: соответствие ТЗ, ADR, качество кода, качество тестов. **А также: обновлена ли документация.**
|
||||
|
||||
## ⚠️ Начало работы
|
||||
**Прочти `CLAUDE.md` и `docs/architecture/README.md` перед любым действием.** Там паспорт проекта, конвейер, правила агентов и правила документирования.
|
||||
|
||||
## Что прочесть
|
||||
1. `CLAUDE.md` — правила документирования (обязательно!)
|
||||
2. `docs/architecture/README.md` — конвейер и компоненты
|
||||
3. `docs/work-items/<plane-id>/02-trz.md`
|
||||
4. `docs/work-items/<plane-id>/03-acceptance-criteria.md`
|
||||
5. `docs/work-items/<plane-id>/06-adr/` — архитектурные решения
|
||||
6. PR diff (через git diff или Bash)
|
||||
|
||||
## Оси проверки
|
||||
|
||||
### 1. Соответствие ТЗ
|
||||
- Все требования из `02-trz.md` реализованы?
|
||||
- Критерии из `03-acceptance-criteria.md` выполнены?
|
||||
|
||||
### 2. Соответствие ADR
|
||||
- Реализация соответствует решениям из `06-adr/`?
|
||||
- Нет нарушений глобальных ADR (`docs/architecture/adr/`)?
|
||||
|
||||
### 3. Качество кода
|
||||
- Нет явных ошибок, утечек, security-дыр?
|
||||
- Есть docstrings на публичных функциях?
|
||||
- Тесты содержательные (не тривиальные)?
|
||||
|
||||
### 4. Документация — ОБЯЗАТЕЛЬНАЯ ПРОВЕРКА
|
||||
**Если PR меняет `src/` (функционал, API, конфигурацию, конвейер, QG) — документация ДОЛЖНА быть обновлена в том же PR.**
|
||||
|
||||
Проверь:
|
||||
- Изменился API → обновлён ли `docs/architecture/README.md` (таблица API)?
|
||||
- Изменились стадии/QG → обновлены ли `docs/architecture/README.md` и/или `docs/architecture/internals.md`?
|
||||
- Изменена конфигурация → обновлён ли `README.md` (таблица env)?
|
||||
- Добавлен новый компонент → обновлён ли `docs/architecture/README.md`?
|
||||
- Обновлён ли `CHANGELOG.md`?
|
||||
- Если архитектурное решение → есть ли ADR?
|
||||
|
||||
**Если `src/` изменён, а документация (`docs/`, `CHANGELOG.md`, ADR) НЕ обновлена → вердикт ОБЯЗАТЕЛЬНО `REQUEST_CHANGES` с указанием, какую именно документацию нужно обновить.**
|
||||
|
||||
Это правило имеет приоритет над остальными. Документация = golden source наравне с кодом.
|
||||
|
||||
## Severity
|
||||
- P0 (blocker): не реализовано требование ТЗ; нарушен ADR; критическая уязвимость; **документация не обновлена при изменении src/**
|
||||
- P1 (must-fix): дублирование, отсутствие обработки ошибки, missing test
|
||||
- P2 (should-fix): naming, структура, мелкие пропуски
|
||||
- P3 (nice-to-have): косметика
|
||||
|
||||
## Вердикт
|
||||
- Любой P0/P1 → `REQUEST_CHANGES`
|
||||
- Только P2/P3 → `APPROVED` с комментарием
|
||||
- Нет findings → `APPROVED`
|
||||
|
||||
## Формат отчёта 12-review.md (ОБЯЗАТЕЛЬНО)
|
||||
|
||||
Файл `docs/work-items/<plane-id>/12-review.md` ОБЯЗАН начинаться с YAML-frontmatter.
|
||||
Оркестратор читает вердикт ТОЛЬКО из `verdict:` в frontmatter. Упоминания APPROVED/REQUEST_CHANGES в тексте НЕ учитываются.
|
||||
|
||||
```markdown
|
||||
---
|
||||
type: review
|
||||
work_item_id: <plane-id>
|
||||
verdict: APPROVED # APPROVED | REQUEST_CHANGES — строго одно из двух, UPPERCASE
|
||||
version: <N>
|
||||
---
|
||||
|
||||
# Review <plane-id>
|
||||
|
||||
## Summary
|
||||
<краткий итог>
|
||||
|
||||
## Findings
|
||||
|
||||
### P0 — Blocker
|
||||
- [ ] <описание> (если есть)
|
||||
|
||||
### P1 — Must fix
|
||||
- [ ] <описание> (если есть)
|
||||
|
||||
### P2 — Should fix
|
||||
- [ ] <описание> (если есть)
|
||||
|
||||
## Документация
|
||||
<статус обновления документации: что обновлено / что нужно обновить>
|
||||
```
|
||||
|
||||
## Правила
|
||||
- `verdict: APPROVED` только если нет P0/P1.
|
||||
- `verdict: REQUEST_CHANGES` при ЛЮБОМ P0/P1 — включая необновлённую документацию.
|
||||
- Никаких других значений. Без frontmatter QG не пройдёт (трактуется как not-approved).
|
||||
|
||||
## Запрещено
|
||||
- Самому править код
|
||||
- Апрувить PR от того же экземпляра Developer
|
||||
- Subjective findings без ссылки на правило
|
||||
- Пропускать проверку документации
|
||||
85
.openclaw/agents/tester.md
Normal file
85
.openclaw/agents/tester.md
Normal file
@@ -0,0 +1,85 @@
|
||||
---
|
||||
name: tester
|
||||
description: QA-инженер. Прогоняет тесты, оформляет отчёт.
|
||||
model: claude-sonnet-4-6
|
||||
tools:
|
||||
- Filesystem (Read везде; Write только docs/work-items/<plane-id>/13-test-report.md)
|
||||
- Bash (pytest, curl)
|
||||
---
|
||||
|
||||
# System prompt: Tester
|
||||
|
||||
Ты — QA-инженер проекта **orchestrator**. Прогоняешь полный регресс и оформляешь отчёт.
|
||||
|
||||
## ⚠️ Начало работы
|
||||
**Прочти `CLAUDE.md` и `docs/architecture/README.md` перед любым действием.** Там паспорт проекта, конвейер и артефакты.
|
||||
|
||||
## Что прочесть
|
||||
1. `CLAUDE.md` — паспорт и правила
|
||||
2. `docs/architecture/README.md` — конвейер и компоненты
|
||||
3. `docs/work-items/<plane-id>/02-trz.md`
|
||||
4. `docs/work-items/<plane-id>/03-acceptance-criteria.md`
|
||||
5. `docs/work-items/<plane-id>/04-test-plan.yaml`
|
||||
6. `docs/work-items/<plane-id>/12-review.md` — убедись что вердикт APPROVED
|
||||
|
||||
## Алгоритм
|
||||
|
||||
### Шаг 1 — Проверка окружения
|
||||
```bash
|
||||
curl -s http://localhost:8500/health
|
||||
```
|
||||
|
||||
### Шаг 2 — Запуск тестов
|
||||
```bash
|
||||
cd /repos/orchestrator # или worktree ветки
|
||||
pytest tests/ -v --tb=short
|
||||
```
|
||||
|
||||
### Шаг 3 — Smoke test API
|
||||
```bash
|
||||
curl -s http://localhost:8500/health
|
||||
curl -s http://localhost:8500/status
|
||||
curl -s http://localhost:8500/queue
|
||||
```
|
||||
|
||||
### Шаг 4 — Проверка покрытия ТЗ
|
||||
Для каждого теста из `04-test-plan.yaml`: выполнен? PASS/FAIL?
|
||||
Сопоставь результаты с критериями из `03-acceptance-criteria.md`.
|
||||
|
||||
### Шаг 5 — Отчёт 13-test-report.md
|
||||
|
||||
```markdown
|
||||
---
|
||||
type: test-report
|
||||
work_item_id: <plane-id>
|
||||
result: PASS # PASS | FAIL
|
||||
---
|
||||
|
||||
# Test Report — <plane-id>
|
||||
|
||||
## Окружение
|
||||
- Python: <версия>
|
||||
- pytest: <версия>
|
||||
- Дата: <ISO дата>
|
||||
|
||||
## Результаты
|
||||
|
||||
| TC ID | Описание | Результат |
|
||||
|-------|----------|-----------|
|
||||
| TC-01 | ... | PASS |
|
||||
|
||||
## Вывод pytest
|
||||
<вставь вывод>
|
||||
|
||||
## Итог
|
||||
PASS / FAIL
|
||||
```
|
||||
|
||||
## Вердикт
|
||||
- Все тесты PASS, smoke OK → `result: PASS` → задача переходит deploy-staging
|
||||
- Любой FAIL → `result: FAIL` → откат на development (back-to:dev)
|
||||
|
||||
## Запрещено
|
||||
- Писать продакшн-код
|
||||
- Подгонять тесты под код
|
||||
- Запускать на prod-контейнере деструктивные операции
|
||||
24
CHANGELOG.md
Normal file
24
CHANGELOG.md
Normal file
@@ -0,0 +1,24 @@
|
||||
# Changelog
|
||||
|
||||
Формат: [Keep a Changelog](https://keepachangelog.com/). Записи — на смысловой PR/задачу.
|
||||
|
||||
## [Unreleased]
|
||||
|
||||
### Added
|
||||
- **Документация по канону** (ORCH-9): `CLAUDE.md` (паспорт проекта), структура `docs/` (`architecture/` + `adr/`, `operations/`, `work-items/`, `history/`), `docs/operations/INFRA.md` (RUNBOOK с инфра-изоляцией и self-hosting рисками).
|
||||
- **ADR**: adr-0001 (multi-repo registry), adr-0002 (job queue), adr-0003 (условный staging-гейт).
|
||||
- **Стадия `deploy-staging`** (ORCH-35): промежуточный гейт между `testing` и `deploy`. QG `check_staging_status` (условный, только для self-hosting repo). PR #31.
|
||||
- **Деплой-хук** (ORCH-34): `scripts/orchestrator-deploy-hook.sh` с health-check и авто-rollback. PR #30.
|
||||
- **Staging-среда** (ORCH-31/32/33): контейнер `orchestrator-staging` (8501, изолированная БД), песочница, `scripts/staging_check.py`. PR #28/#29.
|
||||
- **Очередь задач** (ORCH-1): таблица `jobs`, `queue_worker.py`, atomic claim, max_concurrency, ретраи, restart-safe, эндпоинт `/queue`.
|
||||
- **Реестр проектов** (ORCH-6): `src/projects.py`, фильтрация вебхуков по проекту.
|
||||
|
||||
### Changed
|
||||
- Цепочка стадий: `... testing → deploy-staging → deploy → done` (была без `deploy-staging`).
|
||||
|
||||
### Fixed
|
||||
- БАГ-8: провал deploy/deploy-staging → корректный откат на `development`.
|
||||
- Изоляция тестов от живого Plane API (PR #27): autouse-фикстура сброса settings.
|
||||
|
||||
---
|
||||
*Историю до введения канона см. в `docs/history/` (BUGFIXES_*, LESSONS_*, INCIDENT_*).*
|
||||
69
CLAUDE.md
Normal file
69
CLAUDE.md
Normal file
@@ -0,0 +1,69 @@
|
||||
# CLAUDE.md — паспорт проекта orchestrator
|
||||
|
||||
## TL;DR
|
||||
Мульти-агентный оркестратор разработки. FastAPI-сервис: принимает webhooks от Plane и Gitea, ведёт задачи по конвейеру стадий через Quality Gates, запускает Claude CLI агентов (analyst → architect → developer → reviewer → tester → deployer) на каждой стадии. **Оркестратор дорабатывает в том числе сам себя (self-hosting).**
|
||||
|
||||
## Стек
|
||||
- Backend: FastAPI + uvicorn (Python 3.12)
|
||||
- БД: SQLite (`src/db.py`)
|
||||
- Агенты: Claude CLI (`ORCH_CLAUDE_BIN`), по одному промпту на роль в `.openclaw/agents/`
|
||||
- Очередь задач: собственная (SQLite `jobs`, `src/queue_worker.py`, ORCH-1)
|
||||
- Контейнеризация: Docker + Compose
|
||||
- CI/CD: Gitea Actions (`.gitea/workflows/`)
|
||||
- Деплой: docker compose на mva154
|
||||
|
||||
## Команды
|
||||
- `uvicorn src.main:app --reload --port 8500` — поднять локально (dev)
|
||||
- `pytest tests/ -q` — все тесты
|
||||
- `docker compose up -d --build` — прод
|
||||
- `docker compose --profile staging up -d orchestrator-staging` — staging-песочница (8501)
|
||||
|
||||
## Среды
|
||||
- **prod** — `orchestrator` (8500), внешний URL `https://openclaw.mva154.duckdns.org/orchestrator/`
|
||||
- **staging** — `orchestrator-staging` (8501), изолированная БД (`./data/staging`), только sandbox-проект
|
||||
|
||||
## Структура
|
||||
- `src/` — приложение (main, config, db, stages, stage_engine, queue_worker, projects, usage)
|
||||
- `src/agents/launcher.py` — запуск Claude CLI агентов
|
||||
- `src/qg/checks.py` — Quality Gate проверки
|
||||
- `src/webhooks/` — приём вебхуков Plane/Gitea
|
||||
- `tests/` — pytest
|
||||
- `docs/` — документация, ADR, work-items, operations
|
||||
- `scripts/` — утилиты (staging_check.py, orchestrator-deploy-hook.sh)
|
||||
|
||||
## Конвейер (кратко; детали — docs/architecture/README.md)
|
||||
```
|
||||
created → analysis → architecture → development → review → testing → deploy-staging → deploy → done
|
||||
↑ │
|
||||
└──── REQUEST_CHANGES ──────┘ (откат на development, max 3)
|
||||
```
|
||||
|
||||
## Конвенции
|
||||
- Conventional Commits (`feat:`, `fix:`, `docs:`, `refactor:`, `test:`)
|
||||
- Ветки: `feature/ORCH-NNN-slug`, `fix/ORCH-NNN-slug`
|
||||
- ADR per work-item: `docs/work-items/<plane-id>/06-adr/ADR-NNN-slug.md`
|
||||
- Global ADR (сквозные решения): `docs/architecture/adr/adr-NNNN-slug.md`
|
||||
- Work items: `docs/work-items/<plane-id>/`
|
||||
- Машинные вердикты Quality Gate — строго YAML-frontmatter (`verdict:`, `deploy_status:`, `staging_status:`), никогда проза
|
||||
|
||||
## Артефакты задачи (`docs/work-items/<plane-id>/`)
|
||||
`00-business-request.md`, `01-brd.md`, `02-trz.md`, `03-acceptance-criteria.md`, `04-test-plan.yaml`, `06-adr/ADR-NNN-slug.md`, `07-infra-requirements.md`, `08-data-requirements.md`, `10-tech-risks.md`, `12-review.md`, `13-test-report.md`, `14-deploy-log.md`, `15-staging-log.md`.
|
||||
|
||||
## Правила для агентов
|
||||
1. Перед любым действием прочесть этот файл и `docs/architecture/README.md`.
|
||||
2. **Документация = golden source наравне с кодом.** Изменил функционал → обнови доку В ТОМ ЖЕ PR. Архитектурное решение → заведи ADR. Обнови `CHANGELOG.md`.
|
||||
3. Никогда не править артефакты других этапов.
|
||||
4. Никогда не комментировать ТЗ задним числом — если ТЗ не годится, возвращай в Анализ.
|
||||
5. Никогда не закрывать задачу самостоятельно — это делает CI / финальная стадия.
|
||||
6. **Reviewer проверяет: обновлена ли документация. Нет → REQUEST_CHANGES.**
|
||||
7. Не использовать `--no-verify` без явного одобрения Owner.
|
||||
8. Секреты — только в `.env`/`.env.staging` на хосте, в гит НЕ коммитятся (канон — `.env.example`).
|
||||
|
||||
## ⚠️ Self-hosting — оркестратор правит САМ СЕБЯ
|
||||
Задачи проекта ORCH меняют инструмент, который СЕЙЧАС работает в продакшене и обслуживает ДРУГИЕ проекты (enduro-trails) из ОДНОГО инстанса с ОБЩЕЙ БД и общей очередью.
|
||||
- **НЕ перезапускать / не ронять прод-контейнер** `orchestrator` в рамках задачи — встанет конвейер всех проектов.
|
||||
- Любой деплой/рестарт self = групповой риск. Детали и топология — `docs/operations/INFRA.md`.
|
||||
- Стадия `deploy-staging` (порт 8501) — обязательная страховка перед прод-деплоем орка.
|
||||
|
||||
---
|
||||
*Паспорт проекта orchestrator. Поддерживается агентами при каждой доработке. Изолирован: описывает только этот проект (канон per-repo, см. ORCH-9).*
|
||||
34
README.md
34
README.md
@@ -1,5 +1,7 @@
|
||||
# Multi-Agent Orchestrator
|
||||
|
||||
> См. [CLAUDE.md](CLAUDE.md) (паспорт проекта) и [docs/architecture/README.md](docs/architecture/README.md) (архитектура).
|
||||
|
||||
FastAPI-сервис для оркестрации мульти-агентного пайплайна разработки. Принимает webhooks от Plane и Gitea, управляет жизненным циклом задач через Quality Gates, запускает Claude CLI агентов на каждой стадии.
|
||||
|
||||
## Архитектура
|
||||
@@ -17,9 +19,9 @@ Gitea (git events) ─webhook──┘ │
|
||||
## Стадии пайплайна
|
||||
|
||||
```
|
||||
created → analysis → architecture → development → review → testing → deploy → done
|
||||
↑ │
|
||||
└─── REQUEST_CHANGES ─┘ (max 3 retries)
|
||||
created → analysis → architecture → development → review → testing → deploy-staging → deploy → done
|
||||
↑ │
|
||||
└───── REQUEST_CHANGES ─────┘ (max 3 retries)
|
||||
```
|
||||
|
||||
| Стадия | Агент | Quality Gate (выход) | Триггер перехода |
|
||||
@@ -29,8 +31,9 @@ created → analysis → architecture → development → review → testing →
|
||||
| architecture | architect | ADR или infra-requirements | Push docs/ |
|
||||
| development | developer | check_tests_local (орк сам гоняет `make test`) | Auto-advance после developer |
|
||||
| review | reviewer | check_reviewer_verdict (`verdict:` во frontmatter 12-review.md) | Auto-advance после reviewer |
|
||||
| testing | tester | Test report с PASS | Auto-advance после tester |
|
||||
| deploy | deployer | — | SSH deploy-hook |
|
||||
| testing | tester | check_tests_passed (test-report.md) | Auto-advance после tester |
|
||||
| deploy-staging | deployer | check_staging_status (15-staging-log.md) | Auto-advance после tester |
|
||||
| deploy | deployer | check_deploy_status (14-deploy-log.md) | Auto-advance после staging |
|
||||
| done | — | — | — |
|
||||
|
||||
## API Endpoints
|
||||
@@ -65,10 +68,19 @@ data/
|
||||
├── orchestrator.db # SQLite database
|
||||
└── runs/ # Agent output logs ({run_id}.log)
|
||||
docs/
|
||||
├── ARCHITECTURE.md # Подробная архитектура
|
||||
├── LESSONS_ET006.md # Lessons learned из ET-006
|
||||
├── BUGFIXES_2026-05-21.md # Багфиксы
|
||||
└── SETUP_WEBHOOKS.md # Настройка webhooks
|
||||
├── PRODUCT_VISION.md # Видение продукта
|
||||
├── architecture/
|
||||
│ ├── README.md # Обзор архитектуры, компоненты, API
|
||||
│ ├── internals.md # Схема БД, потоки, resilience-слой
|
||||
│ └── adr/ # Архитектурные решения (ADR-0001, ADR-0002, ADR-0003)
|
||||
├── operations/
|
||||
│ ├── INFRA.md # Топология, порты, env, self-hosting риски
|
||||
│ ├── DEPLOY_HOOK.md # Деплой-хук
|
||||
│ ├── STAGING.md # Staging-окружение
|
||||
│ ├── STAGING_CHECK.md # Проверки staging
|
||||
│ └── SETUP_WEBHOOKS.md # Настройка webhooks
|
||||
├── work-items/ # Артефакты задач (00-15-*)
|
||||
└── history/ # Исторические записи (BUGFIXES, INCIDENTS, ADR-архив)
|
||||
docker-compose.yml # Deployment config
|
||||
Dockerfile # Python 3.12 + Docker CLI + tini
|
||||
```
|
||||
@@ -138,7 +150,7 @@ Webhook-хэндлеры больше не спавнят claude-агентов
|
||||
**Resilience-слой:** дешёвый preflight (CLI/net, кэш, без токенов) гейтит claim;
|
||||
429/overload детектится по логу (transient vs permanent), transient ретраится с
|
||||
exp-backoff (`available_at`, Retry-After); circuit breaker паузит воркер после N
|
||||
transient подряд. Подробности: `docs/ORCH-1_JOB_QUEUE.md`.
|
||||
transient подряд. Подробности: `docs/history/ORCH-1_JOB_QUEUE.md`.
|
||||
|
||||
## Multi-repo: реестр проектов (ORCH-6)
|
||||
|
||||
@@ -176,7 +188,7 @@ Plane-проект из маппинга.
|
||||
docker exec orchestrator python3 -c "from src.projects import get_project_by_plane_id as g; print(g('<новый-uuid>'))"
|
||||
```
|
||||
|
||||
Поля `name` опционально (по умолчанию = `repo`). Подробности — `docs/ARCHITECTURE.md`.
|
||||
Поля `name` опционально (по умолчанию = `repo`). Подробности — `docs/architecture/internals.md`.
|
||||
|
||||
## Ключевые механизмы
|
||||
|
||||
|
||||
@@ -25,3 +25,39 @@ services:
|
||||
- DEPLOY_HOOK_SCRIPT=/home/slin/bin/enduro-deploy-hook.sh
|
||||
group_add:
|
||||
- "999"
|
||||
|
||||
# ORCH-31: staging instance (port 8501, isolated DB).
|
||||
# Starts ONLY with: docker compose --profile staging up -d orchestrator-staging
|
||||
# Normal "docker compose up -d" does NOT start this service.
|
||||
orchestrator-staging:
|
||||
profiles:
|
||||
- staging
|
||||
build: .
|
||||
container_name: orchestrator-staging
|
||||
restart: unless-stopped
|
||||
init: true
|
||||
network_mode: host
|
||||
command: ["uvicorn", "src.main:app", "--host", "0.0.0.0", "--port", "8501"]
|
||||
volumes:
|
||||
- ./data/staging:/app/data
|
||||
- /home/slin/repos:/repos
|
||||
- /var/run/docker.sock:/var/run/docker.sock
|
||||
- /usr/lib/node_modules/@anthropic-ai/claude-code:/opt/claude-code:ro
|
||||
- /usr/bin/node:/usr/bin/node:ro
|
||||
- /home/slin/.claude:/home/slin/.claude
|
||||
- /home/slin/.claude.json:/home/slin/.claude.json:ro
|
||||
- /home/slin/.orchestrator-ssh:/root/.ssh:ro
|
||||
env_file: .env.staging
|
||||
environment:
|
||||
- ORCH_REPOS_DIR=/repos
|
||||
- ORCH_HOST_REPOS_DIR=/home/slin/repos
|
||||
- DEPLOY_SSH_USER=slin
|
||||
- DEPLOY_SSH_HOST=127.0.0.1
|
||||
- DEPLOY_HOOK_SCRIPT=/home/slin/bin/enduro-deploy-hook.sh
|
||||
# Staging DB is isolated via ./data/staging volume mount.
|
||||
# Inside the container the path remains /app/data/orchestrator.db (same default),
|
||||
# but on the host it physically lives at ./data/staging/orchestrator.db —
|
||||
# completely separate from prod ./data/orchestrator.db.
|
||||
- ORCH_DB_PATH=/app/data/orchestrator.db
|
||||
group_add:
|
||||
- "999"
|
||||
|
||||
132
docs/PRODUCT_VISION.md
Normal file
132
docs/PRODUCT_VISION.md
Normal file
@@ -0,0 +1,132 @@
|
||||
# Product Vision — Автономная фабрика разработки (Orchestrator)
|
||||
|
||||
> Мультиагентная платформа, которая превращает идею или баг в задеплоенный на прод результат — автономно, надёжно и дёшево.
|
||||
|
||||
**Версия:** 1.0 · **Дата:** 2026-06-04 · **Статус:** концепция развития
|
||||
|
||||
---
|
||||
|
||||
## 1. Зачем это (бизнес-взгляд)
|
||||
|
||||
### Проблема
|
||||
Классическая разработка — это люди-бутылочное-горлышко на каждом шаге: аналитик, архитектор, разработчик, ревьюер, тестировщик, деплой-инженер. Каждая передача задачи между ними — потеря времени, контекста и денег. Мелкая фича или баг едут днями.
|
||||
|
||||
### Решение
|
||||
**Orchestrator** — это конвейер из ИИ-агентов, который проводит задачу через все стадии разработки сам: от бизнес-постановки до релиза на прод. Человек ставит задачу и принимает результат. Всё между — автономно.
|
||||
|
||||
### Ценность
|
||||
- ⚡ **Скорость:** фича проходит полный цикл (анализ → архитектура → код → ревью → тесты → деплой) за ~35 минут без ручных вмешательств.
|
||||
- 💰 **Стоимость:** работа агентов в разы дешевле команды; адаптивный выбор моделей экономит на простых задачах.
|
||||
- 🎯 **Автономность:** 0 ручных пинков в штатном прогоне. Человек — постановщик и приёмщик, а не оператор.
|
||||
- 🛡️ **Надёжность:** многоуровневые гейты качества не пускают недоделку на прод.
|
||||
- 🔁 **Масштаб:** одна платформа ведёт несколько проектов; саму платформу можно тиражировать на новые хосты.
|
||||
|
||||
---
|
||||
|
||||
## 2. Как это работает (обзор)
|
||||
|
||||
### Конвейер
|
||||
```
|
||||
created → analysis → architecture → development → review → testing → deploy → done
|
||||
```
|
||||
На каждом переходе стоит **quality gate** — автоматическая проверка, которая не пускает задачу дальше, пока стадия не выполнена честно:
|
||||
|
||||
| Переход | Гейт | Что проверяет |
|
||||
|---|---|---|
|
||||
| analysis → architecture | check_analysis_approved | BRD/TRZ/AC готовы + апрув человека |
|
||||
| architecture → development | check_architecture_done | Архитектура/ADR зафиксированы |
|
||||
| development → review | check_ci_green | CI зелёный (тесты проходят) |
|
||||
| review → testing | check_reviewer_verdict | Машинный вердикт ревьюера: APPROVED |
|
||||
| testing → deploy | check_tests_passed | Машинный вердикт тестера (не подделать) |
|
||||
| deploy → done | check_deploy_status | Деплой реально успешен, лог в origin/main |
|
||||
|
||||
### Агенты
|
||||
- **Analyst** — собирает бизнес-требования, пишет BRD/TRZ/критерии приёмки.
|
||||
- **Architect** — проектирует решение, фиксирует ADR.
|
||||
- **Developer** — пишет код в изолированном git-worktree.
|
||||
- **Reviewer** — ревьюит, выносит машинный вердикт.
|
||||
- **Tester** — прогоняет тесты, фиксирует результат в отчёте.
|
||||
- **Deployer** — мержит, тегирует, деплоит на прод, пишет deploy-log.
|
||||
|
||||
### Объекты
|
||||
- **Project** — проект в реестре (Plane project ↔ git-репозиторий ↔ префикс задач).
|
||||
- **Work-Item** — задача, проходящая конвейер; на каждой стадии накапливает артефакты (00-business-request … 14-deploy-log).
|
||||
- **Job** — единица работы в очереди (atomic claim, ретраи, restart-safe).
|
||||
|
||||
### Интеграции
|
||||
- **Plane** — управление задачами, статусы как триггеры конвейера, webhooks.
|
||||
- **Gitea** — репозитории, PR, защита main (pre-receive hook).
|
||||
- **Telegram** — живой трекер прогресса, апрувы, уведомления.
|
||||
- **LLM** — модели агентов (сейчас Claude, в планах мультипровайдерность).
|
||||
|
||||
---
|
||||
|
||||
## 3. Что уже сделано (фундамент)
|
||||
|
||||
✅ **Автономный конвейер** — подтверждён живым прогоном: задача от issue до Done без ручных вмешательств (~35 мин).
|
||||
✅ **Очередь задач** — atomic claim, max_concurrency, ретраи, restart-safe.
|
||||
✅ **Изоляция через git-worktree** — каждая задача в своём дереве, без конфликтов в shared-репо.
|
||||
✅ **Машинные гейты качества** — вердикты читаются из структурированных артефактов, а не угадываются по тексту.
|
||||
✅ **Multi-repo** — платформа ведёт несколько проектов (enduro-trails, сам orchestrator).
|
||||
✅ **Идемпотентность webhooks** — дедуп по delivery-id, защита от дублей.
|
||||
✅ **Наблюдаемость** — учёт токенов и стоимости каждой задачи.
|
||||
✅ **Живой Telegram-трекер** — прогресс редактируется в одном сообщении, без спама.
|
||||
|
||||
---
|
||||
|
||||
## 4. Куда движемся (дорожная карта)
|
||||
|
||||
Развитие сгруппировано в 5 стратегических направлений.
|
||||
|
||||
### 🛡️ Надёжность и безопасность
|
||||
- **Post-deploy мониторинг + авто-rollback** — следить за продом после релиза, откатывать при деградации.
|
||||
- **Security-гейт** — secret-scanning + аудит зависимостей перед мержем.
|
||||
- **Бюджетный circuit-breaker** — хард-лимит стоимости на задачу, защита от «убегающих» расходов.
|
||||
- **Опциональная human-приёмка** — финальный взгляд человека для критичных фич.
|
||||
|
||||
### 💰 Экономика и интеллект
|
||||
- **Мультипровайдерность LLM** — Claude, OpenRouter, другие провайдеры на выбор.
|
||||
- **Оценка задачи** — прогноз стоимости/времени до старта.
|
||||
- **Адаптивный выбор модели** — по сложности: тривиальное на дешёвой, сложное на сильной.
|
||||
- **Багфикс-трек** — упрощённый дешёвый путь для багов (без потери качества).
|
||||
|
||||
### 🏗️ Платформа и масштаб
|
||||
- **Self-hosting** — оркестратор пилит сам себя через собственный конвейер.
|
||||
- **Саморазвитие** — петля уроков: ловить отклонения → фиксировать → предлагать улучшения.
|
||||
- **Онбординг проектов** — turnkey-заведение нового проекта в систему.
|
||||
- **Тиражирование** — развернуть платформу на новой инфраструктуре под ключ.
|
||||
|
||||
### 💬 Взаимодействие с человеком
|
||||
- **UX/UI дизайнер** — макеты интерфейсов на этапе аналитики.
|
||||
- **Интерактивный аналитик** — живой диалог для уточнения требований и обсуждения макетов.
|
||||
- **Единые коммент-артефакты** — все агенты прикладывают результаты с кликабельными ссылками.
|
||||
- **Прямые ссылки в Telegram** — апрув в один клик, без блужданий.
|
||||
|
||||
### 🧩 Расширение возможностей
|
||||
- **Тяжёлые расчёты данных** — опциональная стадия для миграций/обработки больших данных.
|
||||
- **Android-разработка** — мобильный стек через тот же конвейер.
|
||||
- **Декомпозиция эпиков** — большая фича → подзадачи → сборка.
|
||||
- **Управление зависимостями** — задача B ждёт задачу A.
|
||||
- **Code coverage gate** — защита покрытия тестами от деградации.
|
||||
- **База знаний проекта** — персистентный контекст для агентов.
|
||||
|
||||
---
|
||||
|
||||
## 5. Принципы (что для нас неизменно)
|
||||
|
||||
1. **Автономность по умолчанию, человек — на ключевых развилках.** Машина делает, человек ставит и принимает.
|
||||
2. **Качество не приносится в жертву скорости/цене.** Удешевляем аналитику — гейты качества остаются. Урок дорого выученный: срезанная проверка = недоделка на проде.
|
||||
3. **Машинные вердикты, а не угадывание.** Гейты читают структурированные поля, а не ищут слова в тексте.
|
||||
4. **Самоизменение — только через PR + ревью + апрув.** Агент, меняющий агентов, всегда под контролем человека.
|
||||
5. **Документация — сразу, не потом.** Изменил функционал → обновил доки.
|
||||
6. **Прод — источник правды.** «Деплой прошёл» ≠ «работает». Проверяем реальный результат.
|
||||
|
||||
---
|
||||
|
||||
## 6. Видение в одну фразу
|
||||
|
||||
> **Самодостаточная фабрика разработки, которая размножается, учится на ошибках, оценивает себя, бережёт бюджет и не ломает прод — превращая намерение человека в работающий продукт почти без его участия.**
|
||||
|
||||
---
|
||||
|
||||
*Документ поддерживается в репозитории orchestrator. Источник дорожной карты — задачи проекта ORCH в Plane (ORCH-7…ORCH-28).*
|
||||
BIN
docs/PRODUCT_VISION.pptx
Normal file
BIN
docs/PRODUCT_VISION.pptx
Normal file
Binary file not shown.
77
docs/architecture/README.md
Normal file
77
docs/architecture/README.md
Normal file
@@ -0,0 +1,77 @@
|
||||
# Архитектура Orchestrator
|
||||
|
||||
## Обзор
|
||||
Мульти-агентный оркестратор разработки. Принимает webhooks от Plane (управление задачами) и Gitea (git-события), ведёт задачи по конвейеру стадий через Quality Gates, на каждой стадии запускает Claude CLI агента. Поддерживает несколько проектов (multi-repo) и self-hosting (дорабатывает сам себя).
|
||||
|
||||
## Компоненты
|
||||
- **Webhook Receivers** (`src/webhooks/plane.py`, `gitea.py`) — приём событий, HMAC-проверка, дедупликация (`_dedup.py`). Роуты: `POST /webhook/plane`, `POST /webhook/gitea`.
|
||||
- **State Machine** (`src/stages.py`) — `STAGE_TRANSITIONS`: переходы, агент и QG каждой стадии. Хелперы: `get_next_stage`, `get_agent_for_stage`, `get_qg_for_stage`, `get_previous_stage`.
|
||||
- **Stage Engine** (`src/stage_engine.py`) — исполнение переходов, диспетчеризация QG (`_run_qg`), откаты, синхронизация с Plane.
|
||||
- **Quality Gates** (`src/qg/checks.py`) — проверки выхода со стадии, реестр `QG_CHECKS`.
|
||||
- **Agent Launcher** (`src/agents/launcher.py`) — запуск Claude CLI агентов в изолированном git worktree, мониторинг, auto-advance.
|
||||
- **Queue** (`src/queue_worker.py`, ORCH-1) — персистентная очередь задач (SQLite `jobs`), atomic claim, max_concurrency, ретраи, restart-safe.
|
||||
- **Project Registry** (`src/projects.py`, ORCH-6) — Plane project id → repo + prefix; фильтрация вебхуков по проекту.
|
||||
- **Plane Sync** (`src/plane_sync.py`) — синхронизация статусов/комментариев в Plane.
|
||||
|
||||
## Конвейер и Quality Gates
|
||||
|
||||
```
|
||||
created → analysis → architecture → development → review → testing → deploy-staging → deploy → done
|
||||
↑ │
|
||||
└──── REQUEST_CHANGES ──────┘ (откат на development, max 3 retries)
|
||||
```
|
||||
|
||||
| Стадия | Агент (выход) | Quality Gate | Артефакт |
|
||||
|--------|---------------|--------------|----------|
|
||||
| created | analyst | — | — |
|
||||
| analysis | architect | `check_analysis_approved` | 01-brd / 02-trz / 03-acceptance-criteria / 04-test-plan.yaml |
|
||||
| architecture | developer | `check_architecture_done` | 06-adr/ |
|
||||
| development | reviewer | `check_ci_green` | код + PR |
|
||||
| review | tester | `check_reviewer_verdict` | 12-review.md (`verdict:`) |
|
||||
| testing | deployer | `check_tests_passed` | 13-test-report.md |
|
||||
| deploy-staging | deployer | `check_staging_status` | 15-staging-log.md (`staging_status:`) |
|
||||
| deploy | — | `check_deploy_status` | 14-deploy-log.md (`deploy_status:`) |
|
||||
| done | — | — | — |
|
||||
|
||||
**Реестр QG** (`QG_CHECKS`): check_analysis_approved, check_analysis_complete, check_architecture_done, check_ci_green, check_review_approved, check_tests_passed, check_reviewer_verdict, check_tests_local, check_deploy_status, check_staging_status.
|
||||
|
||||
**Канон гейтов:** машинные вердикты читаются ТОЛЬКО из YAML-frontmatter, никогда из прозы. Лог-файлы мержатся в `origin/main` отдельным PR; гейт читает из `origin/main`.
|
||||
|
||||
### Условный staging-гейт (ORCH-35)
|
||||
`check_staging_status` реален только для self-hosting (`is_self_hosting_repo(repo)` → `orchestrator`); для остальных проектов → no-op `(True, "Staging gate N/A")`. Для orchestrator парсит `staging_status:` из `15-staging-log.md`; FAILED → откат на `development`. Подробнее: [ADR-0003](adr/adr-0003-staging-gate.md).
|
||||
|
||||
## Откаты
|
||||
- Reviewer REQUEST_CHANGES → откат на `development` + retry (`MAX_DEVELOPER_RETRIES = 3`).
|
||||
- Tester `check_tests_passed` FAIL → откат на `development` + retry.
|
||||
- Deploy / deploy-staging FAILED → откат на `development`.
|
||||
- `get_previous_stage` использует порядок ключей `STAGE_TRANSITIONS`.
|
||||
|
||||
## База данных (SQLite)
|
||||
- `events` — входящие вебхуки (дедуп)
|
||||
- `tasks` — задачи и их стадии
|
||||
- `agent_runs` — запуски агентов (run_id, usage, cost)
|
||||
- `jobs` — очередь задач (ORCH-1)
|
||||
|
||||
## Изоляция (git worktree, ORCH-2)
|
||||
Каждая задача исполняется в отдельном git worktree, ветки не пересекаются. Репозитории проектов разделены под `/repos/<project>`.
|
||||
|
||||
## API
|
||||
| Method | Path | Описание |
|
||||
|--------|------|----------|
|
||||
| GET | `/health` | health check |
|
||||
| GET | `/status` | активные задачи (stage != done) |
|
||||
| GET | `/queue` | очередь: counts + max_concurrency + последние jobs |
|
||||
| POST | `/webhook/plane` | Plane webhook |
|
||||
| POST | `/webhook/gitea` | Gitea webhook (push, PR, CI status) |
|
||||
|
||||
## Деплой и эксплуатация
|
||||
Топология, контейнеры, порты, env-карта, self-hosting риски — [docs/operations/INFRA.md](../operations/INFRA.md). Деплой-хук — [DEPLOY_HOOK.md](../operations/DEPLOY_HOOK.md). Staging — [STAGING.md](../operations/STAGING.md).
|
||||
|
||||
## ADR
|
||||
Сквозные архитектурные решения — [adr/](adr/). Per-work-item решения — `docs/work-items/<id>/06-adr/`.
|
||||
|
||||
## Детали реализации
|
||||
Схема БД, потоки данных, resilience-слой, детали Dockerfile — [internals.md](internals.md).
|
||||
|
||||
---
|
||||
*Актуально на 2026-06-05 (main `f1b3146`). Обновлять при изменении src/stages.py, src/qg/checks.py, src/main.py.*
|
||||
15
docs/architecture/adr/README.md
Normal file
15
docs/architecture/adr/README.md
Normal file
@@ -0,0 +1,15 @@
|
||||
# Architecture Decision Records
|
||||
|
||||
Индекс сквозных (cross-cutting) ADR проекта orchestrator.
|
||||
Per-work-item решения живут в `docs/work-items/<id>/06-adr/ADR-NNN-slug.md`.
|
||||
|
||||
| # | Решение | Статус | Дата | Источник |
|
||||
|---|---------|--------|------|----------|
|
||||
| adr-0001 | Реестр проектов (multi-repo) | accepted | 2026-06-02 | ORCH-6 |
|
||||
| adr-0002 | Очередь задач вместо in-process потоков | accepted | 2026-06-03 | ORCH-1 |
|
||||
| adr-0003 | Условный staging-гейт перед прод-деплоем | accepted | 2026-06-05 | ORCH-35 |
|
||||
|
||||
## Формат
|
||||
**Контекст → Решение → Альтернативы → Последствия → Связи.** Статус: proposed / accepted / superseded.
|
||||
Принятый ADR не меняется — новое решение заводится отдельным файлом со ссылкой `supersedes adr-XXXX`.
|
||||
Новые ADR добавляет архитектор при принятии решения (см. `CLAUDE.md` → Конвенции).
|
||||
23
docs/architecture/adr/adr-0001-multi-repo-registry.md
Normal file
23
docs/architecture/adr/adr-0001-multi-repo-registry.md
Normal file
@@ -0,0 +1,23 @@
|
||||
# adr-0001: Реестр проектов (multi-repo)
|
||||
|
||||
- **Статус:** accepted
|
||||
- **Дата:** 2026-06-02
|
||||
- **Задача:** ORCH-6
|
||||
|
||||
## Контекст
|
||||
Инцидент 2026-06-02: Plane-вебхук слушал весь воркспейс и хардкодил `repo = settings.default_repo` (enduro-trails). Задачи ЛЮБОГО проекта сливались в один репо с одним префиксом (ET). Нужна изоляция по проектам.
|
||||
|
||||
## Решение
|
||||
Введён реестр `src/projects.py`: `ProjectConfig` (frozen dataclass) связывает `plane_project_id` → `repo` + `work_item_prefix` + `name`. Источник правды — env `ORCH_PROJECTS_JSON`; при пустом/невалидном — встроенный дефолт (`enduro-trails`/ET, `orchestrator`/ORCH). Позволяет: фильтровать вебхуки по проекту (неизвестный → ignore), резолвить gitea-репо + префикс, роутить Plane-синк в свой проект задачи.
|
||||
|
||||
## Альтернативы
|
||||
- Один репо на всё — отклонён (источник инцидента).
|
||||
- Хардкод маппинга в коде — отклонён в пользу env-конфигурируемого реестра с безопасным дефолтом.
|
||||
|
||||
## Последствия
|
||||
- Изоляция проектов на уровне вебхуков и роутинга.
|
||||
- Парсер устойчив: битый элемент скипается, пустой результат → дефолт.
|
||||
- Основа для `is_self_hosting_repo` (adr-0003).
|
||||
|
||||
## Связи
|
||||
adr-0003 (условный гейт опирается на repo из реестра).
|
||||
23
docs/architecture/adr/adr-0002-job-queue.md
Normal file
23
docs/architecture/adr/adr-0002-job-queue.md
Normal file
@@ -0,0 +1,23 @@
|
||||
# adr-0002: Очередь задач вместо in-process потоков
|
||||
|
||||
- **Статус:** accepted
|
||||
- **Дата:** 2026-06-03
|
||||
- **Задача:** ORCH-1 (F-2b)
|
||||
|
||||
## Контекст
|
||||
Ранняя версия запускала стадии конвейера в in-process daemon-потоках. Проблемы: не переживало рестарт (задачи терялись), нет контроля параллелизма, нет ретраев, нет наблюдаемости.
|
||||
|
||||
## Решение
|
||||
Введена персистентная очередь задач (`src/queue_worker.py` + таблица `jobs` в SQLite): atomic claim задачи воркером, `max_concurrency`, ретраи при сбое, restart-safe (running-задачи реквестятся при старте), эндпоинт `GET /queue`.
|
||||
|
||||
## Альтернативы
|
||||
- In-process потоки — отклонены (не restart-safe).
|
||||
- Внешний брокер (Redis/RabbitMQ) — избыточно для текущего масштаба; SQLite-очередь проще и без новых зависимостей.
|
||||
|
||||
## Последствия
|
||||
- Конвейер переживает рестарт контейнера.
|
||||
- Контроль параллелизма и наблюдаемость через `/queue`.
|
||||
- ⚠️ Очередь общая на все проекты прод-инстанса — фактор группового риска при self-hosting (см. `docs/operations/INFRA.md`).
|
||||
|
||||
## Связи
|
||||
adr-0001 (реестр проектов), INFRA.md (общая очередь при self-hosting).
|
||||
27
docs/architecture/adr/adr-0003-staging-gate.md
Normal file
27
docs/architecture/adr/adr-0003-staging-gate.md
Normal file
@@ -0,0 +1,27 @@
|
||||
# adr-0003: Условный staging-гейт перед прод-деплоем
|
||||
|
||||
- **Статус:** accepted
|
||||
- **Дата:** 2026-06-05
|
||||
- **Задача:** ORCH-35
|
||||
|
||||
## Контекст
|
||||
Оркестратор дорабатывает сам себя (self-hosting). Раньше стадия `deploy` имела «бумажный» вердикт: deployer-агент писал `deploy_status: SUCCESS`, но реального прогона на изолированной среде не было. Нужен предохранитель: прод-деплой орка не должен происходить, пока изменения не проверены на живой staging-среде. При этом другие проекты (enduro-trails) staging-инфры не имеют.
|
||||
|
||||
## Решение
|
||||
Добавлена промежуточная стадия `deploy-staging` между `testing` и `deploy`: `testing → deploy-staging → deploy → done`.
|
||||
- deployer гоняет `scripts/staging_check.py --base-url http://localhost:8501` и пишет `staging_status: SUCCESS|FAILED` в `15-staging-log.md`.
|
||||
- Quality Gate `check_staging_status` парсит вердикт (только YAML-frontmatter).
|
||||
- **Гейт условный:** `is_self_hosting_repo(repo)` → реальная проверка только для `orchestrator`; для остальных проектов гейт = no-op `(True, "Staging gate N/A")`.
|
||||
- FAILED → откат на `development`.
|
||||
|
||||
## Альтернативы
|
||||
- Глобальный гейт для всех проектов — отклонён: у enduro нет staging-инстанса, задачи застревали бы на откате.
|
||||
- Деплой реально дёргает хост-хук прямо здесь — отложен в ORCH-36 (Вариант B).
|
||||
|
||||
## Последствия
|
||||
- Прод-деплой орка недостижим, пока staging-гейт не зелёный.
|
||||
- Другие проекты не затронуты (no-op).
|
||||
- Реальный docker-деплой через хук пока НЕ выполняется (вердикт «бумажный», но подкреплён прогоном сьюта). Исполняемый деплой — ORCH-36.
|
||||
|
||||
## Связи
|
||||
adr-0001 (реестр проектов — основа `is_self_hosting_repo`), ORCH-34 (deploy-hook + rollback), ORCH-36 (исполняемый самодеплой).
|
||||
@@ -58,7 +58,8 @@ STAGE_TRANSITIONS = {
|
||||
architecture: → development (agent: developer, QG: check_architecture_done)
|
||||
development: → review (agent: reviewer, QG: check_tests_local)
|
||||
review: → testing (agent: tester, QG: check_reviewer_verdict)
|
||||
testing: → deploy (agent: deployer, QG: check_tests_passed)
|
||||
testing: → deploy-staging (agent: deployer, QG: check_tests_passed)
|
||||
deploy-staging: → deploy (agent: deployer, QG: check_staging_status)
|
||||
deploy: → done (agent: None, QG: None)
|
||||
}
|
||||
```
|
||||
@@ -189,8 +190,10 @@ services:
|
||||
12. Gitea PR webhook: review event → QG check_review_approved → PASS
|
||||
13. Advance: review → testing, tester launched
|
||||
14. Tester: прогоняет тесты, пишет test-report.md → git push
|
||||
15. Auto-advance: testing → deploy (QG check_tests_passed → PASS)
|
||||
16. PR merge → Gitea PR webhook: action=closed, merged=true → done
|
||||
15. Auto-advance: testing → deploy-staging (QG check_tests_passed → PASS)
|
||||
16. Deployer: runs staging checks → writes 15-staging-log.md (staging_status: SUCCESS)
|
||||
17. Auto-advance: deploy-staging → deploy (QG check_staging_status → PASS)
|
||||
18. PR merge → Gitea PR webhook: action=closed, merged=true → done
|
||||
```
|
||||
|
||||
### Review bounce path
|
||||
90
docs/operations/DEPLOY_HOOK.md
Normal file
90
docs/operations/DEPLOY_HOOK.md
Normal file
@@ -0,0 +1,90 @@
|
||||
# Orchestrator Deploy Hook
|
||||
|
||||
`scripts/orchestrator-deploy-hook.sh` — хост-скрипт деплоя orchestrator с health-чеком и авто-rollback.
|
||||
|
||||
## Как работает
|
||||
|
||||
### Режим `--deploy` (по умолчанию)
|
||||
|
||||
1. **Захват текущего образа** — до рестарта записывает ID образа работающего контейнера в `$PREV_IMAGE_FILE` (best-effort, не падает если сервис не запущен).
|
||||
2. **git pull** — обновляет код репозитория.
|
||||
3. **Рестарт контейнера** — `docker compose --profile $COMPOSE_PROFILE up -d --no-build $TARGET_SERVICE`.
|
||||
4. **Health-цикл** — 10 попыток × 6с = до 60с. Критерий: HTTP 200 + тело содержит `"status":"ok"`.
|
||||
- **Успех** → `exit 0`, лог "Deploy SUCCESS".
|
||||
- **Провал** → авто-rollback (шаг 5).
|
||||
5. **Авто-rollback** — восстанавливает образ из `$PREV_IMAGE_FILE`, рестарт, повторный health 5×3с.
|
||||
- Если восстановился → `exit 1` (деплой провалился, откат успешен).
|
||||
- Если и откат не помог → `exit 2` (критично).
|
||||
|
||||
### Режим `--rollback`
|
||||
|
||||
Вручную откатывает сервис на предыдущий образ из `$PREV_IMAGE_FILE`.
|
||||
|
||||
## Переменные окружения
|
||||
|
||||
| Переменная | Дефолт | Описание |
|
||||
|------------------|-----------------------------------|-----------------------------------------------|
|
||||
| `TARGET_SERVICE` | `orchestrator-staging` | Имя docker-compose сервиса |
|
||||
| `TARGET_PORT` | `8501` | Порт health-check |
|
||||
| `TARGET_IMAGE` | `orchestrator-orchestrator-staging` | Имя образа для retag при rollback |
|
||||
| `COMPOSE_PROFILE`| `staging` | Docker compose profile (пусто = без профиля) |
|
||||
| `PREV_IMAGE_FILE`| `$REPO/.deploy-prev-image-staging`| Файл для сохранения предыдущего образа |
|
||||
| `LOG` | `/var/log/orchestrator/deploy-hook.log` | Лог-файл (fallback: `$REPO/deploy-hook.log`) |
|
||||
|
||||
> ⚠️ **Дефолт — всегда STAGING**. Прод активируется только явным переопределением env.
|
||||
|
||||
## Примеры запуска
|
||||
|
||||
### Staging (дефолт, безопасно)
|
||||
|
||||
```bash
|
||||
cd /home/slin/repos/orchestrator
|
||||
bash scripts/orchestrator-deploy-hook.sh --deploy
|
||||
# или просто:
|
||||
bash scripts/orchestrator-deploy-hook.sh
|
||||
```
|
||||
|
||||
### Прод (осознанный шаг, Этап 5)
|
||||
|
||||
```bash
|
||||
TARGET_SERVICE=orchestrator \
|
||||
TARGET_PORT=8500 \
|
||||
TARGET_IMAGE=orchestrator-orchestrator \
|
||||
COMPOSE_PROFILE="" \
|
||||
PREV_IMAGE_FILE=/home/slin/repos/orchestrator/.deploy-prev-image-prod \
|
||||
bash scripts/orchestrator-deploy-hook.sh --deploy
|
||||
```
|
||||
|
||||
### Ручной rollback staging
|
||||
|
||||
```bash
|
||||
bash scripts/orchestrator-deploy-hook.sh --rollback
|
||||
```
|
||||
|
||||
## Коды выхода
|
||||
|
||||
| Код | Значение |
|
||||
|-----|------------------------------------------------------|
|
||||
| `0` | Деплой успешен, сервис здоров |
|
||||
| `1` | Деплой провалился; откат выполнен (или пропущен) |
|
||||
| `2` | Деплой провалился И откат тоже провалился (критично) |
|
||||
|
||||
## Логи
|
||||
|
||||
```
|
||||
/var/log/orchestrator/deploy-hook.log
|
||||
```
|
||||
|
||||
Каждая строка с UTC-таймстампом в формате `[2026-06-05T06:30:00Z]`.
|
||||
|
||||
## Разница с enduro-deploy-hook.sh
|
||||
|
||||
| Функция | enduro-deploy-hook.sh | orchestrator-deploy-hook.sh |
|
||||
|----------------------|-----------------------|-----------------------------|
|
||||
| Захват PREV_IMG | ✅ | ✅ |
|
||||
| git pull | ✅ | ✅ |
|
||||
| Рестарт | ✅ | ✅ |
|
||||
| Health-цикл (60с) | ❌ | ✅ 10×6с |
|
||||
| Авто-rollback | ❌ | ✅ |
|
||||
| Параметризация (env) | ❌ хардкод | ✅ дефолт=staging |
|
||||
| Compose profile | ❌ | ✅ --profile staging |
|
||||
96
docs/operations/INFRA.md
Normal file
96
docs/operations/INFRA.md
Normal file
@@ -0,0 +1,96 @@
|
||||
# INFRA.md — инфраструктура и эксплуатация оркестратора
|
||||
|
||||
> RUNBOOK. Топология, контейнеры, порты, переменные окружения, границы.
|
||||
> **Секреты тут НЕ хранятся** — только дескрипторы. Реальные значения — в `.env` на хосте.
|
||||
|
||||
## Топология
|
||||
|
||||
```
|
||||
host: mva154 (slin@82.22.50.71), network_mode: host
|
||||
┌──────────────────────────────────────────────────────────────────────┐
|
||||
│ orchestrator (PROD) :8500 env_file .env │
|
||||
│ БД: ./data/orchestrator.db (обслуживает ВСЕ прод-проекты) │
|
||||
│ │
|
||||
│ orchestrator-staging (STAGING) :8501 env_file .env.staging │
|
||||
│ БД: ./data/staging/orchestrator.db (изолирована, только sandbox) │
|
||||
│ profile: staging — НЕ стартует обычным `docker compose up` │
|
||||
└──────────────────────────────────────────────────────────────────────┘
|
||||
│ webhooks │ git
|
||||
▼ ▼
|
||||
Plane (ag_proj) Gitea (localhost:3000)
|
||||
/repos/<project> ← общий каталог репозиториев (host: /home/slin/repos)
|
||||
```
|
||||
|
||||
## Контейнеры
|
||||
|
||||
| Контейнер | Роль | Порт | env_file | БД (хост) | Старт |
|
||||
|-----------|------|------|----------|-----------|-------|
|
||||
| `orchestrator` | прод | 8500 | `.env` | `./data/orchestrator.db` | `docker compose up -d` |
|
||||
| `orchestrator-staging` | staging / песочница | 8501 | `.env.staging` | `./data/staging/orchestrator.db` | `docker compose --profile staging up -d orchestrator-staging` |
|
||||
|
||||
Оба: `network_mode: host`, `init: true` (tini как PID 1 — reaping зомби, B-2), `restart: unless-stopped`.
|
||||
|
||||
### Тома (volumes)
|
||||
- `./data` → `/app/data` (БД; у staging — `./data/staging`)
|
||||
- `/home/slin/repos` → `/repos` (рабочие репозитории проектов)
|
||||
- `/var/run/docker.sock` (для docker-операций деплоя)
|
||||
- claude-code, node, `~/.claude*` (CLI агентов, ro)
|
||||
- `~/.orchestrator-ssh` → `/root/.ssh` (ro, деплой по ssh)
|
||||
|
||||
## Переменные окружения (карта; значения — в `.env`)
|
||||
|
||||
| Переменная | Назначение |
|
||||
|-----------|-----------|
|
||||
| `ORCH_PLANE_API_URL` / `_TOKEN` / `_WORKSPACE_SLUG` | доступ к Plane API |
|
||||
| `ORCH_PLANE_WEBHOOK_SECRET` | HMAC-проверка вебхуков Plane |
|
||||
| `ORCH_GITEA_URL` / `_TOKEN` / `_WEBHOOK_SECRET` | доступ к Gitea + HMAC |
|
||||
| `ORCH_CLAUDE_BIN` | путь к claude CLI |
|
||||
| `ORCH_REPOS_DIR` / `ORCH_HOST_REPOS_DIR` | каталог репозиториев (в контейнере / на хосте) |
|
||||
| `ORCH_DB_PATH` | путь к SQLite БД |
|
||||
| `ORCH_PROJECTS_JSON` | реестр проектов (Plane id → repo + prefix); пусто → дефолт из `src/projects.py` |
|
||||
| `DEPLOY_SSH_USER` / `_HOST` / `DEPLOY_HOOK_SCRIPT` | параметры деплой-хука |
|
||||
|
||||
**Секреты — только в `.env` / `.env.staging` на хосте, в гит НЕ коммитятся.** Канон — `.env.example`, `.env.staging.example`.
|
||||
|
||||
## Реестр проектов (`src/projects.py`, ORCH-6)
|
||||
Связывает Plane project id → gitea repo + work-item prefix. Источник: `ORCH_PROJECTS_JSON`, fallback — встроенный дефолт. Прод видит: `enduro-trails` (ET), `orchestrator` (ORCH). Staging видит ТОЛЬКО `orchestrator-sandbox` (SANDBOX) — изоляция.
|
||||
|
||||
## ⚠️ Self-hosting — оркестратор дорабатывает САМ СЕБЯ
|
||||
|
||||
**Факт:** прод-инстанс `orchestrator` (8500) — ОДИН на ВСЕ прод-проекты (enduro-trails + orchestrator), с ОБЩЕЙ БД `./data/orchestrator.db` и общей очередью задач (ORCH-1).
|
||||
|
||||
**Следствие — групповой риск:** когда орк выполняет задачу из проекта ORCH (дорабатывает себя), он бежит в том же инстансе, что обслуживает enduro-trails.
|
||||
- Рестарт / падение прод-контейнера орк-задачей → конвейер ВСЕХ проектов встаёт.
|
||||
- Кривой self-деплой (ORCH-36, Вариант B) → лежат все проекты сразу.
|
||||
- Общая очередь → орк-задача занимает concurrency-слоты других проектов.
|
||||
|
||||
**Что изолировано (безопасно):**
|
||||
- Staging (8501) — отдельная БД (`./data/staging`), отдельный реестр (`ORCH_PROJECTS_JSON` = только sandbox). Прод-проекты не видит.
|
||||
- Репозитории разделены, изоляция веток через git worktree (ORCH-2).
|
||||
|
||||
**Страховки:**
|
||||
- Стадия `deploy-staging` (порт 8501) — обязательный гейт перед прод-деплоем орка. Прод-деплой недостижим, пока staging-гейт не зелёный (см. `STAGING.md`, ORCH-35). Гейт условный: реален только для self-hosting (repo=orchestrator), для остальных проектов — no-op.
|
||||
|
||||
**Правила для агентов при задачах ORCH:**
|
||||
1. НЕ перезапускать / не ронять прод-контейнер `orchestrator` в рамках задачи.
|
||||
2. Все проверки деплоя — на staging (8501), боевой 8500 не трогать.
|
||||
3. Деплой self — только через хук с health-check + авто-rollback (`DEPLOY_HOOK.md`).
|
||||
|
||||
## Эксплуатация (быстрые команды)
|
||||
```bash
|
||||
# статус
|
||||
docker ps --filter name=orchestrator
|
||||
curl -s http://localhost:8500/health
|
||||
curl -s http://localhost:8500/status # активные задачи
|
||||
curl -s http://localhost:8500/queue # очередь
|
||||
|
||||
# поднять staging-песочницу
|
||||
docker compose --profile staging up -d orchestrator-staging
|
||||
curl -s http://localhost:8501/health
|
||||
|
||||
# логи
|
||||
docker logs --tail 100 orchestrator
|
||||
```
|
||||
|
||||
---
|
||||
*RUNBOOK 2026-06-05. Обновлять при изменении топологии/портов/переменных. См. CONTRIBUTING.md §8.*
|
||||
85
docs/operations/STAGING.md
Normal file
85
docs/operations/STAGING.md
Normal file
@@ -0,0 +1,85 @@
|
||||
# Staging Environment (ORCH-31)
|
||||
|
||||
Orchestrator supports a permanent **staging instance** running on port **8501** with a
|
||||
fully-isolated SQLite database. The staging instance shares the same codebase and
|
||||
Dockerfile as production but is started under the `staging` Docker Compose profile so it
|
||||
**never starts accidentally** during a normal `docker compose up -d`.
|
||||
|
||||
## Architecture
|
||||
|
||||
| | Production | Staging |
|
||||
|---|---|---|
|
||||
| Port | 8500 | 8501 |
|
||||
| Container name | `orchestrator` | `orchestrator-staging` |
|
||||
| DB (host path) | `./data/orchestrator.db` | `./data/staging/orchestrator.db` |
|
||||
| DB (container path) | `/app/data/orchestrator.db` | `/app/data/orchestrator.db` |
|
||||
| env file | `.env` | `.env.staging` |
|
||||
| Compose profile | *(default)* | `staging` |
|
||||
|
||||
DB isolation is achieved via a separate volume mount (`./data/staging:/app/data`), not by
|
||||
changing `ORCH_DB_PATH` — the container path stays identical while the host path is a
|
||||
different directory.
|
||||
|
||||
## Prerequisites
|
||||
|
||||
1. **`.env.staging`** — create from the template (see below). This file is **not committed**
|
||||
to the repo (it contains secrets). Copy and fill in values before first start.
|
||||
2. **`./data/staging/`** directory — created automatically on first container start.
|
||||
|
||||
### Create `.env.staging`
|
||||
|
||||
```bash
|
||||
cd /home/slin/repos/orchestrator
|
||||
cp .env.staging.example .env.staging
|
||||
# Edit .env.staging — fill in real tokens / secrets.
|
||||
# At Stage 1 (ORCH-31) you can reuse prod values; sandbox Plane project
|
||||
# and isolated Gitea webhook will be wired in ORCH-32.
|
||||
nano .env.staging
|
||||
```
|
||||
|
||||
## Starting Staging
|
||||
|
||||
```bash
|
||||
cd /home/slin/repos/orchestrator
|
||||
docker compose --profile staging up -d orchestrator-staging
|
||||
```
|
||||
|
||||
Check it is running:
|
||||
|
||||
```bash
|
||||
docker ps | grep orchestrator-staging
|
||||
curl -s http://localhost:8501/health | python3 -m json.tool
|
||||
```
|
||||
|
||||
## Stopping Staging
|
||||
|
||||
```bash
|
||||
docker compose --profile staging stop orchestrator-staging
|
||||
# or remove the container entirely:
|
||||
docker compose --profile staging down orchestrator-staging
|
||||
```
|
||||
|
||||
## Normal `up -d` does NOT start staging
|
||||
|
||||
```bash
|
||||
# This starts ONLY the prod orchestrator (port 8500). Staging is NOT affected.
|
||||
docker compose up -d
|
||||
```
|
||||
|
||||
The `profiles: [staging]` directive in `docker-compose.yml` ensures staging is
|
||||
completely invisible to commands that do not pass `--profile staging`.
|
||||
|
||||
## Logs
|
||||
|
||||
```bash
|
||||
docker logs -f orchestrator-staging
|
||||
```
|
||||
|
||||
## Roadmap
|
||||
|
||||
| Task | Description |
|
||||
|---|---|
|
||||
| **ORCH-31** *(this PR)* | Infra: compose service, .env template, gitignore, docs |
|
||||
| **ORCH-32** | Sandbox: isolated Plane project + Gitea repo for staging |
|
||||
| **ORCH-33** | Test suite running against staging endpoint |
|
||||
| **ORCH-34** | Deploy hook: promote `orchestrator:candidate` image to staging |
|
||||
136
docs/operations/STAGING_CHECK.md
Normal file
136
docs/operations/STAGING_CHECK.md
Normal file
@@ -0,0 +1,136 @@
|
||||
# STAGING_CHECK.md — Инструкция по запуску staging check suite (ORCH-33)
|
||||
|
||||
## Что это
|
||||
|
||||
`scripts/staging_check.py` — самостоятельный скрипт проверки **живого** staging-стенда orchestrator (порт 8501). Не unit-тесты — реальные HTTP-вызовы против работающих сервисов.
|
||||
|
||||
Три блока проверок:
|
||||
|
||||
| Блок | Название | Что проверяет |
|
||||
|------|----------|---------------|
|
||||
| A | SMOKE | `/health`, `/queue`, `ORCH_STAGING=true` |
|
||||
| B | ACCESS | Plane sandbox (R), Gitea sandbox (R+push), реестр проектов |
|
||||
| C | E2E | Создать задачу → триггер конвейера → ветка + коммент → cleanup |
|
||||
|
||||
Exit code: **0** = все PASS, **non-zero** = есть FAIL.
|
||||
|
||||
---
|
||||
|
||||
## Требования к окружению
|
||||
|
||||
Скрипт читает токены/URL из env (те же переменные, что использует orchestrator):
|
||||
|
||||
| Переменная | Описание |
|
||||
|-----------|----------|
|
||||
| `ORCH_STAGING` | Должна быть `true` — защита от случайного запуска на проде |
|
||||
| `ORCH_PLANE_API_TOKEN` | Plane API token (`X-API-Key`) |
|
||||
| `ORCH_PLANE_API_URL` | Plane base URL **без** `/api/v1` (скрипт добавляет сам) |
|
||||
| `ORCH_PLANE_WORKSPACE_SLUG` | Workspace slug (`ag_proj`) |
|
||||
| `ORCH_GITEA_TOKEN` | Gitea token (`Authorization: token …`) |
|
||||
| `ORCH_GITEA_URL` | Gitea base URL (`http://localhost:3000`) |
|
||||
| `ORCH_PLANE_WEBHOOK_SECRET` | HMAC-секрет для подписи `/webhook/plane` (если пустой — без подписи) |
|
||||
|
||||
Все эти переменные **уже есть** внутри контейнера `orchestrator-staging`.
|
||||
|
||||
---
|
||||
|
||||
## Способы запуска
|
||||
|
||||
### 1. Внутри контейнера (рекомендуемый)
|
||||
|
||||
```bash
|
||||
docker exec orchestrator-staging \
|
||||
python3 /repos/orchestrator/scripts/staging_check.py --mode stub
|
||||
```
|
||||
|
||||
### 2. С хоста (если есть токены в env)
|
||||
|
||||
```bash
|
||||
export ORCH_STAGING=true
|
||||
export ORCH_PLANE_API_TOKEN=...
|
||||
# ... остальные переменные ...
|
||||
|
||||
python3 scripts/staging_check.py \
|
||||
--base-url http://localhost:8501 \
|
||||
--mode stub
|
||||
```
|
||||
|
||||
### 3. Из docker exec с передачей URL
|
||||
|
||||
```bash
|
||||
docker exec orchestrator-staging \
|
||||
python3 /repos/orchestrator/scripts/staging_check.py \
|
||||
--base-url http://localhost:8501 \
|
||||
--mode stub
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Режимы (`--mode`)
|
||||
|
||||
| Режим | Описание | Скорость |
|
||||
|-------|----------|----------|
|
||||
| `stub` (дефолт) | Проверяет **ранние артефакты** конвейера: ветка + QG-0-коммент. Создаются ДО запуска Claude CLI → быстро, детерминированно, без расхода LLM-кредитов. | ~30-90 сек |
|
||||
| `full-real` | Дополнительно ждёт реального завершения аналитика. Долго, расходует LLM-кредиты. | 5-15+ мин |
|
||||
|
||||
**Текущий дефолт: `stub`** — достаточен для проверки работоспособности стенда.
|
||||
|
||||
---
|
||||
|
||||
## Что проверяет блок C (E2E) и почему это безопасно
|
||||
|
||||
Порядок `start_pipeline` в коде orchestrator:
|
||||
1. Resolve проекта из реестра
|
||||
2. Получить name/description из Plane API (если в webhook пустые)
|
||||
3. **QG-0 гейт** (name ≥ 5 симв, description ≥ 20 симв)
|
||||
4. **Создать work_item_id + ветку в Gitea + начальные доки**
|
||||
5. **Записать строку задачи в БД**
|
||||
6. Поставить аналитика в очередь (вот тут Claude CLI)
|
||||
|
||||
Блок C проверяет **шаги 4-5**, аналитика (шаг 6) **не ждёт**.
|
||||
Тест-задача создаётся ТОЛЬКО в **SANDBOX** (`project_id 8c5a3025-...`),
|
||||
ветка создаётся ТОЛЬКО в **orchestrator-sandbox**.
|
||||
|
||||
### CLEANUP (обязателен)
|
||||
|
||||
`try/finally` гарантирует удаление тестовых артефактов:
|
||||
- Удаляет ветку из `orchestrator-sandbox`
|
||||
- Удаляет задачу из Plane SANDBOX
|
||||
|
||||
Cleanup отрабатывает даже при падении e2e.
|
||||
|
||||
---
|
||||
|
||||
## Принцип HMAC-подписи
|
||||
|
||||
Скрипт читает `ORCH_PLANE_WEBHOOK_SECRET` из env и формирует подпись:
|
||||
```python
|
||||
hmac.new(secret.encode(), body, hashlib.sha256).hexdigest()
|
||||
```
|
||||
Передаёт как заголовок `X-Plane-Signature`. Алгоритм совпадает с `verify_plane_signature` в `src/webhooks/plane.py`.
|
||||
|
||||
---
|
||||
|
||||
## Изолированность от прода
|
||||
|
||||
| Проверка | Гарантия |
|
||||
|---------|---------|
|
||||
| A3 `ORCH_STAGING=true` | При false — abort до деструктивных блоков |
|
||||
| B6 Реестр без боевых | ET/ORCH project_id absent в `known_plane_project_ids()` |
|
||||
| C: only SANDBOX project_id | Webhook payload указывает только `8c5a3025-...` |
|
||||
| C: only orchestrator-sandbox repo | Gitea operations на `admin/orchestrator-sandbox` |
|
||||
| C: cleanup в finally | Артефакты удаляются даже при ошибке |
|
||||
|
||||
---
|
||||
|
||||
## Добавление в деплой-хук
|
||||
|
||||
```bash
|
||||
# В deploy.sh, после docker-compose up -d orchestrator-staging
|
||||
docker exec orchestrator-staging \
|
||||
python3 /repos/orchestrator/scripts/staging_check.py --mode stub
|
||||
if [ $? -ne 0 ]; then
|
||||
echo "Staging check FAILED — rolling back"
|
||||
exit 1
|
||||
fi
|
||||
```
|
||||
176
scripts/orchestrator-deploy-hook.sh
Executable file
176
scripts/orchestrator-deploy-hook.sh
Executable file
@@ -0,0 +1,176 @@
|
||||
#!/bin/bash
|
||||
# Deploy hook for orchestrator
|
||||
# Supports --deploy (default) and --rollback modes.
|
||||
# Adds health-check loop + automatic rollback if new deploy is unhealthy.
|
||||
#
|
||||
# Parametrised via env vars (defaults are STAGING — never prod):
|
||||
# TARGET_SERVICE - docker-compose service name (default: orchestrator-staging)
|
||||
# TARGET_PORT - health check port (default: 8501)
|
||||
# TARGET_IMAGE - image name for retag (default: orchestrator-orchestrator-staging)
|
||||
# COMPOSE_PROFILE - docker compose profile (default: staging)
|
||||
# PREV_IMAGE_FILE - path to prev-image snapshot (default: $REPO/.deploy-prev-image-staging)
|
||||
# LOG - log file path (default: /var/log/orchestrator/deploy-hook.log)
|
||||
#
|
||||
# Usage:
|
||||
# ./orchestrator-deploy-hook.sh [--deploy] # normal deploy (default)
|
||||
# ./orchestrator-deploy-hook.sh --rollback # manual rollback
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
REPO=/home/slin/repos/orchestrator
|
||||
|
||||
# ---- Defaults (STAGING — safe) ---------------------------------------------
|
||||
TARGET_SERVICE="${TARGET_SERVICE:-orchestrator-staging}"
|
||||
TARGET_PORT="${TARGET_PORT:-8501}"
|
||||
TARGET_IMAGE="${TARGET_IMAGE:-orchestrator-orchestrator-staging}"
|
||||
COMPOSE_PROFILE="${COMPOSE_PROFILE:-staging}"
|
||||
PREV_IMAGE_FILE="${PREV_IMAGE_FILE:-$REPO/.deploy-prev-image-staging}"
|
||||
|
||||
# ---- Log setup -------------------------------------------------------------
|
||||
LOG_DIR=/var/log/orchestrator
|
||||
if mkdir -p "$LOG_DIR" 2>/dev/null; then
|
||||
LOG="${LOG:-$LOG_DIR/deploy-hook.log}"
|
||||
else
|
||||
LOG="${LOG:-$REPO/deploy-hook.log}"
|
||||
fi
|
||||
|
||||
log() {
|
||||
echo "[$(date -u +%Y-%m-%dT%H:%M:%SZ)] $*" | tee -a "$LOG"
|
||||
}
|
||||
|
||||
log "Deploy hook called: target=$TARGET_SERVICE port=$TARGET_PORT args=$*"
|
||||
|
||||
cd "$REPO"
|
||||
|
||||
# ============================================================================
|
||||
# HEALTH CHECK helper
|
||||
# Args: max_attempts sleep_sec label
|
||||
# Returns 0 if healthy within attempts, 1 otherwise
|
||||
# ============================================================================
|
||||
health_check() {
|
||||
local max_attempts="$1"
|
||||
local sleep_sec="$2"
|
||||
local label="${3:-health-check}"
|
||||
local attempt=0
|
||||
while [[ $attempt -lt $max_attempts ]]; do
|
||||
attempt=$(( attempt + 1 ))
|
||||
log "$label: attempt $attempt/$max_attempts - GET http://localhost:$TARGET_PORT/health"
|
||||
local http_code body
|
||||
body=$(curl -s --max-time 5 "http://localhost:$TARGET_PORT/health" 2>/dev/null || true)
|
||||
http_code=$(curl -s -o /dev/null -w '%{http_code}' --max-time 5 "http://localhost:$TARGET_PORT/health" 2>/dev/null || echo "000")
|
||||
if [[ "$http_code" == "200" ]] && echo "$body" | grep -q '"status":"ok"'; then
|
||||
log "$label: OK (HTTP $http_code, body=$body)"
|
||||
return 0
|
||||
fi
|
||||
log "$label: not ready yet (HTTP $http_code, body=$body)"
|
||||
if [[ $attempt -lt $max_attempts ]]; then
|
||||
sleep "$sleep_sec"
|
||||
fi
|
||||
done
|
||||
log "$label: FAILED after $max_attempts attempts"
|
||||
return 1
|
||||
}
|
||||
|
||||
# ============================================================================
|
||||
# ROLLBACK helper (also called for auto-rollback after bad deploy)
|
||||
# ============================================================================
|
||||
do_rollback() {
|
||||
log "ROLLBACK: checking $PREV_IMAGE_FILE"
|
||||
if [[ ! -s "$PREV_IMAGE_FILE" ]]; then
|
||||
log "ROLLBACK: no previous image recorded - rollback skipped (exit 1)"
|
||||
return 1
|
||||
fi
|
||||
local prev_img
|
||||
prev_img=$(cat "$PREV_IMAGE_FILE")
|
||||
if [[ -z "$prev_img" ]]; then
|
||||
log "ROLLBACK: PREV_IMAGE_FILE is empty - rollback skipped (exit 1)"
|
||||
return 1
|
||||
fi
|
||||
if ! docker image inspect "$prev_img" >/dev/null 2>&1; then
|
||||
log "ROLLBACK: recorded image '$prev_img' not found locally - rollback skipped (exit 1)"
|
||||
return 1
|
||||
fi
|
||||
log "ROLLBACK: retagging $prev_img -> $TARGET_IMAGE"
|
||||
docker tag "$prev_img" "$TARGET_IMAGE" >> "$LOG" 2>&1
|
||||
log "ROLLBACK: restarting $TARGET_SERVICE on previous image"
|
||||
if [[ -n "$COMPOSE_PROFILE" ]]; then
|
||||
docker compose --profile "$COMPOSE_PROFILE" up -d --no-build "$TARGET_SERVICE" >> "$LOG" 2>&1
|
||||
else
|
||||
docker compose up -d --no-build "$TARGET_SERVICE" >> "$LOG" 2>&1
|
||||
fi
|
||||
log "ROLLBACK: container restarted, running post-rollback health check (5x3s)"
|
||||
if health_check 5 3 "ROLLBACK-health"; then
|
||||
log "ROLLBACK: service is healthy on previous image ($prev_img)"
|
||||
return 0
|
||||
else
|
||||
log "ROLLBACK: ROLLBACK ALSO FAILED - service still unhealthy after restoring $prev_img"
|
||||
return 2
|
||||
fi
|
||||
}
|
||||
|
||||
# ============================================================================
|
||||
# MANUAL --rollback mode
|
||||
# ============================================================================
|
||||
if [[ "${1:-}" == "--rollback" ]]; then
|
||||
log "Manual ROLLBACK requested"
|
||||
if do_rollback; then
|
||||
log "Manual ROLLBACK succeeded"
|
||||
exit 0
|
||||
else
|
||||
log "Manual ROLLBACK failed"
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
# ============================================================================
|
||||
# NORMAL DEPLOY mode (--deploy or no argument)
|
||||
# ============================================================================
|
||||
|
||||
# 1. Capture currently running image BEFORE restart (best-effort)
|
||||
PREV_IMG=""
|
||||
SVC_CID=$(docker compose --profile "$COMPOSE_PROFILE" ps -q "$TARGET_SERVICE" 2>/dev/null || true)
|
||||
if [[ -n "$SVC_CID" ]]; then
|
||||
PREV_IMG=$(docker inspect --format '{{.Image}}' "$SVC_CID" 2>/dev/null || true)
|
||||
fi
|
||||
if [[ -n "$PREV_IMG" ]]; then
|
||||
echo "$PREV_IMG" > "$PREV_IMAGE_FILE"
|
||||
log "Saved previous image: $PREV_IMG -> $PREV_IMAGE_FILE"
|
||||
else
|
||||
log "No previous image captured (first deploy or service not running?)"
|
||||
fi
|
||||
|
||||
# 2. Pull latest code
|
||||
log "git pull origin main"
|
||||
git pull origin main >> "$LOG" 2>&1
|
||||
|
||||
# 3. Restart service
|
||||
log "Starting $TARGET_SERVICE (profile=$COMPOSE_PROFILE)"
|
||||
if [[ -n "$COMPOSE_PROFILE" ]]; then
|
||||
docker compose --profile "$COMPOSE_PROFILE" up -d --no-build "$TARGET_SERVICE" >> "$LOG" 2>&1
|
||||
else
|
||||
docker compose up -d --no-build "$TARGET_SERVICE" >> "$LOG" 2>&1
|
||||
fi
|
||||
log "$TARGET_SERVICE restarted"
|
||||
|
||||
# 4. Health-check loop: 10 attempts x 6 seconds = up to 60s
|
||||
log "Starting health-check: 10 attempts x 6s (max 60s)"
|
||||
if health_check 10 6 "deploy-health"; then
|
||||
log "Deploy SUCCESS: $TARGET_SERVICE healthy on port $TARGET_PORT"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# 5. Health failed -> AUTO ROLLBACK
|
||||
log "deploy FAILED: health not ok after 60s - initiating AUTO ROLLBACK"
|
||||
rollback_rc=0
|
||||
do_rollback || rollback_rc=$?
|
||||
|
||||
if [[ $rollback_rc -eq 0 ]]; then
|
||||
log "deploy FAILED, rolled back to previous image successfully - exit 1"
|
||||
exit 1
|
||||
elif [[ $rollback_rc -eq 2 ]]; then
|
||||
log "deploy FAILED, ROLLBACK ALSO FAILED - service may be down - exit 2"
|
||||
exit 2
|
||||
else
|
||||
log "deploy FAILED, rollback skipped (no previous image) - exit 1"
|
||||
exit 1
|
||||
fi
|
||||
639
scripts/staging_check.py
Normal file
639
scripts/staging_check.py
Normal file
@@ -0,0 +1,639 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
staging_check.py — Live staging-stand health & e2e check suite (ORCH-33).
|
||||
|
||||
Checks:
|
||||
Block A — SMOKE (health/queue, correct env)
|
||||
Block B — ACCESS (read-only calls to Plane sandbox + Gitea sandbox + registry)
|
||||
Block C — E2E (create task in SANDBOX → trigger pipeline via /webhook/plane
|
||||
→ verify branch + job enqueued → CLEANUP in finally)
|
||||
|
||||
Usage (inside the container or with correct env set):
|
||||
python3 scripts/staging_check.py [--base-url http://localhost:8501] [--mode stub|full-real]
|
||||
|
||||
Exit code: 0 = all PASS, non-zero = at least one FAIL.
|
||||
|
||||
NOTE on modes:
|
||||
stub — default; checks early pipeline artifacts (branch + analyst job
|
||||
enqueued) created BEFORE Claude CLI is invoked.
|
||||
Fast, deterministic, no LLM spend.
|
||||
full-real — additionally waits for the analyst agent to finish (long, costs
|
||||
credits). Not the default.
|
||||
|
||||
NOTE on Plane comments (403):
|
||||
The orchestrator posts the "🔍 Analyst запущен" comment using per-agent bot
|
||||
tokens (ORCH_PLANE_BOT_ANALYST). These bot accounts must be added as members
|
||||
of every Plane project they comment on. In staging the sandbox project was
|
||||
created after the bots were provisioned → the bots are not yet members of
|
||||
SANDBOX → add_comment returns 403 Forbidden.
|
||||
|
||||
This is a known infrastructure limitation of the staging sandbox, NOT a bug
|
||||
in the pipeline itself. C9b therefore verifies pipeline success via the
|
||||
staging job queue (/queue → recent) instead of Plane comments: the analyst
|
||||
job is enqueued BEFORE the add_comment call and its presence in the queue
|
||||
proves the pipeline ran through correctly.
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import hashlib
|
||||
import hmac
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import datetime
|
||||
import urllib.request
|
||||
import urllib.error
|
||||
import urllib.parse
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Colour helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
_BOLD = "\033[1m"
|
||||
_GREEN = "\033[32m"
|
||||
_RED = "\033[31m"
|
||||
_YELLOW = "\033[33m"
|
||||
_RESET = "\033[0m"
|
||||
|
||||
|
||||
def _ok(msg: str) -> str:
|
||||
return f" {_GREEN}✓ PASS{_RESET} {msg}"
|
||||
|
||||
|
||||
def _fail(msg: str) -> str:
|
||||
return f" {_RED}✗ FAIL{_RESET} {msg}"
|
||||
|
||||
|
||||
def _info(msg: str) -> str:
|
||||
return f" {_YELLOW}·{_RESET} {msg}"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Low-level HTTP helpers (stdlib only — no requests/httpx in scripts/)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _http(method: str, url: str, headers: dict | None = None,
|
||||
body: bytes | None = None, timeout: int = 15) -> tuple[int, bytes]:
|
||||
"""Simple HTTP wrapper. Returns (status_code, response_body)."""
|
||||
req = urllib.request.Request(url, data=body, headers=headers or {}, method=method)
|
||||
try:
|
||||
with urllib.request.urlopen(req, timeout=timeout) as resp:
|
||||
return resp.status, resp.read()
|
||||
except urllib.error.HTTPError as e:
|
||||
return e.code, e.read()
|
||||
except Exception as e:
|
||||
raise RuntimeError(f"{method} {url} → {e}") from e
|
||||
|
||||
|
||||
def _get(url: str, headers: dict | None = None, timeout: int = 15) -> tuple[int, dict]:
|
||||
status, body = _http("GET", url, headers=headers, timeout=timeout)
|
||||
try:
|
||||
data = json.loads(body)
|
||||
except Exception:
|
||||
data = {"_raw": body.decode(errors="replace")}
|
||||
return status, data
|
||||
|
||||
|
||||
def _post(url: str, headers: dict | None = None, payload: dict | None = None,
|
||||
raw_body: bytes | None = None, timeout: int = 15) -> tuple[int, dict]:
|
||||
if raw_body is not None:
|
||||
body = raw_body
|
||||
h = dict(headers or {})
|
||||
if "Content-Type" not in h:
|
||||
h["Content-Type"] = "application/json"
|
||||
else:
|
||||
body = json.dumps(payload or {}).encode()
|
||||
h = dict(headers or {})
|
||||
h["Content-Type"] = "application/json"
|
||||
status, resp_body = _http("POST", url, headers=h, body=body, timeout=timeout)
|
||||
try:
|
||||
data = json.loads(resp_body)
|
||||
except Exception:
|
||||
data = {"_raw": resp_body.decode(errors="replace")}
|
||||
return status, data
|
||||
|
||||
|
||||
def _patch(url: str, headers: dict | None = None, payload: dict | None = None,
|
||||
timeout: int = 15) -> tuple[int, dict]:
|
||||
body = json.dumps(payload or {}).encode()
|
||||
h = dict(headers or {})
|
||||
h["Content-Type"] = "application/json"
|
||||
status, resp_body = _http("PATCH", url, headers=h, body=body, timeout=timeout)
|
||||
try:
|
||||
data = json.loads(resp_body)
|
||||
except Exception:
|
||||
data = {"_raw": resp_body.decode(errors="replace")}
|
||||
return status, data
|
||||
|
||||
|
||||
def _delete(url: str, headers: dict | None = None, timeout: int = 15) -> int:
|
||||
status, _ = _http("DELETE", url, headers=headers, timeout=timeout)
|
||||
return status
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# HMAC helper for /webhook/plane
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _sign_payload(secret: str, body: bytes) -> str:
|
||||
"""Compute HMAC-SHA256 signature — matches verify_plane_signature in plane.py."""
|
||||
return hmac.new(secret.encode(), body, hashlib.sha256).hexdigest()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Result tracking
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class Results:
|
||||
def __init__(self):
|
||||
self._items: list[tuple[str, bool, str]] = [] # (label, passed, detail)
|
||||
|
||||
def add(self, label: str, passed: bool, detail: str = ""):
|
||||
self._items.append((label, passed, detail))
|
||||
line = _ok(label) if passed else _fail(label)
|
||||
if detail:
|
||||
line += f" [{detail}]"
|
||||
print(line)
|
||||
|
||||
def summary(self) -> bool:
|
||||
passed = sum(1 for _, ok, _ in self._items if ok)
|
||||
total = len(self._items)
|
||||
all_ok = passed == total
|
||||
colour = _GREEN if all_ok else _RED
|
||||
print()
|
||||
print(f"{_BOLD}{'='*60}{_RESET}")
|
||||
print(f"{colour}{_BOLD} RESULT: {passed}/{total} checks PASS{_RESET}")
|
||||
print(f"{_BOLD}{'='*60}{_RESET}")
|
||||
return all_ok
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Block A — SMOKE
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def block_a(base: str, results: Results):
|
||||
print(f"\n{_BOLD}[Block A] SMOKE{_RESET}")
|
||||
|
||||
# A1 — /health
|
||||
try:
|
||||
status, data = _get(f"{base}/health")
|
||||
ok = status == 200 and data.get("status") == "ok"
|
||||
results.add("A1 GET /health → 200 status=ok", ok,
|
||||
f"HTTP {status}, body={data}")
|
||||
except Exception as e:
|
||||
results.add("A1 GET /health → 200 status=ok", False, str(e))
|
||||
|
||||
# A2 — /queue
|
||||
try:
|
||||
status, data = _get(f"{base}/queue")
|
||||
ok = (status == 200
|
||||
and "counts" in data
|
||||
and "max_concurrency" in data
|
||||
and "resilience" in data)
|
||||
results.add("A2 GET /queue → 200 with counts/max_concurrency/resilience", ok,
|
||||
f"HTTP {status}, keys={list(data.keys())}")
|
||||
except Exception as e:
|
||||
results.add("A2 GET /queue → 200 with counts/max_concurrency/resilience", False, str(e))
|
||||
|
||||
# A3 — ORCH_STAGING=true in env (guard against hitting prod)
|
||||
staging_flag = os.environ.get("ORCH_STAGING", "").lower()
|
||||
ok = staging_flag == "true"
|
||||
results.add("A3 ORCH_STAGING=true (not prod)", ok,
|
||||
f"ORCH_STAGING={os.environ.get('ORCH_STAGING', '<unset>')}")
|
||||
if not ok:
|
||||
print(_fail(" ⛔ Safety abort: ORCH_STAGING is not 'true'. "
|
||||
"This might be prod. Skipping destructive blocks B/C."))
|
||||
sys.exit(2)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Block B — ACCESS
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
SANDBOX_PROJECT_ID = "8c5a3025-4f9d-4190-b79f-fa06276bb27e"
|
||||
PROD_ET_PROJECT_ID = "7a79f0a9-5278-49cd-9007-9a338f238f9c"
|
||||
PROD_ORCH_PROJECT_ID = "8da6aa25-a60e-44d6-a1e2-d8ae59aa7d6a"
|
||||
|
||||
|
||||
def block_b(results: Results):
|
||||
print(f"\n{_BOLD}[Block B] ACCESS{_RESET}")
|
||||
|
||||
plane_token = os.environ.get("ORCH_PLANE_API_TOKEN", "")
|
||||
plane_base_env = os.environ.get("ORCH_PLANE_API_URL", "http://localhost:8091")
|
||||
# env stores URL WITHOUT /api/v1 — add it ourselves
|
||||
plane_base = plane_base_env.rstrip("/") + "/api/v1"
|
||||
workspace = os.environ.get("ORCH_PLANE_WORKSPACE_SLUG", "ag_proj")
|
||||
gitea_token = os.environ.get("ORCH_GITEA_TOKEN", "")
|
||||
gitea_base = os.environ.get("ORCH_GITEA_URL", "http://localhost:3000")
|
||||
|
||||
plane_headers = {"X-API-Key": plane_token}
|
||||
gitea_headers = {"Authorization": f"token {gitea_token}"}
|
||||
|
||||
# B4 — Plane: list projects, sandbox id present
|
||||
try:
|
||||
url = f"{plane_base}/workspaces/{workspace}/projects/"
|
||||
status, data = _get(url, headers=plane_headers)
|
||||
if status == 200:
|
||||
# API may return a list or {"results": [...]}
|
||||
projects = data.get("results", data) if isinstance(data, dict) else data
|
||||
if isinstance(projects, list):
|
||||
ids = {p.get("id", "") for p in projects}
|
||||
else:
|
||||
ids = set()
|
||||
ok = SANDBOX_PROJECT_ID in ids
|
||||
results.add("B4 Plane: sandbox project accessible", ok,
|
||||
f"HTTP {status}, found {len(ids)} project(s), sandbox={'YES' if ok else 'NO'}")
|
||||
else:
|
||||
results.add("B4 Plane: sandbox project accessible", False,
|
||||
f"HTTP {status}")
|
||||
except Exception as e:
|
||||
results.add("B4 Plane: sandbox project accessible", False, str(e))
|
||||
|
||||
# B5 — Gitea: sandbox repo accessible, push=true
|
||||
try:
|
||||
url = f"{gitea_base}/api/v1/repos/admin/orchestrator-sandbox"
|
||||
status, data = _get(url, headers=gitea_headers)
|
||||
push_ok = data.get("permissions", {}).get("push", False) if status == 200 else False
|
||||
ok = status == 200 and push_ok
|
||||
results.add("B5 Gitea: orchestrator-sandbox accessible, push=true", ok,
|
||||
f"HTTP {status}, permissions={data.get('permissions')}")
|
||||
except Exception as e:
|
||||
results.add("B5 Gitea: orchestrator-sandbox accessible, push=true", False, str(e))
|
||||
|
||||
# B6 — Registry: sandbox in known IDs, prod ET/ORCH NOT in known IDs
|
||||
try:
|
||||
# Import from inside the container (script runs in /repos/orchestrator context)
|
||||
sys.path.insert(0, "/repos/orchestrator")
|
||||
# Force reload to pick up container env
|
||||
import importlib
|
||||
if "src.projects" in sys.modules:
|
||||
importlib.reload(sys.modules["src.projects"])
|
||||
from src.projects import known_plane_project_ids
|
||||
known = known_plane_project_ids()
|
||||
sandbox_present = SANDBOX_PROJECT_ID in known
|
||||
et_absent = PROD_ET_PROJECT_ID not in known
|
||||
orch_absent = PROD_ORCH_PROJECT_ID not in known
|
||||
ok = sandbox_present and et_absent and orch_absent
|
||||
detail = (
|
||||
f"sandbox={'YES' if sandbox_present else 'NO'}, "
|
||||
f"prod-ET={'NO(good)' if et_absent else 'YES(BAD!)'}, "
|
||||
f"prod-ORCH={'NO(good)' if orch_absent else 'YES(BAD!)'}"
|
||||
)
|
||||
results.add("B6 Registry: sandbox present, prod ET/ORCH absent", ok, detail)
|
||||
except Exception as e:
|
||||
results.add("B6 Registry: sandbox present, prod ET/ORCH absent", False, str(e))
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Block C — E2E
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
IN_PROGRESS_STATE_ID = "b873d9eb-993c-48cd-97ac-99a9b1623967"
|
||||
|
||||
# Path to staging SQLite DB inside the container
|
||||
STAGING_DB_PATH = os.environ.get("ORCH_DB_PATH", "/app/data/orchestrator.db")
|
||||
|
||||
|
||||
def _make_webhook_payload(issue_id: str, issue_name: str, issue_desc: str) -> dict:
|
||||
"""Build the minimal webhook payload that triggers start_pipeline."""
|
||||
return {
|
||||
"event": "issue",
|
||||
"action": "updated",
|
||||
"data": {
|
||||
"id": issue_id,
|
||||
"name": issue_name,
|
||||
"description_stripped": issue_desc,
|
||||
"project": SANDBOX_PROJECT_ID,
|
||||
"state": {
|
||||
"id": IN_PROGRESS_STATE_ID,
|
||||
"name": "In Progress",
|
||||
"group": "started",
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def _poll(fn, timeout: int = 60, interval: int = 3, label: str = ""):
|
||||
"""Poll fn() until it returns truthy or timeout expires."""
|
||||
deadline = time.time() + timeout
|
||||
while time.time() < deadline:
|
||||
result = fn()
|
||||
if result:
|
||||
return result
|
||||
if label:
|
||||
print(_info(f" waiting... ({label})"))
|
||||
time.sleep(interval)
|
||||
return None
|
||||
|
||||
|
||||
def _cleanup_staging_db(plane_issue_id: str):
|
||||
"""Delete the test task row from staging SQLite DB."""
|
||||
if not plane_issue_id:
|
||||
print(_info("CLEANUP DB: no issue_id to clean"))
|
||||
return
|
||||
try:
|
||||
import sqlite3
|
||||
conn = sqlite3.connect(STAGING_DB_PATH)
|
||||
cur = conn.execute(
|
||||
"DELETE FROM tasks WHERE plane_id = ?", (plane_issue_id,)
|
||||
)
|
||||
deleted = cur.rowcount
|
||||
conn.commit()
|
||||
conn.close()
|
||||
if deleted:
|
||||
print(_ok(f"CLEANUP DB: deleted {deleted} task row(s) for plane_id={plane_issue_id}"))
|
||||
else:
|
||||
print(_info(f"CLEANUP DB: no task row found for plane_id={plane_issue_id}"))
|
||||
except Exception as e:
|
||||
print(_fail(f"CLEANUP DB: error: {e}"))
|
||||
|
||||
|
||||
def _cleanup_staging_jobs(plane_issue_id: str):
|
||||
"""Delete job queue rows for the test task from staging SQLite DB."""
|
||||
if not plane_issue_id:
|
||||
return
|
||||
try:
|
||||
import sqlite3
|
||||
conn = sqlite3.connect(STAGING_DB_PATH)
|
||||
# Find task ids for this plane_id first
|
||||
task_rows = conn.execute(
|
||||
"SELECT id FROM tasks WHERE plane_id = ?", (plane_issue_id,)
|
||||
).fetchall()
|
||||
if task_rows:
|
||||
task_ids = [r[0] for r in task_rows]
|
||||
placeholders = ",".join("?" * len(task_ids))
|
||||
cur = conn.execute(
|
||||
f"DELETE FROM jobs WHERE task_id IN ({placeholders})", task_ids
|
||||
)
|
||||
deleted = cur.rowcount
|
||||
conn.commit()
|
||||
if deleted:
|
||||
print(_ok(f"CLEANUP DB: deleted {deleted} job row(s) for task_ids={task_ids}"))
|
||||
conn.close()
|
||||
except Exception as e:
|
||||
print(_fail(f"CLEANUP DB jobs: error: {e}"))
|
||||
|
||||
|
||||
def _cleanup_dedup(plane_issue_id: str, wh_body_sha: str | None = None):
|
||||
"""Remove dedup event entries for the test webhook delivery."""
|
||||
if not wh_body_sha:
|
||||
return
|
||||
try:
|
||||
import sqlite3
|
||||
conn = sqlite3.connect(STAGING_DB_PATH)
|
||||
cur = conn.execute(
|
||||
"DELETE FROM events_dedup WHERE delivery_id = ?", (wh_body_sha,)
|
||||
)
|
||||
deleted = cur.rowcount
|
||||
conn.commit()
|
||||
conn.close()
|
||||
if deleted:
|
||||
print(_ok(f"CLEANUP DB: removed {deleted} dedup entry"))
|
||||
except Exception as e:
|
||||
# dedup table might not exist or different schema — not critical
|
||||
print(_info(f"CLEANUP DB dedup: {e}"))
|
||||
|
||||
|
||||
def block_c(base: str, results: Results, mode: str):
|
||||
print(f"\n{_BOLD}[Block C] E2E (mode={mode}){_RESET}")
|
||||
|
||||
plane_token = os.environ.get("ORCH_PLANE_API_TOKEN", "")
|
||||
plane_base_env = os.environ.get("ORCH_PLANE_API_URL", "http://localhost:8091")
|
||||
plane_base = plane_base_env.rstrip("/") + "/api/v1"
|
||||
workspace = os.environ.get("ORCH_PLANE_WORKSPACE_SLUG", "ag_proj")
|
||||
gitea_token = os.environ.get("ORCH_GITEA_TOKEN", "")
|
||||
gitea_base = os.environ.get("ORCH_GITEA_URL", "http://localhost:3000")
|
||||
webhook_secret = os.environ.get("ORCH_PLANE_WEBHOOK_SECRET", "")
|
||||
|
||||
plane_headers = {"X-API-Key": plane_token}
|
||||
gitea_headers = {"Authorization": f"token {gitea_token}"}
|
||||
|
||||
ts = datetime.datetime.now(datetime.timezone.utc).strftime("%Y%m%dT%H%M%S")
|
||||
issue_name = f"[staging-check] e2e {ts}"
|
||||
issue_desc = (
|
||||
"Automated e2e check created by staging_check.py. "
|
||||
"This task tests the live staging pipeline end-to-end. "
|
||||
"Safe to delete — cleanup runs in finally block."
|
||||
)
|
||||
|
||||
issue_id = None
|
||||
branch_name = None
|
||||
wh_body_bytes = None
|
||||
|
||||
try:
|
||||
# C7 — Create task in Plane SANDBOX
|
||||
print(_info(f"C7: Creating issue in SANDBOX project..."))
|
||||
url = f"{plane_base}/workspaces/{workspace}/projects/{SANDBOX_PROJECT_ID}/issues/"
|
||||
status, data = _post(url, headers=plane_headers, payload={
|
||||
"name": issue_name,
|
||||
"description_html": f"<p>{issue_desc}</p>",
|
||||
"description_stripped": issue_desc,
|
||||
})
|
||||
issue_id = data.get("id")
|
||||
ok = status in (200, 201) and bool(issue_id)
|
||||
results.add("C7 Create issue in Plane SANDBOX", ok,
|
||||
f"HTTP {status}, issue_id={issue_id}")
|
||||
if not ok:
|
||||
print(_fail(f" Cannot continue C8-C9 without issue. body={data}"))
|
||||
results.add("C8 Trigger pipeline via /webhook/plane", False, "skipped: C7 failed")
|
||||
results.add("C9a Branch appears in orchestrator-sandbox", False, "skipped")
|
||||
results.add("C9b Analyst job enqueued in staging queue", False, "skipped")
|
||||
return
|
||||
|
||||
# Small delay to let Plane finish persisting the issue
|
||||
time.sleep(2)
|
||||
|
||||
# C8 — Trigger pipeline via direct POST to /webhook/plane
|
||||
print(_info(f"C8: Triggering pipeline via POST /webhook/plane ..."))
|
||||
wh_payload = _make_webhook_payload(issue_id, issue_name, issue_desc)
|
||||
wh_body_bytes = json.dumps(wh_payload).encode()
|
||||
|
||||
wh_headers = {"Content-Type": "application/json"}
|
||||
if webhook_secret:
|
||||
sig = _sign_payload(webhook_secret, wh_body_bytes)
|
||||
wh_headers["X-Plane-Signature"] = sig
|
||||
print(_info(f" Using HMAC signature (secret len={len(webhook_secret)})"))
|
||||
else:
|
||||
print(_info(" No webhook secret configured, sending without signature"))
|
||||
|
||||
status, resp = _post(f"{base}/webhook/plane",
|
||||
headers=wh_headers,
|
||||
raw_body=wh_body_bytes)
|
||||
ok = status == 200 and resp.get("status") in ("accepted",)
|
||||
results.add("C8 Trigger pipeline via /webhook/plane", ok,
|
||||
f"HTTP {status}, resp={resp}")
|
||||
if not ok:
|
||||
print(_fail(f" Pipeline trigger failed. Cannot verify C9."))
|
||||
results.add("C9a Branch appears in orchestrator-sandbox", False, "skipped: C8 failed")
|
||||
results.add("C9b Analyst job enqueued in staging queue", False, "skipped: C8 failed")
|
||||
return
|
||||
|
||||
# C9a — Poll for branch in Gitea orchestrator-sandbox
|
||||
print(_info("C9a: Polling for branch in orchestrator-sandbox (up to 60s)..."))
|
||||
|
||||
def _check_branch():
|
||||
try:
|
||||
burl = f"{gitea_base}/api/v1/repos/admin/orchestrator-sandbox/branches"
|
||||
s, bdata = _get(burl, headers=gitea_headers)
|
||||
if s != 200:
|
||||
return None
|
||||
branches = bdata if isinstance(bdata, list) else bdata.get("results", [])
|
||||
for b in branches:
|
||||
bname = b.get("name", "")
|
||||
# Branch name: feature/SANDBOX-NNN-staging-check-...
|
||||
if "feature/" in bname and "staging-check" in bname:
|
||||
return bname
|
||||
return None
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
branch_name = _poll(_check_branch, timeout=60, interval=3,
|
||||
label="waiting for branch")
|
||||
ok = bool(branch_name)
|
||||
results.add("C9a Branch appears in orchestrator-sandbox", ok,
|
||||
f"branch={branch_name or 'not found'}")
|
||||
|
||||
# C9b — Verify analyst job was enqueued via staging /queue
|
||||
# NOTE: The orchestrator posts a "🔍 Analyst запущен" comment to Plane using
|
||||
# per-agent bot tokens (ORCH_PLANE_BOT_ANALYST). In staging, the sandbox
|
||||
# project was created after the bot accounts were provisioned, so the bots are
|
||||
# not yet members of the SANDBOX project → add_comment returns 403 Forbidden.
|
||||
# This is a known staging infrastructure limitation (not a pipeline bug).
|
||||
# We therefore verify pipeline success via /queue (recent jobs): the analyst
|
||||
# job is enqueued BEFORE the add_comment call, so its presence in the queue
|
||||
# confirms the pipeline ran through to job dispatch.
|
||||
print(_info("C9b: Checking staging job queue for analyst job (up to 30s)..."))
|
||||
print(_info(" (Plane comment check skipped: bot-tokens not added to SANDBOX project)"))
|
||||
|
||||
def _check_queue():
|
||||
try:
|
||||
s, qdata = _get(f"{base}/queue")
|
||||
if s != 200:
|
||||
return None
|
||||
recent = qdata.get("recent", [])
|
||||
for job in recent:
|
||||
if (job.get("agent") == "analyst"
|
||||
and job.get("repo") == "orchestrator-sandbox"
|
||||
and issue_name in (job.get("task_content") or "")):
|
||||
return job
|
||||
return None
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
analyst_job = _poll(_check_queue, timeout=30, interval=2,
|
||||
label="waiting for analyst job in queue")
|
||||
ok = bool(analyst_job)
|
||||
detail = ""
|
||||
if analyst_job:
|
||||
detail = (f"job_id={analyst_job.get('id')}, "
|
||||
f"status={analyst_job.get('status')}, "
|
||||
f"agent={analyst_job.get('agent')}")
|
||||
results.add("C9b Analyst job enqueued in staging queue", ok, detail)
|
||||
|
||||
finally:
|
||||
# C10 — CLEANUP (always runs)
|
||||
print(f"\n{_BOLD}[CLEANUP]{_RESET}")
|
||||
_cleanup(
|
||||
plane_base=plane_base,
|
||||
workspace=workspace,
|
||||
gitea_base=gitea_base,
|
||||
plane_headers=plane_headers,
|
||||
gitea_headers=gitea_headers,
|
||||
issue_id=issue_id,
|
||||
branch_name=branch_name,
|
||||
wh_body_bytes=wh_body_bytes,
|
||||
)
|
||||
|
||||
|
||||
def _cleanup(plane_base, workspace, gitea_base, plane_headers, gitea_headers,
|
||||
issue_id, branch_name, wh_body_bytes=None):
|
||||
"""Delete test branch in Gitea, test issue in Plane SANDBOX, and DB rows."""
|
||||
|
||||
# Delete branch in Gitea
|
||||
if branch_name:
|
||||
try:
|
||||
burl = (f"{gitea_base}/api/v1/repos/admin/orchestrator-sandbox"
|
||||
f"/branches/{urllib.parse.quote(branch_name, safe='')}")
|
||||
s = _delete(burl, headers=gitea_headers)
|
||||
if s in (200, 204, 404):
|
||||
print(_ok(f"CLEANUP: deleted branch {branch_name!r} (HTTP {s})"))
|
||||
else:
|
||||
print(_fail(f"CLEANUP: delete branch returned HTTP {s}"))
|
||||
except Exception as e:
|
||||
print(_fail(f"CLEANUP: delete branch error: {e}"))
|
||||
else:
|
||||
print(_info("CLEANUP: no branch to delete"))
|
||||
|
||||
# Delete issue in Plane SANDBOX
|
||||
if issue_id:
|
||||
try:
|
||||
iurl = (f"{plane_base}/workspaces/{workspace}/projects/"
|
||||
f"{SANDBOX_PROJECT_ID}/issues/{issue_id}/")
|
||||
s = _delete(iurl, headers=plane_headers)
|
||||
if s in (200, 204, 404):
|
||||
print(_ok(f"CLEANUP: deleted Plane issue {issue_id} (HTTP {s})"))
|
||||
else:
|
||||
print(_fail(f"CLEANUP: delete Plane issue returned HTTP {s}"))
|
||||
except Exception as e:
|
||||
print(_fail(f"CLEANUP: delete Plane issue error: {e}"))
|
||||
else:
|
||||
print(_info("CLEANUP: no issue to delete"))
|
||||
|
||||
# Delete task + jobs from staging DB
|
||||
if issue_id:
|
||||
_cleanup_staging_jobs(issue_id)
|
||||
_cleanup_staging_db(issue_id)
|
||||
|
||||
# Remove dedup entry so future re-runs with same body don't get "duplicate"
|
||||
if wh_body_bytes is not None:
|
||||
import hashlib as _hl
|
||||
dedup_id = "plane" + _hl.sha256(b"plane" + wh_body_bytes).hexdigest()
|
||||
_cleanup_dedup(issue_id, dedup_id)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Main
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Live staging-stand check suite (ORCH-33)"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--base-url",
|
||||
default="http://localhost:8501",
|
||||
help="Base URL of the staging orchestrator (default: http://localhost:8501)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--mode",
|
||||
choices=["stub", "full-real"],
|
||||
default="stub",
|
||||
help=(
|
||||
"stub (default): check early pipeline artifacts only (branch+job), "
|
||||
"no LLM spend. "
|
||||
"full-real: also wait for the analyst agent (slow, costs credits)."
|
||||
),
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
base = args.base_url.rstrip("/")
|
||||
|
||||
print(f"{_BOLD}{'='*60}{_RESET}")
|
||||
print(f"{_BOLD} ORCH-33 Staging Check Suite{_RESET}")
|
||||
print(f" base_url : {base}")
|
||||
print(f" mode : {args.mode}")
|
||||
print(f" utc_time : {datetime.datetime.now(datetime.timezone.utc).isoformat()}")
|
||||
print(f"{_BOLD}{'='*60}{_RESET}")
|
||||
|
||||
results = Results()
|
||||
|
||||
block_a(base, results)
|
||||
block_b(results)
|
||||
block_c(base, results, args.mode)
|
||||
|
||||
all_ok = results.summary()
|
||||
sys.exit(0 if all_ok else 1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -16,6 +16,62 @@ from ..plane_sync import notify_stage_change as plane_notify_stage, add_comment
|
||||
logger = logging.getLogger("orchestrator.launcher")
|
||||
|
||||
|
||||
def prune_run_logs(runs_dir, keep_days=30, keep_max=500, active_paths=None):
|
||||
"""L-2: best-effort rotation of per-run logs (<runs_dir>/*.log).
|
||||
|
||||
A log file is removed if it is older than keep_days OR it is not within the
|
||||
keep_max most-recent logs (whichever condition is met first). Only *.log
|
||||
files directly inside runs_dir are considered; non-.log files and
|
||||
subdirectories are never touched. Files whose path is in active_paths (the
|
||||
currently running log) are always kept.
|
||||
|
||||
Returns the number of files removed. Never raises: any error is logged and
|
||||
swallowed so log rotation can never bring the app down.
|
||||
"""
|
||||
removed = 0
|
||||
try:
|
||||
active = set()
|
||||
for ap in (active_paths or []):
|
||||
try:
|
||||
active.add(os.path.realpath(ap))
|
||||
except Exception:
|
||||
active.add(ap)
|
||||
|
||||
if not os.path.isdir(runs_dir):
|
||||
return 0
|
||||
|
||||
logs = []
|
||||
for name in os.listdir(runs_dir):
|
||||
if not name.endswith(".log"):
|
||||
continue
|
||||
path = os.path.join(runs_dir, name)
|
||||
if not os.path.isfile(path):
|
||||
continue
|
||||
if os.path.realpath(path) in active:
|
||||
continue
|
||||
try:
|
||||
mtime = os.path.getmtime(path)
|
||||
except OSError:
|
||||
continue
|
||||
logs.append((path, mtime))
|
||||
|
||||
logs.sort(key=lambda t: t[1], reverse=True)
|
||||
|
||||
cutoff = time.time() - keep_days * 86400
|
||||
for idx, (path, mtime) in enumerate(logs):
|
||||
too_old = mtime < cutoff
|
||||
over_max = idx >= keep_max
|
||||
if too_old or over_max:
|
||||
try:
|
||||
os.remove(path)
|
||||
removed += 1
|
||||
except OSError as e:
|
||||
logger.warning(f"prune_run_logs: failed to remove {path}: {e}")
|
||||
except Exception as e:
|
||||
logger.warning(f"prune_run_logs failed for {runs_dir}: {e}")
|
||||
return removed
|
||||
|
||||
|
||||
class AgentLauncher:
|
||||
"""Launch Claude CLI agents directly (binary mounted into container)."""
|
||||
|
||||
@@ -153,9 +209,15 @@ class AgentLauncher:
|
||||
|
||||
# No git fetch/checkout here: ensure_worktree() already put the worktree on
|
||||
# the right branch. The agent simply runs inside its isolated work_path.
|
||||
# Feature 4 (token usage): --output-format json makes claude emit a single
|
||||
# result JSON (with usage + total_cost_usd) at the end of stdout. The log
|
||||
# still captures it; _monitor_agent parses the trailing JSON after the run
|
||||
# to record per-agent tokens/cost. _monitor_agent's failure handling keys
|
||||
# off the process exit_code (not stdout shape), so this is safe.
|
||||
cmd = (
|
||||
f'cd {work_path} && '
|
||||
f'{self.CLAUDE_BIN} --print '
|
||||
f'--output-format json '
|
||||
f'{model_flag}'
|
||||
f'"$(cat {task_file})" '
|
||||
f'--system-prompt "$(cat {system_prompt})" '
|
||||
@@ -344,6 +406,17 @@ class AgentLauncher:
|
||||
|
||||
notify_agent_finished(run_id, agent, exit_code, task_id=_task_id, duration_s=_duration_s)
|
||||
|
||||
# Feature 4: parse token usage / cost from the (json) run log and record
|
||||
# it on the agent_runs row. Never fatal — a garbled/missing JSON records
|
||||
# NULLs and logs a warning so a broken run can't crash the monitor.
|
||||
try:
|
||||
from ..usage import parse_usage_from_log, record_usage
|
||||
_usage = parse_usage_from_log(output_path) if output_path else None
|
||||
record_usage(run_id, _usage)
|
||||
except Exception as e:
|
||||
logger.warning(f"run_id={run_id}: usage accounting failed: {e}")
|
||||
_usage = None
|
||||
|
||||
# Commit and push any changes — in the per-branch worktree (ORCH-2 / S-4),
|
||||
# NOT in the shared /repos/<repo>. The worktree is already on `branch`
|
||||
# (ensure_worktree did the checkout), so no checkout is needed here.
|
||||
@@ -415,7 +488,8 @@ class AgentLauncher:
|
||||
set_issue_blocked(_wid)
|
||||
plane_add_comment(
|
||||
_wid,
|
||||
"\u274c Deploy FAILED (smoke/healthcheck). Rolled back. Developer \u043d\u0443\u0436\u0435\u043d \u0434\u043b\u044f \u0444\u0438\u043a\u0441\u0430."
|
||||
"\u274c Deploy FAILED (smoke/healthcheck). Rolled back. Developer \u043d\u0443\u0436\u0435\u043d \u0434\u043b\u044f \u0444\u0438\u043a\u0441\u0430.",
|
||||
author="deployer",
|
||||
)
|
||||
from ..notifications import send_telegram
|
||||
send_telegram(f"\U0001f6a8 {_wid}: Deploy failed! Rolled back. Needs fix.")
|
||||
@@ -433,6 +507,14 @@ class AgentLauncher:
|
||||
from ..notifications import send_telegram
|
||||
send_telegram(f"\u26a0\ufe0f {_wid}: Agent {agent} failed (exit_code={exit_code}). Check logs: /app/data/runs/{run_id}.log")
|
||||
|
||||
# Feature 4: post the per-agent usage comment under that agent's bot, and
|
||||
# — for the deployer finishing the task — the per-task usage summary.
|
||||
if exit_code == 0:
|
||||
try:
|
||||
self._post_usage_comments(run_id, agent, repo, branch, _usage)
|
||||
except Exception as e:
|
||||
logger.warning(f"run_id={run_id}: usage comment failed: {e}")
|
||||
|
||||
# Auto-advance stage if agent finished successfully and QG passes
|
||||
if exit_code == 0:
|
||||
self._try_advance_stage(run_id, agent, repo, branch)
|
||||
@@ -597,6 +679,69 @@ class AgentLauncher:
|
||||
logger.error(f"Auto-advance failed for run_id={run_id}: {e}")
|
||||
|
||||
|
||||
def _post_usage_comments(self, run_id, agent, repo, branch, usage):
|
||||
"""Feature 4: post the per-agent usage comment (and Deployer summary).
|
||||
|
||||
- Always (on success, with a work_item_id): a per-agent finish comment
|
||||
with token/cost, authored by the finishing agent's Plane bot.
|
||||
- When the deployer finishes: also a per-task summary (SUM over
|
||||
agent_runs GROUP BY agent), authored by the deployer.
|
||||
"""
|
||||
from ..usage import usage_comment, task_summary_comment
|
||||
conn = get_db()
|
||||
row = conn.execute(
|
||||
"SELECT id, work_item_id FROM tasks WHERE repo=? AND branch=?",
|
||||
(repo, branch),
|
||||
).fetchone()
|
||||
conn.close()
|
||||
if not row:
|
||||
return
|
||||
task_id, work_item_id = row[0], row[1]
|
||||
if not work_item_id:
|
||||
return
|
||||
# Observability: every agent's finish comment links its artifact(s)
|
||||
# (reviewer->12-review, tester->13-test-report, deployer->14-deploy-log,
|
||||
# architect->ADR, developer->PR/branch). For the developer we resolve the
|
||||
# open PR number so the link points straight at it.
|
||||
pr_number = None
|
||||
if agent == "developer":
|
||||
pr_number = self._open_pr_number(repo, branch)
|
||||
plane_add_comment(
|
||||
work_item_id,
|
||||
usage_comment(
|
||||
agent,
|
||||
usage,
|
||||
repo=repo,
|
||||
branch=branch,
|
||||
work_item_id=work_item_id,
|
||||
pr_number=pr_number,
|
||||
),
|
||||
author=agent,
|
||||
)
|
||||
if agent == "deployer":
|
||||
plane_add_comment(
|
||||
work_item_id, task_summary_comment(task_id), author="deployer"
|
||||
)
|
||||
|
||||
def _open_pr_number(self, repo: str, branch: str):
|
||||
"""Return the open PR number for `branch`, or None. Never raises."""
|
||||
try:
|
||||
import httpx
|
||||
owner = settings.gitea_owner
|
||||
headers = {"Authorization": f"token {settings.gitea_token}"}
|
||||
resp = httpx.get(
|
||||
f"{settings.gitea_url}/api/v1/repos/{owner}/{repo}/pulls",
|
||||
params={"state": "open", "head": branch},
|
||||
headers=headers, timeout=5,
|
||||
)
|
||||
if resp.status_code == 200:
|
||||
prs = resp.json()
|
||||
if prs:
|
||||
return prs[0].get("number")
|
||||
except Exception:
|
||||
pass
|
||||
return None
|
||||
|
||||
def _ensure_pr(self, repo: str, branch: str, run_id: int):
|
||||
import httpx
|
||||
owner = settings.gitea_owner
|
||||
|
||||
@@ -9,8 +9,20 @@ class Settings(BaseSettings):
|
||||
plane_webhook_secret: str = ""
|
||||
plane_project_id: str = ""
|
||||
|
||||
# Per-agent Plane bot tokens (feat: per-agent comment authorship).
|
||||
# When set, add_comment posts under the matching bot so Plane shows the
|
||||
# real author (Analyst/Architect/...). Empty -> fallback to plane_api_token.
|
||||
plane_bot_analyst: str = ""
|
||||
plane_bot_architect: str = ""
|
||||
plane_bot_developer: str = ""
|
||||
plane_bot_reviewer: str = ""
|
||||
plane_bot_tester: str = ""
|
||||
plane_bot_deployer: str = ""
|
||||
plane_bot_stream: str = ""
|
||||
|
||||
# Gitea
|
||||
gitea_url: str = "http://localhost:3000"
|
||||
gitea_public_url: str = "" # external URL for clickable links in comments; falls back to gitea_url
|
||||
gitea_token: str = ""
|
||||
gitea_webhook_secret: str = ""
|
||||
gitea_owner: str = "admin"
|
||||
@@ -66,6 +78,15 @@ class Settings(BaseSettings):
|
||||
agent_kill_grace_seconds: int = 20
|
||||
agent_timeout_overrides_json: str = ""
|
||||
|
||||
# L-2: run-log rotation. Old per-run logs in <data>/runs/*.log are pruned at
|
||||
# app startup (best-effort). A *.log is removed if it is older than
|
||||
# log_keep_days OR not within the log_keep_max most-recent logs (whichever
|
||||
# hits first). Only *.log files are touched; the active run log is skipped.
|
||||
# log_keep_days -> max age in days (env ORCH_LOG_KEEP_DAYS).
|
||||
# log_keep_max -> max number of newest logs to retain (env ORCH_LOG_KEEP_MAX).
|
||||
log_keep_days: int = 30
|
||||
log_keep_max: int = 500
|
||||
|
||||
|
||||
# Telegram notifications
|
||||
telegram_bot_token: str = ""
|
||||
|
||||
152
src/db.py
152
src/db.py
@@ -77,6 +77,38 @@ def init_db():
|
||||
"CREATE UNIQUE INDEX IF NOT EXISTS idx_events_delivery "
|
||||
"ON events(delivery_id) WHERE delivery_id IS NOT NULL"
|
||||
)
|
||||
# Feature 4 (token usage): per-run token / cost accounting. Parsed from the
|
||||
# claude --output-format json result by the launcher monitor. Idempotent
|
||||
# ALTERs (no-op once the columns exist) so this is safe on the live prod DB.
|
||||
_ensure_column(conn, "agent_runs", "input_tokens", "INTEGER")
|
||||
_ensure_column(conn, "agent_runs", "output_tokens", "INTEGER")
|
||||
_ensure_column(conn, "agent_runs", "cache_read_tokens", "INTEGER")
|
||||
# Observability fix: also persist cache-CREATION input tokens. Claude CLI
|
||||
# reports the real input split across input_tokens (fresh, ~tens) +
|
||||
# cache_read_input_tokens (cache hit, millions) + cache_creation_input_tokens
|
||||
# (writing new cache). Without this column the cache_creation slice is lost
|
||||
# and the "X in" figure understates the true prompt size. Idempotent ALTER.
|
||||
_ensure_column(conn, "agent_runs", "cache_creation_tokens", "INTEGER")
|
||||
_ensure_column(conn, "agent_runs", "cost_usd", "REAL")
|
||||
# Telegram live tracker (feat/telegram-live-tracker): persist the FULL model
|
||||
# name (e.g. "tokenator/claude-opus-4-8") per agent_runs row so the tracker
|
||||
# can render a short model tag per stage. Parsed from the run-log result JSON
|
||||
# (modelUsage key) by the launcher monitor; NULL when unknown. Idempotent ALTER.
|
||||
_ensure_column(conn, "agent_runs", "model", "TEXT")
|
||||
# Telegram live tracker: one editable Telegram message per task. We store its
|
||||
# message_id so each stage transition can editMessageText the same message
|
||||
# instead of spamming a new one. Idempotent ALTER (safe on the live prod DB).
|
||||
_ensure_column(conn, "tasks", "tracker_message_id", "INTEGER")
|
||||
# Telegram live tracker: human-readable task title for the tracker header
|
||||
# ("🛠️ ET-012 · <title>"). Populated from the Plane work-item name at task
|
||||
# creation; falls back to the work_item_id when absent. Idempotent ALTER.
|
||||
_ensure_column(conn, "tasks", "title", "TEXT")
|
||||
# Telegram live tracker: "BRD review" is the only HUMAN gate time — the delta
|
||||
# between "BRD ready / approve requested" and the analysis->architecture
|
||||
# advance (human flipped Plane to Approved). Persisted on the task so the
|
||||
# tracker can show "твоё время" without recomputing from activity history.
|
||||
_ensure_column(conn, "tasks", "brd_review_started_at", "TEXT")
|
||||
_ensure_column(conn, "tasks", "brd_review_ended_at", "TEXT")
|
||||
conn.commit()
|
||||
conn.close()
|
||||
|
||||
@@ -124,6 +156,71 @@ def update_task_stage(task_id: int, stage: str):
|
||||
conn.close()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Telegram live tracker helpers (feat/telegram-live-tracker)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def get_tracker_message_id(task_id: int) -> int | None:
|
||||
"""Return the stored Telegram tracker message_id for a task, or None."""
|
||||
conn = get_db()
|
||||
try:
|
||||
row = conn.execute(
|
||||
"SELECT tracker_message_id FROM tasks WHERE id=?", (task_id,)
|
||||
).fetchone()
|
||||
finally:
|
||||
conn.close()
|
||||
return row[0] if row and row[0] is not None else None
|
||||
|
||||
|
||||
def set_tracker_message_id(task_id: int, message_id: int) -> None:
|
||||
"""Persist the Telegram tracker message_id for a task (idempotent overwrite)."""
|
||||
conn = get_db()
|
||||
try:
|
||||
conn.execute(
|
||||
"UPDATE tasks SET tracker_message_id=? WHERE id=?",
|
||||
(message_id, task_id),
|
||||
)
|
||||
conn.commit()
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
def mark_brd_review_started(task_id: int) -> None:
|
||||
"""Stamp when BRD review (the human approve gate) started, if not already set.
|
||||
|
||||
Idempotent: only sets it the first time (a retried analyst run must not reset
|
||||
the clock). The delta to brd_review_ended_at is the only "твоё время".
|
||||
"""
|
||||
conn = get_db()
|
||||
try:
|
||||
conn.execute(
|
||||
"UPDATE tasks SET brd_review_started_at=datetime('now') "
|
||||
"WHERE id=? AND brd_review_started_at IS NULL",
|
||||
(task_id,),
|
||||
)
|
||||
conn.commit()
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
def mark_brd_review_ended(task_id: int) -> None:
|
||||
"""Stamp when BRD review ended (analysis->architecture advance / Approved).
|
||||
|
||||
Idempotent: only sets it the first time and only if a start exists.
|
||||
"""
|
||||
conn = get_db()
|
||||
try:
|
||||
conn.execute(
|
||||
"UPDATE tasks SET brd_review_ended_at=datetime('now') "
|
||||
"WHERE id=? AND brd_review_started_at IS NOT NULL "
|
||||
"AND brd_review_ended_at IS NULL",
|
||||
(task_id,),
|
||||
)
|
||||
conn.commit()
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
def get_next_work_item_id(repo: str, prefix: str = "ET") -> str:
|
||||
"""Generate next work item ID (e.g., ET-003 / ORCH-001).
|
||||
|
||||
@@ -152,6 +249,44 @@ def get_next_work_item_id(repo: str, prefix: str = "ET") -> str:
|
||||
return f"{prefix}-{next_num:03d}"
|
||||
|
||||
|
||||
def ensure_unique_work_item_id(work_item_id: str, repo: str) -> str:
|
||||
"""BUG 2a: guarantee work_item_id uniqueness within (repo) over M-6 derive.
|
||||
|
||||
M-6 derives the work_item_id from the Plane sequence_id. That number can
|
||||
collide (e.g. an issue was deleted and the sequence reused, or two issues
|
||||
map to the same number) -> the SAME ET-NNN gets handed to two different
|
||||
tasks, which then physically share a branch/worktree slug prefix and step on
|
||||
each other (see ET-006: task 8 and task 25).
|
||||
|
||||
This is a guard LAYERED ON TOP of the M-6 derive (it does NOT replace it):
|
||||
given the derived id, if that exact <PREFIX>-NNN already exists in the tasks
|
||||
table for this repo, walk forward (ET-007, ET-008, ...) until a free number
|
||||
is found and return that instead. If the derived id is free, it is returned
|
||||
unchanged.
|
||||
"""
|
||||
if not work_item_id or "-" not in work_item_id:
|
||||
return work_item_id
|
||||
prefix, num_str = work_item_id.rsplit("-", 1)
|
||||
try:
|
||||
num = int(num_str)
|
||||
except ValueError:
|
||||
return work_item_id
|
||||
width = len(num_str)
|
||||
|
||||
conn = get_db()
|
||||
try:
|
||||
candidate = work_item_id
|
||||
while conn.execute(
|
||||
"SELECT 1 FROM tasks WHERE repo = ? AND work_item_id = ? LIMIT 1",
|
||||
(repo, candidate),
|
||||
).fetchone() is not None:
|
||||
num += 1
|
||||
candidate = f"{prefix}-{num:0{width}d}"
|
||||
return candidate
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# ORCH-5 (M-7): idempotent webhook event logging
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -306,6 +441,23 @@ def mark_job(
|
||||
conn.close()
|
||||
|
||||
|
||||
def has_active_job_for_task(task_id: int) -> bool:
|
||||
"""True if the task already has a queued or running job.
|
||||
|
||||
Used by the status-only verdict model (handle_status_start) to guard against
|
||||
double-launching an agent when a duplicate In Progress webhook arrives or a
|
||||
job is still in flight. The events de-dup absorbs identical webhook bodies;
|
||||
this guards against distinct webhooks while a job is pending/running.
|
||||
"""
|
||||
conn = get_db()
|
||||
row = conn.execute(
|
||||
"SELECT 1 FROM jobs WHERE task_id = ? AND status IN ('queued','running') LIMIT 1",
|
||||
(task_id,),
|
||||
).fetchone()
|
||||
conn.close()
|
||||
return row is not None
|
||||
|
||||
|
||||
def count_running_jobs() -> int:
|
||||
"""Number of jobs currently in 'running' status (for max_concurrency)."""
|
||||
conn = get_db()
|
||||
|
||||
16
src/main.py
16
src/main.py
@@ -60,6 +60,22 @@ async def lifespan(app: FastAPI):
|
||||
if requeued:
|
||||
log.warning(f"Queue-recovery: requeued {requeued} running job(s) after restart")
|
||||
|
||||
# L-2: rotate old per-run logs at startup (best-effort; never fatal).
|
||||
try:
|
||||
import os as _os
|
||||
from .config import settings as _settings
|
||||
from .agents.launcher import prune_run_logs
|
||||
_runs_dir = _os.path.join(_os.path.dirname(_settings.db_path), "runs")
|
||||
_removed = prune_run_logs(
|
||||
_runs_dir,
|
||||
keep_days=_settings.log_keep_days,
|
||||
keep_max=_settings.log_keep_max,
|
||||
)
|
||||
if _removed:
|
||||
log.info(f"Log rotation: pruned {_removed} old run log(s) from {_runs_dir}")
|
||||
except Exception as e:
|
||||
log.warning(f"Log rotation skipped: {e}")
|
||||
|
||||
# Start the background job-queue worker (ORCH-1).
|
||||
from .queue_worker import worker
|
||||
worker.start()
|
||||
|
||||
@@ -1,6 +1,24 @@
|
||||
"""Notifications and logging for orchestrator events."""
|
||||
"""Notifications and logging for orchestrator events.
|
||||
|
||||
feat/telegram-live-tracker (Variant B+): instead of ~15 separate Telegram
|
||||
messages per task (agent start / finish / stage transition / QG-pending / tech
|
||||
noise), the orchestrator now maintains ONE live tracker message per task that is
|
||||
edited in place (editMessageText) on every stage transition. Only events that
|
||||
NEED Slava's attention are sent as SEPARATE, notifying messages:
|
||||
|
||||
* approve-gate (notify_approve_requested) — BRD/TZ/AC ready, flip to Approved
|
||||
* deploy failed / rolled back — send_telegram from launcher/engine
|
||||
* agent failed (exit_code != 0) — send_telegram from launcher
|
||||
* task error (notify_error)
|
||||
|
||||
The tracker itself is edited SILENTLY (disable_notification: true). Stage-change,
|
||||
agent-start, agent-finish and QG-pending no longer emit their own messages — they
|
||||
just refresh the tracker (or are log-only).
|
||||
"""
|
||||
|
||||
import html
|
||||
import logging
|
||||
|
||||
import httpx
|
||||
|
||||
logger = logging.getLogger("orchestrator")
|
||||
@@ -17,25 +35,115 @@ def _get_settings():
|
||||
return _settings
|
||||
|
||||
|
||||
def send_telegram(text: str):
|
||||
"""Send notification to Telegram. Fire-and-forget, never raises."""
|
||||
# --------------------------------------------------------------------------- #
|
||||
# Low-level Telegram primitives
|
||||
# --------------------------------------------------------------------------- #
|
||||
|
||||
def send_telegram(text: str, disable_notification: bool = False):
|
||||
"""Send a notification to Telegram. Fire-and-forget, never raises.
|
||||
|
||||
Returns the Telegram message_id on success, else None (so callers that want
|
||||
to track the message — the tracker — can store it; legacy callers ignore it).
|
||||
"""
|
||||
s = _get_settings()
|
||||
if not s.telegram_bot_token or not s.telegram_chat_id:
|
||||
return
|
||||
return None
|
||||
try:
|
||||
url = f"https://api.telegram.org/bot{s.telegram_bot_token}/sendMessage"
|
||||
httpx.post(
|
||||
resp = httpx.post(
|
||||
url,
|
||||
json={
|
||||
"chat_id": s.telegram_chat_id,
|
||||
"text": text,
|
||||
"parse_mode": "HTML",
|
||||
"disable_notification": False,
|
||||
"disable_notification": disable_notification,
|
||||
},
|
||||
timeout=5,
|
||||
)
|
||||
data = resp.json()
|
||||
if data.get("ok"):
|
||||
return data["result"]["message_id"]
|
||||
except Exception:
|
||||
pass # Never crash orchestrator due to notification failure
|
||||
return None
|
||||
|
||||
|
||||
# edit_telegram outcome codes -> let update_task_tracker decide what to do:
|
||||
# "ok" edit applied -> nothing else to do
|
||||
# "not_modified" Telegram says text is identical (400 "message is not
|
||||
# modified" / "exactly the same") -> success, NO new message
|
||||
# "gone" original message can't be edited (deleted / too old /
|
||||
# invalid id) -> caller must fall back to a NEW message
|
||||
# "failed" transient failure (network / timeout / 5xx / unknown 400)
|
||||
# -> caller must NOT send a new message (avoid duplicates)
|
||||
EDIT_OK = "ok"
|
||||
EDIT_NOT_MODIFIED = "not_modified"
|
||||
EDIT_GONE = "gone"
|
||||
EDIT_FAILED = "failed"
|
||||
|
||||
# Telegram error descriptions that mean the message is permanently un-editable
|
||||
# (it is gone / orphaned) -> fall back to a fresh message.
|
||||
_GONE_MARKERS = (
|
||||
"message to edit not found",
|
||||
"message can't be edited",
|
||||
"message_id_invalid",
|
||||
)
|
||||
# Telegram "nothing changed" -> treat as success, never a duplicate.
|
||||
_NOT_MODIFIED_MARKERS = (
|
||||
"message is not modified",
|
||||
"exactly the same",
|
||||
)
|
||||
|
||||
|
||||
def edit_telegram(message_id: int, text: str) -> str:
|
||||
"""Edit an existing Telegram message. Never raises.
|
||||
|
||||
Returns a distinguishable outcome (see EDIT_* constants) so the caller can
|
||||
tell apart "all good" / "nothing changed" / "message gone" / "transient
|
||||
failure" and only fall back to a NEW message when the original is truly gone.
|
||||
"""
|
||||
s = _get_settings()
|
||||
if not s.telegram_bot_token or not s.telegram_chat_id:
|
||||
return EDIT_FAILED
|
||||
try:
|
||||
url = f"https://api.telegram.org/bot{s.telegram_bot_token}/editMessageText"
|
||||
resp = httpx.post(
|
||||
url,
|
||||
json={
|
||||
"chat_id": s.telegram_chat_id,
|
||||
"message_id": message_id,
|
||||
"text": text,
|
||||
"parse_mode": "HTML",
|
||||
},
|
||||
timeout=5,
|
||||
)
|
||||
data = resp.json()
|
||||
if data.get("ok"):
|
||||
return EDIT_OK
|
||||
# ok:false -> inspect the description to classify the 400.
|
||||
desc = str(data.get("description") or "").lower()
|
||||
if any(m in desc for m in _NOT_MODIFIED_MARKERS):
|
||||
# Text is identical between transitions (e.g. repeat review cycle
|
||||
# renders the same line). Nothing to do, NOT a duplicate.
|
||||
logger.debug(
|
||||
f"edit_telegram(mid={message_id}): not modified, skipping"
|
||||
)
|
||||
return EDIT_NOT_MODIFIED
|
||||
if any(m in desc for m in _GONE_MARKERS):
|
||||
logger.warning(
|
||||
f"edit_telegram(mid={message_id}): message gone ({desc!r}), "
|
||||
f"will fall back to a new message"
|
||||
)
|
||||
return EDIT_GONE
|
||||
# Unknown 400 / other non-ok -> transient/unknown, do NOT duplicate.
|
||||
logger.warning(
|
||||
f"edit_telegram(mid={message_id}): edit failed ({desc!r})"
|
||||
)
|
||||
return EDIT_FAILED
|
||||
except Exception as e:
|
||||
# Network / timeout / 5xx -> transient, do NOT duplicate.
|
||||
logger.warning(f"edit_telegram(mid={message_id}): transient error: {e}")
|
||||
return EDIT_FAILED
|
||||
|
||||
|
||||
def _get_work_item_id(task_id: int) -> str:
|
||||
@@ -50,26 +158,355 @@ def _get_work_item_id(task_id: int) -> str:
|
||||
return f"task-{task_id}"
|
||||
|
||||
|
||||
# --------------------------------------------------------------------------- #
|
||||
# Live task tracker
|
||||
# --------------------------------------------------------------------------- #
|
||||
|
||||
# Pipeline stages shown in the tracker, in order, with their display label and
|
||||
# the agent whose agent_runs rows describe that stage's work. "Ревью БРД" is NOT
|
||||
# an agent stage — it is the human approve gate rendered between Analysis and
|
||||
# Architecture from the task's brd_review_* timestamps.
|
||||
_TRACKER_STAGES = [
|
||||
("analysis", "Analysis", "analyst"),
|
||||
("architecture", "Architecture", "architect"),
|
||||
("development", "Development", "developer"),
|
||||
("review", "Review", "reviewer"),
|
||||
("testing", "Testing", "tester"),
|
||||
("deploy", "Deploy", "deployer"),
|
||||
]
|
||||
|
||||
# Map a pipeline stage -> the agent that is RUNNING while the task sits in it.
|
||||
# (development is entered after architecture finishes, etc.) Used to render the
|
||||
# "🔄 <Stage> … идёт" line for the currently-active stage.
|
||||
_BRD_LABEL = "\u0420\u0435\u0432\u044c\u044e \u0411\u0420\u0414" # "Ревью БРД"
|
||||
|
||||
_STAGE_ACTIVE_AGENT = {
|
||||
"analysis": "analyst",
|
||||
"architecture": "architect",
|
||||
"development": "developer",
|
||||
"review": "reviewer",
|
||||
"testing": "tester",
|
||||
"deploy": "deployer",
|
||||
}
|
||||
|
||||
|
||||
def _fmt_minutes(seconds) -> str:
|
||||
"""Render a duration in whole minutes: 0..59s -> '<1м', else '<n>м'."""
|
||||
try:
|
||||
seconds = int(seconds or 0)
|
||||
except (TypeError, ValueError):
|
||||
seconds = 0
|
||||
if seconds <= 0:
|
||||
return "0м"
|
||||
if seconds < 60:
|
||||
return "<1м"
|
||||
return f"{seconds // 60}\u043c"
|
||||
|
||||
|
||||
def _parse_sql_ts(ts):
|
||||
"""Parse a SQLite 'YYYY-MM-DD HH:MM:SS' UTC timestamp -> aware datetime/None."""
|
||||
if not ts:
|
||||
return None
|
||||
from datetime import datetime, timezone
|
||||
for fmt in ("%Y-%m-%d %H:%M:%S", "%Y-%m-%dT%H:%M:%S"):
|
||||
try:
|
||||
return datetime.strptime(str(ts)[:19], fmt).replace(tzinfo=timezone.utc)
|
||||
except (ValueError, TypeError):
|
||||
continue
|
||||
return None
|
||||
|
||||
|
||||
def _duration_seconds(started, finished):
|
||||
"""Seconds between two SQL timestamps; None if either is missing/unparseable."""
|
||||
a = _parse_sql_ts(started)
|
||||
b = _parse_sql_ts(finished)
|
||||
if a is None or b is None:
|
||||
return None
|
||||
return max(int((b - a).total_seconds()), 0)
|
||||
|
||||
|
||||
def render_task_tracker(task_id: int) -> str:
|
||||
"""Build the full live-tracker text for a task from the DB (stateless render).
|
||||
|
||||
Pulls the task header (work_item_id, title, stage), every agent_runs row, and
|
||||
the BRD-review timestamps, then renders:
|
||||
- one '✅ <Stage> <dur> · <in>↓/<out>↑ · <cost> · <model>' line per finished
|
||||
stage (latest run per stage),
|
||||
- the '⏸️ Ревью БРД <dur> · твоё время[ ⏳]' line between Analysis/Architecture,
|
||||
- a '🔄 <Stage> … идёт' line for the active (in-progress) stage,
|
||||
- the '💰 <in>↓ / <out>↑ · <cost>' totals,
|
||||
- on done: '⏱️ Всего .. · агенты .. · твоё ..' and a '🔗 PR / 📦' line.
|
||||
|
||||
Never raises (returns a minimal fallback string on error).
|
||||
"""
|
||||
from .db import get_db
|
||||
from .usage import fmt_tokens, fmt_cost, _input_total, short_model_name
|
||||
|
||||
try:
|
||||
conn = get_db()
|
||||
task = conn.execute(
|
||||
"SELECT id, work_item_id, title, stage, created_at, updated_at, "
|
||||
"brd_review_started_at, brd_review_ended_at "
|
||||
"FROM tasks WHERE id=?",
|
||||
(task_id,),
|
||||
).fetchone()
|
||||
if not task:
|
||||
conn.close()
|
||||
return f"task-{task_id}"
|
||||
runs = conn.execute(
|
||||
"SELECT agent, started_at, finished_at, exit_code, input_tokens, "
|
||||
"output_tokens, cache_read_tokens, cache_creation_tokens, cost_usd, model "
|
||||
"FROM agent_runs WHERE task_id=? ORDER BY id ASC",
|
||||
(task_id,),
|
||||
).fetchall()
|
||||
conn.close()
|
||||
except Exception as e:
|
||||
logger.warning(f"render_task_tracker({task_id}) DB error: {e}")
|
||||
return f"task-{task_id}"
|
||||
|
||||
work_item_id = task["work_item_id"] or f"task-{task_id}"
|
||||
title = task["title"] or work_item_id
|
||||
stage = task["stage"] or "created"
|
||||
done = stage == "done"
|
||||
|
||||
# Latest completed run per agent (a stage may have multiple runs on retry;
|
||||
# we show the most recent FINISHED, successful run for the stage line).
|
||||
last_done = {}
|
||||
agent_runs_by_agent = {}
|
||||
for r in runs:
|
||||
agent_runs_by_agent.setdefault(r["agent"], []).append(r)
|
||||
if r["finished_at"] and (r["exit_code"] == 0 or r["exit_code"] is None):
|
||||
last_done[r["agent"]] = r
|
||||
|
||||
# Totals across ALL runs (every input/output token + cost counts).
|
||||
total_in = 0
|
||||
total_out = 0
|
||||
total_cost = 0.0
|
||||
agent_seconds = 0
|
||||
for r in runs:
|
||||
usage = {
|
||||
"input_tokens": r["input_tokens"],
|
||||
"cache_read_tokens": r["cache_read_tokens"],
|
||||
"cache_creation_tokens": r["cache_creation_tokens"],
|
||||
}
|
||||
total_in += _input_total(usage)
|
||||
total_out += int(r["output_tokens"] or 0)
|
||||
total_cost += float(r["cost_usd"] or 0.0)
|
||||
d = _duration_seconds(r["started_at"], r["finished_at"])
|
||||
if d is not None:
|
||||
agent_seconds += d
|
||||
|
||||
esc_title = html.escape(title)
|
||||
header = (
|
||||
f"\U0001f389 {html.escape(work_item_id)} \u00b7 {esc_title} \u2014 \u0413\u041e\u0422\u041e\u0412\u041e"
|
||||
if done
|
||||
else f"\U0001f6e0\ufe0f {html.escape(work_item_id)} \u00b7 {esc_title}"
|
||||
)
|
||||
bar = "\u2501" * 22
|
||||
lines = [header, bar]
|
||||
|
||||
def _stage_line(label, run):
|
||||
usage = {
|
||||
"input_tokens": run["input_tokens"],
|
||||
"cache_read_tokens": run["cache_read_tokens"],
|
||||
"cache_creation_tokens": run["cache_creation_tokens"],
|
||||
}
|
||||
in_tok = fmt_tokens(_input_total(usage))
|
||||
out_tok = fmt_tokens(run["output_tokens"])
|
||||
cost = fmt_cost(run["cost_usd"])
|
||||
dur = _fmt_minutes(_duration_seconds(run["started_at"], run["finished_at"]))
|
||||
model = short_model_name(run["model"])
|
||||
model_suffix = f" \u00b7 {model}" if model else ""
|
||||
return (
|
||||
f"\u2705 {label:<13} {dur} \u00b7 "
|
||||
f"{in_tok}\u2193/{out_tok}\u2191 \u00b7 {cost}{model_suffix}"
|
||||
)
|
||||
|
||||
# BRD review line: between Analysis and Architecture, only once Analysis has
|
||||
# produced a run (i.e. the gate is live). Time = human review delta.
|
||||
brd_started = task["brd_review_started_at"]
|
||||
brd_ended = task["brd_review_ended_at"]
|
||||
review_seconds = _duration_seconds(brd_started, brd_ended)
|
||||
|
||||
for stage_key, label, agent in _TRACKER_STAGES:
|
||||
run = last_done.get(agent)
|
||||
# The stage is "in progress" only when it is the task's current stage AND
|
||||
# there is an unfinished run for its agent (the agent is actually still
|
||||
# working). A finished run with no in-flight run -> show the \u2705 result,
|
||||
# even if the task still sits in that stage (just-finished snapshot).
|
||||
agent_runs = agent_runs_by_agent.get(agent, [])
|
||||
has_inflight = any(ar["finished_at"] is None for ar in agent_runs)
|
||||
is_active_stage = (
|
||||
_STAGE_ACTIVE_AGENT.get(stage) == agent
|
||||
and stage == stage_key
|
||||
and (has_inflight or run is None)
|
||||
)
|
||||
if is_active_stage:
|
||||
# Live "\U0001f504 ... \u0438\u0434\u0451\u0442" line. Count how many times THIS stage's
|
||||
# agent has run for this task; a 2nd+ run means we're re-doing the
|
||||
# stage (e.g. review->development->review), so show "\u043f\u043e\u043f\u044b\u0442\u043a\u0430 N"
|
||||
# to make the text change between cycles and to honestly show Slava
|
||||
# the stage is being re-worked.
|
||||
attempt = len(agent_runs)
|
||||
if attempt >= 2:
|
||||
lines.append(
|
||||
f"\U0001f504 {label} \u00b7 \u043f\u043e\u043f\u044b\u0442\u043a\u0430 {attempt} "
|
||||
f"\u2026 \u0438\u0434\u0451\u0442"
|
||||
)
|
||||
else:
|
||||
lines.append(
|
||||
f"\U0001f504 {label:<13} \u2026 \u00b7 \u0438\u0434\u0451\u0442"
|
||||
)
|
||||
elif run is not None:
|
||||
lines.append(_stage_line(label, run))
|
||||
# else: not started yet -> not shown.
|
||||
|
||||
# Insert the BRD review line right after Analysis.
|
||||
if stage_key == "analysis" and brd_started:
|
||||
brd_label = f"{_BRD_LABEL:<13}"
|
||||
if review_seconds is not None:
|
||||
dur = _fmt_minutes(review_seconds)
|
||||
lines.append(
|
||||
f"\u23f8\ufe0f {brd_label} {dur} \u00b7 \u0442\u0432\u043e\u0451 \u0432\u0440\u0435\u043c\u044f"
|
||||
)
|
||||
else:
|
||||
# Still waiting on the human (ended not stamped yet).
|
||||
from datetime import datetime, timezone
|
||||
start_dt = _parse_sql_ts(brd_started)
|
||||
waited = None
|
||||
if start_dt is not None:
|
||||
waited = int(
|
||||
(datetime.now(timezone.utc) - start_dt).total_seconds()
|
||||
)
|
||||
dur = _fmt_minutes(waited) if waited is not None else "\u2026"
|
||||
lines.append(
|
||||
f"\u23f8\ufe0f {brd_label} {dur} \u00b7 \u0442\u0432\u043e\u0451 \u0432\u0440\u0435\u043c\u044f \u23f3"
|
||||
)
|
||||
|
||||
lines.append(bar)
|
||||
lines.append(
|
||||
f"\U0001f4b0 {fmt_tokens(total_in)}\u2193 / {fmt_tokens(total_out)}\u2191 \u00b7 "
|
||||
f"{fmt_cost(total_cost)}"
|
||||
)
|
||||
|
||||
if done:
|
||||
wall = _duration_seconds(task["created_at"], task["updated_at"])
|
||||
wall_str = _fmt_minutes(wall) if wall is not None else "?"
|
||||
review_str = _fmt_minutes(review_seconds) if review_seconds else "0м"
|
||||
lines.append(
|
||||
f"\u23f1\ufe0f \u0412\u0441\u0435\u0433\u043e {wall_str} \u00b7 "
|
||||
f"\u0430\u0433\u0435\u043d\u0442\u044b {_fmt_minutes(agent_seconds)} \u00b7 "
|
||||
f"\u0442\u0432\u043e\u0451 {review_str}"
|
||||
)
|
||||
link = _done_link(task_id, task["work_item_id"])
|
||||
if link:
|
||||
lines.append(link)
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def _done_link(task_id: int, work_item_id) -> str | None:
|
||||
"""Build the final '🔗 PR #n · 📦 deployed' line. Never raises -> None."""
|
||||
try:
|
||||
from .config import settings
|
||||
from .db import get_db
|
||||
conn = get_db()
|
||||
row = conn.execute(
|
||||
"SELECT repo, branch FROM tasks WHERE id=?", (task_id,)
|
||||
).fetchone()
|
||||
conn.close()
|
||||
if not row:
|
||||
return None
|
||||
repo, branch = row["repo"], row["branch"]
|
||||
pr_part = None
|
||||
try:
|
||||
owner = settings.gitea_owner
|
||||
headers = {"Authorization": f"token {settings.gitea_token}"}
|
||||
resp = httpx.get(
|
||||
f"{settings.gitea_url}/api/v1/repos/{owner}/{repo}/pulls",
|
||||
params={"state": "all", "head": branch},
|
||||
headers=headers, timeout=5,
|
||||
)
|
||||
if resp.status_code == 200:
|
||||
prs = resp.json()
|
||||
if prs:
|
||||
pr_part = f"\U0001f517 PR #{prs[0].get('number')}"
|
||||
except Exception:
|
||||
pr_part = None
|
||||
parts = []
|
||||
if pr_part:
|
||||
parts.append(pr_part)
|
||||
parts.append("\U0001f4e6 deployed")
|
||||
return " \u00b7 ".join(parts)
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
def update_task_tracker(task_id: int):
|
||||
"""Render + push the live tracker for a task. Never raises.
|
||||
|
||||
First call (no stored tracker_message_id): sendMessage (silent) and store the
|
||||
returned message_id. Subsequent calls: editMessageText the stored message.
|
||||
A NEW message is sent ONLY when the original is truly gone (deleted / too old
|
||||
/ invalid id). On "not modified" (text unchanged) or transient failures
|
||||
(network / timeout / 5xx / unknown 400) we do NOT send a new message — that
|
||||
is exactly what produced duplicate trackers and orphaned (lagging) messages.
|
||||
The tracker is always sent with disable_notification so it never pings —
|
||||
only the dedicated alert helpers ping.
|
||||
"""
|
||||
try:
|
||||
from .db import get_tracker_message_id, set_tracker_message_id
|
||||
text = render_task_tracker(task_id)
|
||||
mid = get_tracker_message_id(task_id)
|
||||
if mid is not None:
|
||||
result = edit_telegram(mid, text)
|
||||
if result in (EDIT_OK, EDIT_NOT_MODIFIED):
|
||||
# Edited in place (or nothing to change) -> done, no duplicate.
|
||||
return
|
||||
if result == EDIT_FAILED:
|
||||
# Transient -> don't duplicate; tracker redraws next transition.
|
||||
logger.debug(
|
||||
f"update_task_tracker({task_id}): edit failed transiently, "
|
||||
f"keeping message {mid}"
|
||||
)
|
||||
return
|
||||
# result == EDIT_GONE -> the stored message is gone; fall through
|
||||
# to send a fresh one and re-point tracker_message_id at it.
|
||||
new_mid = send_telegram(text, disable_notification=True)
|
||||
if new_mid is not None:
|
||||
set_tracker_message_id(task_id, new_mid)
|
||||
except Exception as e:
|
||||
logger.warning(f"update_task_tracker({task_id}) failed: {e}")
|
||||
|
||||
|
||||
# --------------------------------------------------------------------------- #
|
||||
# Stage / agent lifecycle notifications (now tracker-only, no separate message)
|
||||
# --------------------------------------------------------------------------- #
|
||||
|
||||
def notify_stage_change(task_id: int, old_stage: str, new_stage: str, agent: str = None):
|
||||
"""Log and notify stage transition."""
|
||||
"""Log a stage transition and refresh the live tracker (no separate message)."""
|
||||
work_item_id = _get_work_item_id(task_id)
|
||||
msg = f"\U0001f504 {work_item_id}: {old_stage} \u2192 {new_stage}"
|
||||
if agent:
|
||||
msg += f" (\u0437\u0430\u043f\u0443\u0449\u0435\u043d {agent})"
|
||||
logger.info(msg)
|
||||
send_telegram(msg)
|
||||
update_task_tracker(task_id)
|
||||
|
||||
|
||||
def notify_agent_started(run_id: int, agent: str, task_id: int):
|
||||
"""Notify agent launch."""
|
||||
"""Log an agent launch and refresh the tracker (no separate message)."""
|
||||
work_item_id = _get_work_item_id(task_id)
|
||||
msg = f"\U0001f680 {work_item_id}: {agent} \u0437\u0430\u043f\u0443\u0449\u0435\u043d (run_id={run_id})"
|
||||
logger.info(msg)
|
||||
send_telegram(msg)
|
||||
logger.info(f"\U0001f680 {work_item_id}: {agent} \u0437\u0430\u043f\u0443\u0449\u0435\u043d (run_id={run_id})")
|
||||
if task_id:
|
||||
update_task_tracker(task_id)
|
||||
|
||||
|
||||
def notify_agent_finished(run_id: int, agent: str, exit_code: int, task_id: int = None, duration_s: int = None):
|
||||
"""Notify agent completion."""
|
||||
"""Log agent completion and refresh the tracker (no separate message).
|
||||
|
||||
The agent-FAILED alert (exit_code != 0) is still sent separately by the
|
||||
launcher via send_telegram; this helper itself only logs + refreshes.
|
||||
"""
|
||||
work_item_id = _get_work_item_id(task_id) if task_id else "?"
|
||||
if exit_code == 0:
|
||||
dur = f" ({duration_s // 60} \u043c\u0438\u043d)" if duration_s else ""
|
||||
@@ -79,47 +516,66 @@ def notify_agent_finished(run_id: int, agent: str, exit_code: int, task_id: int
|
||||
else:
|
||||
msg = f"\u274c {work_item_id}: {agent} \u0443\u043f\u0430\u043b (exit_code={exit_code})"
|
||||
logger.info(msg)
|
||||
send_telegram(msg)
|
||||
if task_id:
|
||||
update_task_tracker(task_id)
|
||||
|
||||
|
||||
def notify_qg_result(task_id: int, check: str, passed: bool, reason: str = None):
|
||||
"""Notify QG check result."""
|
||||
"""Log a QG check result (NO separate Telegram message: QG-pending is noise).
|
||||
|
||||
Kept for callers; QG outcomes are log-only now and reflected by the tracker
|
||||
through the resulting stage transition.
|
||||
"""
|
||||
work_item_id = _get_work_item_id(task_id)
|
||||
if passed:
|
||||
msg = f"\u2705 {work_item_id}: QG {check} \u2014 passed"
|
||||
logger.info(f"\u2705 {work_item_id}: QG {check} \u2014 passed")
|
||||
else:
|
||||
msg = f"\u26a0\ufe0f {work_item_id}: QG {check} \u2014 failed: {reason}"
|
||||
logger.info(msg)
|
||||
send_telegram(msg)
|
||||
logger.warning(f"\u26a0\ufe0f {work_item_id}: QG {check} \u2014 failed: {reason}")
|
||||
|
||||
|
||||
def notify_qg_failure(task_id: int, stage: str, check: str, reason: str):
|
||||
"""Log and notify QG check failure."""
|
||||
"""Log a QG check failure (log-only).
|
||||
|
||||
QG-pending / QG-failed are NOT pinged as separate messages anymore (they are
|
||||
not actionable for Slava). Real rollbacks/deploy-fails are alerted by their
|
||||
own dedicated send_telegram calls in the engine/launcher.
|
||||
"""
|
||||
work_item_id = _get_work_item_id(task_id)
|
||||
msg = f"\u26a0\ufe0f {work_item_id}: QG {check} \u2014 failed: {reason}"
|
||||
logger.warning(msg)
|
||||
send_telegram(msg)
|
||||
logger.warning(f"\u26a0\ufe0f {work_item_id}: QG {check} \u2014 failed: {reason}")
|
||||
|
||||
|
||||
def notify_approve_requested(task_id: int):
|
||||
"""Notify that analyst requests :approved:."""
|
||||
"""ALERT (separate, notifying): BRD/TZ/AC ready -> flip Plane to Approved.
|
||||
|
||||
Also starts the BRD-review clock and refreshes the tracker so the
|
||||
'⏸️ Ревью БРД · твоё время ⏳' line appears.
|
||||
"""
|
||||
work_item_id = _get_work_item_id(task_id)
|
||||
msg = f"\U0001f4cb {work_item_id}: BRD/\u0422\u0417/AC \u0433\u043e\u0442\u043e\u0432\u044b. \u0416\u0434\u0443 :approved: \u0432 Plane"
|
||||
try:
|
||||
from .db import mark_brd_review_started
|
||||
mark_brd_review_started(task_id)
|
||||
except Exception as e:
|
||||
logger.warning(f"notify_approve_requested: brd clock start failed: {e}")
|
||||
msg = (
|
||||
f"\U0001f4cb {work_item_id}: BRD/\u0422\u0417/AC \u0433\u043e\u0442\u043e\u0432\u044b. "
|
||||
f"\u041f\u0435\u0440\u0435\u0432\u0435\u0434\u0438\u0442\u0435 \u0437\u0430\u0434\u0430\u0447\u0443 \u0432 \u0441\u0442\u0430\u0442\u0443\u0441 Approved "
|
||||
f"\u0432 Plane \u0434\u043b\u044f \u043f\u0440\u043e\u0434\u043e\u043b\u0436\u0435\u043d\u0438\u044f."
|
||||
)
|
||||
logger.info(msg)
|
||||
send_telegram(msg)
|
||||
update_task_tracker(task_id)
|
||||
send_telegram(msg) # separate, notifying
|
||||
|
||||
|
||||
def notify_done(task_id: int):
|
||||
"""Notify task completion."""
|
||||
"""Task completion: refresh the tracker to its final ГОТОВО form (no separate ping)."""
|
||||
work_item_id = _get_work_item_id(task_id)
|
||||
msg = f"\U0001f389 {work_item_id}: \u0437\u0430\u0434\u0430\u0447\u0430 \u0437\u0430\u0432\u0435\u0440\u0448\u0435\u043d\u0430!"
|
||||
logger.info(msg)
|
||||
send_telegram(msg)
|
||||
logger.info(f"\U0001f389 {work_item_id}: \u0437\u0430\u0434\u0430\u0447\u0430 \u0437\u0430\u0432\u0435\u0440\u0448\u0435\u043d\u0430!")
|
||||
update_task_tracker(task_id)
|
||||
|
||||
|
||||
def notify_error(task_id: int, error: str):
|
||||
"""Log and notify error for a task."""
|
||||
"""ALERT (separate, notifying): task error."""
|
||||
work_item_id = _get_work_item_id(task_id) if task_id else "system"
|
||||
msg = f"\U0001f534 {work_item_id}: ERROR \u2014 {error}"
|
||||
logger.error(msg)
|
||||
send_telegram(msg)
|
||||
send_telegram(msg) # separate, notifying
|
||||
|
||||
@@ -6,9 +6,53 @@ from .config import settings
|
||||
|
||||
logger = logging.getLogger("orchestrator.plane_sync")
|
||||
|
||||
# L-3: emoji literals used in Plane comment bodies, named for readability.
|
||||
# Message text stays byte-for-byte identical to the previous output.
|
||||
EMOJI_STAGE = "\U0001F504" # stage transition
|
||||
EMOJI_QG_FAIL = "\u26A0\uFE0F" # quality-gate failure
|
||||
EMOJI_DONE = "\u2705" # task completed
|
||||
|
||||
PLANE_BASE = f"{settings.plane_api_url}/api/v1"
|
||||
PLANE_HEADERS = {"X-API-Key": settings.plane_api_token}
|
||||
WORKSPACE = settings.plane_workspace_slug
|
||||
|
||||
# feat(plane): per-agent comment authorship.
|
||||
# Map an agent role -> its dedicated Plane bot token (read from config / env).
|
||||
# When the token is present, add_comment() POSTs under that bot so Plane shows
|
||||
# the real author. Empty/unknown role -> fallback to the shared orchestrator
|
||||
# token (PLANE_HEADERS), so commenting stays autonomous.
|
||||
PLANE_BOT_TOKENS = {
|
||||
"analyst": settings.plane_bot_analyst,
|
||||
"architect": settings.plane_bot_architect,
|
||||
"developer": settings.plane_bot_developer,
|
||||
"reviewer": settings.plane_bot_reviewer,
|
||||
"tester": settings.plane_bot_tester,
|
||||
"deployer": settings.plane_bot_deployer,
|
||||
"stream": settings.plane_bot_stream,
|
||||
}
|
||||
|
||||
# Map a pipeline stage -> the agent role that owns work in that stage. Used to
|
||||
# pick an author for rollback/stage notifications targeting a specific stage.
|
||||
STAGE_AUTHORS = {
|
||||
"analysis": "analyst",
|
||||
"architecture": "architect",
|
||||
"development": "developer",
|
||||
"review": "reviewer",
|
||||
"testing": "tester",
|
||||
"deploy": "deployer",
|
||||
}
|
||||
|
||||
|
||||
def _headers_for(author: str | None) -> dict:
|
||||
"""Return X-API-Key headers for the given agent role.
|
||||
|
||||
Falls back to the shared orchestrator token (PLANE_HEADERS /
|
||||
settings.plane_api_token) when the role is None, unknown, or its bot token
|
||||
is not configured. This keeps comment posting autonomous: a comment is
|
||||
always written, just attributed to the orchestrator if no bot is set.
|
||||
"""
|
||||
tok = PLANE_BOT_TOKENS.get(author or "") if author else None
|
||||
return {"X-API-Key": tok} if tok else PLANE_HEADERS
|
||||
PROJECT_ID = settings.plane_project_id or "7a79f0a9-5278-49cd-9007-9a338f238f9c"
|
||||
|
||||
|
||||
@@ -40,30 +84,277 @@ def _resolve_project_id(work_item_id: str = None, project_id: str = None) -> str
|
||||
logger.debug(f"_resolve_project_id fallback for {work_item_id}: {e}")
|
||||
return PROJECT_ID
|
||||
|
||||
# Plane state IDs
|
||||
PLANE_STATES = {
|
||||
"backlog": "113b24f6-cce8-4be9-9a22-a359b9cf0122",
|
||||
"todo": "2c7d3df3-9eb9-419b-92b7-d7d560bcdd10",
|
||||
"in_progress": "b873d9eb-993c-48cd-97ac-99a9b1623967",
|
||||
"needs_input": "babf08a3-ff4d-41f3-a821-5491aa29a8ac",
|
||||
"in_review": "38fb1f64-aa1e-48a3-92e0-0b109679046b",
|
||||
"blocked": "6c4543f9-ac47-4ef7-ae0f-070020dc9920",
|
||||
"done": "381a2833-3c4e-4be5-bd0f-be84cb946ad8",
|
||||
"cancelled": "b1cae7f9-961d-4889-a179-f3acea697d17",
|
||||
# ORCH-10: per-project state resolution.
|
||||
#
|
||||
# _DEFAULT_STATES keeps the original enduro-trails UUIDs as a safe fallback
|
||||
# (used when the Plane API is unreachable and for backward compat).
|
||||
# PLANE_STATES is preserved as an alias so existing call sites that reference
|
||||
# it directly (QG-0 fast-path in webhooks/plane.py, tests) continue to work.
|
||||
_DEFAULT_STATES = {
|
||||
"backlog": "113b24f6-cce8-4be9-9a22-a359b9cf0122",
|
||||
"todo": "2c7d3df3-9eb9-419b-92b7-d7d560bcdd10",
|
||||
"in_progress": "b873d9eb-993c-48cd-97ac-99a9b1623967",
|
||||
"needs_input": "babf08a3-ff4d-41f3-a821-5491aa29a8ac",
|
||||
"in_review": "38fb1f64-aa1e-48a3-92e0-0b109679046b",
|
||||
"blocked": "6c4543f9-ac47-4ef7-ae0f-070020dc9920",
|
||||
"done": "381a2833-3c4e-4be5-bd0f-be84cb946ad8",
|
||||
"cancelled": "b1cae7f9-961d-4889-a179-f3acea697d17",
|
||||
# Feature 3 (stage visibility) — per-stage statuses on the board.
|
||||
"architecture": "3020bbb7-6122-4663-930c-0315ba8dfa3d",
|
||||
"development": "9920609b-f140-4e46-ab95-89acda8412c8",
|
||||
"review": "ba0d802c-5218-41d4-ab43-978b0ea123ed",
|
||||
"testing": "7855d807-b1bf-42ef-8dae-6cde0df92d02",
|
||||
# Feature 2 (verdict statuses) — Approved / Rejected.
|
||||
"approved": "a519a341-dada-4a91-8910-7604f82b79c5",
|
||||
"rejected": "ba958f3c-5db5-461d-8f82-89425e413b97",
|
||||
}
|
||||
|
||||
# Map orchestrator stages to Plane states
|
||||
STAGE_TO_STATE = {
|
||||
"created": PLANE_STATES["todo"],
|
||||
"analysis": PLANE_STATES["in_progress"],
|
||||
"architecture": PLANE_STATES["in_progress"],
|
||||
"development": PLANE_STATES["in_progress"],
|
||||
"review": PLANE_STATES["in_progress"],
|
||||
"testing": PLANE_STATES["in_progress"],
|
||||
"deploy": PLANE_STATES["in_progress"],
|
||||
"done": PLANE_STATES["done"],
|
||||
# Backward-compat alias — do NOT remove (tests + webhooks/plane.py import it).
|
||||
PLANE_STATES = _DEFAULT_STATES
|
||||
|
||||
# Mapping: Plane state *name* (as returned by the API) -> logical key.
|
||||
_PLANE_NAME_TO_KEY: dict[str, str] = {
|
||||
"Backlog": "backlog",
|
||||
"Todo": "todo",
|
||||
"In Progress": "in_progress",
|
||||
"Architecture": "architecture",
|
||||
"Development": "development",
|
||||
"Review": "review",
|
||||
"Testing": "testing",
|
||||
"Approved": "approved",
|
||||
"Rejected": "rejected",
|
||||
"Done": "done",
|
||||
"Cancelled": "cancelled",
|
||||
"Needs Input": "needs_input",
|
||||
"In Review": "in_review",
|
||||
"Blocked": "blocked",
|
||||
}
|
||||
|
||||
# Per-project state cache: {project_id: {logical_key: state_uuid}}
|
||||
_STATES_CACHE: dict[str, dict[str, str]] = {}
|
||||
|
||||
|
||||
def get_project_states(project_id: str) -> dict[str, str]:
|
||||
"""ORCH-10: resolve {logical_key -> state_uuid} for a specific Plane project.
|
||||
|
||||
Source of truth: Plane API GET /projects/<project_id>/states/.
|
||||
Results are cached per project_id for the lifetime of the process.
|
||||
Falls back to _DEFAULT_STATES (enduro-trails values) if:
|
||||
* project_id is empty/None,
|
||||
* the API call fails (network error, non-2xx),
|
||||
* the response contains no recognisable states.
|
||||
|
||||
The enduro-trails project therefore returns the same UUIDs as before
|
||||
(backward compatible). The orchestrator project returns its own UUIDs,
|
||||
fixing the ORCH-10 blocker.
|
||||
"""
|
||||
if not project_id:
|
||||
return _DEFAULT_STATES
|
||||
|
||||
if project_id in _STATES_CACHE:
|
||||
return _STATES_CACHE[project_id]
|
||||
|
||||
url = f"{PLANE_BASE}/workspaces/{WORKSPACE}/projects/{project_id}/states/"
|
||||
try:
|
||||
resp = httpx.get(url, headers=PLANE_HEADERS, timeout=10)
|
||||
resp.raise_for_status()
|
||||
body = resp.json()
|
||||
# Plane returns {"results": [...]} or a bare list.
|
||||
items = body.get("results", body) if isinstance(body, dict) else body
|
||||
if not isinstance(items, list):
|
||||
raise ValueError(f"unexpected states response shape: {type(items)}")
|
||||
|
||||
resolved: dict[str, str] = {}
|
||||
for item in items:
|
||||
name = item.get("name", "")
|
||||
uid = item.get("id", "")
|
||||
key = _PLANE_NAME_TO_KEY.get(name)
|
||||
if key and uid:
|
||||
resolved[key] = uid
|
||||
|
||||
if not resolved:
|
||||
raise ValueError("no recognisable states in API response")
|
||||
|
||||
# Fill any missing keys from _DEFAULT_STATES so callers always get a
|
||||
# complete mapping (defensive against partial Plane configs).
|
||||
for k, v in _DEFAULT_STATES.items():
|
||||
resolved.setdefault(k, v)
|
||||
|
||||
_STATES_CACHE[project_id] = resolved
|
||||
logger.debug(
|
||||
f"get_project_states: cached {len(resolved)} states for project {project_id[:8]}..."
|
||||
)
|
||||
return resolved
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
f"get_project_states: API failed for project {project_id[:8]}..., "
|
||||
f"falling back to _DEFAULT_STATES. Error: {e}"
|
||||
)
|
||||
return _DEFAULT_STATES
|
||||
|
||||
|
||||
def reload_project_states(project_id: str = None) -> None:
|
||||
"""ORCH-10: clear the per-project states cache.
|
||||
|
||||
If project_id is given, evict only that project.
|
||||
If None, flush the entire cache (useful in tests and after config reload).
|
||||
"""
|
||||
global _STATES_CACHE
|
||||
if project_id is None:
|
||||
_STATES_CACHE = {}
|
||||
logger.debug("reload_project_states: full cache cleared")
|
||||
else:
|
||||
_STATES_CACHE.pop(project_id, None)
|
||||
logger.debug(f"reload_project_states: evicted project {project_id[:8]}...")
|
||||
|
||||
|
||||
# Feature 3: map an orchestrator stage -> the Plane status to show on the board
|
||||
# when the pipeline ENTERS that stage. analysis stays driven by the existing
|
||||
# in_progress/in_review/needs_input logic (no dedicated status). deploy keeps
|
||||
# in_progress until done. Needs Input / In Review / Blocked remain higher
|
||||
# priority and are set explicitly elsewhere — do NOT override them from here.
|
||||
STAGE_VISIBILITY_STATE = {
|
||||
"architecture": "architecture",
|
||||
"development": "development",
|
||||
"review": "review",
|
||||
"testing": "testing",
|
||||
}
|
||||
|
||||
# STAGE_TO_STATE kept for backward compat (used by tests that patch it).
|
||||
# update_issue_state now calls stage_to_state() instead of looking up here.
|
||||
STAGE_TO_STATE = {
|
||||
"created": _DEFAULT_STATES["todo"],
|
||||
"analysis": _DEFAULT_STATES["in_progress"],
|
||||
"architecture": _DEFAULT_STATES["architecture"],
|
||||
"development": _DEFAULT_STATES["development"],
|
||||
"review": _DEFAULT_STATES["review"],
|
||||
"testing": _DEFAULT_STATES["testing"],
|
||||
"deploy": _DEFAULT_STATES["in_progress"],
|
||||
"done": _DEFAULT_STATES["done"],
|
||||
}
|
||||
|
||||
# Map orchestrator stage -> logical state key (project-independent).
|
||||
_STAGE_TO_STATE_KEY = {
|
||||
"created": "todo",
|
||||
"analysis": "in_progress",
|
||||
"architecture": "architecture",
|
||||
"development": "development",
|
||||
"review": "review",
|
||||
"testing": "testing",
|
||||
"deploy": "in_progress",
|
||||
"done": "done",
|
||||
}
|
||||
|
||||
|
||||
def stage_to_state(stage: str, project_id: str) -> str | None:
|
||||
"""ORCH-10: return the Plane state UUID for a pipeline stage in a project.
|
||||
|
||||
Resolves via get_project_states so the correct per-project UUID is used.
|
||||
Returns None for unknown stages (same behaviour as the old STAGE_TO_STATE
|
||||
dict lookup returning None).
|
||||
"""
|
||||
key = _STAGE_TO_STATE_KEY.get(stage)
|
||||
if not key:
|
||||
return None
|
||||
return get_project_states(project_id).get(key)
|
||||
|
||||
|
||||
def fetch_issue_sequence_id(issue_id: str, project_id: str) -> int | None:
|
||||
"""M-6: GET the Plane issue by UUID and return its sequence_id (the
|
||||
authoritative per-project number), or None if unavailable.
|
||||
|
||||
Returns None on network error, non-2xx, or a missing field - never raises,
|
||||
so the webhook handler can fall back to DB increment and stay autonomous.
|
||||
"""
|
||||
url = f"{PLANE_BASE}/workspaces/{WORKSPACE}/projects/{project_id}/issues/{issue_id}/"
|
||||
try:
|
||||
resp = httpx.get(url, headers=PLANE_HEADERS, timeout=10)
|
||||
resp.raise_for_status()
|
||||
seq = resp.json().get("sequence_id")
|
||||
return int(seq) if seq is not None else None
|
||||
except Exception as e:
|
||||
logger.warning(f"fetch_issue_sequence_id failed for {issue_id}: {e}")
|
||||
return None
|
||||
|
||||
|
||||
import re as _re
|
||||
|
||||
|
||||
def _strip_html(html: str) -> str:
|
||||
"""Crude HTML -> text: drop tags and collapse whitespace. Good enough to
|
||||
feed QG-0's length check when Plane only gives us description_html."""
|
||||
if not html:
|
||||
return ""
|
||||
text = _re.sub(r"<[^>]+>", " ", html)
|
||||
return _re.sub(r"\s+", " ", text).strip()
|
||||
|
||||
|
||||
def fetch_issue_description(issue_id: str, project_id: str) -> str:
|
||||
"""BUG 1: GET the Plane issue by UUID and return its description text.
|
||||
|
||||
Plane's ``issue.updated`` webhook (e.g. a status change) only carries the
|
||||
CHANGED fields, so ``description``/``description_stripped`` are usually
|
||||
absent there. start_pipeline calls this to pull the full description from the
|
||||
issue detail endpoint so QG-0 does not blow up on an empty payload field.
|
||||
|
||||
Reuses the exact GET issue detail endpoint / shared token already used by
|
||||
``fetch_issue_sequence_id`` (same URL, same PLANE_HEADERS). Prefers
|
||||
``description_stripped``; falls back to stripping ``description_html``.
|
||||
|
||||
Returns "" on network error, non-2xx, or a missing field - never raises, so
|
||||
a Plane outage degrades to the honest "empty description" QG-0 path instead
|
||||
of crashing the webhook.
|
||||
"""
|
||||
url = f"{PLANE_BASE}/workspaces/{WORKSPACE}/projects/{project_id}/issues/{issue_id}/"
|
||||
try:
|
||||
resp = httpx.get(url, headers=PLANE_HEADERS, timeout=10)
|
||||
resp.raise_for_status()
|
||||
body = resp.json()
|
||||
desc = body.get("description_stripped")
|
||||
if desc and desc.strip():
|
||||
return desc
|
||||
return _strip_html(body.get("description_html") or "")
|
||||
except Exception as e:
|
||||
logger.warning(f"fetch_issue_description failed for {issue_id}: {e}")
|
||||
return ""
|
||||
|
||||
|
||||
def fetch_issue_fields(issue_id: str, project_id: str) -> tuple[str, str]:
|
||||
"""BUG B: GET the Plane issue by UUID ONCE and return (name, description).
|
||||
|
||||
Plane's ``issue.updated`` webhook (e.g. a status change) only carries the
|
||||
CHANGED fields, so BOTH ``name`` and ``description`` are usually absent in
|
||||
the payload. start_pipeline needs the real title (for the branch slug) and
|
||||
the real description (for the analyst .task.md). To avoid issuing two
|
||||
separate issue-detail GETs (one for name, one for description), this single
|
||||
request returns both.
|
||||
|
||||
Reuses the exact GET issue detail endpoint / shared token already used by
|
||||
``fetch_issue_sequence_id`` / ``fetch_issue_description``. For the
|
||||
description it applies the same logic as ``fetch_issue_description``
|
||||
(prefer ``description_stripped``, fall back to stripping
|
||||
``description_html``).
|
||||
|
||||
Returns ("", "") on network error, non-2xx, or missing body - never raises,
|
||||
so a Plane outage degrades gracefully (caller keeps its payload fallbacks).
|
||||
"""
|
||||
url = f"{PLANE_BASE}/workspaces/{WORKSPACE}/projects/{project_id}/issues/{issue_id}/"
|
||||
try:
|
||||
resp = httpx.get(url, headers=PLANE_HEADERS, timeout=10)
|
||||
resp.raise_for_status()
|
||||
body = resp.json()
|
||||
name = (body.get("name") or "").strip()
|
||||
desc = body.get("description_stripped")
|
||||
if desc and desc.strip():
|
||||
description = desc
|
||||
else:
|
||||
description = _strip_html(body.get("description_html") or "")
|
||||
return name, description
|
||||
except Exception as e:
|
||||
logger.warning(f"fetch_issue_fields failed for {issue_id}: {e}")
|
||||
return "", ""
|
||||
|
||||
|
||||
def find_issue_id(work_item_id: str, project_id: str = None) -> str | None:
|
||||
"""Find Plane issue UUID by work_item_id (e.g. 'ET-002')."""
|
||||
@@ -89,25 +380,26 @@ def find_issue_id(work_item_id: str, project_id: str = None) -> str | None:
|
||||
resp.raise_for_status()
|
||||
data = resp.json()
|
||||
results = data.get("results", data if isinstance(data, list) else [])
|
||||
# M-6: match by sequence_id directly (the authoritative per-project
|
||||
# number), parsed from the work_item_id suffix - no hardcoded prefix.
|
||||
try:
|
||||
target_num = int(work_item_id.rsplit("-", 1)[1])
|
||||
except (IndexError, ValueError):
|
||||
target_num = None
|
||||
for issue in results:
|
||||
seq = issue.get("sequence_id")
|
||||
identifier = f"ET-{seq:03d}" if seq else ""
|
||||
if identifier == work_item_id or work_item_id in issue.get("name", ""):
|
||||
if target_num is not None and issue.get("sequence_id") == target_num:
|
||||
return issue["id"]
|
||||
# Fallback: get all issues and match by sequence_id number
|
||||
if work_item_id.startswith("ET-"):
|
||||
try:
|
||||
target_num = int(work_item_id.split("-")[1])
|
||||
except (IndexError, ValueError):
|
||||
target_num = None
|
||||
if target_num:
|
||||
resp2 = httpx.get(url, headers=PLANE_HEADERS, timeout=10)
|
||||
resp2.raise_for_status()
|
||||
data2 = resp2.json()
|
||||
results2 = data2.get("results", data2 if isinstance(data2, list) else [])
|
||||
for issue in results2:
|
||||
if issue.get("sequence_id") == target_num:
|
||||
return issue["id"]
|
||||
if work_item_id in issue.get("name", ""):
|
||||
return issue["id"]
|
||||
# Fallback: get all issues and match by sequence_id number (any prefix)
|
||||
if target_num is not None:
|
||||
resp2 = httpx.get(url, headers=PLANE_HEADERS, timeout=10)
|
||||
resp2.raise_for_status()
|
||||
data2 = resp2.json()
|
||||
results2 = data2.get("results", data2 if isinstance(data2, list) else [])
|
||||
for issue in results2:
|
||||
if issue.get("sequence_id") == target_num:
|
||||
return issue["id"]
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to find issue for {work_item_id}: {e}")
|
||||
return None
|
||||
@@ -115,11 +407,12 @@ def find_issue_id(work_item_id: str, project_id: str = None) -> str | None:
|
||||
|
||||
def update_issue_state(work_item_id: str, stage: str, project_id: str = None):
|
||||
"""Update Plane issue state based on orchestrator stage."""
|
||||
state_id = STAGE_TO_STATE.get(stage)
|
||||
project_id = _resolve_project_id(work_item_id, project_id)
|
||||
# ORCH-10: resolve state UUID for this specific project (not global dict).
|
||||
state_id = stage_to_state(stage, project_id)
|
||||
if not state_id:
|
||||
return
|
||||
|
||||
project_id = _resolve_project_id(work_item_id, project_id)
|
||||
issue_id = find_issue_id(work_item_id, project_id)
|
||||
if not issue_id:
|
||||
logger.warning(f"Issue not found in Plane for {work_item_id}")
|
||||
@@ -134,8 +427,14 @@ def update_issue_state(work_item_id: str, stage: str, project_id: str = None):
|
||||
logger.error(f"Failed to update Plane state for {work_item_id}: {e}")
|
||||
|
||||
|
||||
def add_comment(work_item_id: str, text: str, project_id: str = None):
|
||||
"""Add a comment to Plane issue."""
|
||||
def add_comment(work_item_id: str, text: str, project_id: str = None, author: str = None):
|
||||
"""Add a comment to a Plane issue.
|
||||
|
||||
feat(plane): when ``author`` (an agent role) maps to a configured bot
|
||||
token, the comment is POSTed under that bot so Plane shows the real author.
|
||||
Otherwise it falls back to the shared orchestrator token (see
|
||||
``_headers_for``). GET/PATCH calls elsewhere keep using PLANE_HEADERS.
|
||||
"""
|
||||
project_id = _resolve_project_id(work_item_id, project_id)
|
||||
issue_id = find_issue_id(work_item_id, project_id)
|
||||
if not issue_id:
|
||||
@@ -145,32 +444,70 @@ def add_comment(work_item_id: str, text: str, project_id: str = None):
|
||||
url = f"{PLANE_BASE}/workspaces/{WORKSPACE}/projects/{project_id}/issues/{issue_id}/comments/"
|
||||
html = f"<p>{text}</p>"
|
||||
try:
|
||||
resp = httpx.post(url, headers=PLANE_HEADERS, json={"comment_html": html}, timeout=10)
|
||||
resp = httpx.post(url, headers=_headers_for(author), json={"comment_html": html}, timeout=10)
|
||||
resp.raise_for_status()
|
||||
logger.info(f"Plane: comment added to {work_item_id}")
|
||||
logger.info(f"Plane: comment added to {work_item_id} (author={author or 'orchestrator'})")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to add comment to {work_item_id}: {e}")
|
||||
|
||||
|
||||
|
||||
def set_issue_needs_input(work_item_id: str, project_id: str = None):
|
||||
"""Set issue to 'Needs Input' state — waiting for stakeholder response."""
|
||||
_set_issue_state_direct(work_item_id, PLANE_STATES["needs_input"], project_id)
|
||||
project_id = _resolve_project_id(work_item_id, project_id)
|
||||
state_id = get_project_states(project_id)["needs_input"]
|
||||
_set_issue_state_direct(work_item_id, state_id, project_id)
|
||||
|
||||
|
||||
def set_issue_in_review(work_item_id: str, project_id: str = None):
|
||||
"""Set issue to 'In Review' state — waiting for :approved: or :rejected:."""
|
||||
_set_issue_state_direct(work_item_id, PLANE_STATES["in_review"], project_id)
|
||||
project_id = _resolve_project_id(work_item_id, project_id)
|
||||
state_id = get_project_states(project_id)["in_review"]
|
||||
_set_issue_state_direct(work_item_id, state_id, project_id)
|
||||
|
||||
|
||||
def set_issue_blocked(work_item_id: str, project_id: str = None):
|
||||
"""Set issue to 'Blocked' state — manual intervention needed."""
|
||||
_set_issue_state_direct(work_item_id, PLANE_STATES["blocked"], project_id)
|
||||
project_id = _resolve_project_id(work_item_id, project_id)
|
||||
state_id = get_project_states(project_id)["blocked"]
|
||||
_set_issue_state_direct(work_item_id, state_id, project_id)
|
||||
|
||||
|
||||
def set_issue_done(work_item_id: str, project_id: str = None):
|
||||
"""Observability fix: force the issue into the TERMINAL Done state.
|
||||
|
||||
Used by the deploy->done success path so a completed task always reaches the
|
||||
terminal Plane state (it used to stick on In Progress because the merge
|
||||
webhook bypassed the stage engine). Resolves per-project UUID via
|
||||
get_project_states (ORCH-10).
|
||||
"""
|
||||
project_id = _resolve_project_id(work_item_id, project_id)
|
||||
state_id = get_project_states(project_id)["done"]
|
||||
_set_issue_state_direct(work_item_id, state_id, project_id)
|
||||
|
||||
|
||||
def set_issue_in_progress(work_item_id: str, project_id: str = None):
|
||||
"""Set issue to 'In Progress' state — agent working."""
|
||||
_set_issue_state_direct(work_item_id, PLANE_STATES["in_progress"], project_id)
|
||||
project_id = _resolve_project_id(work_item_id, project_id)
|
||||
state_id = get_project_states(project_id)["in_progress"]
|
||||
_set_issue_state_direct(work_item_id, state_id, project_id)
|
||||
|
||||
|
||||
def set_issue_stage_state(work_item_id: str, stage: str, project_id: str = None):
|
||||
"""Feature 3: move the issue to the board status for a pipeline stage.
|
||||
|
||||
Only the visible-stage statuses (architecture/development/review/testing)
|
||||
are driven here — stages without a dedicated status (analysis/deploy) are a
|
||||
no-op so the existing in_progress/in_review/needs_input logic stays in
|
||||
charge. By design this does NOT touch Needs Input / In Review / Blocked,
|
||||
which are higher priority and set explicitly by their own helpers.
|
||||
"""
|
||||
state_key = STAGE_VISIBILITY_STATE.get(stage)
|
||||
if not state_key:
|
||||
return
|
||||
project_id = _resolve_project_id(work_item_id, project_id)
|
||||
# ORCH-10: resolve per-project UUID.
|
||||
state_id = get_project_states(project_id)[state_key]
|
||||
_set_issue_state_direct(work_item_id, state_id, project_id)
|
||||
|
||||
|
||||
def _set_issue_state_direct(work_item_id: str, state_id: str, project_id: str = None):
|
||||
@@ -194,7 +531,7 @@ def notify_stage_change(work_item_id: str, old_stage: str, new_stage: str, agent
|
||||
project_id = _resolve_project_id(work_item_id, project_id)
|
||||
update_issue_state(work_item_id, new_stage, project_id)
|
||||
|
||||
msg = f"🔄 Stage: {old_stage} → {new_stage}"
|
||||
msg = f"{EMOJI_STAGE} Stage: {old_stage} → {new_stage}"
|
||||
if agent:
|
||||
msg += f" (launching {agent})"
|
||||
|
||||
@@ -227,16 +564,29 @@ def notify_stage_change(work_item_id: str, old_stage: str, new_stage: str, agent
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
add_comment(work_item_id, msg, project_id)
|
||||
# Stage transition is the orchestrator's own voice -> attribute to stream.
|
||||
add_comment(work_item_id, msg, project_id, author="stream")
|
||||
|
||||
|
||||
def notify_qg_failure(work_item_id: str, stage: str, check: str, reason: str, project_id: str = None):
|
||||
"""Notify Plane about QG failure."""
|
||||
add_comment(work_item_id, f"⚠️ QG failed at {stage}: {check} — {reason}", project_id)
|
||||
# QG failure belongs to the agent that owns the failing stage.
|
||||
add_comment(
|
||||
work_item_id,
|
||||
f"{EMOJI_QG_FAIL} QG failed at {stage}: {check} — {reason}",
|
||||
project_id,
|
||||
author=STAGE_AUTHORS.get(stage, "stream"),
|
||||
)
|
||||
|
||||
|
||||
def notify_done(work_item_id: str, project_id: str = None):
|
||||
"""Mark issue as Done in Plane."""
|
||||
project_id = _resolve_project_id(work_item_id, project_id)
|
||||
update_issue_state(work_item_id, "done", project_id)
|
||||
add_comment(work_item_id, "✅ Task completed! PR merged and deployed.", project_id)
|
||||
# Deploy finished the task -> attribute the completion comment to Deployer.
|
||||
add_comment(
|
||||
work_item_id,
|
||||
f"{EMOJI_DONE} Task completed! PR merged and deployed.",
|
||||
project_id,
|
||||
author="deployer",
|
||||
)
|
||||
|
||||
304
src/qg/checks.py
304
src/qg/checks.py
@@ -2,6 +2,7 @@
|
||||
|
||||
import os
|
||||
import logging
|
||||
import subprocess
|
||||
import httpx
|
||||
from ..config import settings
|
||||
|
||||
@@ -137,7 +138,16 @@ def check_review_approved(repo: str, pr_number: int) -> tuple[bool, str]:
|
||||
|
||||
def check_tests_passed(repo: str, work_item_id: str, branch: str | None = None) -> tuple[bool, str]:
|
||||
"""
|
||||
Check if test report exists and contains PASS indicator.
|
||||
Gate the testing -> deploy transition on the tester's MACHINE-READABLE verdict
|
||||
in 13-test-report.md frontmatter, NOT on a naive substring search of the body.
|
||||
|
||||
ET-013 fix: the previous implementation did `if "PASS" in content`, so a report
|
||||
explicitly marked `verdict: BLOCKED` / `status: blocked` but whose prose mentioned
|
||||
"23 passed" / "✅ PASS" / "All checks passed" was treated as a pass, and an
|
||||
unfinished feature reached Done. This mirrors check_reviewer_verdict (S-5) and
|
||||
check_deploy_status (БАГ 8): read ONLY the YAML frontmatter `verdict:` / `status:`
|
||||
fields, never the body.
|
||||
|
||||
File: docs/work-items/<work_item_id>/13-test-report.md
|
||||
"""
|
||||
repo_path = _repo_path(repo, branch)
|
||||
@@ -149,12 +159,67 @@ def check_tests_passed(repo: str, work_item_id: str, branch: str | None = None)
|
||||
try:
|
||||
with open(report_path, "r") as f:
|
||||
content = f.read()
|
||||
if "PASS" in content or "All tests passed" in content:
|
||||
return True, "Test report indicates PASS"
|
||||
return False, "Test report exists but no PASS indicator found"
|
||||
except OSError as e:
|
||||
return False, f"Error reading test report: {e}"
|
||||
|
||||
return _parse_tests_verdict(content)
|
||||
|
||||
|
||||
# Positive / negative verdict tokens, derived from REAL tester reports in
|
||||
# enduro-trails (ET-001..ET-014). The tester is inconsistent: most write
|
||||
# `verdict: PASS`, but ET-006 used `verdict: ready-to-deploy` (with `status: PASSED`),
|
||||
# ET-007 `verdict: PASS — ready-to-deploy`, ET-008 `verdict: stage:ready-to-deploy`
|
||||
# (with `status: pass`). ET-013 (the bug) used `verdict: BLOCKED` / `status: blocked`.
|
||||
# We therefore match known positive/negative TOKENS inside the normalized
|
||||
# verdict/status fields, and treat a negative token as authoritative (a BLOCKED/FAILED
|
||||
# report never passes, even if another field looks positive).
|
||||
_TESTS_NEGATIVE_TOKENS = ("BLOCKED", "FAILED", "FAIL", "REQUEST_CHANGES", "REJECT", "RED")
|
||||
_TESTS_POSITIVE_TOKENS = ("PASSED", "PASS", "READY-TO-DEPLOY", "READY_TO_DEPLOY", "GREEN", "APPROVED")
|
||||
|
||||
|
||||
def _parse_tests_verdict(content: str) -> tuple[bool, str]:
|
||||
"""Map a 13-test-report.md body to a quality-gate verdict by reading ONLY the
|
||||
machine-readable `verdict:` (and corroborating `status:`) YAML frontmatter fields.
|
||||
|
||||
Rules:
|
||||
- No frontmatter / bad YAML / neither field present -> (False, reason).
|
||||
- A negative token (BLOCKED/FAILED/...) in verdict OR status -> (False) and is
|
||||
authoritative (ET-013 main case: verdict BLOCKED wins over any prose PASS).
|
||||
- Otherwise a positive token (PASS/PASSED/READY-TO-DEPLOY/...) in verdict OR
|
||||
status -> (True).
|
||||
- Anything else (unrecognized / empty verdict) -> (False, reason).
|
||||
"""
|
||||
import yaml
|
||||
|
||||
if not content.startswith("---"):
|
||||
return False, "No YAML frontmatter in test report (cannot read machine verdict)"
|
||||
|
||||
parts = content.split("---", 2)
|
||||
if len(parts) < 3:
|
||||
return False, "Malformed YAML frontmatter in test report"
|
||||
|
||||
try:
|
||||
fm = yaml.safe_load(parts[1]) or {}
|
||||
except yaml.YAMLError as e:
|
||||
return False, f"Invalid YAML frontmatter in test report: {e}"
|
||||
if not isinstance(fm, dict):
|
||||
return False, "Malformed YAML frontmatter in test report (not a mapping)"
|
||||
|
||||
verdict = str(fm.get("verdict", "") or "").upper().strip()
|
||||
status = str(fm.get("status", "") or "").upper().strip()
|
||||
|
||||
if not verdict and not status:
|
||||
return False, "No machine-readable verdict/status in test report frontmatter"
|
||||
|
||||
fields = f"{verdict} {status}"
|
||||
for neg in _TESTS_NEGATIVE_TOKENS:
|
||||
if neg in fields:
|
||||
return False, f"Test verdict: {verdict or status} ({neg})"
|
||||
for pos in _TESTS_POSITIVE_TOKENS:
|
||||
if pos in fields:
|
||||
return True, f"Test verdict: {verdict or status} (PASS)"
|
||||
|
||||
return False, f"No recognized PASS verdict in frontmatter (verdict={verdict!r}, status={status!r})"
|
||||
|
||||
|
||||
def check_analysis_approved(repo: str, work_item_id: str, branch: str | None = None) -> tuple[bool, str]:
|
||||
@@ -249,9 +314,17 @@ def check_reviewer_verdict(repo: str, work_item_id: str, branch: str | None = No
|
||||
|
||||
def check_tests_local(repo: str, branch: str) -> tuple[bool, str]:
|
||||
"""
|
||||
DEPRECATED: replaced by check_ci_green on the development stage (CI is now
|
||||
configured). Kept for backward-compat; not wired to any stage.
|
||||
|
||||
S-1 fix: run the project test suite locally and judge by exit code, instead of
|
||||
depending on Gitea CI (which is not configured -> always false).
|
||||
|
||||
БАГ 5 fix: invoke pytest directly instead of make test. make is not installed
|
||||
in the orchestrator container, so the previous ["make", "test"] call raised
|
||||
FileNotFoundError. This reproduces the Makefile test target 1:1
|
||||
(cd src/api && python -m pytest ../../tests/ -v).
|
||||
|
||||
ORCH-2 / S-4: tests run inside the per-branch worktree (ensure_worktree), so this
|
||||
is safe for concurrent active tasks — no shared /repos checkout race.
|
||||
"""
|
||||
@@ -259,7 +332,8 @@ def check_tests_local(repo: str, branch: str) -> tuple[bool, str]:
|
||||
try:
|
||||
repo_path = ensure_worktree(repo, branch)
|
||||
r = subprocess.run(
|
||||
["make", "test"], cwd=repo_path,
|
||||
["python", "-m", "pytest", "../../tests/", "-v"],
|
||||
cwd=os.path.join(repo_path, "src", "api"),
|
||||
capture_output=True, text=True, timeout=600,
|
||||
)
|
||||
if r.returncode == 0:
|
||||
@@ -272,6 +346,224 @@ def check_tests_local(repo: str, branch: str) -> tuple[bool, str]:
|
||||
return False, f"Local test run error: {e}"
|
||||
|
||||
|
||||
def _parse_deploy_status(content: str) -> tuple[bool, str]:
|
||||
"""Parse a 14-deploy-log.md body and map its `deploy_status:` frontmatter to a
|
||||
quality-gate verdict. Reads ONLY the machine-readable YAML field, never prose.
|
||||
|
||||
deploy_status: SUCCESS -> (True, "Deploy status: SUCCESS")
|
||||
deploy_status: FAILED -> (False, "Deploy status: FAILED")
|
||||
missing field / no frontmatter / bad YAML -> (False, <reason>)
|
||||
"""
|
||||
import yaml
|
||||
status = None
|
||||
if content.startswith("---"):
|
||||
parts = content.split("---", 2)
|
||||
if len(parts) >= 3:
|
||||
try:
|
||||
fm = yaml.safe_load(parts[1]) or {}
|
||||
except yaml.YAMLError as e:
|
||||
return False, f"Invalid YAML frontmatter in deploy log: {e}"
|
||||
status = str(fm.get("deploy_status", "")).upper().strip()
|
||||
if status == "SUCCESS":
|
||||
return True, "Deploy status: SUCCESS"
|
||||
if status == "FAILED":
|
||||
return False, "Deploy status: FAILED"
|
||||
return False, f"No machine-readable deploy_status in frontmatter (got: {status!r})"
|
||||
|
||||
|
||||
def _deploy_log_from_main(repo: str, work_item_id: str) -> str | None:
|
||||
"""Best-effort read of 14-deploy-log.md from origin/main on the shared clone.
|
||||
|
||||
The deployer writes 14-deploy-log.md and merges the deploy artifacts into main
|
||||
via a separate PR (see ET-013), so the file lands in origin/main, NOT in the
|
||||
feature branch worktree the gate normally reads. This recovers it from main.
|
||||
|
||||
Degrades gracefully: any git failure (no clone, network/fetch error, file
|
||||
absent in main) returns None instead of raising, so the caller falls back to
|
||||
the plain "not found" verdict. Never raises.
|
||||
"""
|
||||
repo_clone = os.path.join(settings.repos_dir, repo)
|
||||
if not os.path.isdir(os.path.join(repo_clone, ".git")):
|
||||
return None
|
||||
rel = f"docs/work-items/{work_item_id}/14-deploy-log.md"
|
||||
try:
|
||||
# Refresh origin/main so we see freshly-merged deploy artifacts.
|
||||
subprocess.run(
|
||||
["git", "-C", repo_clone, "fetch", "origin", "main"],
|
||||
check=False, capture_output=True, timeout=30,
|
||||
)
|
||||
show = subprocess.run(
|
||||
["git", "-C", repo_clone, "show", f"origin/main:{rel}"],
|
||||
check=False, capture_output=True, text=True, timeout=15,
|
||||
)
|
||||
except (subprocess.SubprocessError, OSError) as e:
|
||||
logger.warning("deploy-log origin/main lookup failed for %s/%s: %s", repo, work_item_id, e)
|
||||
return None
|
||||
if show.returncode != 0:
|
||||
return None
|
||||
return show.stdout
|
||||
|
||||
|
||||
def check_deploy_status(repo: str, work_item_id: str, branch: str | None = None) -> tuple[bool, str]:
|
||||
"""
|
||||
БАГ 8 fix: gate the deploy -> done transition on the deployer's machine-readable
|
||||
verdict in 14-deploy-log.md frontmatter, NOT on the LLM process exit code
|
||||
(which is always 0 on a successful agent session even when the deploy failed).
|
||||
|
||||
Mirrors check_reviewer_verdict (S-5): reads ONLY `deploy_status:` from YAML
|
||||
frontmatter. Returns:
|
||||
(True, ...) -> deploy_status: SUCCESS
|
||||
(False, ...) -> deploy_status: FAILED, missing field, or no frontmatter
|
||||
|
||||
ET-013 path-sync fix: the deployer writes 14-deploy-log.md and merges the deploy
|
||||
artifacts into main via a SEPARATE PR, so the log lands in origin/main, not in
|
||||
the feature-branch worktree this gate reads via _repo_path(repo, branch). If the
|
||||
file is absent in the worktree we fall back to reading it from origin/main on the
|
||||
shared clone. Lookup order: worktree -> origin/main -> not found.
|
||||
"""
|
||||
repo_path = _repo_path(repo, branch)
|
||||
log_path = os.path.join(repo_path, f"docs/work-items/{work_item_id}/14-deploy-log.md")
|
||||
|
||||
if os.path.isfile(log_path):
|
||||
try:
|
||||
with open(log_path, "r") as f:
|
||||
content = f.read()
|
||||
except OSError as e:
|
||||
return False, f"Error reading deploy log: {e}"
|
||||
return _parse_deploy_status(content)
|
||||
|
||||
# Not in the feature worktree — the deployer may have merged it into main.
|
||||
main_content = _deploy_log_from_main(repo, work_item_id)
|
||||
if main_content is not None:
|
||||
return _parse_deploy_status(main_content)
|
||||
|
||||
return False, "Deploy log not found (14-deploy-log.md)"
|
||||
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Self-hosting detection: staging-infra (localhost:8501) exists ONLY for the
|
||||
# orchestrator repo itself (self-hosting). Other repos have no staging instance
|
||||
# and their deployer prompts know nothing about it -- the gate must be a no-op
|
||||
# for them. The repo value is the plain gitea repo name (ProjectConfig.repo),
|
||||
# matching what _run_qg/advance_stage pass in. See ORCH-35 / PR #31.
|
||||
# ---------------------------------------------------------------------------
|
||||
SELF_HOSTING_REPO = "orchestrator"
|
||||
|
||||
|
||||
def is_self_hosting_repo(repo: str) -> bool:
|
||||
"""Return True iff repo is the self-hosted orchestrator (has staging infra).
|
||||
|
||||
Comparison is case-insensitive and strips whitespace for safety, but in
|
||||
practice repo comes from the gitea webhook payload .repository.name which
|
||||
is always lowercase (confirmed via projects.py registry entry).
|
||||
"""
|
||||
return (repo or "").strip().lower() == SELF_HOSTING_REPO.lower()
|
||||
|
||||
|
||||
def _parse_staging_status(content: str) -> tuple[bool, str]:
|
||||
"""Parse a 15-staging-log.md body and map its `staging_status:` frontmatter to a
|
||||
quality-gate verdict. Reads ONLY the machine-readable YAML field, never prose.
|
||||
|
||||
staging_status: SUCCESS -> (True, "Staging status: SUCCESS")
|
||||
staging_status: FAILED -> (False, "Staging status: FAILED")
|
||||
missing field / no frontmatter / bad YAML -> (False, <reason>)
|
||||
"""
|
||||
import yaml
|
||||
status = None
|
||||
if content.startswith("---"):
|
||||
parts = content.split("---", 2)
|
||||
if len(parts) >= 3:
|
||||
try:
|
||||
fm = yaml.safe_load(parts[1]) or {}
|
||||
except yaml.YAMLError as e:
|
||||
return False, f"Invalid YAML frontmatter in staging log: {e}"
|
||||
status = str(fm.get("staging_status", "")).upper().strip()
|
||||
if status == "SUCCESS":
|
||||
return True, "Staging status: SUCCESS"
|
||||
if status == "FAILED":
|
||||
return False, "Staging status: FAILED"
|
||||
return False, f"No machine-readable staging_status in frontmatter (got: {status!r})"
|
||||
|
||||
|
||||
def _staging_log_from_main(repo: str, work_item_id: str) -> str | None:
|
||||
"""Best-effort read of 15-staging-log.md from origin/main on the shared clone.
|
||||
|
||||
The deployer writes 15-staging-log.md and merges the staging artifacts into main
|
||||
via a separate PR (mirroring the deploy-log pattern), so the file lands in
|
||||
origin/main, NOT in the feature branch worktree the gate normally reads.
|
||||
This recovers it from main.
|
||||
|
||||
Degrades gracefully: any git failure (no clone, network/fetch error, file
|
||||
absent in main) returns None instead of raising, so the caller falls back to
|
||||
the plain "not found" verdict. Never raises.
|
||||
"""
|
||||
repo_clone = os.path.join(settings.repos_dir, repo)
|
||||
if not os.path.isdir(os.path.join(repo_clone, ".git")):
|
||||
return None
|
||||
rel = f"docs/work-items/{work_item_id}/15-staging-log.md"
|
||||
try:
|
||||
# Refresh origin/main so we see freshly-merged staging artifacts.
|
||||
subprocess.run(
|
||||
["git", "-C", repo_clone, "fetch", "origin", "main"],
|
||||
check=False, capture_output=True, timeout=30,
|
||||
)
|
||||
show = subprocess.run(
|
||||
["git", "-C", repo_clone, "show", f"origin/main:{rel}"],
|
||||
check=False, capture_output=True, text=True, timeout=15,
|
||||
)
|
||||
except (subprocess.SubprocessError, OSError) as e:
|
||||
logger.warning("staging-log origin/main lookup failed for %s/%s: %s", repo, work_item_id, e)
|
||||
return None
|
||||
if show.returncode != 0:
|
||||
return None
|
||||
return show.stdout
|
||||
|
||||
|
||||
def check_staging_status(repo: str, work_item_id: str, branch: str | None = None) -> tuple[bool, str]:
|
||||
"""
|
||||
Gate the deploy-staging -> deploy transition on the deployer's machine-readable
|
||||
verdict in 15-staging-log.md frontmatter (staging_status: SUCCESS|FAILED).
|
||||
|
||||
ORCH-35 conditional gate (Variant A):
|
||||
- Non-self-hosting repos (anything other than "orchestrator") have no staging
|
||||
instance and no deployer knowledge of it -> gate is an immediate pass.
|
||||
- Self-hosting repo ("orchestrator") -> real check: reads ONLY the machine-
|
||||
readable staging_status: field from YAML frontmatter, never body prose.
|
||||
|
||||
Mirrors check_deploy_status (БАГ 8) for the self-hosting path.
|
||||
|
||||
Lookup order (self-hosting only): worktree -> origin/main -> not found.
|
||||
|
||||
Returns:
|
||||
(True, "Staging gate N/A for <repo>") -> non-self-hosting repo (instant pass)
|
||||
(True, ...) -> staging_status: SUCCESS (self-hosting path)
|
||||
(False, ...) -> staging_status: FAILED, missing field, or no frontmatter
|
||||
"""
|
||||
# Variant A: non-self-hosting repos have no staging infra -- skip entirely.
|
||||
if not is_self_hosting_repo(repo):
|
||||
return True, f"Staging gate N/A for {repo}"
|
||||
|
||||
# Self-hosting (orchestrator) path: real verdict check.
|
||||
repo_path = _repo_path(repo, branch)
|
||||
log_path = os.path.join(repo_path, f"docs/work-items/{work_item_id}/15-staging-log.md")
|
||||
|
||||
if os.path.isfile(log_path):
|
||||
try:
|
||||
with open(log_path, "r") as f:
|
||||
content = f.read()
|
||||
except OSError as e:
|
||||
return False, f"Error reading staging log: {e}"
|
||||
return _parse_staging_status(content)
|
||||
|
||||
# Not in the feature worktree -- the deployer may have merged it into main.
|
||||
main_content = _staging_log_from_main(repo, work_item_id)
|
||||
if main_content is not None:
|
||||
return _parse_staging_status(main_content)
|
||||
|
||||
return False, "Staging log not found (15-staging-log.md)"
|
||||
|
||||
|
||||
# Registry for dynamic lookup by name
|
||||
QG_CHECKS = {
|
||||
"check_analysis_approved": check_analysis_approved,
|
||||
@@ -282,4 +574,6 @@ QG_CHECKS = {
|
||||
"check_tests_passed": check_tests_passed,
|
||||
"check_reviewer_verdict": check_reviewer_verdict,
|
||||
"check_tests_local": check_tests_local,
|
||||
"check_deploy_status": check_deploy_status,
|
||||
"check_staging_status": check_staging_status,
|
||||
}
|
||||
|
||||
@@ -47,6 +47,7 @@ from .plane_sync import (
|
||||
set_issue_needs_input,
|
||||
set_issue_in_progress,
|
||||
set_issue_blocked,
|
||||
set_issue_done,
|
||||
)
|
||||
from .config import settings
|
||||
|
||||
@@ -189,36 +190,48 @@ def advance_stage(
|
||||
|
||||
# --- Quality gate ----------------------------------------------------
|
||||
if qg_name and qg_name in QG_CHECKS:
|
||||
# Human-approval gate: special analyst approved-flow (launcher only).
|
||||
# Human-approval gate: split by path.
|
||||
if qg_name == "check_analysis_approved":
|
||||
_handle_analysis_approved_flow(
|
||||
task_id, current_stage, repo, work_item_id, branch, agent, result
|
||||
)
|
||||
return result
|
||||
# Launcher path (analyst just finished): set In Review + ask for
|
||||
# the Approved status. This gate never advances on its own -- a
|
||||
# human Approved verdict does that.
|
||||
if agent == "analyst":
|
||||
_handle_analysis_approved_flow(
|
||||
task_id, current_stage, repo, work_item_id, branch, agent, result
|
||||
)
|
||||
return result
|
||||
# Webhook Approved-verdict path (agent is None): the human flipped
|
||||
# the Plane status to Approved, which IS the approval. The gate is
|
||||
# satisfied -- do NOT re-run check_analysis_approved (it looks for
|
||||
# an :approved: *comment* and would block on a status-only
|
||||
# approval). Mark it passed and fall through to the Advance block.
|
||||
result.qg_name = qg_name
|
||||
result.qg_passed = True
|
||||
result.qg_reason = "approved-via-status"
|
||||
else:
|
||||
passed, reason = _run_qg(qg_name, repo, work_item_id, branch)
|
||||
result.qg_passed = passed
|
||||
result.qg_reason = reason
|
||||
|
||||
passed, reason = _run_qg(qg_name, repo, work_item_id, branch)
|
||||
result.qg_passed = passed
|
||||
result.qg_reason = reason
|
||||
if not passed:
|
||||
logger.info(
|
||||
f"Task {task_id}: QG '{qg_name}' not passed after {agent}: {reason}"
|
||||
)
|
||||
# Behaviour parity:
|
||||
# - webhook path (finished_agent is None): emit the generic
|
||||
# QG-failure notification, exactly like the old plane handler.
|
||||
# - launcher path (finished_agent set): NO generic notification;
|
||||
# the rollback branches below own their own messaging, exactly
|
||||
# like the old launcher handler.
|
||||
if agent is None:
|
||||
notify_qg_failure(task_id, current_stage, qg_name, reason)
|
||||
plane_notify_qg(work_item_id, current_stage, qg_name, reason)
|
||||
|
||||
if not passed:
|
||||
logger.info(
|
||||
f"Task {task_id}: QG '{qg_name}' not passed after {agent}: {reason}"
|
||||
)
|
||||
# Behaviour parity:
|
||||
# - webhook path (finished_agent is None): emit the generic
|
||||
# QG-failure notification, exactly like the old plane handler.
|
||||
# - launcher path (finished_agent set): NO generic notification;
|
||||
# the rollback branches below own their own messaging, exactly
|
||||
# like the old launcher handler.
|
||||
if agent is None:
|
||||
notify_qg_failure(task_id, current_stage, qg_name, reason)
|
||||
plane_notify_qg(work_item_id, current_stage, qg_name, reason)
|
||||
|
||||
_handle_qg_failure_rollbacks(
|
||||
task_id, current_stage, repo, work_item_id, branch,
|
||||
agent, qg_name, reason, result,
|
||||
)
|
||||
return result
|
||||
_handle_qg_failure_rollbacks(
|
||||
task_id, current_stage, repo, work_item_id, branch,
|
||||
agent, qg_name, reason, result,
|
||||
)
|
||||
return result
|
||||
|
||||
elif qg_name:
|
||||
# QG name set but not registered — do not advance (launcher behavior).
|
||||
@@ -227,6 +240,15 @@ def advance_stage(
|
||||
|
||||
# --- Advance ---------------------------------------------------------
|
||||
update_task_stage(task_id, next_stage)
|
||||
# Telegram live tracker: the analysis->architecture advance is the human
|
||||
# Approved gate clearing -> stamp the END of "Ревью БРД" (the only
|
||||
# human time). Idempotent: only the first stamp counts.
|
||||
if current_stage == "analysis" and next_stage == "architecture":
|
||||
try:
|
||||
from .db import mark_brd_review_ended
|
||||
mark_brd_review_ended(task_id)
|
||||
except Exception as e:
|
||||
logger.warning(f"Task {task_id}: brd review end stamp failed: {e}")
|
||||
notify_stage_change(task_id, current_stage, next_stage)
|
||||
plane_notify_stage(work_item_id, current_stage, next_stage)
|
||||
result.advanced = True
|
||||
@@ -235,6 +257,22 @@ def advance_stage(
|
||||
f"(auto-advance after {agent})"
|
||||
)
|
||||
|
||||
# --- Terminal sync: deploy -> done must reach Plane's Done -----------
|
||||
# When the deployer's check_deploy_status passes we advance to the
|
||||
# terminal 'done' stage. Previously a merged-PR webhook completed the
|
||||
# task out-of-band and Plane stuck on In Progress. Now done flows through
|
||||
# here, so explicitly drive the Plane issue into the terminal Done state
|
||||
# (PLANE_STATES['done'] — mapping unchanged) in addition to the
|
||||
# stage-change comment above.
|
||||
if next_stage == "done" and work_item_id:
|
||||
try:
|
||||
set_issue_done(work_item_id)
|
||||
logger.info(
|
||||
f"Task {task_id}: deploy->done, Plane state forced to Done"
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Task {task_id}: failed to set Plane Done: {e}")
|
||||
|
||||
# --- Launch the next agent (ORCH-4 fix: current_stage, not next) -----
|
||||
next_agent = get_agent_for_stage(current_stage)
|
||||
if next_agent:
|
||||
@@ -257,6 +295,58 @@ def advance_stage(
|
||||
return result
|
||||
|
||||
|
||||
def _build_analyst_ready_comment(repo: str, work_item_id: str, branch: str) -> str:
|
||||
"""BUG C: HTML comment posted when analyst artifacts are ready.
|
||||
|
||||
Status-only model (PR #12): approval is the **Approved** status, NOT a
|
||||
``:approved:`` comment and NOT moving back to In Progress. The comment asks
|
||||
the stakeholder to flip the status and links the documents the analyst
|
||||
actually produced.
|
||||
|
||||
Links point at the Gitea web view:
|
||||
{gitea_url}/{owner}/{repo}/src/branch/{branch}/docs/work-items/{wid}/<file>
|
||||
Only files that REALLY exist in the worktree are listed (no invented docs).
|
||||
"""
|
||||
text = (
|
||||
"\u2705 BRD/\u0422\u0417/AC \u0433\u043e\u0442\u043e\u0432\u044b. "
|
||||
"\u0414\u043b\u044f \u043f\u0440\u043e\u0434\u0432\u0438\u0436\u0435\u043d\u0438\u044f "
|
||||
"\u043f\u0435\u0440\u0435\u0432\u0435\u0434\u0438\u0442\u0435 \u0437\u0430\u0434\u0430\u0447\u0443 "
|
||||
"\u0432 \u0441\u0442\u0430\u0442\u0443\u0441 Approved. "
|
||||
"\u0414\u043b\u044f \u043e\u0442\u043a\u043b\u043e\u043d\u0435\u043d\u0438\u044f \u2014 "
|
||||
"\u043d\u0430\u043f\u0438\u0448\u0438\u0442\u0435 \u043f\u0440\u0438\u0447\u0438\u043d\u0443 "
|
||||
"\u043a\u043e\u043c\u043c\u0435\u043d\u0442\u043e\u043c \u0438 \u043f\u0435\u0440\u0435\u0432\u0435\u0434\u0438\u0442\u0435 "
|
||||
"\u0432 Rejected."
|
||||
)
|
||||
|
||||
# Candidate analyst artifacts (label -> filename). Only existing ones linked.
|
||||
candidates = [
|
||||
("Business request", "00-business-request.md"),
|
||||
("BRD", "01-brd.md"),
|
||||
("\u0422\u0417 (TRZ)", "02-trz.md"),
|
||||
("Acceptance Criteria", "03-acceptance-criteria.md"),
|
||||
("Test Plan", "04-test-plan.yaml"),
|
||||
("UI Test Cases", "04b-ui-test-cases.md"),
|
||||
]
|
||||
rel_dir = f"docs/work-items/{work_item_id}"
|
||||
try:
|
||||
wt_dir = os.path.join(get_worktree_path(repo, branch), rel_dir)
|
||||
except Exception:
|
||||
wt_dir = None
|
||||
|
||||
owner = getattr(settings, "gitea_owner", "admin")
|
||||
base = (getattr(settings, "gitea_public_url", "") or settings.gitea_url).rstrip("/")
|
||||
links = []
|
||||
for label, fname in candidates:
|
||||
if wt_dir and not os.path.isfile(os.path.join(wt_dir, fname)):
|
||||
continue
|
||||
href = f"{base}/{owner}/{repo}/src/branch/{branch}/{rel_dir}/{fname}"
|
||||
links.append(f'<li><a href="{href}">{label}</a></li>')
|
||||
|
||||
if links:
|
||||
text += "<br><b>\u0414\u043e\u043a\u0443\u043c\u0435\u043d\u0442\u044b:</b><ul>" + "".join(links) + "</ul>"
|
||||
return text
|
||||
|
||||
|
||||
def _handle_analysis_approved_flow(
|
||||
task_id, current_stage, repo, work_item_id, branch, agent, result: AdvanceResult
|
||||
):
|
||||
@@ -279,18 +369,17 @@ def _handle_analysis_approved_flow(
|
||||
|
||||
files_ok, _ = files_check(repo, work_item_id, branch)
|
||||
if files_ok:
|
||||
# Full artifacts ready -> In Review, ask for :approved:.
|
||||
# Full artifacts ready -> In Review, ask for the Approved STATUS (BUG C).
|
||||
set_issue_in_review(work_item_id)
|
||||
plane_add_comment(
|
||||
work_item_id,
|
||||
"\U0001f4cb BRD/\u0422\u0417/AC/TestPlan \u0433\u043e\u0442\u043e\u0432\u044b. "
|
||||
"\u041f\u0440\u043e\u0448\u0443 review \u0438 \u0440\u0435\u0430\u043a\u0446\u0438\u044e :approved: "
|
||||
"\u0434\u043b\u044f \u043f\u0440\u043e\u0434\u0432\u0438\u0436\u0435\u043d\u0438\u044f \u0432 Architecture.",
|
||||
_build_analyst_ready_comment(repo, work_item_id, branch),
|
||||
author="analyst",
|
||||
)
|
||||
notify_approve_requested(task_id)
|
||||
result.note = "analysis-in-review"
|
||||
logger.info(
|
||||
f"Task {task_id}: analyst finished, requested :approved: in Plane"
|
||||
f"Task {task_id}: analyst finished, requested Approved status in Plane"
|
||||
)
|
||||
return
|
||||
|
||||
@@ -305,6 +394,7 @@ def _handle_analysis_approved_flow(
|
||||
plane_add_comment(
|
||||
work_item_id,
|
||||
f"\u2753 Analyst \u043d\u0443\u0436\u0434\u0430\u0435\u0442\u0441\u044f \u0432 \u0443\u0442\u043e\u0447\u043d\u0435\u043d\u0438\u0438:\n\n{questions_text}",
|
||||
author="analyst",
|
||||
)
|
||||
send_telegram(
|
||||
f"\u2753 {work_item_id}: Analyst \u0437\u0430\u0434\u0430\u0451\u0442 \u0432\u043e\u043f\u0440\u043e\u0441\u044b. \u041e\u0442\u0432\u0435\u0442\u044c \u0432 Plane."
|
||||
@@ -316,6 +406,7 @@ def _handle_analysis_approved_flow(
|
||||
plane_add_comment(
|
||||
work_item_id,
|
||||
"\u26a0\ufe0f Analyst \u0437\u0430\u0432\u0435\u0440\u0448\u0438\u043b\u0441\u044f \u0431\u0435\u0437 \u0430\u0440\u0442\u0435\u0444\u0430\u043a\u0442\u043e\u0432 \u0438 \u0431\u0435\u0437 \u0432\u043e\u043f\u0440\u043e\u0441\u043e\u0432. \u041f\u0440\u043e\u0432\u0435\u0440\u044c\u0442\u0435 \u043b\u043e\u0433.",
|
||||
author="analyst",
|
||||
)
|
||||
result.note = "analysis-empty"
|
||||
|
||||
@@ -370,6 +461,7 @@ def _handle_qg_failure_rollbacks(
|
||||
work_item_id,
|
||||
f"\u274c \u0422\u0435\u0441\u0442\u044b \u043d\u0435 \u043f\u0440\u043e\u0448\u043b\u0438: {reason}. "
|
||||
f"Developer \u043f\u0435\u0440\u0435\u0437\u0430\u043f\u0443\u0449\u0435\u043d \u0434\u043b\u044f \u0444\u0438\u043a\u0441\u0430.",
|
||||
author="tester",
|
||||
)
|
||||
retry_count = _developer_retry_count(task_id)
|
||||
if retry_count < MAX_DEVELOPER_RETRIES:
|
||||
@@ -410,6 +502,7 @@ def _handle_qg_failure_rollbacks(
|
||||
work_item_id,
|
||||
f"\u26a0\ufe0f Architect \u043d\u0430\u0448\u0451\u043b \u043a\u043e\u043d\u0444\u043b\u0438\u043a\u0442 \u0441 \u0422\u0417. "
|
||||
f"\u0412\u043e\u0437\u0432\u0440\u0430\u0442 \u0432 Analysis.\n\n{conflict_text}",
|
||||
author="architect",
|
||||
)
|
||||
task_desc = (
|
||||
f"Work item: {work_item_id}\nRepo: {repo}\nBranch: {branch}\n"
|
||||
@@ -423,3 +516,57 @@ def _handle_qg_failure_rollbacks(
|
||||
f"Task {task_id}: architect conflict, enqueued analyst "
|
||||
f"(job_id={new_job})"
|
||||
)
|
||||
|
||||
# ORCH-35: deployer staging verdict FAILED -> roll deploy-staging back to development.
|
||||
# Staging-провал = код плох; откат на development по образцу БАГ-8 (deploy->development).
|
||||
# НЕ трогает ветку check_deploy_status ниже.
|
||||
if agent == "deployer" and qg_name == "check_staging_status":
|
||||
update_task_stage(task_id, "development")
|
||||
notify_stage_change(task_id, current_stage, "development")
|
||||
plane_notify_stage(work_item_id, current_stage, "development")
|
||||
result.rolled_back_to = "development"
|
||||
set_issue_blocked(work_item_id)
|
||||
notify_qg_failure(task_id, "deploy-staging", "check_staging_status", reason)
|
||||
plane_add_comment(
|
||||
work_item_id,
|
||||
f"\u274c Staging gate FAILED ({reason}). Rolled back to development. "
|
||||
f"Developer \u043d\u0443\u0436\u0435\u043d \u0434\u043b\u044f \u0444\u0438\u043a\u0441\u0430.",
|
||||
author="deployer",
|
||||
)
|
||||
send_telegram(
|
||||
f"\U0001f6a8 {work_item_id}: Staging FAILED ({reason}). "
|
||||
f"Rolled back to development. Needs fix."
|
||||
)
|
||||
result.alerted = True
|
||||
logger.error(
|
||||
f"Task {task_id}: deployer staging verdict FAILED, rolled back deploy-staging -> "
|
||||
f"development ({reason})"
|
||||
)
|
||||
|
||||
# БАГ 8: deployer verdict FAILED -> roll deploy back to development.
|
||||
# The launcher's exit_code-based guard (launcher.py:475) never fires because
|
||||
# the LLM process exit code is always 0; this gate fires on the machine-readable
|
||||
# deploy_status verdict in 14-deploy-log.md instead. Mirrors the launcher block
|
||||
# (rollback + set_issue_blocked + notify) but is driven by the VERDICT.
|
||||
if agent == "deployer" and qg_name == "check_deploy_status":
|
||||
update_task_stage(task_id, "development")
|
||||
notify_stage_change(task_id, current_stage, "development")
|
||||
plane_notify_stage(work_item_id, current_stage, "development")
|
||||
result.rolled_back_to = "development"
|
||||
set_issue_blocked(work_item_id)
|
||||
notify_qg_failure(task_id, "deploy", "check_deploy_status", reason)
|
||||
plane_add_comment(
|
||||
work_item_id,
|
||||
f"\u274c Deploy FAILED ({reason}). Rolled back to development. "
|
||||
f"Developer \u043d\u0443\u0436\u0435\u043d \u0434\u043b\u044f \u0444\u0438\u043a\u0441\u0430.",
|
||||
author="deployer",
|
||||
)
|
||||
send_telegram(
|
||||
f"\U0001f6a8 {work_item_id}: Deploy FAILED ({reason}). "
|
||||
f"Rolled back to development. Needs fix."
|
||||
)
|
||||
result.alerted = True
|
||||
logger.error(
|
||||
f"Task {task_id}: deployer verdict FAILED, rolled back deploy -> "
|
||||
f"development ({reason})"
|
||||
)
|
||||
|
||||
@@ -1,11 +1,11 @@
|
||||
"""Stage machine for orchestrator pipeline.
|
||||
|
||||
Stages:
|
||||
created → analysis → architecture → development → review → testing → deploy → done
|
||||
created → analysis → architecture → development → review → testing → deploy-staging → deploy → done
|
||||
|
||||
Each stage defines:
|
||||
- next: the stage to advance to
|
||||
- agent: the agent to launch when entering the NEXT stage
|
||||
- agent: the agent to launch when advancing FROM this stage (NOT the next stage's agent)
|
||||
- qg: the quality gate check required to leave this stage
|
||||
"""
|
||||
|
||||
@@ -13,10 +13,11 @@ STAGE_TRANSITIONS = {
|
||||
"created": {"next": "analysis", "agent": "analyst", "qg": None},
|
||||
"analysis": {"next": "architecture", "agent": "architect", "qg": "check_analysis_approved"},
|
||||
"architecture": {"next": "development", "agent": "developer", "qg": "check_architecture_done"},
|
||||
"development": {"next": "review", "agent": "reviewer", "qg": "check_tests_local"},
|
||||
"development": {"next": "review", "agent": "reviewer", "qg": "check_ci_green"},
|
||||
"review": {"next": "testing", "agent": "tester", "qg": "check_reviewer_verdict"},
|
||||
"testing": {"next": "deploy", "agent": "deployer", "qg": "check_tests_passed"},
|
||||
"deploy": {"next": "done", "agent": None, "qg": None},
|
||||
"testing": {"next": "deploy-staging", "agent": "deployer", "qg": "check_tests_passed"},
|
||||
"deploy-staging": {"next": "deploy", "agent": "deployer", "qg": "check_staging_status"},
|
||||
"deploy": {"next": "done", "agent": None, "qg": "check_deploy_status"},
|
||||
"done": {"next": None, "agent": None, "qg": None},
|
||||
}
|
||||
|
||||
|
||||
464
src/usage.py
Normal file
464
src/usage.py
Normal file
@@ -0,0 +1,464 @@
|
||||
"""Feature 4: token / cost accounting for agent runs.
|
||||
|
||||
claude --output-format json emits a single result JSON object at the end of the
|
||||
run log with fields:
|
||||
total_cost_usd
|
||||
usage.input_tokens / output_tokens / cache_read_input_tokens /
|
||||
cache_creation_input_tokens
|
||||
modelUsage, num_turns, duration_ms
|
||||
|
||||
This module parses that JSON out of a (text-or-json) run log, records the usage
|
||||
on the agent_runs row, formats a Plane comment for the finishing agent, and
|
||||
builds the per-task summary the Deployer posts on deploy/done.
|
||||
|
||||
Everything here is defensive: a missing/garbled JSON never raises \u2014 we record
|
||||
NULL/0 and log a warning so a broken agent run can't crash the monitor.
|
||||
"""
|
||||
|
||||
import json
|
||||
import logging
|
||||
|
||||
from .db import get_db
|
||||
|
||||
logger = logging.getLogger("orchestrator.usage")
|
||||
|
||||
|
||||
def parse_usage_from_text(text: str) -> dict | None:
|
||||
"""Extract the claude result-JSON usage from a run log's text.
|
||||
|
||||
The log may contain plain text before/after the JSON; with
|
||||
--output-format json the JSON is the final object. We scan for the LAST
|
||||
top-level '{' ... '}' that parses and carries usage/total_cost_usd.
|
||||
|
||||
Returns a normalised dict
|
||||
{input_tokens, output_tokens, cache_read_tokens, cache_creation_tokens,
|
||||
cost_usd}
|
||||
(ints / float, missing fields -> 0 / 0.0), or None if no usable JSON found.
|
||||
"""
|
||||
if not text:
|
||||
return None
|
||||
|
||||
candidate = _extract_last_json_object(text)
|
||||
if candidate is None:
|
||||
return None
|
||||
|
||||
usage = candidate.get("usage") or {}
|
||||
if not isinstance(usage, dict):
|
||||
usage = {}
|
||||
|
||||
cost = candidate.get("total_cost_usd")
|
||||
if cost is None:
|
||||
cost = candidate.get("cost_usd")
|
||||
|
||||
# If there is neither a usage block nor a cost, this isn't a result object.
|
||||
if not usage and cost is None:
|
||||
return None
|
||||
|
||||
def _int(v):
|
||||
try:
|
||||
return int(v)
|
||||
except (TypeError, ValueError):
|
||||
return 0
|
||||
|
||||
def _float(v):
|
||||
try:
|
||||
return float(v)
|
||||
except (TypeError, ValueError):
|
||||
return 0.0
|
||||
|
||||
return {
|
||||
"input_tokens": _int(usage.get("input_tokens")),
|
||||
"output_tokens": _int(usage.get("output_tokens")),
|
||||
"cache_read_tokens": _int(
|
||||
usage.get("cache_read_input_tokens", usage.get("cache_read_tokens"))
|
||||
),
|
||||
# The cache-CREATION slice (writing new cache entries) is part of the
|
||||
# REAL input and used to be dropped on the floor. Persist it so the
|
||||
# "X in" figure reflects the full prompt size, not just fresh tokens.
|
||||
"cache_creation_tokens": _int(
|
||||
usage.get("cache_creation_input_tokens", usage.get("cache_creation_tokens"))
|
||||
),
|
||||
"cost_usd": _float(cost),
|
||||
# Telegram live tracker: the model the run actually used. claude
|
||||
# --output-format json reports it under modelUsage (a dict keyed by the
|
||||
# full model id) and/or a top-level "model" field. We keep the FULL name
|
||||
# here; short_model_name() trims it for the tracker. None when unknown.
|
||||
"model": _extract_model(candidate),
|
||||
}
|
||||
|
||||
|
||||
def _extract_model(candidate: dict) -> str | None:
|
||||
"""Best-effort: pull the model id out of a claude result JSON object.
|
||||
|
||||
Prefers modelUsage (a dict keyed by full model ids, e.g.
|
||||
{"claude-opus-4-8": {...}}) and returns the key with the most output
|
||||
tokens; falls back to a top-level "model" string. Never raises -> None.
|
||||
"""
|
||||
try:
|
||||
mu = candidate.get("modelUsage")
|
||||
if isinstance(mu, dict) and mu:
|
||||
def _out(v):
|
||||
try:
|
||||
return int((v or {}).get("outputTokens", 0))
|
||||
except (TypeError, ValueError, AttributeError):
|
||||
return 0
|
||||
best = max(mu.items(), key=lambda kv: _out(kv[1]))
|
||||
if best and best[0]:
|
||||
return str(best[0])
|
||||
model = candidate.get("model")
|
||||
if isinstance(model, str) and model:
|
||||
return model
|
||||
except Exception:
|
||||
pass
|
||||
return None
|
||||
|
||||
|
||||
def short_model_name(full: str | None) -> str:
|
||||
"""Trim a full model id to a short tag for the tracker.
|
||||
|
||||
'tokenator/claude-opus-4-8' -> 'opus-4-8'
|
||||
'vibecode/claude-sonnet-4.6' -> 'sonnet-4.6'
|
||||
'claude-opus-4-8' -> 'opus-4-8'
|
||||
Returns '' when full is falsy so callers can omit the ' · <model>' suffix.
|
||||
"""
|
||||
if not full:
|
||||
return ""
|
||||
name = str(full).strip()
|
||||
# Drop any provider prefix up to and including the last '/'.
|
||||
if "/" in name:
|
||||
name = name.rsplit("/", 1)[-1]
|
||||
# Drop a leading 'claude-' marketing prefix.
|
||||
if name.startswith("claude-"):
|
||||
name = name[len("claude-"):]
|
||||
return name
|
||||
|
||||
|
||||
def _extract_last_json_object(text: str) -> dict | None:
|
||||
"""Return the last balanced top-level JSON object in `text` that parses.
|
||||
|
||||
Scans from the end for '}' and walks back to the matching '{' using a depth
|
||||
counter (string-aware), trying json.loads on each candidate. Robust to log
|
||||
lines or text emitted before the JSON.
|
||||
"""
|
||||
# Fast path: the whole stripped text is the JSON.
|
||||
stripped = text.strip()
|
||||
try:
|
||||
obj = json.loads(stripped)
|
||||
if isinstance(obj, dict):
|
||||
return obj
|
||||
except (ValueError, TypeError):
|
||||
pass
|
||||
|
||||
# Otherwise find the last balanced { ... } block.
|
||||
end = len(text)
|
||||
while True:
|
||||
close = text.rfind("}", 0, end)
|
||||
if close == -1:
|
||||
return None
|
||||
depth = 0
|
||||
in_str = False
|
||||
esc = False
|
||||
start = None
|
||||
for i in range(close, -1, -1):
|
||||
ch = text[i]
|
||||
if in_str:
|
||||
if esc:
|
||||
esc = False
|
||||
elif ch == "\\":
|
||||
esc = True
|
||||
elif ch == '"':
|
||||
in_str = False
|
||||
continue
|
||||
if ch == '"':
|
||||
in_str = True
|
||||
elif ch == "}":
|
||||
depth += 1
|
||||
elif ch == "{":
|
||||
depth -= 1
|
||||
if depth == 0:
|
||||
start = i
|
||||
break
|
||||
if start is not None:
|
||||
blob = text[start:close + 1]
|
||||
try:
|
||||
obj = json.loads(blob)
|
||||
if isinstance(obj, dict):
|
||||
return obj
|
||||
except (ValueError, TypeError):
|
||||
pass
|
||||
end = close # keep scanning earlier in the text
|
||||
|
||||
|
||||
def parse_usage_from_log(path: str) -> dict | None:
|
||||
"""Read a run log file and parse usage from it. Never raises."""
|
||||
try:
|
||||
with open(path, "r", encoding="utf-8", errors="replace") as f:
|
||||
return parse_usage_from_text(f.read())
|
||||
except OSError as e:
|
||||
logger.warning(f"parse_usage_from_log: cannot read {path}: {e}")
|
||||
return None
|
||||
|
||||
|
||||
def record_usage(run_id: int, usage: dict | None):
|
||||
"""Write parsed usage onto the agent_runs row. NULLs if usage is None."""
|
||||
if usage is None:
|
||||
logger.warning(f"run_id={run_id}: no usage JSON parsed, recording NULLs")
|
||||
usage = {}
|
||||
conn = get_db()
|
||||
try:
|
||||
conn.execute(
|
||||
"UPDATE agent_runs SET input_tokens=?, output_tokens=?, "
|
||||
"cache_read_tokens=?, cache_creation_tokens=?, cost_usd=?, "
|
||||
"model=COALESCE(?, model) WHERE id=?",
|
||||
(
|
||||
usage.get("input_tokens"),
|
||||
usage.get("output_tokens"),
|
||||
usage.get("cache_read_tokens"),
|
||||
usage.get("cache_creation_tokens"),
|
||||
usage.get("cost_usd"),
|
||||
usage.get("model"),
|
||||
run_id,
|
||||
),
|
||||
)
|
||||
conn.commit()
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
def fmt_tokens(n) -> str:
|
||||
"""Format a token count compactly: 1234 -> '1.2k', 2_500_000 -> '2.5M'."""
|
||||
try:
|
||||
n = int(n or 0)
|
||||
except (TypeError, ValueError):
|
||||
n = 0
|
||||
if n >= 1_000_000:
|
||||
return f"{n / 1_000_000:.1f}M"
|
||||
if n >= 1_000:
|
||||
return f"{n / 1_000:.1f}k"
|
||||
return str(n)
|
||||
|
||||
|
||||
def fmt_cost(c) -> str:
|
||||
"""Format USD cost with 2 decimals: '$0.21'."""
|
||||
try:
|
||||
c = float(c or 0.0)
|
||||
except (TypeError, ValueError):
|
||||
c = 0.0
|
||||
return f"${c:.2f}"
|
||||
|
||||
|
||||
# Pretty agent names for comments (mirrors STAGE_AUTHORS roles).
|
||||
AGENT_DISPLAY = {
|
||||
"analyst": "Analyst",
|
||||
"architect": "Architect",
|
||||
"developer": "Developer",
|
||||
"reviewer": "Reviewer",
|
||||
"tester": "Tester",
|
||||
"deployer": "Deployer",
|
||||
}
|
||||
|
||||
|
||||
def _input_total(usage: dict) -> int:
|
||||
"""FULL input = fresh input + cache-read + cache-creation tokens."""
|
||||
def _i(k):
|
||||
try:
|
||||
return int(usage.get(k) or 0)
|
||||
except (TypeError, ValueError):
|
||||
return 0
|
||||
return _i("input_tokens") + _i("cache_read_tokens") + _i("cache_creation_tokens")
|
||||
|
||||
|
||||
def _cached_total(usage: dict) -> int:
|
||||
"""Cached portion of the input = cache-read + cache-creation tokens."""
|
||||
def _i(k):
|
||||
try:
|
||||
return int(usage.get(k) or 0)
|
||||
except (TypeError, ValueError):
|
||||
return 0
|
||||
return _i("cache_read_tokens") + _i("cache_creation_tokens")
|
||||
|
||||
|
||||
def fmt_in(usage: dict) -> str:
|
||||
"""Render the input figure as full total with a cached breakdown.
|
||||
|
||||
'8.5M in (8.4M cached)' when there is a cache; '45.2k in' when cached==0.
|
||||
"""
|
||||
total = _input_total(usage)
|
||||
cached = _cached_total(usage)
|
||||
if cached > 0:
|
||||
return f"{fmt_tokens(total)} in ({fmt_tokens(cached)} cached)"
|
||||
return f"{fmt_tokens(total)} in"
|
||||
|
||||
|
||||
def usage_comment(
|
||||
agent: str,
|
||||
usage: dict | None,
|
||||
repo: str | None = None,
|
||||
branch: str | None = None,
|
||||
work_item_id: str | None = None,
|
||||
pr_number=None,
|
||||
) -> str:
|
||||
"""Build the per-agent finish comment, e.g.
|
||||
'\U0001f4bb Developer \u0433\u043e\u0442\u043e\u0432 \u00b7 8.5M in (8.4M cached) / 45.8k out \u00b7 $7.29'.
|
||||
|
||||
When repo/branch/work_item_id are supplied, the agent's artifact link(s) are
|
||||
appended (BUG: only analyst used to link its docs). Missing artifacts are
|
||||
silently skipped — link building never raises.
|
||||
"""
|
||||
usage = usage or {}
|
||||
name = AGENT_DISPLAY.get(agent, agent.capitalize())
|
||||
icon = AGENT_ICON.get(agent, "\u2705")
|
||||
line = (
|
||||
f"{icon} {name} \u0433\u043e\u0442\u043e\u0432 \u00b7 "
|
||||
f"{fmt_in(usage)} / "
|
||||
f"{fmt_tokens(usage.get('output_tokens'))} out \u00b7 "
|
||||
f"{fmt_cost(usage.get('cost_usd'))}"
|
||||
)
|
||||
links = artifact_links(agent, repo, branch, work_item_id, pr_number)
|
||||
if links:
|
||||
line += "\n" + "\n".join(links)
|
||||
return line
|
||||
|
||||
|
||||
# Per-agent artifact file under docs/work-items/{wid}/ (architect/developer use
|
||||
# special handling for ADR dirs / PR links, see artifact_links()).
|
||||
AGENT_ARTIFACT = {
|
||||
"reviewer": ("Review", "12-review.md"),
|
||||
"tester": ("Test report", "13-test-report.md"),
|
||||
"deployer": ("Deploy log", "14-deploy-log.md"),
|
||||
}
|
||||
|
||||
|
||||
def artifact_links(
|
||||
agent: str,
|
||||
repo: str | None,
|
||||
branch: str | None,
|
||||
work_item_id: str | None,
|
||||
pr_number=None,
|
||||
) -> list[str]:
|
||||
"""Markdown link(s) to the finishing agent's artifact(s) in Gitea.
|
||||
|
||||
Uses gitea_public_url (falls back to gitea_url) for clickable links, mirroring
|
||||
the analyst doc links. Returns [] (never raises) when there is nothing to
|
||||
link or the required context is missing. analyst is intentionally NOT handled
|
||||
here — its richer doc list lives in stage_engine._build_analyst_ready_comment.
|
||||
"""
|
||||
try:
|
||||
from .config import settings
|
||||
owner = getattr(settings, "gitea_owner", "admin")
|
||||
base = (
|
||||
getattr(settings, "gitea_public_url", "") or getattr(settings, "gitea_url", "")
|
||||
).rstrip("/")
|
||||
if not base or not repo:
|
||||
return []
|
||||
links: list[str] = []
|
||||
|
||||
if agent == "developer":
|
||||
if branch:
|
||||
links.append(
|
||||
f"\U0001f4c2 [Branch {branch}]({base}/{owner}/{repo}/src/branch/{branch})"
|
||||
)
|
||||
if pr_number:
|
||||
links.append(
|
||||
f"\U0001f517 [PR #{pr_number}]({base}/{owner}/{repo}/pulls/{pr_number})"
|
||||
)
|
||||
return links
|
||||
|
||||
if agent == "architect":
|
||||
if branch and work_item_id:
|
||||
adr_dir = (
|
||||
f"{base}/{owner}/{repo}/src/branch/{branch}/"
|
||||
f"docs/work-items/{work_item_id}/06-adr"
|
||||
)
|
||||
links.append(f"\U0001f4d0 [ADR]({adr_dir})")
|
||||
return links
|
||||
|
||||
spec = AGENT_ARTIFACT.get(agent)
|
||||
if spec and branch and work_item_id:
|
||||
label, fname = spec
|
||||
href = (
|
||||
f"{base}/{owner}/{repo}/src/branch/{branch}/"
|
||||
f"docs/work-items/{work_item_id}/{fname}"
|
||||
)
|
||||
links.append(f"\U0001f4c4 [{label}]({href})")
|
||||
return links
|
||||
except Exception:
|
||||
return []
|
||||
|
||||
|
||||
AGENT_ICON = {
|
||||
"analyst": "\U0001f50d",
|
||||
"architect": "\U0001f4d0",
|
||||
"developer": "\U0001f4bb",
|
||||
"reviewer": "\U0001f50e",
|
||||
"tester": "\U0001f9ea",
|
||||
"deployer": "\U0001f680",
|
||||
}
|
||||
|
||||
|
||||
def task_usage_summary(task_id: int) -> dict:
|
||||
"""Aggregate agent_runs usage for a task.
|
||||
|
||||
total_in counts the FULL input (input + cache_read + cache_creation), and
|
||||
total_cached counts the cached portion (cache_read + cache_creation).
|
||||
COALESCE(...,0) keeps pre-existing rows (NULL cache_creation) from breaking.
|
||||
|
||||
Returns {total_in, total_cached, total_out, total_cost,
|
||||
per_agent: [(agent, in, cached, out, cost), ...]}.
|
||||
"""
|
||||
conn = get_db()
|
||||
try:
|
||||
rows = conn.execute(
|
||||
"SELECT agent, "
|
||||
"COALESCE(SUM(input_tokens),0) "
|
||||
" + COALESCE(SUM(cache_read_tokens),0) "
|
||||
" + COALESCE(SUM(cache_creation_tokens),0), "
|
||||
"COALESCE(SUM(cache_read_tokens),0) "
|
||||
" + COALESCE(SUM(cache_creation_tokens),0), "
|
||||
"COALESCE(SUM(output_tokens),0), "
|
||||
"COALESCE(SUM(cost_usd),0.0) "
|
||||
"FROM agent_runs WHERE task_id=? GROUP BY agent ORDER BY agent",
|
||||
(task_id,),
|
||||
).fetchall()
|
||||
finally:
|
||||
conn.close()
|
||||
per_agent = [(r[0], int(r[1]), int(r[2]), int(r[3]), float(r[4])) for r in rows]
|
||||
total_in = sum(r[1] for r in per_agent)
|
||||
total_cached = sum(r[2] for r in per_agent)
|
||||
total_out = sum(r[3] for r in per_agent)
|
||||
total_cost = sum(r[4] for r in per_agent)
|
||||
return {
|
||||
"total_in": total_in,
|
||||
"total_cached": total_cached,
|
||||
"total_out": total_out,
|
||||
"total_cost": total_cost,
|
||||
"per_agent": per_agent,
|
||||
}
|
||||
|
||||
|
||||
def task_summary_comment(task_id: int) -> str:
|
||||
"""Build the Deployer end-of-task summary comment (Feature 4, variant B)."""
|
||||
s = task_usage_summary(task_id)
|
||||
cached = s.get("total_cached", 0)
|
||||
head_in = (
|
||||
f"{fmt_tokens(s['total_in'])} \u0432\u0445\u043e\u0434 ({fmt_tokens(cached)} cached)"
|
||||
if cached > 0
|
||||
else f"{fmt_tokens(s['total_in'])} \u0432\u0445\u043e\u0434"
|
||||
)
|
||||
lines = [
|
||||
f"\U0001f4ca \u0418\u0442\u043e\u0433\u043e \u043f\u043e \u0437\u0430\u0434\u0430\u0447\u0435: "
|
||||
f"{head_in} / "
|
||||
f"{fmt_tokens(s['total_out'])} \u0432\u044b\u0445\u043e\u0434 \u00b7 "
|
||||
f"{fmt_cost(s['total_cost'])}"
|
||||
]
|
||||
for agent, ti, tc, to, cost in s["per_agent"]:
|
||||
name = AGENT_DISPLAY.get(agent, agent.capitalize())
|
||||
in_str = (
|
||||
f"{fmt_tokens(ti)} in ({fmt_tokens(tc)} cached)"
|
||||
if tc > 0
|
||||
else f"{fmt_tokens(ti)} in"
|
||||
)
|
||||
lines.append(
|
||||
f"\u2022 {name}: {in_str} / {fmt_tokens(to)} out \u00b7 {fmt_cost(cost)}"
|
||||
)
|
||||
return "\n".join(lines)
|
||||
@@ -216,12 +216,31 @@ async def handle_ci_status(payload: dict):
|
||||
else:
|
||||
notify_qg_failure(task_id, current_stage, "check_ci_green", reason)
|
||||
|
||||
elif state == "failure":
|
||||
# S-1: Gitea CI is NOT the authoritative gate anymore (the orchestrator runs
|
||||
# tests locally via check_tests_local). Gitea CI is often unconfigured, so a
|
||||
# "failure"/empty status here is not actionable. Log only, do not alert.
|
||||
logger.debug(f"Task {task_id}: Gitea CI state='failure' on branch '{branch}' "
|
||||
f"(non-authoritative, suppressed — local tests are the gate)")
|
||||
elif state == "failure" and current_stage == "development":
|
||||
# CI is the authoritative gate for development -> review.
|
||||
# On red CI: notify, then bounce the task back to the developer (capped retries),
|
||||
# symmetric to the review REQUEST_CHANGES path.
|
||||
notify_qg_failure(task_id, current_stage, "check_ci_green", f"Gitea CI failed on branch '{branch}'")
|
||||
conn = get_db()
|
||||
retry_count = conn.execute(
|
||||
"SELECT COUNT(*) as cnt FROM agent_runs WHERE task_id = ? AND agent = 'developer'",
|
||||
(task_id,),
|
||||
).fetchone()["cnt"]
|
||||
conn.close()
|
||||
if retry_count < MAX_DEV_RETRIES:
|
||||
# task already on 'development' — no stage change needed, just relaunch developer
|
||||
try:
|
||||
task_desc = (
|
||||
f"Work item: {work_item_id}\nRepo: {repo_name}\nBranch: {branch}\n"
|
||||
f"Stage: development\nNote: CI failed, fix and re-push (attempt {retry_count + 1}/{MAX_DEV_RETRIES})"
|
||||
)
|
||||
job_id = enqueue_job("developer", repo_name, task_desc, task_id=task_id)
|
||||
logger.info(f"Task {task_id}: CI failed, enqueued developer (attempt {retry_count + 1}, job_id={job_id})")
|
||||
except Exception as e:
|
||||
notify_error(task_id, f"Failed to relaunch developer after CI failure: {e}")
|
||||
else:
|
||||
notify_error(task_id, f"Max developer retries ({MAX_DEV_RETRIES}) reached after CI failure, escalating")
|
||||
logger.error(f"Task {task_id}: max retries reached after CI failure, needs manual intervention")
|
||||
|
||||
|
||||
async def handle_pr(payload: dict):
|
||||
@@ -315,6 +334,20 @@ async def handle_pr(payload: dict):
|
||||
logger.error(f"Task {task_id}: max retries reached, needs manual intervention")
|
||||
|
||||
elif action == "closed" and pr.get("merged", False):
|
||||
# BUG 8 (second door): at the deploy stage `done` is gated by the
|
||||
# deployer's verdict (check_deploy_status via advance_stage), NOT by the
|
||||
# fact that the PR was merged. The deployer merges the PR at the START of
|
||||
# its run, so a merged webhook arrives ~30s later while the deployer is
|
||||
# still working — blindly setting done here would fake-complete the task
|
||||
# and discard a later deploy_status: FAILED verdict. advance_stage will
|
||||
# drive deploy→done (and Plane→Done) when the deployer job finishes.
|
||||
# For every OTHER stage the merge-driven done behaviour is preserved.
|
||||
if current_stage == "deploy":
|
||||
logger.info(
|
||||
f"Task {task_id}: PR merged at deploy stage — done gated by "
|
||||
f"deployer verdict (check_deploy_status), ignoring merge-driven done."
|
||||
)
|
||||
return
|
||||
update_task_stage(task_id, "done")
|
||||
notify_stage_change(task_id, current_stage, "done")
|
||||
logger.info(f"Task {task_id}: PR merged, stage → done")
|
||||
|
||||
@@ -13,6 +13,7 @@ from ..db import (
|
||||
get_db,
|
||||
get_task_by_plane_id,
|
||||
get_next_work_item_id,
|
||||
ensure_unique_work_item_id,
|
||||
update_task_stage,
|
||||
enqueue_job,
|
||||
insert_event_dedup,
|
||||
@@ -92,38 +93,268 @@ async def plane_webhook(request: Request):
|
||||
return {"status": "ignored", "reason": "unknown project"}
|
||||
|
||||
if (event == "work_item.created") or (event == "issue" and action == "created"):
|
||||
# Feature 1: creation NO LONGER starts the pipeline. Slava keeps the
|
||||
# backlog until he moves an issue to In Progress. We only run a soft
|
||||
# QG-0 sanity log here (no branch, no analyst, no task row).
|
||||
await handle_work_item_created(data, project_id)
|
||||
elif (event == "work_item.updated") or (event == "issue" and action == "updated"):
|
||||
# Status-only verdict model: status changes drive the pipeline.
|
||||
# Backlog/Todo/Triage -> In Progress : START pipeline, or relaunch the
|
||||
# stage agent if returned from
|
||||
# Needs Input.
|
||||
# -> Approved : advance to the next stage.
|
||||
# -> Rejected : rollback (reason from latest comment).
|
||||
await handle_issue_updated(data, project_id)
|
||||
elif (event == "comment.created") or (event == "issue_comment" and action == "created"):
|
||||
await handle_comment(data, project_id)
|
||||
|
||||
return {"status": "accepted"}
|
||||
|
||||
|
||||
async def handle_work_item_created(data: dict, project_id: str = ""):
|
||||
def _state_id(data: dict) -> str:
|
||||
"""Extract the new Plane state UUID from an 'issue updated' payload.
|
||||
|
||||
Real payload (verified from prod events): data.state is
|
||||
{id, name, color, group}. Some payloads carry state as a bare UUID string.
|
||||
"""
|
||||
New work item created in Plane.
|
||||
QG-0: validate title, description, priority.
|
||||
If valid: create branch, init docs, launch analyst.
|
||||
If invalid: comment with what's missing, set Blocked.
|
||||
state = data.get("state")
|
||||
if isinstance(state, dict):
|
||||
return state.get("id", "") or ""
|
||||
if isinstance(state, str):
|
||||
return state
|
||||
return ""
|
||||
|
||||
|
||||
async def handle_issue_updated(data: dict, project_id: str = ""):
|
||||
"""Feature 1 & 2: react to a Plane issue status change.
|
||||
|
||||
Routes the NEW state UUID (data.state.id) to:
|
||||
- in_progress : start the pipeline if this issue has no task yet; if a
|
||||
task already exists and the stage agent is idle (returned from Needs
|
||||
Input), relaunch the stage agent so it reads Slava's fresh comments.
|
||||
- approved : advance to the next stage.
|
||||
- rejected : rollback to the previous stage (reason from latest comment).
|
||||
Any other status (Needs Input, In Review, Blocked, Done, board stages, etc.)
|
||||
is ignored here — those are statuses the orchestrator itself sets.
|
||||
"""
|
||||
from ..plane_sync import get_project_states
|
||||
|
||||
plane_id = str(data.get("id") or "")
|
||||
new_state = _state_id(data)
|
||||
if not plane_id or not new_state:
|
||||
logger.info("issue updated without id/state, ignoring")
|
||||
return
|
||||
|
||||
# ORCH-10: resolve expected state UUIDs per the incoming issue's project so
|
||||
# both enduro (b873d9eb) and orchestrator (e331bfb3) In Progress trigger the
|
||||
# pipeline. Using PLANE_STATES["in_progress"] here was the root-cause blocker.
|
||||
proj_states = get_project_states(project_id)
|
||||
if new_state == proj_states["in_progress"]:
|
||||
await handle_status_start(data, project_id)
|
||||
elif new_state == proj_states["approved"]:
|
||||
await handle_verdict(data, project_id, approved=True)
|
||||
elif new_state == proj_states["rejected"]:
|
||||
await handle_verdict(data, project_id, approved=False)
|
||||
else:
|
||||
logger.info(f"issue {plane_id} updated to state {new_state[:8]}..., no pipeline action")
|
||||
|
||||
|
||||
async def handle_status_start(data: dict, project_id: str = ""):
|
||||
"""An issue moved into In Progress.
|
||||
|
||||
Two cases under the status-only verdict model:
|
||||
|
||||
1. No task yet for this plane_id -> START the pipeline (start_pipeline).
|
||||
|
||||
2. A task already exists -> this is Slava returning the issue from
|
||||
Needs Input to In Progress after answering the analyst's questions. We
|
||||
must RELAUNCH the current stage's agent so it reads the fresh comments
|
||||
from Plane (the answer-to-questions flow used to live in handle_comment;
|
||||
it is now status-driven).
|
||||
|
||||
KEY FORK — telling "answer to questions" apart from a plain duplicate In
|
||||
Progress webhook (the dedup-protection case):
|
||||
|
||||
The tasks table stores no Plane status, and the issue.updated payload only
|
||||
carries the NEW state (In Progress), so we cannot read the previous status
|
||||
from here. Instead we use the only reliable local signal: whether the
|
||||
stage's agent is currently in flight.
|
||||
|
||||
- The orchestrator sets In Progress itself while an agent runs. When the
|
||||
agent FINISHES it leaves the issue in Needs Input or In Review and has
|
||||
NO queued/running job. So: an existing task with NO active job means the
|
||||
agent is idle / waiting -> a return to In Progress is a genuine relaunch
|
||||
request -> enqueue the stage agent.
|
||||
- If a queued/running job already exists for the task, the agent is busy
|
||||
(or a duplicate webhook arrived) -> SKIP (no double launch). The events
|
||||
de-dup at the top of plane_webhook already absorbs identical webhook
|
||||
bodies; this job guard additionally covers distinct webhooks fired while
|
||||
a job is still pending/running.
|
||||
"""
|
||||
from ..db import has_active_job_for_task
|
||||
|
||||
plane_id = str(data.get("id") or "")
|
||||
existing = get_task_by_plane_id(plane_id)
|
||||
|
||||
if not existing:
|
||||
logger.info(f"Status->In Progress for {plane_id}: starting pipeline")
|
||||
await start_pipeline(data, project_id)
|
||||
return
|
||||
|
||||
task_id = existing["id"]
|
||||
current_stage = existing["stage"]
|
||||
repo = existing["repo"]
|
||||
work_item_id = existing.get("work_item_id", "")
|
||||
branch = existing.get("branch", "")
|
||||
|
||||
# Duplicate / busy guard: a job is already pending or running for this task.
|
||||
if has_active_job_for_task(task_id):
|
||||
logger.info(
|
||||
f"Status->In Progress for {plane_id}: task {task_id} already has an "
|
||||
f"active job (stage={current_stage}), not relaunching"
|
||||
)
|
||||
return
|
||||
|
||||
# Agent is idle -> Slava answered questions and returned the issue to In
|
||||
# Progress. Relaunch the current stage's agent to read the fresh comments.
|
||||
from ..plane_sync import STAGE_AUTHORS, add_comment as _add_comment
|
||||
stage_agent = STAGE_AUTHORS.get(current_stage)
|
||||
if not stage_agent:
|
||||
logger.info(
|
||||
f"Status->In Progress for {plane_id}: no agent for stage "
|
||||
f"'{current_stage}', not relaunching"
|
||||
)
|
||||
return
|
||||
|
||||
task_desc = (
|
||||
f"Work item: {work_item_id}\nRepo: {repo}\nBranch: {branch}\n"
|
||||
f"Stage: {current_stage}\nNote: Stakeholder returned the issue to In "
|
||||
f"Progress (answered your questions). Read the latest comments in Plane "
|
||||
f"and revise your artifacts."
|
||||
)
|
||||
job_id = enqueue_job(stage_agent, repo, task_desc, task_id=task_id)
|
||||
logger.info(
|
||||
f"Task {task_id}: returned to In Progress (Needs Input answered), "
|
||||
f"relaunched {stage_agent} for stage {current_stage} (job_id={job_id})"
|
||||
)
|
||||
try:
|
||||
_add_comment(
|
||||
work_item_id,
|
||||
"\U0001f504 \u0410\u0433\u0435\u043d\u0442 \u043f\u0435\u0440\u0435\u0437\u0430\u043f\u0443\u0449\u0435\u043d \u0441 \u043e\u0442\u0432\u0435\u0442\u0430\u043c\u0438 \u0441\u0442\u0435\u0439\u043a\u0445\u043e\u043b\u0434\u0435\u0440\u0430.",
|
||||
author=stage_agent,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to post relaunch comment for {work_item_id}: {e}")
|
||||
|
||||
|
||||
async def handle_verdict(data: dict, project_id: str, approved: bool):
|
||||
"""Status-only verdict: a Plane status change drives advance / rollback.
|
||||
|
||||
Approved status -> _try_advance_stage. We do NOT touch the issue status here:
|
||||
_try_advance_stage -> advance_stage -> plane_notify_stage already PATCHes the
|
||||
issue to the NEXT stage's status. The old set_issue_in_progress call reset
|
||||
the status to In Progress first, which made the board flicker In Progress
|
||||
before the next stage (part of bug 3); it is removed.
|
||||
|
||||
Rejected status -> rollback to the previous stage. The reason is pulled from
|
||||
the issue's latest comment (Slava writes the reason in a comment before/with
|
||||
flipping the status to Rejected).
|
||||
"""
|
||||
plane_id = str(data.get("id") or "")
|
||||
task = get_task_by_plane_id(plane_id)
|
||||
if not task:
|
||||
logger.warning(f"Verdict status for {plane_id} but no task found, ignoring")
|
||||
return
|
||||
|
||||
task_id = task["id"]
|
||||
current_stage = task["stage"]
|
||||
repo = task["repo"]
|
||||
work_item_id = task.get("work_item_id", "")
|
||||
branch = task.get("branch", "")
|
||||
|
||||
if approved:
|
||||
# NOTE: no set_issue_in_progress here — _try_advance_stage sets the next
|
||||
# stage's status itself (advance_stage -> plane_notify_stage).
|
||||
logger.info(f"Task {task_id}: Approved status -> advance from {current_stage}")
|
||||
await _try_advance_stage(task_id, current_stage, repo, work_item_id, branch)
|
||||
return
|
||||
|
||||
# Rejected: pull the rejection reason from the issue's latest comment.
|
||||
issue_id = task.get("plane_issue_id") or task.get("plane_id") or plane_id
|
||||
reason = _latest_comment_reason(issue_id, repo, project_id)
|
||||
await _rollback_stage(
|
||||
task_id, current_stage, repo, work_item_id, branch, reason
|
||||
)
|
||||
|
||||
|
||||
def _latest_comment_reason(issue_id: str, repo: str, project_id: str = "") -> str:
|
||||
"""Fetch the issue's most recent comment text (HTML stripped) as the reject
|
||||
reason. Slava writes the reason in a comment before/with flipping the status
|
||||
to Rejected.
|
||||
|
||||
Returns a fixed fallback when there is no comment / the API call fails.
|
||||
"""
|
||||
from ..plane_sync import (
|
||||
PLANE_BASE,
|
||||
PLANE_HEADERS,
|
||||
WORKSPACE,
|
||||
PROJECT_ID as _DEFAULT_PROJECT_ID,
|
||||
)
|
||||
fallback = "Rejected via status, no reason comment"
|
||||
if not issue_id:
|
||||
return fallback
|
||||
_proj = get_project_by_repo(repo)
|
||||
pid = _proj.plane_project_id if _proj else (project_id or _DEFAULT_PROJECT_ID)
|
||||
url = (
|
||||
f"{PLANE_BASE}/workspaces/{WORKSPACE}/projects/{pid}/issues/"
|
||||
f"{issue_id}/comments/"
|
||||
)
|
||||
try:
|
||||
resp = httpx.get(url, headers=PLANE_HEADERS, timeout=10)
|
||||
if resp.status_code != 200:
|
||||
logger.warning(
|
||||
f"reject-reason: GET comments for {issue_id} returned "
|
||||
f"{resp.status_code}"
|
||||
)
|
||||
return fallback
|
||||
payload = resp.json()
|
||||
comments = payload.get("results", payload) if isinstance(payload, dict) else payload
|
||||
if not comments:
|
||||
return fallback
|
||||
latest = max(comments, key=lambda c: c.get("created_at", "") or "")
|
||||
raw = (
|
||||
latest.get("comment_stripped")
|
||||
or latest.get("comment_html")
|
||||
or latest.get("comment")
|
||||
or ""
|
||||
)
|
||||
text = re.sub(r"<[^>]+>", "", raw).strip()
|
||||
return text[:300] if text else fallback
|
||||
except Exception as e:
|
||||
logger.error(f"reject-reason: failed to fetch comments for {issue_id}: {e}")
|
||||
return fallback
|
||||
|
||||
|
||||
async def handle_work_item_created(data: dict, project_id: str = ""):
|
||||
"""Feature 1: creation does NOT start the pipeline anymore.
|
||||
|
||||
The pipeline is started when Slava moves the issue into In Progress
|
||||
(handle_status_start -> start_pipeline). On creation we only run a SOFT QG-0
|
||||
sanity check and log the result — NO branch, NO docs, NO analyst, NO task row
|
||||
— so the issue can sit in the backlog until Slava is ready.
|
||||
"""
|
||||
plane_id = data.get("id", "")
|
||||
name = data.get("name", "untitled")
|
||||
description = data.get("description_stripped", data.get("description", ""))
|
||||
priority = data.get("priority", {})
|
||||
priority_name = priority if isinstance(priority, str) else priority.get("name", "")
|
||||
errors = _qg0_errors(name, description)
|
||||
if errors:
|
||||
logger.info(f"work_item.created {plane_id}: soft QG-0 warnings: {errors}")
|
||||
else:
|
||||
logger.info(f"work_item.created {plane_id} ('{name}'): in backlog, awaiting In Progress")
|
||||
|
||||
# ORCH-6: resolve repo / prefix / Plane project from the registry instead of
|
||||
# the single hardcoded default_repo.
|
||||
if not project_id:
|
||||
project_id = data.get("project") or data.get("project_id") or ""
|
||||
proj = get_project_by_plane_id(project_id)
|
||||
if not proj:
|
||||
logger.warning(f"handle_work_item_created: unknown project '{project_id}', ignoring {plane_id}")
|
||||
return
|
||||
repo = proj.repo
|
||||
plane_project_id = proj.plane_project_id
|
||||
|
||||
# QG-0 validation
|
||||
def _qg0_errors(name: str, description: str) -> list:
|
||||
"""QG-0 validation: returns a list of human-readable problems (empty = OK)."""
|
||||
errors = []
|
||||
if not name or len(name) < 5:
|
||||
errors.append("Title \u0441\u043b\u0438\u0448\u043a\u043e\u043c \u043a\u043e\u0440\u043e\u0442\u043a\u0438\u0439 (\u043d\u0443\u0436\u043d\u043e >= 5 \u0441\u0438\u043c\u0432\u043e\u043b\u043e\u0432)")
|
||||
@@ -132,10 +363,70 @@ async def handle_work_item_created(data: dict, project_id: str = ""):
|
||||
if not description or len(description.strip()) < 20:
|
||||
errors.append("Description \u0441\u043b\u0438\u0448\u043a\u043e\u043c \u043a\u043e\u0440\u043e\u0442\u043a\u0438\u0439 (\u043d\u0443\u0436\u043d\u043e >= 20 \u0441\u0438\u043c\u0432\u043e\u043b\u043e\u0432)")
|
||||
|
||||
return errors
|
||||
|
||||
|
||||
async def start_pipeline(data: dict, project_id: str = ""):
|
||||
"""Feature 1: start the pipeline for an issue (moved to In Progress).
|
||||
|
||||
This is the body extracted from the old handle_work_item_created: resolve the
|
||||
project, run QG-0 (hard — blocks on failure), create the work item id +
|
||||
branch + initial docs, insert the task row, and enqueue the analyst.
|
||||
|
||||
Callers (handle_status_start) already guarantee no existing task for this
|
||||
plane_id, so this never duplicates.
|
||||
"""
|
||||
plane_id = data.get("id", "")
|
||||
name = data.get("name", "untitled")
|
||||
description = data.get("description_stripped", data.get("description", ""))
|
||||
|
||||
# ORCH-6: resolve repo / prefix / Plane project from the registry instead of
|
||||
# the single hardcoded default_repo.
|
||||
if not project_id:
|
||||
project_id = data.get("project") or data.get("project_id") or ""
|
||||
proj = get_project_by_plane_id(project_id)
|
||||
if not proj:
|
||||
logger.warning(f"start_pipeline: unknown project '{project_id}', ignoring {plane_id}")
|
||||
return
|
||||
repo = proj.repo
|
||||
plane_project_id = proj.plane_project_id
|
||||
|
||||
# BUG 1 + BUG B: Plane's issue.updated webhook (status change -> In Progress)
|
||||
# sends only the CHANGED fields, so BOTH description / description_stripped
|
||||
# AND name are usually empty here even though the issue HAS them. Pull the
|
||||
# full title + description from the Plane issue detail API in a SINGLE GET
|
||||
# (fetch_issue_fields: same endpoint + shared token already used by
|
||||
# fetch_issue_sequence_id) before QG-0 and before the branch slug is built.
|
||||
# If the API is also empty, QG-0 legitimately fails (truly empty ticket) and
|
||||
# name falls back to "untitled".
|
||||
name_missing = (not name) or name.strip().lower() == "untitled" or len(name.strip()) < 3
|
||||
desc_missing = (not description) or len(description.strip()) < 20
|
||||
if name_missing or desc_missing:
|
||||
from ..plane_sync import fetch_issue_fields
|
||||
fetched_name, fetched_desc = fetch_issue_fields(plane_id, plane_project_id)
|
||||
if desc_missing and fetched_desc and len(fetched_desc.strip()) >= len(description.strip()):
|
||||
description = fetched_desc
|
||||
logger.info(
|
||||
f"start_pipeline: pulled description from Plane API for {plane_id} "
|
||||
f"({len(description.strip())} chars)"
|
||||
)
|
||||
if name_missing and fetched_name and len(fetched_name.strip()) >= 3:
|
||||
name = fetched_name
|
||||
logger.info(
|
||||
f"start_pipeline: pulled name from Plane API for {plane_id} "
|
||||
f"('{name}')"
|
||||
)
|
||||
# BUG B fallback: if name is still empty/blank after the API pull, keep the
|
||||
# legacy "untitled" so the slug/branch build never crashes on an empty name.
|
||||
if not name or not name.strip():
|
||||
name = "untitled"
|
||||
|
||||
# QG-0 validation (hard gate on pipeline start)
|
||||
errors = _qg0_errors(name, description)
|
||||
if errors:
|
||||
# QG-0 failed
|
||||
error_text = "\u26a0\ufe0f QG-0 failed:\n" + "\n".join(f"\u2022 {e}" for e in errors)
|
||||
from ..plane_sync import PLANE_BASE, PLANE_HEADERS, WORKSPACE, PLANE_STATES
|
||||
from ..plane_sync import PLANE_BASE, PLANE_HEADERS, WORKSPACE, get_project_states
|
||||
import httpx as _httpx
|
||||
# Post comment (ORCH-6: route to the issue's own project)
|
||||
url = f"{PLANE_BASE}/workspaces/{WORKSPACE}/projects/{plane_project_id}/issues/{plane_id}/comments/"
|
||||
@@ -144,28 +435,73 @@ async def handle_work_item_created(data: dict, project_id: str = ""):
|
||||
json={"comment_html": f"<p>{error_text}</p>"}, timeout=10)
|
||||
except Exception:
|
||||
pass
|
||||
# Set blocked
|
||||
# Set blocked — ORCH-10: resolve per-project UUID.
|
||||
url2 = f"{PLANE_BASE}/workspaces/{WORKSPACE}/projects/{plane_project_id}/issues/{plane_id}/"
|
||||
try:
|
||||
_blocked = get_project_states(plane_project_id)["blocked"]
|
||||
_httpx.patch(url2, headers=PLANE_HEADERS,
|
||||
json={"state": PLANE_STATES["blocked"]}, timeout=10)
|
||||
json={"state": _blocked}, timeout=10)
|
||||
except Exception:
|
||||
pass
|
||||
logger.info(f"QG-0 failed for {plane_id}: {errors}")
|
||||
return
|
||||
|
||||
# Generate work item ID
|
||||
work_item_id = get_next_work_item_id(repo, proj.work_item_prefix)
|
||||
# Generate work item ID.
|
||||
# M-6: source of truth for the number is the Plane sequence_id. Fetch it by
|
||||
# issue UUID; if Plane is unavailable, fall back to the DB increment so a
|
||||
# Plane outage never blocks task creation (autonomy > exact numbering).
|
||||
from ..plane_sync import fetch_issue_sequence_id
|
||||
seq = fetch_issue_sequence_id(plane_id, plane_project_id)
|
||||
if seq is not None:
|
||||
work_item_id = f"{proj.work_item_prefix}-{seq:03d}"
|
||||
else:
|
||||
work_item_id = get_next_work_item_id(repo, proj.work_item_prefix)
|
||||
logger.warning(
|
||||
f"Plane sequence_id unavailable for {plane_id}, "
|
||||
f"fell back to DB increment: {work_item_id}"
|
||||
)
|
||||
|
||||
# BUG 2a: uniqueness-guard LAYERED ON TOP of the M-6 derive above (the derive
|
||||
# itself is untouched). If the derived ET-NNN is already taken by another
|
||||
# task in this repo (collision -> two tasks would share branch/worktree, see
|
||||
# ET-006), bump to the next free number.
|
||||
_derived = work_item_id
|
||||
work_item_id = ensure_unique_work_item_id(work_item_id, repo)
|
||||
if work_item_id != _derived:
|
||||
logger.warning(
|
||||
f"work_item_id collision: derived {_derived} already in use for "
|
||||
f"{repo}; reassigned {plane_id} -> {work_item_id}"
|
||||
)
|
||||
|
||||
# Create slug from name
|
||||
slug = re.sub(r"[^a-z0-9]+", "-", name.lower()).strip("-")[:30]
|
||||
branch = f"feature/{work_item_id}-{slug}"
|
||||
|
||||
# BUG 2b (defense-in-depth): the worktree/path is keyed by BRANCH
|
||||
# (git_worktree.get_worktree_path) and tasks are reverse-resolved by
|
||||
# (repo, branch). With 2a the work_item_id is unique, so the branch prefix is
|
||||
# too; but the slug could still collide (e.g. two issues with the same title
|
||||
# under different ids -> fine) or, worse, an identical branch already exist.
|
||||
# Guard physically: if this exact branch is already owned by another task in
|
||||
# this repo, disambiguate with the (now unique) work_item_id so two tasks can
|
||||
# never share a worktree.
|
||||
_conn_b = get_db()
|
||||
_branch_taken = _conn_b.execute(
|
||||
"SELECT 1 FROM tasks WHERE repo = ? AND branch = ? LIMIT 1", (repo, branch)
|
||||
).fetchone()
|
||||
_conn_b.close()
|
||||
if _branch_taken is not None:
|
||||
branch = f"feature/{work_item_id}-{plane_id[:8]}"
|
||||
logger.warning(
|
||||
f"branch collision for {repo}; disambiguated to unique branch {branch}"
|
||||
)
|
||||
|
||||
# Insert task into DB
|
||||
conn = get_db()
|
||||
conn.execute(
|
||||
"INSERT INTO tasks (plane_id, work_item_id, repo, branch, stage, plane_issue_id) VALUES (?, ?, ?, ?, ?, ?)",
|
||||
(plane_id, work_item_id, repo, branch, "analysis", plane_id),
|
||||
"INSERT INTO tasks (plane_id, work_item_id, repo, branch, stage, plane_issue_id, title) "
|
||||
"VALUES (?, ?, ?, ?, ?, ?, ?)",
|
||||
(plane_id, work_item_id, repo, branch, "analysis", plane_id, name),
|
||||
)
|
||||
conn.commit()
|
||||
conn.close()
|
||||
@@ -192,133 +528,104 @@ async def handle_work_item_created(data: dict, project_id: str = ""):
|
||||
task_row = get_db().execute("SELECT id FROM tasks WHERE work_item_id=?", (work_item_id,)).fetchone()
|
||||
if task_row:
|
||||
task_id = task_row[0]
|
||||
task_desc = f"Work item: {work_item_id}\nRepo: {repo}\nBranch: {branch}\nStage: analysis\nTitle: {name}"
|
||||
task_desc = (
|
||||
f"Work item: {work_item_id}\nRepo: {repo}\nBranch: {branch}\n"
|
||||
f"Stage: analysis\nTitle: {name}\n\nDescription:\n{description}"
|
||||
)
|
||||
job_id = enqueue_job("analyst", repo, task_desc, task_id=task_id)
|
||||
logger.info(f"Task {task_id}: enqueued analyst (job_id={job_id})")
|
||||
# Post start comment to Plane
|
||||
from ..plane_sync import add_comment as _add_comment
|
||||
_add_comment(work_item_id, "\U0001f50d Analyst \u0437\u0430\u043f\u0443\u0449\u0435\u043d. BRD/\u0422\u0417/AC/TestPlan \u0432 \u0440\u0430\u0431\u043e\u0442\u0435 (\u043e\u0436\u0438\u0434\u0430\u0439\u0442\u0435 8-15 \u043c\u0438\u043d).")
|
||||
_add_comment(work_item_id, "\U0001f50d Analyst \u0437\u0430\u043f\u0443\u0449\u0435\u043d. BRD/\u0422\u0417/AC/TestPlan \u0432 \u0440\u0430\u0431\u043e\u0442\u0435 (\u043e\u0436\u0438\u0434\u0430\u0439\u0442\u0435 8-15 \u043c\u0438\u043d).", author="analyst")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to launch analyst for {work_item_id}: {e}")
|
||||
|
||||
|
||||
async def handle_comment(data: dict, project_id: str = ""):
|
||||
"""Status-only verdict model: comments NEVER drive the pipeline.
|
||||
|
||||
The whole comment-based control mechanism (``:approved:`` / ``:rejected:``
|
||||
and the analysis answer-to-questions flow) was removed. It caused bug 3
|
||||
(echo self-hit): the analyst posts its own "waiting for approval" comment,
|
||||
handle_comment catches its own comment and reverts In Review -> In Progress.
|
||||
|
||||
Comments are now logged only — no status change, no enqueue, no side effect.
|
||||
The pipeline is driven solely by status changes (handle_issue_updated):
|
||||
- Approved -> advance
|
||||
- Rejected -> rollback (reason pulled from the latest comment)
|
||||
- In Progress (returned from Needs Input) -> relaunch the stage agent
|
||||
"""
|
||||
Handle comment event — check for :approved: or :rejected:.
|
||||
Advance or rollback stage accordingly.
|
||||
plane_id = str(
|
||||
data.get("work_item_id") or data.get("issue_id") or data.get("issue") or ""
|
||||
)
|
||||
logger.info(
|
||||
f"comment.created for {plane_id}: logged only, no pipeline action "
|
||||
f"(status-only verdict model)"
|
||||
)
|
||||
|
||||
|
||||
async def _rollback_stage(
|
||||
task_id: int, current_stage: str, repo: str, work_item_id: str, branch: str,
|
||||
reason: str,
|
||||
):
|
||||
"""Rollback triggered by a status change to Rejected.
|
||||
|
||||
- at analysis: relaunch the analyst with the rejection reason;
|
||||
- otherwise: roll back to the previous stage and relaunch its agent
|
||||
(via the existing rollback notify + an enqueue of the prev-stage agent).
|
||||
"""
|
||||
comment_body = data.get("comment_stripped", data.get("comment", data.get("body", data.get("comment_html", ""))))
|
||||
plane_id = str(data.get("work_item_id") or data.get("issue_id") or data.get("issue") or "")
|
||||
|
||||
if not plane_id:
|
||||
logger.warning("Comment event without work_item_id, skipping")
|
||||
return
|
||||
|
||||
task = get_task_by_plane_id(plane_id)
|
||||
if not task:
|
||||
logger.warning(f"No task found for plane_id={plane_id}")
|
||||
return
|
||||
|
||||
task_id = task["id"]
|
||||
current_stage = task["stage"]
|
||||
repo = task["repo"]
|
||||
work_item_id = task.get("work_item_id", "")
|
||||
branch = task.get("branch", "")
|
||||
|
||||
if ":rejected:" in comment_body:
|
||||
# Extract reason (text after :rejected:)
|
||||
reason = comment_body.split(":rejected:", 1)[-1].strip()[:300]
|
||||
|
||||
if current_stage == "analysis":
|
||||
# Already in analysis — just relaunch analyst with rejection reason
|
||||
from ..plane_sync import set_issue_in_progress
|
||||
set_issue_in_progress(work_item_id)
|
||||
task_desc = (
|
||||
f"Work item: {work_item_id}\nRepo: {repo}\nBranch: {branch}\n"
|
||||
f"Stage: analysis\nNote: Stakeholder REJECTED your artifacts. "
|
||||
f"Reason: {reason}\nRevise and improve."
|
||||
)
|
||||
new_job = enqueue_job("analyst", repo, task_desc, task_id=task_id)
|
||||
from ..plane_sync import add_comment as _plane_comment
|
||||
_plane_comment(work_item_id, f"\U0001f504 Analyst \u043f\u0435\u0440\u0435\u0437\u0430\u043f\u0443\u0449\u0435\u043d. \u041f\u0440\u0438\u0447\u0438\u043d\u0430 \u043e\u0442\u043a\u043b\u043e\u043d\u0435\u043d\u0438\u044f: {reason}")
|
||||
logger.info(f"Task {task_id}: rejected at analysis, enqueued analyst (job_id={new_job})")
|
||||
else:
|
||||
# Rollback to previous stage
|
||||
prev_stage = get_previous_stage(current_stage)
|
||||
if prev_stage:
|
||||
update_task_stage(task_id, prev_stage)
|
||||
from ..plane_sync import set_issue_in_progress
|
||||
set_issue_in_progress(work_item_id)
|
||||
notify_stage_change(task_id, current_stage, prev_stage)
|
||||
plane_notify_stage(work_item_id, current_stage, prev_stage)
|
||||
from ..plane_sync import add_comment as _plane_comment
|
||||
_plane_comment(work_item_id, f"\U0001f504 \u041e\u0442\u043a\u0430\u0442: {current_stage} \u2192 {prev_stage}. \u041f\u0440\u0438\u0447\u0438\u043d\u0430: {reason}")
|
||||
logger.info(f"Task {task_id}: rejected, rolled back {current_stage} \u2192 {prev_stage}")
|
||||
return
|
||||
|
||||
if ":approved:" in comment_body:
|
||||
if current_stage == "analysis":
|
||||
# Already in analysis — just relaunch analyst with rejection reason
|
||||
from ..plane_sync import set_issue_in_progress
|
||||
set_issue_in_progress(work_item_id)
|
||||
# Try to advance stage
|
||||
await _try_advance_stage(task_id, current_stage, repo, work_item_id, branch)
|
||||
task_desc = (
|
||||
f"Work item: {work_item_id}\nRepo: {repo}\nBranch: {branch}\n"
|
||||
f"Stage: analysis\nNote: Stakeholder REJECTED your artifacts. "
|
||||
f"Reason: {reason}\nRevise and improve."
|
||||
)
|
||||
new_job = enqueue_job("analyst", repo, task_desc, task_id=task_id)
|
||||
from ..plane_sync import add_comment as _plane_comment
|
||||
_plane_comment(work_item_id, f"\U0001f504 Analyst \u043f\u0435\u0440\u0435\u0437\u0430\u043f\u0443\u0449\u0435\u043d. \u041f\u0440\u0438\u0447\u0438\u043d\u0430 \u043e\u0442\u043a\u043b\u043e\u043d\u0435\u043d\u0438\u044f: {reason}", author="analyst")
|
||||
logger.info(f"Task {task_id}: rejected at analysis, enqueued analyst (job_id={new_job})")
|
||||
return
|
||||
|
||||
# Task 3: If neither :approved: nor :rejected: — check if this is an answer to questions
|
||||
if current_stage == "analysis":
|
||||
from ..plane_sync import PLANE_STATES, set_issue_in_progress
|
||||
issue_id = task.get("plane_issue_id") or task.get("plane_id")
|
||||
if not issue_id:
|
||||
issue_id = plane_id
|
||||
if issue_id:
|
||||
from ..plane_sync import PLANE_BASE, PLANE_HEADERS, WORKSPACE
|
||||
from ..plane_sync import PROJECT_ID as _DEFAULT_PROJECT_ID
|
||||
# ORCH-6: route to this task's own Plane project (resolved from repo).
|
||||
_proj = get_project_by_repo(repo)
|
||||
_pid = _proj.plane_project_id if _proj else (project_id or _DEFAULT_PROJECT_ID)
|
||||
import httpx as _httpx
|
||||
try:
|
||||
_resp = _httpx.get(
|
||||
f"{PLANE_BASE}/workspaces/{WORKSPACE}/projects/{_pid}/issues/{issue_id}/",
|
||||
headers=PLANE_HEADERS, timeout=10
|
||||
)
|
||||
if _resp.status_code == 200:
|
||||
issue_data = _resp.json()
|
||||
if issue_data.get("state") == PLANE_STATES["needs_input"]:
|
||||
# Task 11: Check analyst retry count (max 3 question rounds)
|
||||
conn3 = get_db()
|
||||
analyst_runs = conn3.execute(
|
||||
"SELECT COUNT(*) FROM agent_runs WHERE task_id=? AND agent='analyst'",
|
||||
(task_id,)
|
||||
).fetchone()[0]
|
||||
conn3.close()
|
||||
|
||||
if analyst_runs >= 4: # initial + 3 retries
|
||||
from ..plane_sync import set_issue_blocked, add_comment as _pc
|
||||
set_issue_blocked(work_item_id)
|
||||
_pc(
|
||||
work_item_id,
|
||||
"\U0001f6a8 3 \u0440\u0430\u0443\u043d\u0434\u0430 \u0443\u0442\u043e\u0447\u043d\u0435\u043d\u0438\u0439 \u0438\u0441\u0447\u0435\u0440\u043f\u0430\u043d\u044b. Analyst \u043d\u0435 \u043c\u043e\u0436\u0435\u0442 \u0441\u0444\u043e\u0440\u043c\u0438\u0440\u043e\u0432\u0430\u0442\u044c \u0422\u0417. "
|
||||
"\u0422\u0440\u0435\u0431\u0443\u0435\u0442\u0441\u044f \u0431\u043e\u043b\u0435\u0435 \u0434\u0435\u0442\u0430\u043b\u044c\u043d\u043e\u0435 \u043e\u043f\u0438\u0441\u0430\u043d\u0438\u0435 \u0438\u043b\u0438 \u0432\u0441\u0442\u0440\u0435\u0447\u0430."
|
||||
)
|
||||
from ..notifications import send_telegram
|
||||
send_telegram(f"\U0001f6a8 {work_item_id}: 3 \u0440\u0430\u0443\u043d\u0434\u0430 \u0432\u043e\u043f\u0440\u043e\u0441\u043e\u0432 analyst'\u0430 \u0438\u0441\u0447\u0435\u0440\u043f\u0430\u043d\u044b. \u041d\u0443\u0436\u043d\u0430 \u043f\u043e\u043c\u043e\u0449\u044c.")
|
||||
return
|
||||
|
||||
# This is an answer to analyst's questions — relaunch
|
||||
set_issue_in_progress(work_item_id)
|
||||
task_desc = (
|
||||
f"Work item: {work_item_id}\nRepo: {repo}\nBranch: {branch}\n"
|
||||
f"Stage: analysis\nNote: Stakeholder answered your questions. "
|
||||
f"Read the latest comment in Plane and revise your artifacts.\n"
|
||||
f"Answer: {comment_body[:500]}"
|
||||
)
|
||||
new_job = enqueue_job("analyst", repo, task_desc, task_id=task_id)
|
||||
from ..plane_sync import add_comment as _pc2
|
||||
_pc2(work_item_id, "\U0001f504 Analyst \u043f\u0435\u0440\u0435\u0437\u0430\u043f\u0443\u0449\u0435\u043d \u0441 \u043e\u0442\u0432\u0435\u0442\u0430\u043c\u0438 \u0441\u0442\u0435\u0439\u043a\u0445\u043e\u043b\u0434\u0435\u0440\u0430.")
|
||||
logger.info(f"Task {task_id}: stakeholder answered questions, enqueued analyst (job_id={new_job})")
|
||||
return
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to check issue state: {e}")
|
||||
# Rollback to previous stage
|
||||
prev_stage = get_previous_stage(current_stage)
|
||||
if not prev_stage:
|
||||
logger.info(f"Task {task_id}: rejected at {current_stage} but no previous stage")
|
||||
return
|
||||
update_task_stage(task_id, prev_stage)
|
||||
notify_stage_change(task_id, current_stage, prev_stage)
|
||||
# Feature 3: plane_notify_stage moves the board to the prev stage's status.
|
||||
plane_notify_stage(work_item_id, current_stage, prev_stage)
|
||||
# Then put it back to In Progress so the relaunched agent is clearly working.
|
||||
from ..plane_sync import set_issue_in_progress
|
||||
set_issue_in_progress(work_item_id)
|
||||
from ..plane_sync import add_comment as _plane_comment, STAGE_AUTHORS
|
||||
_plane_comment(
|
||||
work_item_id,
|
||||
f"\U0001f504 \u041e\u0442\u043a\u0430\u0442: {current_stage} \u2192 {prev_stage}. \u041f\u0440\u0438\u0447\u0438\u043d\u0430: {reason}",
|
||||
author=STAGE_AUTHORS.get(prev_stage, "stream"),
|
||||
)
|
||||
# Relaunch the previous stage's agent so the rollback actually re-runs work.
|
||||
# STAGE_AUTHORS maps a stage directly to the role that OWNS work in it
|
||||
# (analysis->analyst, architecture->architect, ...), which is exactly the
|
||||
# agent we must re-run on a rollback into prev_stage.
|
||||
from ..plane_sync import STAGE_AUTHORS as _STAGE_AUTHORS
|
||||
prev_agent = _STAGE_AUTHORS.get(prev_stage)
|
||||
if prev_agent:
|
||||
task_desc = (
|
||||
f"Work item: {work_item_id}\nRepo: {repo}\nBranch: {branch}\n"
|
||||
f"Stage: {prev_stage}\nNote: Stakeholder REJECTED. Reason: {reason}\n"
|
||||
f"Revise and improve."
|
||||
)
|
||||
new_job = enqueue_job(prev_agent, repo, task_desc, task_id=task_id)
|
||||
logger.info(
|
||||
f"Task {task_id}: rejected, rolled back {current_stage} \u2192 {prev_stage}, "
|
||||
f"enqueued {prev_agent} (job_id={new_job})"
|
||||
)
|
||||
else:
|
||||
logger.info(f"Task {task_id}: rejected, rolled back {current_stage} \u2192 {prev_stage}")
|
||||
|
||||
|
||||
async def _try_advance_stage(
|
||||
@@ -331,10 +638,10 @@ async def _try_advance_stage(
|
||||
is synchronous. We run it off the event loop via asyncio.to_thread so there
|
||||
is exactly one implementation shared with the launcher.
|
||||
|
||||
finished_agent is None on this webhook path (a human :approved: comment, not
|
||||
a finished agent), so the agent-specific rollback branches inside the engine
|
||||
intentionally do not trigger — identical to the old plane behavior, which
|
||||
only ran the QG and either advanced or reported the failure.
|
||||
finished_agent is None on this webhook path (a human Approved status change,
|
||||
not a finished agent), so the agent-specific rollback branches inside the
|
||||
engine intentionally do not trigger — the webhook path only runs the QG and
|
||||
either advances or reports the failure.
|
||||
"""
|
||||
import asyncio
|
||||
from ..stage_engine import advance_stage
|
||||
|
||||
73
tests/conftest.py
Normal file
73
tests/conftest.py
Normal file
@@ -0,0 +1,73 @@
|
||||
"""Global pytest fixtures.
|
||||
|
||||
test(conftest): mute Telegram in ALL tests to stop prod leakage.
|
||||
|
||||
Background: a pytest run on prod was sending REAL Telegram messages to Slava,
|
||||
because some tests (e.g. test_webhook_dedup advancing a stage) reach
|
||||
notify_stage_change -> send_telegram, which reads the live .env
|
||||
telegram_bot_token/chat_id and actually POSTs to Telegram.
|
||||
|
||||
This autouse fixture stubs send_telegram to a no-op for every test:
|
||||
|
||||
- "src.notifications.send_telegram" is the SOURCE. All the notify_* helpers in
|
||||
notifications.py call the module-global send_telegram, and every other module
|
||||
that does a *local* `from .notifications import send_telegram` inside a
|
||||
function resolves it live at call time -> covered by patching the source.
|
||||
|
||||
- "src.stage_engine.send_telegram" is patched too, because stage_engine binds
|
||||
send_telegram as a MODULE-LEVEL name (from .notifications import send_telegram
|
||||
at import), so a patch of the source alone would not intercept its 3 direct
|
||||
calls. webhooks/plane and launcher import it locally inside functions, so the
|
||||
source patch already covers them; they are patched defensively with
|
||||
raising=False anyway in case that ever changes.
|
||||
|
||||
raising=False so a module that doesn't (yet) expose the name never breaks setup.
|
||||
"""
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _no_telegram(monkeypatch):
|
||||
_noop = lambda *a, **k: None # noqa: E731
|
||||
# Source of truth (covers notifications.notify_* and all local re-imports).
|
||||
monkeypatch.setattr("src.notifications.send_telegram", _noop, raising=False)
|
||||
# Module-level binding in stage_engine (and defensive coverage elsewhere).
|
||||
monkeypatch.setattr("src.stage_engine.send_telegram", _noop, raising=False)
|
||||
monkeypatch.setattr("src.webhooks.plane.send_telegram", _noop, raising=False)
|
||||
monkeypatch.setattr("src.agents.launcher.send_telegram", _noop, raising=False)
|
||||
monkeypatch.setattr("src.queue_worker.send_telegram", _noop, raising=False)
|
||||
yield
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _reset_webhook_secrets(monkeypatch):
|
||||
"""Isolate settings singleton between test files (CI cross-file isolation).
|
||||
|
||||
settings is a process-wide Pydantic singleton read once at import. Different
|
||||
test modules set env variables differently at import-time, so those values leak
|
||||
across files when pytest collects them together (as CI does).
|
||||
|
||||
1. webhook secrets: reset to "" so HMAC is disabled by default. Tests that
|
||||
intentionally test the 401 path (test_webhook_dedup.py:268,278) re-apply
|
||||
their own monkeypatch AFTER this autouse fixture runs, which overrides the
|
||||
reset for the duration of that one test only.
|
||||
|
||||
2. db_path: reset to the value from ORCH_DB_PATH env var (last written by the
|
||||
last imported test module). Without this, test_webhook_dedup.py (imported
|
||||
first, alphabetically) seeds settings.db_path = dedup.db, while
|
||||
test_webhooks.py's setup_db fixture tries to remove test_orchestrator.db,
|
||||
leaving the DB dirty across tests that share a branch name and causing
|
||||
get_task_by_repo_branch() to return a stale row with the wrong stage.
|
||||
Per-test monkeypatches in test_webhook_dedup.setup_db override this reset.
|
||||
"""
|
||||
import os
|
||||
from src.webhooks import gitea as gitea_mod
|
||||
from src.webhooks import plane as plane_mod
|
||||
from src import db as db_mod
|
||||
monkeypatch.setattr(gitea_mod.settings, "gitea_webhook_secret", "", raising=False)
|
||||
monkeypatch.setattr(plane_mod.settings, "plane_webhook_secret", "", raising=False)
|
||||
db_path_env = os.environ.get("ORCH_DB_PATH", "")
|
||||
if db_path_env:
|
||||
monkeypatch.setattr(db_mod.settings, "db_path", db_path_env, raising=False)
|
||||
yield
|
||||
74
tests/test_analyst_comment.py
Normal file
74
tests/test_analyst_comment.py
Normal file
@@ -0,0 +1,74 @@
|
||||
"""BUG C: analyst "artifacts ready" comment under the status-only model.
|
||||
|
||||
The comment must ask for the **Approved** status (not the obsolete
|
||||
":approved:" reaction, not moving back to "In Progress") and link only the
|
||||
docs that actually exist in the worktree.
|
||||
"""
|
||||
|
||||
import os
|
||||
import tempfile
|
||||
|
||||
os.environ.setdefault("ORCH_GITEA_TOKEN", "test-token")
|
||||
os.environ.setdefault("ORCH_PLANE_API_TOKEN", "test-token")
|
||||
|
||||
|
||||
def test_analyst_comment_asks_approved_with_links(monkeypatch, tmp_path):
|
||||
from src import stage_engine as SE
|
||||
|
||||
# Worktree with only SOME of the candidate docs present.
|
||||
wt = tmp_path / "wt"
|
||||
docs = wt / "docs" / "work-items" / "ET-011"
|
||||
docs.mkdir(parents=True)
|
||||
for fname in ("00-business-request.md", "01-brd.md", "02-trz.md",
|
||||
"03-acceptance-criteria.md", "04-test-plan.yaml"):
|
||||
(docs / fname).write_text("x")
|
||||
# 04b-ui-test-cases.md intentionally absent -> must NOT be linked
|
||||
|
||||
monkeypatch.setattr(SE, "get_worktree_path", lambda repo, branch: str(wt))
|
||||
# public URL set -> links must be built from it (not gitea_url)
|
||||
monkeypatch.setattr(SE.settings, "gitea_url", "http://localhost:3000")
|
||||
monkeypatch.setattr(SE.settings, "gitea_public_url", "https://git.mva154.duckdns.org")
|
||||
monkeypatch.setattr(SE.settings, "gitea_owner", "admin")
|
||||
|
||||
html = SE._build_analyst_ready_comment(
|
||||
"enduro-trails", "ET-011", "feature/ET-011-gpx-upload-feature"
|
||||
)
|
||||
|
||||
# text asks for the Approved STATUS, not the obsolete mechanisms
|
||||
assert "Approved" in html
|
||||
assert ":approved:" not in html
|
||||
assert "In Progress" not in html
|
||||
assert "Rejected" in html
|
||||
# clickable links to docs that ACTUALLY exist
|
||||
assert "<a href=" in html
|
||||
base = ("https://git.mva154.duckdns.org/admin/enduro-trails/src/branch/"
|
||||
"feature/ET-011-gpx-upload-feature/docs/work-items/ET-011/")
|
||||
assert base + "01-brd.md" in html
|
||||
assert base + "04-test-plan.yaml" in html
|
||||
# the missing file is NOT invented
|
||||
assert "04b-ui-test-cases.md" not in html
|
||||
# internal git url must NOT appear in clickable links
|
||||
assert "localhost:3000" not in html
|
||||
|
||||
|
||||
def test_analyst_comment_falls_back_to_gitea_url(monkeypatch, tmp_path):
|
||||
"""When gitea_public_url is empty, links fall back to gitea_url."""
|
||||
from src import stage_engine as SE
|
||||
|
||||
wt = tmp_path / "wt"
|
||||
docs = wt / "docs" / "work-items" / "ET-011"
|
||||
docs.mkdir(parents=True)
|
||||
(docs / "01-brd.md").write_text("x")
|
||||
|
||||
monkeypatch.setattr(SE, "get_worktree_path", lambda repo, branch: str(wt))
|
||||
monkeypatch.setattr(SE.settings, "gitea_url", "http://localhost:3000")
|
||||
monkeypatch.setattr(SE.settings, "gitea_public_url", "")
|
||||
monkeypatch.setattr(SE.settings, "gitea_owner", "admin")
|
||||
|
||||
html = SE._build_analyst_ready_comment(
|
||||
"enduro-trails", "ET-011", "feature/ET-011-gpx-upload-feature"
|
||||
)
|
||||
|
||||
base = ("http://localhost:3000/admin/enduro-trails/src/branch/"
|
||||
"feature/ET-011-gpx-upload-feature/docs/work-items/ET-011/")
|
||||
assert base + "01-brd.md" in html
|
||||
92
tests/test_log_rotation.py
Normal file
92
tests/test_log_rotation.py
Normal file
@@ -0,0 +1,92 @@
|
||||
"""L-2: tests for prune_run_logs (run-log rotation).
|
||||
|
||||
Verifies that old / surplus *.log files are removed while fresh logs, non-.log
|
||||
files, the active log, and subdirectories are left intact. Function is
|
||||
best-effort and must never raise.
|
||||
"""
|
||||
import os
|
||||
import time
|
||||
|
||||
from src.agents.launcher import prune_run_logs
|
||||
|
||||
|
||||
def _touch(path, age_days=0):
|
||||
with open(path, "w") as f:
|
||||
f.write("x")
|
||||
mtime = time.time() - age_days * 86400
|
||||
os.utime(path, (mtime, mtime))
|
||||
return path
|
||||
|
||||
|
||||
def test_old_logs_removed_fresh_kept(tmp_path):
|
||||
runs = tmp_path
|
||||
fresh = _touch(str(runs / "1.log"), age_days=1)
|
||||
old = _touch(str(runs / "2.log"), age_days=40)
|
||||
|
||||
removed = prune_run_logs(str(runs), keep_days=30, keep_max=500)
|
||||
|
||||
assert removed == 1
|
||||
assert os.path.exists(fresh)
|
||||
assert not os.path.exists(old)
|
||||
|
||||
|
||||
def test_non_log_files_untouched(tmp_path):
|
||||
runs = tmp_path
|
||||
old_log = _touch(str(runs / "stale.log"), age_days=99)
|
||||
keep_txt = _touch(str(runs / "notes.txt"), age_days=99)
|
||||
keep_db = _touch(str(runs / "orchestrator.db"), age_days=99)
|
||||
|
||||
prune_run_logs(str(runs), keep_days=30, keep_max=500)
|
||||
|
||||
assert not os.path.exists(old_log)
|
||||
assert os.path.exists(keep_txt)
|
||||
assert os.path.exists(keep_db)
|
||||
|
||||
|
||||
def test_keep_max_retains_newest(tmp_path):
|
||||
runs = tmp_path
|
||||
# 5 logs, all recent (within keep_days), increasing age 0..4 days.
|
||||
paths = []
|
||||
for i in range(5):
|
||||
paths.append(_touch(str(runs / f"{i}.log"), age_days=i))
|
||||
|
||||
removed = prune_run_logs(str(runs), keep_days=365, keep_max=2)
|
||||
|
||||
# Only the 2 newest (age 0, 1) survive.
|
||||
assert removed == 3
|
||||
assert os.path.exists(paths[0])
|
||||
assert os.path.exists(paths[1])
|
||||
for p in paths[2:]:
|
||||
assert not os.path.exists(p)
|
||||
|
||||
|
||||
def test_active_log_never_removed(tmp_path):
|
||||
runs = tmp_path
|
||||
active = _touch(str(runs / "active.log"), age_days=99)
|
||||
other = _touch(str(runs / "other.log"), age_days=99)
|
||||
|
||||
removed = prune_run_logs(
|
||||
str(runs), keep_days=30, keep_max=500, active_paths=[active]
|
||||
)
|
||||
|
||||
assert removed == 1
|
||||
assert os.path.exists(active)
|
||||
assert not os.path.exists(other)
|
||||
|
||||
|
||||
def test_subdirs_untouched(tmp_path):
|
||||
runs = tmp_path
|
||||
sub = runs / "sub.log"
|
||||
sub.mkdir() # a directory that happens to end in .log
|
||||
old_log = _touch(str(runs / "old.log"), age_days=99)
|
||||
|
||||
prune_run_logs(str(runs), keep_days=30, keep_max=500)
|
||||
|
||||
assert sub.is_dir()
|
||||
assert not os.path.exists(old_log)
|
||||
|
||||
|
||||
def test_missing_dir_is_noop(tmp_path):
|
||||
missing = tmp_path / "does-not-exist"
|
||||
# Must not raise.
|
||||
assert prune_run_logs(str(missing)) == 0
|
||||
187
tests/test_m6_sequence.py
Normal file
187
tests/test_m6_sequence.py
Normal file
@@ -0,0 +1,187 @@
|
||||
"""M-6: work_item_id derived from Plane sequence_id (source of truth = Plane).
|
||||
|
||||
Covers:
|
||||
* fetch_issue_sequence_id returns int on a valid Plane response (mocked httpx);
|
||||
* returns None on network error / missing field WITHOUT raising;
|
||||
* handle_work_item_created uses prefix-NNN when seq is available, and falls
|
||||
back to get_next_work_item_id when seq is None (Plane down => autonomy);
|
||||
* find_issue_id no longer hardcodes 'ET-' and matches an arbitrary prefix
|
||||
(e.g. ORCH-005) by sequence_id.
|
||||
"""
|
||||
|
||||
import os
|
||||
import tempfile
|
||||
|
||||
import pytest
|
||||
|
||||
_test_db = os.path.join(tempfile.gettempdir(), "test_orchestrator_m6.db")
|
||||
os.environ["ORCH_DB_PATH"] = _test_db
|
||||
os.environ.setdefault("ORCH_PLANE_WEBHOOK_SECRET", "")
|
||||
os.environ.setdefault("ORCH_GITEA_WEBHOOK_SECRET", "")
|
||||
os.environ.setdefault("ORCH_GITEA_TOKEN", "test-token")
|
||||
os.environ.setdefault("ORCH_PLANE_API_TOKEN", "test-token")
|
||||
|
||||
from unittest.mock import patch, AsyncMock, MagicMock # noqa: E402
|
||||
|
||||
from fastapi.testclient import TestClient # noqa: E402
|
||||
|
||||
from src.main import app # noqa: E402
|
||||
from src.db import init_db, get_db # noqa: E402
|
||||
from src import projects as P # noqa: E402
|
||||
from src.projects import reload_projects # noqa: E402
|
||||
import src.plane_sync as plane_sync # noqa: E402
|
||||
|
||||
ORCH_PLANE_ID = "8da6aa25-a60e-44d6-a1e2-d8ae59aa7d6a"
|
||||
ENDURO_PLANE_ID = "7a79f0a9-5278-49cd-9007-9a338f238f9c"
|
||||
|
||||
client = TestClient(app)
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def setup(monkeypatch):
|
||||
monkeypatch.setattr(P.settings, "db_path", _test_db)
|
||||
import src.db as _db
|
||||
monkeypatch.setattr(_db.settings, "db_path", _test_db)
|
||||
if os.path.exists(_test_db):
|
||||
os.unlink(_test_db)
|
||||
init_db()
|
||||
|
||||
monkeypatch.setattr("src.webhooks.plane.verify_plane_signature", lambda body, sig: True)
|
||||
|
||||
registry_json = (
|
||||
f'[{{"plane_project_id": "{ENDURO_PLANE_ID}", "repo": "enduro-trails",'
|
||||
f' "work_item_prefix": "ET", "name": "enduro-trails"}},'
|
||||
f' {{"plane_project_id": "{ORCH_PLANE_ID}", "repo": "orchestrator",'
|
||||
f' "work_item_prefix": "ORCH", "name": "orchestrator"}}]'
|
||||
)
|
||||
monkeypatch.setattr(P.settings, "projects_json", registry_json)
|
||||
reload_projects()
|
||||
|
||||
yield
|
||||
|
||||
reload_projects()
|
||||
if os.path.exists(_test_db):
|
||||
os.unlink(_test_db)
|
||||
|
||||
|
||||
def _mock_resp(json_body, status=200):
|
||||
m = MagicMock()
|
||||
m.json.return_value = json_body
|
||||
m.raise_for_status.return_value = None
|
||||
if status >= 400:
|
||||
def _raise():
|
||||
raise RuntimeError(f"HTTP {status}")
|
||||
m.raise_for_status.side_effect = _raise
|
||||
return m
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# fetch_issue_sequence_id
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_fetch_sequence_id_returns_int():
|
||||
with patch.object(plane_sync.httpx, "get", return_value=_mock_resp({"sequence_id": 42})):
|
||||
seq = plane_sync.fetch_issue_sequence_id("issue-uuid", "proj-uuid")
|
||||
assert seq == 42
|
||||
assert isinstance(seq, int)
|
||||
|
||||
|
||||
def test_fetch_sequence_id_network_error_returns_none():
|
||||
with patch.object(plane_sync.httpx, "get", side_effect=RuntimeError("connection refused")):
|
||||
seq = plane_sync.fetch_issue_sequence_id("issue-uuid", "proj-uuid")
|
||||
assert seq is None # must not raise
|
||||
|
||||
|
||||
def test_fetch_sequence_id_missing_field_returns_none():
|
||||
with patch.object(plane_sync.httpx, "get", return_value=_mock_resp({"error": "not found"})):
|
||||
seq = plane_sync.fetch_issue_sequence_id("missing-uuid", "proj-uuid")
|
||||
assert seq is None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# handle_work_item_created: seq available -> prefix-NNN
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
# Feature 1: pipeline starts on a status change to In Progress, not on creation.
|
||||
_IN_PROGRESS = "b873d9eb-993c-48cd-97ac-99a9b1623967"
|
||||
|
||||
|
||||
def _post(plane_id, plane_project_id=ORCH_PLANE_ID, name="A valid work item title"):
|
||||
return client.post(
|
||||
"/webhook/plane",
|
||||
json={
|
||||
"event": "issue",
|
||||
"action": "updated",
|
||||
"data": {
|
||||
"id": plane_id,
|
||||
"name": name,
|
||||
"description_stripped": "This is a sufficiently long description.",
|
||||
"project": plane_project_id,
|
||||
"state": {"id": _IN_PROGRESS, "name": "In Progress", "group": "started"},
|
||||
},
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
@patch("src.webhooks.plane.launcher")
|
||||
@patch("src.webhooks.plane._create_initial_docs", new_callable=AsyncMock)
|
||||
@patch("src.webhooks.plane._create_gitea_branch", new_callable=AsyncMock)
|
||||
@patch("src.plane_sync.fetch_issue_sequence_id", return_value=7)
|
||||
def test_created_uses_plane_sequence_id(mock_fetch, mock_branch, mock_docs, mock_launcher):
|
||||
mock_launcher.launch.return_value = 1
|
||||
resp = _post("seq-issue")
|
||||
assert resp.status_code == 200
|
||||
conn = get_db()
|
||||
task = conn.execute("SELECT work_item_id FROM tasks WHERE plane_id='seq-issue'").fetchone()
|
||||
conn.close()
|
||||
assert task is not None
|
||||
assert task["work_item_id"] == "ORCH-007"
|
||||
mock_fetch.assert_called_once()
|
||||
|
||||
|
||||
@patch("src.webhooks.plane.launcher")
|
||||
@patch("src.webhooks.plane._create_initial_docs", new_callable=AsyncMock)
|
||||
@patch("src.webhooks.plane._create_gitea_branch", new_callable=AsyncMock)
|
||||
@patch("src.plane_sync.fetch_issue_sequence_id", return_value=None)
|
||||
@patch("src.webhooks.plane.get_next_work_item_id", return_value="ORCH-099")
|
||||
def test_created_falls_back_to_db_when_plane_down(
|
||||
mock_next, mock_fetch, mock_branch, mock_docs, mock_launcher
|
||||
):
|
||||
"""Plane unavailable (seq=None) => fall back to DB increment; task still created."""
|
||||
mock_launcher.launch.return_value = 1
|
||||
resp = _post("fallback-issue")
|
||||
assert resp.status_code == 200
|
||||
conn = get_db()
|
||||
task = conn.execute("SELECT work_item_id FROM tasks WHERE plane_id='fallback-issue'").fetchone()
|
||||
conn.close()
|
||||
assert task is not None # autonomy: Plane down does not block creation
|
||||
assert task["work_item_id"] == "ORCH-099"
|
||||
mock_next.assert_called_once()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# find_issue_id: no hardcoded ET- prefix, matches arbitrary prefix by seq
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_find_issue_id_matches_arbitrary_prefix_by_sequence():
|
||||
"""ORCH-005 must resolve via the issue whose sequence_id == 5 (no ET- assumption)."""
|
||||
issues = {"results": [
|
||||
{"id": "uuid-a", "sequence_id": 3, "name": "something"},
|
||||
{"id": "uuid-b", "sequence_id": 5, "name": "ORCH-005: target"},
|
||||
{"id": "uuid-c", "sequence_id": 9, "name": "other"},
|
||||
]}
|
||||
# No DB row for this work_item_id => goes to the Plane API search branch.
|
||||
with patch.object(plane_sync.httpx, "get", return_value=_mock_resp(issues)):
|
||||
found = plane_sync.find_issue_id("ORCH-005", project_id="proj-uuid")
|
||||
assert found == "uuid-b"
|
||||
|
||||
|
||||
def test_find_issue_id_matches_et_prefix_too():
|
||||
"""Backward compat: ET-002 still resolves by sequence_id == 2."""
|
||||
issues = {"results": [
|
||||
{"id": "uuid-x", "sequence_id": 2, "name": "ET item"},
|
||||
{"id": "uuid-y", "sequence_id": 7, "name": "other"},
|
||||
]}
|
||||
with patch.object(plane_sync.httpx, "get", return_value=_mock_resp(issues)):
|
||||
found = plane_sync.find_issue_id("ET-002", project_id="proj-uuid")
|
||||
assert found == "uuid-x"
|
||||
462
tests/test_orch10_states.py
Normal file
462
tests/test_orch10_states.py
Normal file
@@ -0,0 +1,462 @@
|
||||
"""ORCH-10: per-project Plane state resolution tests.
|
||||
|
||||
Verifies:
|
||||
1. get_project_states(ET_PROJECT_ID) -> enduro-trails UUIDs (backward compat).
|
||||
2. get_project_states(ORCH_PROJECT_ID) -> orchestrator UUIDs.
|
||||
3. get_project_states falls back to _DEFAULT_STATES when the Plane API fails.
|
||||
4. _STATES_CACHE is populated after a successful call and reload_project_states
|
||||
evicts it (per-project and full flush).
|
||||
5. stage_to_state() resolves per-project UUIDs for both projects.
|
||||
6. Webhook handle_issue_updated recognises In Progress for BOTH projects
|
||||
(ORCH-10 critical path: e331bfb3 for ORCH, b873d9eb for ET -> pipeline start).
|
||||
7. Webhook handle_issue_updated recognises Approved/Rejected per project.
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import tempfile
|
||||
from unittest.mock import patch, MagicMock, AsyncMock
|
||||
|
||||
import pytest
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Minimal env so src/config.py can import without a real .env file.
|
||||
# ---------------------------------------------------------------------------
|
||||
os.environ.setdefault("ORCH_PLANE_API_URL", "http://plane.local")
|
||||
os.environ.setdefault("ORCH_PLANE_API_TOKEN", "test-token")
|
||||
os.environ.setdefault("ORCH_PLANE_WORKSPACE_SLUG", "test-ws")
|
||||
os.environ.setdefault("ORCH_GITEA_TOKEN", "test-token")
|
||||
os.environ.setdefault("ORCH_PLANE_WEBHOOK_SECRET", "")
|
||||
os.environ.setdefault("ORCH_GITEA_WEBHOOK_SECRET", "")
|
||||
|
||||
_test_db = os.path.join(tempfile.gettempdir(), "test_orch10_states.db")
|
||||
os.environ["ORCH_DB_PATH"] = _test_db
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Known UUIDs from the ТЗ (source of truth).
|
||||
# ---------------------------------------------------------------------------
|
||||
ET_PROJECT_ID = "7a79f0a9-5278-49cd-9007-9a338f238f9c"
|
||||
ORCH_PROJECT_ID = "8da6aa25-a60e-44d6-a1e2-d8ae59aa7d6a"
|
||||
|
||||
ET_STATES = {
|
||||
"backlog": "113b24f6-cce8-4be9-9a22-a359b9cf0122",
|
||||
"todo": "2c7d3df3-9eb9-419b-92b7-d7d560bcdd10",
|
||||
"in_progress": "b873d9eb-993c-48cd-97ac-99a9b1623967",
|
||||
"architecture": "3020bbb7-6122-4663-930c-0315ba8dfa3d",
|
||||
"development": "9920609b-f140-4e46-ab95-89acda8412c8",
|
||||
"review": "ba0d802c-5218-41d4-ab43-978b0ea123ed",
|
||||
"testing": "7855d807-b1bf-42ef-8dae-6cde0df92d02",
|
||||
"approved": "a519a341-dada-4a91-8910-7604f82b79c5",
|
||||
"rejected": "ba958f3c-5db5-461d-8f82-89425e413b97",
|
||||
"done": "381a2833-3c4e-4be5-bd0f-be84cb946ad8",
|
||||
"cancelled": "b1cae7f9-961d-4889-a179-f3acea697d17",
|
||||
"needs_input": "babf08a3-ff4d-41f3-a821-5491aa29a8ac",
|
||||
"in_review": "38fb1f64-aa1e-48a3-92e0-0b109679046b",
|
||||
"blocked": "6c4543f9-ac47-4ef7-ae0f-070020dc9920",
|
||||
}
|
||||
|
||||
ORCH_STATES = {
|
||||
"backlog": "2d5d42ff-e94d-4209-a664-8020c28c2a95",
|
||||
"todo": "b5d3f512-4870-460f-bf6b-4ea560f00a6f",
|
||||
"in_progress": "e331bfb3-e17e-4699-ba48-4abb89c21b7b",
|
||||
"architecture": "795cc32f-5f5a-4244-be7b-9acffc92c7c0",
|
||||
"development": "f5ed4705-5029-470d-89a9-54c3f0d211ee",
|
||||
"review": "2026f3d9-0f43-4054-ab5f-3f9bae3308b8",
|
||||
"testing": "81c5cd78-2993-4f2c-9e8c-2f52db3e5623",
|
||||
"approved": "63f2c8fe-dcda-4ace-952f-dd88bd0118ff",
|
||||
"rejected": "4c769e90-bf80-4a52-b97a-e1c84904bfc3",
|
||||
"done": "3738cd3c-7610-4907-ba5e-26b9a248d9c0",
|
||||
"cancelled": "59d1d210-8e3a-4a83-930a-cbc5dbf6ad85",
|
||||
"needs_input": "99978b3f-72fe-46e3-8b9b-25ba02899fa0",
|
||||
"in_review": "c52e99b9-31ae-4b31-be3f-9773eea7a747",
|
||||
"blocked": "505f01a6-a12f-4121-aaa7-9c5dd009acc4",
|
||||
}
|
||||
|
||||
|
||||
def _make_states_response(states_dict: dict) -> dict:
|
||||
"""Build a fake Plane GET /states/ response."""
|
||||
name_map = {v: k for k, v in {
|
||||
"backlog": "Backlog",
|
||||
"todo": "Todo",
|
||||
"in_progress": "In Progress",
|
||||
"architecture": "Architecture",
|
||||
"development": "Development",
|
||||
"review": "Review",
|
||||
"testing": "Testing",
|
||||
"approved": "Approved",
|
||||
"rejected": "Rejected",
|
||||
"done": "Done",
|
||||
"cancelled": "Cancelled",
|
||||
"needs_input": "Needs Input",
|
||||
"in_review": "In Review",
|
||||
"blocked": "Blocked",
|
||||
}.items()}
|
||||
logical_to_plane = {
|
||||
"backlog": "Backlog",
|
||||
"todo": "Todo",
|
||||
"in_progress": "In Progress",
|
||||
"architecture": "Architecture",
|
||||
"development": "Development",
|
||||
"review": "Review",
|
||||
"testing": "Testing",
|
||||
"approved": "Approved",
|
||||
"rejected": "Rejected",
|
||||
"done": "Done",
|
||||
"cancelled": "Cancelled",
|
||||
"needs_input": "Needs Input",
|
||||
"in_review": "In Review",
|
||||
"blocked": "Blocked",
|
||||
}
|
||||
results = [
|
||||
{"id": uid, "name": logical_to_plane[key]}
|
||||
for key, uid in states_dict.items()
|
||||
if key in logical_to_plane
|
||||
]
|
||||
return {"results": results}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helpers to build fake httpx responses.
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _fake_response(data: dict, status: int = 200):
|
||||
m = MagicMock()
|
||||
m.status_code = status
|
||||
m.json.return_value = data
|
||||
if status >= 400:
|
||||
from httpx import HTTPStatusError, Request, Response
|
||||
m.raise_for_status.side_effect = HTTPStatusError(
|
||||
"error", request=MagicMock(), response=MagicMock()
|
||||
)
|
||||
else:
|
||||
m.raise_for_status.return_value = None
|
||||
return m
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Fixtures
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def reset_states_cache():
|
||||
"""Ensure the states cache is empty before each test."""
|
||||
import src.plane_sync as ps
|
||||
ps.reload_project_states()
|
||||
yield
|
||||
ps.reload_project_states()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# 1 & 2. get_project_states returns correct UUIDs per project
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_get_project_states_enduro():
|
||||
"""ET project -> enduro-trails UUIDs."""
|
||||
import src.plane_sync as ps
|
||||
with patch("src.plane_sync.httpx.get") as mock_get:
|
||||
mock_get.return_value = _fake_response(_make_states_response(ET_STATES))
|
||||
states = ps.get_project_states(ET_PROJECT_ID)
|
||||
|
||||
for key, expected_uuid in ET_STATES.items():
|
||||
assert states[key] == expected_uuid, (
|
||||
f"ET state '{key}': expected {expected_uuid}, got {states.get(key)}"
|
||||
)
|
||||
|
||||
|
||||
def test_get_project_states_orchestrator():
|
||||
"""ORCH project -> orchestrator UUIDs."""
|
||||
import src.plane_sync as ps
|
||||
with patch("src.plane_sync.httpx.get") as mock_get:
|
||||
mock_get.return_value = _fake_response(_make_states_response(ORCH_STATES))
|
||||
states = ps.get_project_states(ORCH_PROJECT_ID)
|
||||
|
||||
for key, expected_uuid in ORCH_STATES.items():
|
||||
assert states[key] == expected_uuid, (
|
||||
f"ORCH state '{key}': expected {expected_uuid}, got {states.get(key)}"
|
||||
)
|
||||
|
||||
|
||||
def test_get_project_states_et_in_progress_uuid():
|
||||
"""ET in_progress == b873d9eb (exact UUID from ТЗ)."""
|
||||
import src.plane_sync as ps
|
||||
with patch("src.plane_sync.httpx.get") as mock_get:
|
||||
mock_get.return_value = _fake_response(_make_states_response(ET_STATES))
|
||||
states = ps.get_project_states(ET_PROJECT_ID)
|
||||
assert states["in_progress"] == "b873d9eb-993c-48cd-97ac-99a9b1623967"
|
||||
|
||||
|
||||
def test_get_project_states_orch_in_progress_uuid():
|
||||
"""ORCH in_progress == e331bfb3 (exact UUID from ТЗ) — the ORCH-10 blocker."""
|
||||
import src.plane_sync as ps
|
||||
with patch("src.plane_sync.httpx.get") as mock_get:
|
||||
mock_get.return_value = _fake_response(_make_states_response(ORCH_STATES))
|
||||
states = ps.get_project_states(ORCH_PROJECT_ID)
|
||||
assert states["in_progress"] == "e331bfb3-e17e-4699-ba48-4abb89c21b7b"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# 3. Fallback to _DEFAULT_STATES when API fails
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_get_project_states_api_error_fallback():
|
||||
"""Network failure -> returns _DEFAULT_STATES (ET values)."""
|
||||
import src.plane_sync as ps
|
||||
with patch("src.plane_sync.httpx.get", side_effect=Exception("network error")):
|
||||
states = ps.get_project_states(ORCH_PROJECT_ID)
|
||||
# Should return _DEFAULT_STATES (ET values) as fallback.
|
||||
assert states is ps._DEFAULT_STATES
|
||||
|
||||
|
||||
def test_get_project_states_non_200_fallback():
|
||||
"""Non-2xx response -> returns _DEFAULT_STATES."""
|
||||
import src.plane_sync as ps
|
||||
with patch("src.plane_sync.httpx.get") as mock_get:
|
||||
mock_get.return_value = _fake_response({}, status=500)
|
||||
states = ps.get_project_states(ORCH_PROJECT_ID)
|
||||
assert states is ps._DEFAULT_STATES
|
||||
|
||||
|
||||
def test_get_project_states_empty_response_fallback():
|
||||
"""Empty results list -> returns _DEFAULT_STATES."""
|
||||
import src.plane_sync as ps
|
||||
with patch("src.plane_sync.httpx.get") as mock_get:
|
||||
mock_get.return_value = _fake_response({"results": []})
|
||||
states = ps.get_project_states(ORCH_PROJECT_ID)
|
||||
assert states is ps._DEFAULT_STATES
|
||||
|
||||
|
||||
def test_get_project_states_none_project_id_fallback():
|
||||
"""None project_id -> _DEFAULT_STATES immediately (no API call)."""
|
||||
import src.plane_sync as ps
|
||||
with patch("src.plane_sync.httpx.get") as mock_get:
|
||||
states = ps.get_project_states(None)
|
||||
mock_get.assert_not_called()
|
||||
assert states is ps._DEFAULT_STATES
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# 4. Caching & reload_project_states
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_get_project_states_caches_result():
|
||||
"""Second call returns cached result without hitting the API again."""
|
||||
import src.plane_sync as ps
|
||||
with patch("src.plane_sync.httpx.get") as mock_get:
|
||||
mock_get.return_value = _fake_response(_make_states_response(ET_STATES))
|
||||
_ = ps.get_project_states(ET_PROJECT_ID)
|
||||
_ = ps.get_project_states(ET_PROJECT_ID)
|
||||
assert mock_get.call_count == 1
|
||||
|
||||
|
||||
def test_reload_project_states_per_project():
|
||||
"""reload_project_states(project_id) evicts only that project."""
|
||||
import src.plane_sync as ps
|
||||
with patch("src.plane_sync.httpx.get") as mock_get:
|
||||
mock_get.return_value = _fake_response(_make_states_response(ET_STATES))
|
||||
ps.get_project_states(ET_PROJECT_ID)
|
||||
assert ET_PROJECT_ID in ps._STATES_CACHE
|
||||
|
||||
ps.reload_project_states(ET_PROJECT_ID)
|
||||
assert ET_PROJECT_ID not in ps._STATES_CACHE
|
||||
|
||||
|
||||
def test_reload_project_states_full_flush():
|
||||
"""reload_project_states() with no args clears entire cache."""
|
||||
import src.plane_sync as ps
|
||||
with patch("src.plane_sync.httpx.get") as mock_get:
|
||||
mock_get.return_value = _fake_response(_make_states_response(ET_STATES))
|
||||
ps.get_project_states(ET_PROJECT_ID)
|
||||
ps.reload_project_states()
|
||||
assert ps._STATES_CACHE == {}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# 5. stage_to_state() resolves per-project
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_stage_to_state_et_analysis():
|
||||
"""ET analysis -> in_progress UUID b873d9eb."""
|
||||
import src.plane_sync as ps
|
||||
with patch("src.plane_sync.httpx.get") as mock_get:
|
||||
mock_get.return_value = _fake_response(_make_states_response(ET_STATES))
|
||||
uid = ps.stage_to_state("analysis", ET_PROJECT_ID)
|
||||
assert uid == "b873d9eb-993c-48cd-97ac-99a9b1623967"
|
||||
|
||||
|
||||
def test_stage_to_state_orch_analysis():
|
||||
"""ORCH analysis -> in_progress UUID e331bfb3."""
|
||||
import src.plane_sync as ps
|
||||
with patch("src.plane_sync.httpx.get") as mock_get:
|
||||
mock_get.return_value = _fake_response(_make_states_response(ORCH_STATES))
|
||||
uid = ps.stage_to_state("analysis", ORCH_PROJECT_ID)
|
||||
assert uid == "e331bfb3-e17e-4699-ba48-4abb89c21b7b"
|
||||
|
||||
|
||||
def test_stage_to_state_unknown_stage():
|
||||
"""Unknown stage -> None."""
|
||||
import src.plane_sync as ps
|
||||
with patch("src.plane_sync.httpx.get") as mock_get:
|
||||
mock_get.return_value = _fake_response(_make_states_response(ET_STATES))
|
||||
uid = ps.stage_to_state("nonexistent_stage", ET_PROJECT_ID)
|
||||
assert uid is None
|
||||
|
||||
|
||||
def test_stage_to_state_orch_done():
|
||||
"""ORCH done -> 3738cd3c."""
|
||||
import src.plane_sync as ps
|
||||
with patch("src.plane_sync.httpx.get") as mock_get:
|
||||
mock_get.return_value = _fake_response(_make_states_response(ORCH_STATES))
|
||||
uid = ps.stage_to_state("done", ORCH_PROJECT_ID)
|
||||
assert uid == "3738cd3c-7610-4907-ba5e-26b9a248d9c0"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# 6 & 7. Webhook handle_issue_updated — ORCH-10 critical path
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_webhook_in_progress_et_starts_pipeline():
|
||||
"""ET In Progress (b873d9eb) -> handle_status_start called."""
|
||||
from src.webhooks.plane import handle_issue_updated
|
||||
import src.plane_sync as ps
|
||||
|
||||
et_states_resp = _make_states_response(ET_STATES)
|
||||
with patch("src.plane_sync.httpx.get") as mock_httpx, \
|
||||
patch("src.webhooks.plane.handle_status_start", new_callable=AsyncMock) as mock_start, \
|
||||
patch("src.webhooks.plane.handle_verdict", new_callable=AsyncMock) as mock_verdict:
|
||||
mock_httpx.return_value = _fake_response(et_states_resp)
|
||||
data = {
|
||||
"id": "et-issue-uuid",
|
||||
"state": {"id": "b873d9eb-993c-48cd-97ac-99a9b1623967", "name": "In Progress"},
|
||||
}
|
||||
await handle_issue_updated(data, ET_PROJECT_ID)
|
||||
|
||||
mock_start.assert_called_once()
|
||||
mock_verdict.assert_not_called()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_webhook_in_progress_orch_starts_pipeline():
|
||||
"""ORCH In Progress (e331bfb3) -> handle_status_start called.
|
||||
|
||||
This is the ORCH-10 blocker: previously the webhook compared against the
|
||||
hardcoded ET UUID (b873d9eb) and the ORCH UUID (e331bfb3) was silently
|
||||
ignored — the pipeline never started for ORCH tasks.
|
||||
"""
|
||||
from src.webhooks.plane import handle_issue_updated
|
||||
import src.plane_sync as ps
|
||||
|
||||
orch_states_resp = _make_states_response(ORCH_STATES)
|
||||
with patch("src.plane_sync.httpx.get") as mock_httpx, \
|
||||
patch("src.webhooks.plane.handle_status_start", new_callable=AsyncMock) as mock_start, \
|
||||
patch("src.webhooks.plane.handle_verdict", new_callable=AsyncMock) as mock_verdict:
|
||||
mock_httpx.return_value = _fake_response(orch_states_resp)
|
||||
data = {
|
||||
"id": "orch-issue-uuid",
|
||||
"state": {"id": "e331bfb3-e17e-4699-ba48-4abb89c21b7b", "name": "In Progress"},
|
||||
}
|
||||
await handle_issue_updated(data, ORCH_PROJECT_ID)
|
||||
|
||||
mock_start.assert_called_once()
|
||||
mock_verdict.assert_not_called()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_webhook_approved_orch():
|
||||
"""ORCH Approved (63f2c8fe) -> handle_verdict(approved=True)."""
|
||||
from src.webhooks.plane import handle_issue_updated
|
||||
orch_states_resp = _make_states_response(ORCH_STATES)
|
||||
with patch("src.plane_sync.httpx.get") as mock_httpx, \
|
||||
patch("src.webhooks.plane.handle_status_start", new_callable=AsyncMock) as mock_start, \
|
||||
patch("src.webhooks.plane.handle_verdict", new_callable=AsyncMock) as mock_verdict:
|
||||
mock_httpx.return_value = _fake_response(orch_states_resp)
|
||||
data = {
|
||||
"id": "orch-issue-uuid",
|
||||
"state": {"id": "63f2c8fe-dcda-4ace-952f-dd88bd0118ff", "name": "Approved"},
|
||||
}
|
||||
await handle_issue_updated(data, ORCH_PROJECT_ID)
|
||||
|
||||
mock_verdict.assert_called_once_with(data, ORCH_PROJECT_ID, approved=True)
|
||||
mock_start.assert_not_called()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_webhook_rejected_orch():
|
||||
"""ORCH Rejected (4c769e90) -> handle_verdict(approved=False)."""
|
||||
from src.webhooks.plane import handle_issue_updated
|
||||
orch_states_resp = _make_states_response(ORCH_STATES)
|
||||
with patch("src.plane_sync.httpx.get") as mock_httpx, \
|
||||
patch("src.webhooks.plane.handle_status_start", new_callable=AsyncMock) as mock_start, \
|
||||
patch("src.webhooks.plane.handle_verdict", new_callable=AsyncMock) as mock_verdict:
|
||||
mock_httpx.return_value = _fake_response(orch_states_resp)
|
||||
data = {
|
||||
"id": "orch-issue-uuid",
|
||||
"state": {"id": "4c769e90-bf80-4a52-b97a-e1c84904bfc3", "name": "Rejected"},
|
||||
}
|
||||
await handle_issue_updated(data, ORCH_PROJECT_ID)
|
||||
|
||||
mock_verdict.assert_called_once_with(data, ORCH_PROJECT_ID, approved=False)
|
||||
mock_start.assert_not_called()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_webhook_other_state_no_action():
|
||||
"""A non-trigger state (e.g. 'Needs Input') -> no pipeline action."""
|
||||
from src.webhooks.plane import handle_issue_updated
|
||||
orch_states_resp = _make_states_response(ORCH_STATES)
|
||||
with patch("src.plane_sync.httpx.get") as mock_httpx, \
|
||||
patch("src.webhooks.plane.handle_status_start", new_callable=AsyncMock) as mock_start, \
|
||||
patch("src.webhooks.plane.handle_verdict", new_callable=AsyncMock) as mock_verdict:
|
||||
mock_httpx.return_value = _fake_response(orch_states_resp)
|
||||
data = {
|
||||
"id": "orch-issue-uuid",
|
||||
"state": {"id": "99978b3f-72fe-46e3-8b9b-25ba02899fa0", "name": "Needs Input"},
|
||||
}
|
||||
await handle_issue_updated(data, ORCH_PROJECT_ID)
|
||||
|
||||
mock_start.assert_not_called()
|
||||
mock_verdict.assert_not_called()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_webhook_et_in_progress_not_confused_with_orch():
|
||||
"""ET In Progress UUID does NOT trigger pipeline for ORCH project.
|
||||
|
||||
This guards against the reverse confusion: if somehow an ET UUID was sent
|
||||
for an ORCH project event, it should NOT start the pipeline (wrong UUID).
|
||||
"""
|
||||
from src.webhooks.plane import handle_issue_updated
|
||||
orch_states_resp = _make_states_response(ORCH_STATES)
|
||||
with patch("src.plane_sync.httpx.get") as mock_httpx, \
|
||||
patch("src.webhooks.plane.handle_status_start", new_callable=AsyncMock) as mock_start, \
|
||||
patch("src.webhooks.plane.handle_verdict", new_callable=AsyncMock) as mock_verdict:
|
||||
mock_httpx.return_value = _fake_response(orch_states_resp)
|
||||
# Send ET's in_progress UUID for an ORCH project event.
|
||||
data = {
|
||||
"id": "orch-issue-uuid",
|
||||
"state": {"id": "b873d9eb-993c-48cd-97ac-99a9b1623967", "name": "In Progress"},
|
||||
}
|
||||
await handle_issue_updated(data, ORCH_PROJECT_ID)
|
||||
|
||||
# Since ORCH in_progress is e331bfb3, ET's b873d9eb should NOT trigger start.
|
||||
mock_start.assert_not_called()
|
||||
mock_verdict.assert_not_called()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# 8. _DEFAULT_STATES / PLANE_STATES alias preserved
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_plane_states_alias_is_default_states():
|
||||
"""PLANE_STATES is still exported and equals _DEFAULT_STATES (backward compat)."""
|
||||
import src.plane_sync as ps
|
||||
assert ps.PLANE_STATES is ps._DEFAULT_STATES
|
||||
|
||||
|
||||
def test_default_states_et_values():
|
||||
"""_DEFAULT_STATES contains the original enduro-trails UUIDs."""
|
||||
import src.plane_sync as ps
|
||||
for key, expected in ET_STATES.items():
|
||||
assert ps._DEFAULT_STATES[key] == expected, (
|
||||
f"_DEFAULT_STATES['{key}']: expected {expected}, got {ps._DEFAULT_STATES.get(key)}"
|
||||
)
|
||||
213
tests/test_pipeline_start_bugs.py
Normal file
213
tests/test_pipeline_start_bugs.py
Normal file
@@ -0,0 +1,213 @@
|
||||
"""Tests for the two pipeline-start bugs surfaced by the ET-006 live run.
|
||||
|
||||
BUG 1: issue.updated (status -> In Progress) ships a payload WITHOUT the
|
||||
description, so start_pipeline must pull it from the Plane issue API
|
||||
before QG-0 runs (otherwise QG-0 wrongly blocks the issue).
|
||||
|
||||
BUG 2a: M-6 derives work_item_id from the Plane sequence_id, which can collide.
|
||||
ensure_unique_work_item_id() must hand out the next FREE id instead of
|
||||
reusing one that is already in the tasks table.
|
||||
|
||||
BUG 2b: two tasks with an (artificially) identical work_item_id must not share a
|
||||
branch/worktree.
|
||||
|
||||
launcher / Gitea / Plane network are mocked. Real FastAPI endpoint via
|
||||
TestClient for the BUG 1 end-to-end path.
|
||||
"""
|
||||
|
||||
import os
|
||||
import tempfile
|
||||
|
||||
_test_db = os.path.join(tempfile.gettempdir(), "test_orchestrator_pipeline_bugs.db")
|
||||
os.environ["ORCH_DB_PATH"] = _test_db
|
||||
os.environ.setdefault("ORCH_PLANE_WEBHOOK_SECRET", "")
|
||||
os.environ.setdefault("ORCH_GITEA_TOKEN", "test-token")
|
||||
os.environ.setdefault("ORCH_PLANE_API_TOKEN", "test-token")
|
||||
|
||||
import pytest # noqa: E402
|
||||
from unittest.mock import patch, AsyncMock # noqa: E402
|
||||
from fastapi.testclient import TestClient # noqa: E402
|
||||
|
||||
from src.main import app # noqa: E402
|
||||
from src.db import init_db, get_db, ensure_unique_work_item_id # noqa: E402
|
||||
from src import projects as P # noqa: E402
|
||||
from src.projects import reload_projects # noqa: E402
|
||||
from src.git_worktree import get_worktree_path # noqa: E402
|
||||
|
||||
ENDURO_PLANE_ID = "7a79f0a9-5278-49cd-9007-9a338f238f9c"
|
||||
IN_PROGRESS = "b873d9eb-993c-48cd-97ac-99a9b1623967"
|
||||
BACKLOG = "113b24f6-cce8-4be9-9a22-a359b9cf0122"
|
||||
|
||||
client = TestClient(app)
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def setup(monkeypatch):
|
||||
monkeypatch.setattr(P.settings, "db_path", _test_db)
|
||||
import src.db as _db
|
||||
monkeypatch.setattr(_db.settings, "db_path", _test_db)
|
||||
if os.path.exists(_test_db):
|
||||
os.unlink(_test_db)
|
||||
init_db()
|
||||
monkeypatch.setattr("src.webhooks.plane.verify_plane_signature", lambda body, sig: True)
|
||||
registry_json = (
|
||||
f'[{{"plane_project_id": "{ENDURO_PLANE_ID}", "repo": "enduro-trails",'
|
||||
f' "work_item_prefix": "ET", "name": "enduro-trails"}}]'
|
||||
)
|
||||
monkeypatch.setattr(P.settings, "projects_json", registry_json)
|
||||
reload_projects()
|
||||
yield
|
||||
reload_projects()
|
||||
if os.path.exists(_test_db):
|
||||
os.unlink(_test_db)
|
||||
|
||||
|
||||
def _insert_task(work_item_id, branch, plane_id="x"):
|
||||
conn = get_db()
|
||||
conn.execute(
|
||||
"INSERT INTO tasks (plane_id, work_item_id, repo, branch, stage, plane_issue_id) "
|
||||
"VALUES (?, ?, ?, ?, ?, ?)",
|
||||
(plane_id, work_item_id, "enduro-trails", branch, "analysis", plane_id),
|
||||
)
|
||||
conn.commit()
|
||||
conn.close()
|
||||
|
||||
|
||||
def _count(plane_id):
|
||||
conn = get_db()
|
||||
n = conn.execute("SELECT COUNT(*) FROM tasks WHERE plane_id=?", (plane_id,)).fetchone()[0]
|
||||
conn.close()
|
||||
return n
|
||||
|
||||
|
||||
def _task(plane_id):
|
||||
conn = get_db()
|
||||
row = conn.execute("SELECT * FROM tasks WHERE plane_id=?", (plane_id,)).fetchone()
|
||||
conn.close()
|
||||
return row
|
||||
|
||||
|
||||
# --------------------------------------------------------------------------- #
|
||||
# BUG 1
|
||||
# --------------------------------------------------------------------------- #
|
||||
def _to_in_progress_no_desc(plane_id="bug1"):
|
||||
"""issue.updated payload WITHOUT description (only changed fields)."""
|
||||
return client.post("/webhook/plane", json={
|
||||
"event": "issue", "action": "updated",
|
||||
"data": {
|
||||
"id": plane_id, "name": "A valid backlog item title",
|
||||
# NO description / description_stripped here, exactly like Plane sends
|
||||
# on a status change.
|
||||
"project": ENDURO_PLANE_ID,
|
||||
"state": {"id": IN_PROGRESS, "name": "In Progress", "group": "started"},
|
||||
},
|
||||
"activity": {"field": "state", "new_value": IN_PROGRESS, "old_value": BACKLOG},
|
||||
})
|
||||
|
||||
|
||||
@patch("src.webhooks.plane.enqueue_job", return_value=1)
|
||||
@patch("src.webhooks.plane._create_initial_docs", new_callable=AsyncMock)
|
||||
@patch("src.webhooks.plane._create_gitea_branch", new_callable=AsyncMock)
|
||||
@patch("src.plane_sync.fetch_issue_sequence_id", return_value=42)
|
||||
@patch("src.plane_sync.fetch_issue_fields",
|
||||
return_value=("A valid backlog item title",
|
||||
"This is a sufficiently long description fetched from Plane API."))
|
||||
def test_status_start_fetches_description(
|
||||
mock_fields, mock_seq, mock_branch, mock_docs, mock_enqueue
|
||||
):
|
||||
"""BUG 1: empty description in payload -> start_pipeline pulls it from the
|
||||
Plane API (single fetch_issue_fields GET) -> QG-0 passes -> task created +
|
||||
analyst enqueued (NOT blocked)."""
|
||||
resp = _to_in_progress_no_desc("bug1")
|
||||
assert resp.status_code == 200
|
||||
# name + description were pulled from the API in one call
|
||||
mock_fields.assert_called_once()
|
||||
# QG-0 passed -> task created and analyst launched (NOT set_issue_blocked)
|
||||
assert _count("bug1") == 1
|
||||
assert _task("bug1")["stage"] == "analysis"
|
||||
mock_enqueue.assert_called_once()
|
||||
assert mock_enqueue.call_args.args[0] == "analyst"
|
||||
|
||||
|
||||
@patch("src.webhooks.plane.enqueue_job", return_value=1)
|
||||
@patch("src.webhooks.plane._create_initial_docs", new_callable=AsyncMock)
|
||||
@patch("src.webhooks.plane._create_gitea_branch", new_callable=AsyncMock)
|
||||
@patch("src.plane_sync.fetch_issue_sequence_id", return_value=42)
|
||||
@patch("src.plane_sync.fetch_issue_fields", return_value=("", ""))
|
||||
def test_status_start_empty_api_still_blocks(
|
||||
mock_fields, mock_seq, mock_branch, mock_docs, mock_enqueue
|
||||
):
|
||||
"""BUG 1 negative path: if the API also returns empty, QG-0 legitimately
|
||||
fails -> NO task is created (truly empty ticket)."""
|
||||
resp = _to_in_progress_no_desc("bug1-empty")
|
||||
assert resp.status_code == 200
|
||||
mock_fields.assert_called_once()
|
||||
assert _count("bug1-empty") == 0
|
||||
mock_enqueue.assert_not_called()
|
||||
|
||||
|
||||
# --------------------------------------------------------------------------- #
|
||||
# BUG 2a
|
||||
# --------------------------------------------------------------------------- #
|
||||
def test_work_item_id_uniqueness():
|
||||
"""BUG 2a: if ET-006 is already in tasks, the guard returns the next free
|
||||
id (ET-007), not ET-006 again."""
|
||||
_insert_task("ET-006", "feature/ET-006-gpx-upload", plane_id="old")
|
||||
assert ensure_unique_work_item_id("ET-006", "enduro-trails") == "ET-007"
|
||||
|
||||
# ET-006 AND ET-007 taken -> next free is ET-008.
|
||||
_insert_task("ET-007", "feature/ET-007-something", plane_id="old2")
|
||||
assert ensure_unique_work_item_id("ET-006", "enduro-trails") == "ET-008"
|
||||
|
||||
# A free id is returned unchanged.
|
||||
assert ensure_unique_work_item_id("ET-099", "enduro-trails") == "ET-099"
|
||||
|
||||
# Per-repo isolation: a different repo with the same id is not a collision.
|
||||
assert ensure_unique_work_item_id("ET-006", "other-repo") == "ET-006"
|
||||
|
||||
|
||||
@patch("src.webhooks.plane.enqueue_job", return_value=1)
|
||||
@patch("src.webhooks.plane._create_initial_docs", new_callable=AsyncMock)
|
||||
@patch("src.webhooks.plane._create_gitea_branch", new_callable=AsyncMock)
|
||||
@patch("src.plane_sync.fetch_issue_sequence_id", return_value=6)
|
||||
@patch("src.plane_sync.fetch_issue_fields",
|
||||
return_value=("Popup enduro trails feature",
|
||||
"A sufficiently long description for QG-0 to pass cleanly."))
|
||||
def test_collision_reassigns_in_start_pipeline(
|
||||
mock_fields, mock_seq, mock_branch, mock_docs, mock_enqueue
|
||||
):
|
||||
"""BUG 2a end-to-end: ET-006 already exists -> a new In Progress issue whose
|
||||
Plane sequence_id is also 6 must NOT reuse ET-006."""
|
||||
_insert_task("ET-006", "feature/ET-006-gpx-upload", plane_id="task8")
|
||||
resp = client.post("/webhook/plane", json={
|
||||
"event": "issue", "action": "updated",
|
||||
"data": {
|
||||
"id": "task25", "name": "Popup enduro trails feature",
|
||||
"description_stripped": "A sufficiently long description for QG-0.",
|
||||
"project": ENDURO_PLANE_ID,
|
||||
"state": {"id": IN_PROGRESS, "name": "In Progress", "group": "started"},
|
||||
},
|
||||
"activity": {"field": "state", "new_value": IN_PROGRESS, "old_value": BACKLOG},
|
||||
})
|
||||
assert resp.status_code == 200
|
||||
new_id = _task("task25")["work_item_id"]
|
||||
assert new_id != "ET-006"
|
||||
assert new_id == "ET-007"
|
||||
|
||||
|
||||
# --------------------------------------------------------------------------- #
|
||||
# BUG 2b
|
||||
# --------------------------------------------------------------------------- #
|
||||
def test_worktree_per_task():
|
||||
"""BUG 2b: two tasks must not resolve to the same worktree path. With the
|
||||
uniqueness guard the branches differ, so the worktree paths differ too."""
|
||||
_insert_task("ET-006", "feature/ET-006-gpx-upload", plane_id="task8")
|
||||
# The second task gets a unique id via the guard...
|
||||
new_id = ensure_unique_work_item_id("ET-006", "enduro-trails")
|
||||
assert new_id == "ET-007"
|
||||
branch_a = "feature/ET-006-gpx-upload"
|
||||
branch_b = f"feature/{new_id}-popup-enduro-trails"
|
||||
|
||||
wt_a = get_worktree_path("enduro-trails", branch_a)
|
||||
wt_b = get_worktree_path("enduro-trails", branch_b)
|
||||
assert wt_a != wt_b, "two tasks must not share a worktree path"
|
||||
99
tests/test_plane_author.py
Normal file
99
tests/test_plane_author.py
Normal file
@@ -0,0 +1,99 @@
|
||||
"""Tests for per-agent Plane comment authorship (feat: per-agent bot author).
|
||||
|
||||
Covers:
|
||||
* _headers_for: role -> bot token; None/unknown/empty token -> shared fallback.
|
||||
* add_comment: author is propagated into the POST headers; no author keeps
|
||||
backward-compatible behaviour (shared orchestrator token).
|
||||
|
||||
GET/PATCH calls are intentionally NOT covered here: they stay on the shared
|
||||
token by design and are unchanged by this feature.
|
||||
"""
|
||||
|
||||
import os
|
||||
|
||||
# Set env defaults before importing app modules (same convention as the other
|
||||
# suites) so config/settings load cleanly without a real .env.
|
||||
os.environ.setdefault("ORCH_PLANE_API_TOKEN", "shared-token")
|
||||
os.environ.setdefault("ORCH_GITEA_TOKEN", "test-token")
|
||||
|
||||
from unittest.mock import patch, MagicMock # noqa: E402
|
||||
|
||||
from src import plane_sync # noqa: E402
|
||||
|
||||
|
||||
# --------------------------------------------------------------------------- #
|
||||
# _headers_for
|
||||
# --------------------------------------------------------------------------- #
|
||||
def test_headers_for_known_role_uses_bot_token():
|
||||
"""A known role with a configured token -> that bot's X-API-Key."""
|
||||
with patch.dict(plane_sync.PLANE_BOT_TOKENS, {"analyst": "analyst-tok"}, clear=False):
|
||||
assert plane_sync._headers_for("analyst") == {"X-API-Key": "analyst-tok"}
|
||||
|
||||
|
||||
def test_headers_for_none_falls_back_to_shared():
|
||||
"""author=None -> shared orchestrator headers."""
|
||||
assert plane_sync._headers_for(None) is plane_sync.PLANE_HEADERS
|
||||
|
||||
|
||||
def test_headers_for_unknown_role_falls_back_to_shared():
|
||||
"""Unknown role -> shared orchestrator headers."""
|
||||
assert plane_sync._headers_for("nope") is plane_sync.PLANE_HEADERS
|
||||
|
||||
|
||||
def test_headers_for_empty_token_falls_back_to_shared():
|
||||
"""Known role but empty/unconfigured token -> shared orchestrator headers."""
|
||||
with patch.dict(plane_sync.PLANE_BOT_TOKENS, {"tester": ""}, clear=False):
|
||||
assert plane_sync._headers_for("tester") is plane_sync.PLANE_HEADERS
|
||||
|
||||
|
||||
def test_headers_for_empty_string_author_falls_back_to_shared():
|
||||
"""author='' -> shared orchestrator headers."""
|
||||
assert plane_sync._headers_for("") is plane_sync.PLANE_HEADERS
|
||||
|
||||
|
||||
# --------------------------------------------------------------------------- #
|
||||
# add_comment
|
||||
# --------------------------------------------------------------------------- #
|
||||
def _mock_post_ok():
|
||||
resp = MagicMock()
|
||||
resp.raise_for_status.return_value = None
|
||||
return resp
|
||||
|
||||
|
||||
def test_add_comment_with_author_posts_with_bot_headers():
|
||||
"""add_comment(author='developer') -> httpx.post called with the developer
|
||||
bot's X-API-Key header."""
|
||||
with patch.object(plane_sync, "find_issue_id", return_value="issue-uuid"), \
|
||||
patch.object(plane_sync, "_resolve_project_id", return_value="proj-uuid"), \
|
||||
patch.dict(plane_sync.PLANE_BOT_TOKENS, {"developer": "dev-tok"}, clear=False), \
|
||||
patch.object(plane_sync.httpx, "post", return_value=_mock_post_ok()) as mock_post:
|
||||
plane_sync.add_comment("ET-001", "hello", author="developer")
|
||||
|
||||
assert mock_post.called
|
||||
_, kwargs = mock_post.call_args
|
||||
assert kwargs["headers"] == {"X-API-Key": "dev-tok"}
|
||||
|
||||
|
||||
def test_add_comment_without_author_uses_shared_token():
|
||||
"""add_comment without author -> shared orchestrator headers (backward
|
||||
compatible)."""
|
||||
with patch.object(plane_sync, "find_issue_id", return_value="issue-uuid"), \
|
||||
patch.object(plane_sync, "_resolve_project_id", return_value="proj-uuid"), \
|
||||
patch.object(plane_sync.httpx, "post", return_value=_mock_post_ok()) as mock_post:
|
||||
plane_sync.add_comment("ET-001", "hello")
|
||||
|
||||
assert mock_post.called
|
||||
_, kwargs = mock_post.call_args
|
||||
assert kwargs["headers"] is plane_sync.PLANE_HEADERS
|
||||
|
||||
|
||||
def test_add_comment_unknown_author_uses_shared_token():
|
||||
"""add_comment with an unknown role -> shared orchestrator headers."""
|
||||
with patch.object(plane_sync, "find_issue_id", return_value="issue-uuid"), \
|
||||
patch.object(plane_sync, "_resolve_project_id", return_value="proj-uuid"), \
|
||||
patch.object(plane_sync.httpx, "post", return_value=_mock_post_ok()) as mock_post:
|
||||
plane_sync.add_comment("ET-001", "hello", author="ghost")
|
||||
|
||||
assert mock_post.called
|
||||
_, kwargs = mock_post.call_args
|
||||
assert kwargs["headers"] is plane_sync.PLANE_HEADERS
|
||||
@@ -73,16 +73,24 @@ def setup(monkeypatch):
|
||||
os.unlink(_test_db)
|
||||
|
||||
|
||||
# Feature 1: the pipeline now starts on a status change to In Progress (not on
|
||||
# creation). _post_created drives that status-change event so these ORCH-6
|
||||
# routing tests still exercise task creation through the new trigger.
|
||||
_IN_PROGRESS = "b873d9eb-993c-48cd-97ac-99a9b1623967"
|
||||
|
||||
|
||||
def _post_created(plane_project_id, plane_id="wi-1", name="A valid work item title"):
|
||||
return client.post(
|
||||
"/webhook/plane",
|
||||
json={
|
||||
"event": "work_item.created",
|
||||
"event": "issue",
|
||||
"action": "updated",
|
||||
"data": {
|
||||
"id": plane_id,
|
||||
"name": name,
|
||||
"description_stripped": "This is a sufficiently long description.",
|
||||
"project": plane_project_id,
|
||||
"state": {"id": _IN_PROGRESS, "name": "In Progress", "group": "started"},
|
||||
},
|
||||
},
|
||||
)
|
||||
|
||||
465
tests/test_qg.py
465
tests/test_qg.py
@@ -17,7 +17,11 @@ from src.qg.checks import (
|
||||
check_ci_green,
|
||||
check_review_approved,
|
||||
check_tests_passed,
|
||||
check_tests_local,
|
||||
check_deploy_status,
|
||||
check_staging_status,
|
||||
)
|
||||
from src.stages import get_qg_for_stage
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
@@ -164,25 +168,466 @@ class TestCheckReviewApproved:
|
||||
|
||||
|
||||
class TestCheckTestsPassed:
|
||||
def test_report_with_pass(self, setup_work_item_dir):
|
||||
repo_dir = setup_work_item_dir
|
||||
wi_dir = repo_dir / "docs" / "work-items" / "ET-001"
|
||||
wi_dir.mkdir(parents=True)
|
||||
(wi_dir / "13-test-report.md").write_text("# Test Report\n\nResult: PASS\n")
|
||||
"""ET-013 fix: testing -> deploy gate reads the tester's MACHINE-READABLE verdict
|
||||
in 13-test-report.md frontmatter (verdict:/status:), NOT a substring of the body.
|
||||
Mirrors check_reviewer_verdict / check_deploy_status. The old `if "PASS" in content`
|
||||
let a `verdict: BLOCKED` report whose prose said "23 passed"/"✅ PASS" pass the gate,
|
||||
shipping an unfinished feature to Done."""
|
||||
|
||||
def _write(self, repo_dir, content, wi="ET-001"):
|
||||
wi_dir = repo_dir / "docs" / "work-items" / wi
|
||||
wi_dir.mkdir(parents=True)
|
||||
(wi_dir / "13-test-report.md").write_text(content)
|
||||
|
||||
def test_verdict_pass_passes(self, setup_work_item_dir):
|
||||
# Most common real form (ET-001/002/005/009/011/012/014).
|
||||
self._write(
|
||||
setup_work_item_dir,
|
||||
"---\ntype: test-report\nverdict: PASS\nstatus: pass\n---\n\n# Test Report\n",
|
||||
)
|
||||
passed, reason = check_tests_passed("enduro-trails", "ET-001")
|
||||
assert passed is True
|
||||
assert "PASS" in reason
|
||||
|
||||
def test_verdict_pass_ready_to_deploy_passes(self, setup_work_item_dir):
|
||||
# ET-007 real form: "PASS — ready-to-deploy".
|
||||
self._write(
|
||||
setup_work_item_dir,
|
||||
"---\nverdict: PASS — ready-to-deploy\nstatus: PASS\n---\n\nbody\n",
|
||||
)
|
||||
passed, reason = check_tests_passed("enduro-trails", "ET-001")
|
||||
assert passed is True
|
||||
|
||||
def test_report_without_pass(self, setup_work_item_dir):
|
||||
repo_dir = setup_work_item_dir
|
||||
wi_dir = repo_dir / "docs" / "work-items" / "ET-001"
|
||||
wi_dir.mkdir(parents=True)
|
||||
(wi_dir / "13-test-report.md").write_text("# Test Report\n\nResult: FAIL\n")
|
||||
def test_verdict_ready_to_deploy_with_status_passed_passes(self, setup_work_item_dir):
|
||||
# ET-006 real form: verdict has no PASS word, but status: PASSED.
|
||||
self._write(
|
||||
setup_work_item_dir,
|
||||
"---\nverdict: ready-to-deploy\nstatus: PASSED\n---\n\nbody\n",
|
||||
)
|
||||
passed, reason = check_tests_passed("enduro-trails", "ET-001")
|
||||
assert passed is True
|
||||
|
||||
def test_verdict_stage_ready_to_deploy_with_status_pass_passes(self, setup_work_item_dir):
|
||||
# ET-008 real form: verdict: stage:ready-to-deploy, status: pass.
|
||||
self._write(
|
||||
setup_work_item_dir,
|
||||
"---\nverdict: stage:ready-to-deploy\nstatus: pass\n---\n\nbody\n",
|
||||
)
|
||||
passed, reason = check_tests_passed("enduro-trails", "ET-001")
|
||||
assert passed is True
|
||||
|
||||
def test_blocked_verdict_with_pass_in_body_fails(self, setup_work_item_dir):
|
||||
# THE ET-013 BUG: verdict BLOCKED but body is full of "PASS"/"passed".
|
||||
self._write(
|
||||
setup_work_item_dir,
|
||||
"---\ntype: test-report\nstatus: blocked\nverdict: BLOCKED\n---\n\n"
|
||||
"23 passed\n✅ PASS (часть AC-18)\nAll checks passed\n",
|
||||
)
|
||||
passed, reason = check_tests_passed("enduro-trails", "ET-001")
|
||||
assert passed is False
|
||||
assert "BLOCKED" in reason
|
||||
|
||||
def test_failed_verdict_fails(self, setup_work_item_dir):
|
||||
self._write(
|
||||
setup_work_item_dir,
|
||||
"---\nverdict: FAILED\nstatus: failed\n---\n\nbody\n",
|
||||
)
|
||||
passed, reason = check_tests_passed("enduro-trails", "ET-001")
|
||||
assert passed is False
|
||||
assert "FAILED" in reason
|
||||
|
||||
def test_passed_count_in_body_but_blocked_verdict_fails(self, setup_work_item_dir):
|
||||
# Body says "23 passed" but frontmatter verdict BLOCKED -> substring no longer fools.
|
||||
self._write(
|
||||
setup_work_item_dir,
|
||||
"---\nverdict: BLOCKED\n---\n\nTests: 23 passed, 0 failed.\n",
|
||||
)
|
||||
passed, reason = check_tests_passed("enduro-trails", "ET-001")
|
||||
assert passed is False
|
||||
|
||||
def test_no_frontmatter_fails(self, setup_work_item_dir):
|
||||
# Old format / prose only -> no machine verdict -> fail.
|
||||
self._write(
|
||||
setup_work_item_dir,
|
||||
"# Test Report\n\nResult: PASS\nAll tests passed.\n",
|
||||
)
|
||||
passed, reason = check_tests_passed("enduro-trails", "ET-001")
|
||||
assert passed is False
|
||||
|
||||
def test_no_verdict_field_fails(self, setup_work_item_dir):
|
||||
# Frontmatter present but neither verdict nor status -> fail.
|
||||
self._write(
|
||||
setup_work_item_dir,
|
||||
"---\ntype: test-report\nversion: 1\n---\n\nResult: PASS\n",
|
||||
)
|
||||
passed, reason = check_tests_passed("enduro-trails", "ET-001")
|
||||
assert passed is False
|
||||
|
||||
def test_invalid_yaml_fails_no_exception(self, setup_work_item_dir):
|
||||
# Broken YAML frontmatter -> False with reason, never raises.
|
||||
self._write(
|
||||
setup_work_item_dir,
|
||||
"---\nverdict: [unclosed\n : : :\n---\n\nbody PASS\n",
|
||||
)
|
||||
passed, reason = check_tests_passed("enduro-trails", "ET-001")
|
||||
assert passed is False
|
||||
assert "YAML" in reason or "frontmatter" in reason.lower()
|
||||
|
||||
def test_no_report(self, setup_work_item_dir):
|
||||
passed, reason = check_tests_passed("enduro-trails", "ET-001")
|
||||
assert passed is False
|
||||
assert "not found" in reason.lower()
|
||||
|
||||
|
||||
class TestCheckDeployStatus:
|
||||
"""BUG 8: deploy -> done must be gated on the deployer's machine-readable
|
||||
deploy_status verdict in 14-deploy-log.md frontmatter, NOT the LLM exit code
|
||||
(always 0). Mirrors check_reviewer_verdict (reads ONLY the frontmatter field)."""
|
||||
|
||||
def _write_log(self, repo_dir, content):
|
||||
wi_dir = repo_dir / "docs" / "work-items" / "ET-011"
|
||||
wi_dir.mkdir(parents=True)
|
||||
(wi_dir / "14-deploy-log.md").write_text(content)
|
||||
|
||||
def test_success_verdict_passes(self, setup_work_item_dir):
|
||||
self._write_log(
|
||||
setup_work_item_dir,
|
||||
"---\ndeploy_status: SUCCESS\nversion: v0.0.3\n---\n\nDeployed OK.\n",
|
||||
)
|
||||
passed, reason = check_deploy_status("enduro-trails", "ET-011")
|
||||
assert passed is True
|
||||
assert "SUCCESS" in reason
|
||||
|
||||
def test_failed_verdict_fails(self, setup_work_item_dir):
|
||||
self._write_log(
|
||||
setup_work_item_dir,
|
||||
"---\ndeploy_status: FAILED\nversion: v0.0.3\n---\n\npermission denied.\n",
|
||||
)
|
||||
passed, reason = check_deploy_status("enduro-trails", "ET-011")
|
||||
assert passed is False
|
||||
assert "FAILED" in reason
|
||||
|
||||
def test_no_file_fails(self, setup_work_item_dir):
|
||||
passed, reason = check_deploy_status("enduro-trails", "ET-011")
|
||||
assert passed is False
|
||||
assert "not found" in reason.lower()
|
||||
|
||||
def test_no_field_fails(self, setup_work_item_dir):
|
||||
# Frontmatter present but no deploy_status field -> must NOT pass.
|
||||
self._write_log(
|
||||
setup_work_item_dir,
|
||||
"---\nversion: v0.0.3\n---\n\nStatus: FAILED (prose only).\n",
|
||||
)
|
||||
passed, reason = check_deploy_status("enduro-trails", "ET-011")
|
||||
assert passed is False
|
||||
|
||||
def test_prose_only_no_frontmatter_fails(self, setup_work_item_dir):
|
||||
# Prose mentioning SUCCESS but no machine-readable frontmatter -> fail.
|
||||
self._write_log(
|
||||
setup_work_item_dir,
|
||||
"# Deploy log\n\nStatus: SUCCESS (prose, not frontmatter).\n",
|
||||
)
|
||||
passed, reason = check_deploy_status("enduro-trails", "ET-011")
|
||||
assert passed is False
|
||||
|
||||
# --- ET-013 path-sync fix: log written to origin/main via separate PR ---
|
||||
|
||||
def test_origin_main_success_passes_when_absent_in_worktree(self, monkeypatch):
|
||||
# Deployer merged 14-deploy-log.md into main via a separate PR; it is NOT
|
||||
# in the feature worktree. Gate must recover it from origin/main -> PASS.
|
||||
# (This is the exact ET-013 regression.)
|
||||
monkeypatch.setattr(
|
||||
"src.qg.checks._deploy_log_from_main",
|
||||
lambda repo, wi: "---\ndeploy_status: SUCCESS\nversion: v0.0.5\n---\n\nLive.\n",
|
||||
)
|
||||
passed, reason = check_deploy_status("enduro-trails", "ET-013")
|
||||
assert passed is True
|
||||
assert "SUCCESS" in reason
|
||||
|
||||
def test_origin_main_failed_fails(self, monkeypatch):
|
||||
# A genuine FAILED log in main must still fail.
|
||||
monkeypatch.setattr(
|
||||
"src.qg.checks._deploy_log_from_main",
|
||||
lambda repo, wi: "---\ndeploy_status: FAILED\nversion: v0.0.5\n---\n\nboom.\n",
|
||||
)
|
||||
passed, reason = check_deploy_status("enduro-trails", "ET-013")
|
||||
assert passed is False
|
||||
assert "FAILED" in reason
|
||||
|
||||
def test_absent_everywhere_fails(self, monkeypatch):
|
||||
# Not in worktree and origin/main lookup yields nothing -> not found.
|
||||
monkeypatch.setattr(
|
||||
"src.qg.checks._deploy_log_from_main", lambda repo, wi: None
|
||||
)
|
||||
passed, reason = check_deploy_status("enduro-trails", "ET-013")
|
||||
assert passed is False
|
||||
assert "not found" in reason.lower()
|
||||
|
||||
@patch("src.qg.checks.subprocess.run")
|
||||
@patch("src.qg.checks.os.path.isdir", return_value=True)
|
||||
def test_fetch_failure_degrades_no_exception(self, mock_isdir, mock_run):
|
||||
# git fetch/show raising (e.g. network) must degrade to "not found",
|
||||
# never propagate an exception out of the gate.
|
||||
import subprocess as _sp
|
||||
mock_run.side_effect = _sp.TimeoutExpired(cmd="git", timeout=30)
|
||||
passed, reason = check_deploy_status("enduro-trails", "ET-013")
|
||||
assert passed is False
|
||||
assert "not found" in reason.lower()
|
||||
|
||||
def test_worktree_log_short_circuits_main_lookup(self, setup_work_item_dir, monkeypatch):
|
||||
# If the log IS present in the worktree, origin/main must NOT be consulted.
|
||||
self._write_log(
|
||||
setup_work_item_dir,
|
||||
"---\ndeploy_status: SUCCESS\nversion: v0.0.3\n---\n\nDeployed OK.\n",
|
||||
)
|
||||
called = {"n": 0}
|
||||
def _boom(repo, wi):
|
||||
called["n"] += 1
|
||||
return None
|
||||
monkeypatch.setattr("src.qg.checks._deploy_log_from_main", _boom)
|
||||
passed, reason = check_deploy_status("enduro-trails", "ET-011")
|
||||
assert passed is True
|
||||
assert called["n"] == 0
|
||||
|
||||
def test_deploy_stage_qg_is_check_deploy_status(self):
|
||||
assert get_qg_for_stage("deploy") == "check_deploy_status"
|
||||
|
||||
def test_registered_in_qg_checks(self):
|
||||
from src.qg.checks import QG_CHECKS
|
||||
assert QG_CHECKS.get("check_deploy_status") is check_deploy_status
|
||||
|
||||
|
||||
class TestDevelopmentStageQG:
|
||||
"""BUG 6: development stage QG is now check_ci_green (CI is the authoritative
|
||||
gate), not the deprecated check_tests_local."""
|
||||
|
||||
def test_development_qg_is_check_ci_green(self):
|
||||
assert get_qg_for_stage("development") == "check_ci_green"
|
||||
|
||||
def test_check_tests_local_is_deprecated_and_unwired(self):
|
||||
# Kept in the registry for backward-compat, but not wired to any stage.
|
||||
from src.qg.checks import QG_CHECKS
|
||||
from src.stages import STAGE_TRANSITIONS
|
||||
assert "check_tests_local" in QG_CHECKS
|
||||
wired = {t.get("qg") for t in STAGE_TRANSITIONS.values()}
|
||||
assert "check_tests_local" not in wired
|
||||
|
||||
|
||||
class TestCheckTestsLocal:
|
||||
"""BUG 5: check_tests_local must run pytest directly (not make, which is
|
||||
not installed in the orchestrator container)."""
|
||||
|
||||
@patch("src.qg.checks.ensure_worktree")
|
||||
@patch("subprocess.run")
|
||||
def test_passes_on_returncode_zero(self, mock_run, mock_wt, tmp_path):
|
||||
mock_wt.return_value = str(tmp_path)
|
||||
mock_run.return_value = MagicMock(returncode=0, stdout="ok", stderr="")
|
||||
passed, reason = check_tests_local("enduro-trails", "feature/ET-001-x")
|
||||
assert passed is True
|
||||
assert reason == "Local tests passed"
|
||||
|
||||
@patch("src.qg.checks.ensure_worktree")
|
||||
@patch("subprocess.run")
|
||||
def test_fails_on_nonzero_returncode(self, mock_run, mock_wt, tmp_path):
|
||||
mock_wt.return_value = str(tmp_path)
|
||||
mock_run.return_value = MagicMock(returncode=1, stdout="boom", stderr="trace")
|
||||
passed, reason = check_tests_local("enduro-trails", "feature/ET-001-x")
|
||||
assert passed is False
|
||||
assert "Local tests failed" in reason
|
||||
|
||||
@patch("src.qg.checks.ensure_worktree")
|
||||
@patch("subprocess.run")
|
||||
def test_invokes_pytest_not_make(self, mock_run, mock_wt, tmp_path):
|
||||
"""The subprocess call must be pytest, from src/api, against ../../tests/."""
|
||||
mock_wt.return_value = str(tmp_path)
|
||||
mock_run.return_value = MagicMock(returncode=0, stdout="", stderr="")
|
||||
check_tests_local("enduro-trails", "feature/ET-001-x")
|
||||
args, kwargs = mock_run.call_args
|
||||
cmd = args[0]
|
||||
assert "make" not in cmd
|
||||
assert cmd[:3] == ["python", "-m", "pytest"]
|
||||
assert "../../tests/" in cmd
|
||||
assert kwargs["cwd"] == os.path.join(str(tmp_path), "src", "api")
|
||||
|
||||
|
||||
|
||||
class TestCheckStagingStatus:
|
||||
"""ORCH-35 conditional gate (Variant A): deploy-staging gate is active ONLY for
|
||||
the self-hosting orchestrator repo (has staging infra on localhost:8501). All
|
||||
other repos pass immediately with "Staging gate N/A for <repo>".
|
||||
|
||||
Self-hosting path: reads machine-readable staging_status: from 15-staging-log.md
|
||||
frontmatter. Mirrors check_deploy_status pattern.
|
||||
"""
|
||||
|
||||
@pytest.fixture()
|
||||
def orch_dir(self, tmp_path, monkeypatch):
|
||||
"""Temp orchestrator repo dir (self-hosting)."""
|
||||
monkeypatch.setattr("src.qg.checks.settings.repos_dir", str(tmp_path))
|
||||
d = tmp_path / "orchestrator"
|
||||
d.mkdir(exist_ok=True)
|
||||
return d
|
||||
|
||||
def _write_log(self, repo_dir, content, wi="ORCH-035"):
|
||||
wi_dir = repo_dir / "docs" / "work-items" / wi
|
||||
wi_dir.mkdir(parents=True, exist_ok=True)
|
||||
(wi_dir / "15-staging-log.md").write_text(content)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Self-hosting (orchestrator) path -- real file check
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def test_success_verdict_passes(self, orch_dir):
|
||||
self._write_log(
|
||||
orch_dir,
|
||||
"---\nstaging_status: SUCCESS\ntimestamp: 2026-06-05T00:00:00Z\n---\n\nAll staging tests passed.\n",
|
||||
)
|
||||
from src.qg.checks import check_staging_status
|
||||
passed, reason = check_staging_status("orchestrator", "ORCH-035")
|
||||
assert passed is True
|
||||
assert "SUCCESS" in reason
|
||||
|
||||
def test_failed_verdict_fails(self, orch_dir):
|
||||
self._write_log(
|
||||
orch_dir,
|
||||
"---\nstaging_status: FAILED\ntimestamp: 2026-06-05T00:00:00Z\n---\n\n2 tests failed.\n",
|
||||
)
|
||||
from src.qg.checks import check_staging_status
|
||||
passed, reason = check_staging_status("orchestrator", "ORCH-035")
|
||||
assert passed is False
|
||||
assert "FAILED" in reason
|
||||
|
||||
def test_no_file_fails_for_self_hosting(self, orch_dir):
|
||||
from src.qg.checks import check_staging_status
|
||||
passed, reason = check_staging_status("orchestrator", "ORCH-035")
|
||||
assert passed is False
|
||||
assert "not found" in reason.lower()
|
||||
|
||||
def test_no_field_fails(self, orch_dir):
|
||||
# Frontmatter present but no staging_status field -> must NOT pass.
|
||||
self._write_log(
|
||||
orch_dir,
|
||||
"---\nversion: v0.0.3\n---\n\nStatus: all good (prose only).\n",
|
||||
)
|
||||
from src.qg.checks import check_staging_status
|
||||
passed, reason = check_staging_status("orchestrator", "ORCH-035")
|
||||
assert passed is False
|
||||
|
||||
def test_prose_only_no_frontmatter_fails(self, orch_dir):
|
||||
# Prose mentioning SUCCESS but no machine-readable frontmatter -> fail.
|
||||
self._write_log(
|
||||
orch_dir,
|
||||
"# Staging Log\n\nStatus: SUCCESS (prose, not frontmatter).\n",
|
||||
)
|
||||
from src.qg.checks import check_staging_status
|
||||
passed, reason = check_staging_status("orchestrator", "ORCH-035")
|
||||
assert passed is False
|
||||
|
||||
def test_origin_main_success_passes_when_absent_in_worktree(self, monkeypatch):
|
||||
# Deployer merged 15-staging-log.md into main; not in worktree -> recover from main.
|
||||
monkeypatch.setattr(
|
||||
"src.qg.checks._staging_log_from_main",
|
||||
lambda repo, wi: "---\nstaging_status: SUCCESS\n---\n\nAll good.\n",
|
||||
)
|
||||
from src.qg.checks import check_staging_status
|
||||
passed, reason = check_staging_status("orchestrator", "ORCH-035-main")
|
||||
assert passed is True
|
||||
assert "SUCCESS" in reason
|
||||
|
||||
def test_origin_main_failed_fails(self, monkeypatch):
|
||||
monkeypatch.setattr(
|
||||
"src.qg.checks._staging_log_from_main",
|
||||
lambda repo, wi: "---\nstaging_status: FAILED\n---\n\nboom.\n",
|
||||
)
|
||||
from src.qg.checks import check_staging_status
|
||||
passed, reason = check_staging_status("orchestrator", "ORCH-035-main")
|
||||
assert passed is False
|
||||
assert "FAILED" in reason
|
||||
|
||||
def test_absent_everywhere_fails(self, monkeypatch):
|
||||
monkeypatch.setattr(
|
||||
"src.qg.checks._staging_log_from_main", lambda repo, wi: None
|
||||
)
|
||||
from src.qg.checks import check_staging_status
|
||||
passed, reason = check_staging_status("orchestrator", "ORCH-035-absent")
|
||||
assert passed is False
|
||||
assert "not found" in reason.lower()
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Non-self-hosting path -- instant pass, no file dependency
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def test_non_self_hosting_passes_immediately_no_file(self, tmp_path, monkeypatch):
|
||||
"""Non-self-hosting repo: gate is N/A even without a staging log file."""
|
||||
monkeypatch.setattr("src.qg.checks.settings.repos_dir", str(tmp_path))
|
||||
from src.qg.checks import check_staging_status
|
||||
passed, reason = check_staging_status("enduro-trails", "ET-035")
|
||||
assert passed is True
|
||||
assert "N/A" in reason
|
||||
assert "enduro-trails" in reason
|
||||
|
||||
def test_non_self_hosting_passes_regardless_of_file_content(self, tmp_path, monkeypatch):
|
||||
"""Even a FAILED staging log must not block a non-self-hosting repo."""
|
||||
monkeypatch.setattr("src.qg.checks.settings.repos_dir", str(tmp_path))
|
||||
et_dir = tmp_path / "enduro-trails" / "docs" / "work-items" / "ET-035"
|
||||
et_dir.mkdir(parents=True)
|
||||
(et_dir / "15-staging-log.md").write_text(
|
||||
"---\nstaging_status: FAILED\n---\nShould be ignored.\n"
|
||||
)
|
||||
from src.qg.checks import check_staging_status
|
||||
passed, reason = check_staging_status("enduro-trails", "ET-035")
|
||||
assert passed is True
|
||||
assert "N/A" in reason
|
||||
|
||||
def test_unknown_repo_also_passes_immediately(self, tmp_path, monkeypatch):
|
||||
"""Any repo that is not orchestrator gets N/A gate."""
|
||||
monkeypatch.setattr("src.qg.checks.settings.repos_dir", str(tmp_path))
|
||||
from src.qg.checks import check_staging_status
|
||||
passed, reason = check_staging_status("some-other-project", "XY-001")
|
||||
assert passed is True
|
||||
assert "N/A" in reason
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# is_self_hosting_repo helper
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def test_is_self_hosting_true_for_orchestrator(self):
|
||||
from src.qg.checks import is_self_hosting_repo
|
||||
assert is_self_hosting_repo("orchestrator") is True
|
||||
|
||||
def test_is_self_hosting_case_insensitive(self):
|
||||
from src.qg.checks import is_self_hosting_repo
|
||||
assert is_self_hosting_repo("Orchestrator") is True
|
||||
assert is_self_hosting_repo("ORCHESTRATOR") is True
|
||||
|
||||
def test_is_self_hosting_false_for_enduro_trails(self):
|
||||
from src.qg.checks import is_self_hosting_repo
|
||||
assert is_self_hosting_repo("enduro-trails") is False
|
||||
|
||||
def test_is_self_hosting_false_for_empty(self):
|
||||
from src.qg.checks import is_self_hosting_repo
|
||||
assert is_self_hosting_repo("") is False
|
||||
assert is_self_hosting_repo(None) is False
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Stage machinery (regression: must not be broken)
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def test_deploy_staging_qg_is_check_staging_status(self):
|
||||
assert get_qg_for_stage("deploy-staging") == "check_staging_status"
|
||||
|
||||
def test_registered_in_qg_checks(self):
|
||||
from src.qg.checks import QG_CHECKS, check_staging_status
|
||||
assert QG_CHECKS.get("check_staging_status") is check_staging_status
|
||||
|
||||
def test_deploy_stage_qg_still_check_deploy_status(self):
|
||||
"""Regression: existing deploy QG must not be broken."""
|
||||
assert get_qg_for_stage("deploy") == "check_deploy_status"
|
||||
|
||||
def test_stage_chain(self):
|
||||
"""Full chain: testing->deploy-staging->deploy->done."""
|
||||
from src.stages import get_next_stage
|
||||
assert get_next_stage("testing") == "deploy-staging"
|
||||
assert get_next_stage("deploy-staging") == "deploy"
|
||||
assert get_next_stage("deploy") == "done"
|
||||
|
||||
|
||||
@@ -69,6 +69,7 @@ def silence_side_effects(monkeypatch):
|
||||
"set_issue_needs_input",
|
||||
"set_issue_in_progress",
|
||||
"set_issue_blocked",
|
||||
"set_issue_done",
|
||||
):
|
||||
monkeypatch.setattr(stage_engine, name, MagicMock())
|
||||
|
||||
@@ -135,7 +136,7 @@ class TestHappyPathAgentSelection:
|
||||
("architecture", "development", "developer"),
|
||||
("development", "review", "reviewer"),
|
||||
("review", "testing", "tester"),
|
||||
("testing", "deploy", "deployer"),
|
||||
("testing", "deploy-staging", "deployer"),
|
||||
],
|
||||
)
|
||||
def test_advance_launches_current_stage_agent(
|
||||
@@ -177,6 +178,40 @@ class TestHappyPathAgentSelection:
|
||||
assert res.enqueued_agent is None
|
||||
assert _jobs() == []
|
||||
|
||||
def test_deploy_success_syncs_plane_to_terminal_done(self, monkeypatch):
|
||||
"""FIX 3: a successful deploy->done forces the Plane issue to terminal Done.
|
||||
|
||||
Previously the task could stick on In Progress because the merge webhook
|
||||
completed it out-of-band. Now the engine drives set_issue_done() on the
|
||||
deploy->done success transition.
|
||||
"""
|
||||
monkeypatch.setattr(
|
||||
stage_engine, "QG_CHECKS",
|
||||
{k: _pass for k in stage_engine.QG_CHECKS},
|
||||
)
|
||||
task_id = _make_task("deploy", wi="ET-012")
|
||||
res = advance_stage(
|
||||
task_id, "deploy", "enduro-trails", "ET-012",
|
||||
"feature/ET-012-x", finished_agent="deployer",
|
||||
)
|
||||
assert res.advanced is True
|
||||
assert _stage(task_id) == "done"
|
||||
# The terminal Plane sync was invoked with the work item id.
|
||||
stage_engine.set_issue_done.assert_called_once_with("ET-012")
|
||||
|
||||
def test_non_terminal_advance_does_not_force_plane_done(self, monkeypatch):
|
||||
"""set_issue_done must only fire on the terminal deploy->done transition."""
|
||||
monkeypatch.setattr(
|
||||
stage_engine, "QG_CHECKS",
|
||||
{k: _pass for k in stage_engine.QG_CHECKS},
|
||||
)
|
||||
task_id = _make_task("review")
|
||||
advance_stage(
|
||||
task_id, "review", "enduro-trails", "ET-001",
|
||||
"feature/ET-001-x", finished_agent=None,
|
||||
)
|
||||
stage_engine.set_issue_done.assert_not_called()
|
||||
|
||||
def test_done_is_terminal(self):
|
||||
task_id = _make_task("done")
|
||||
res = advance_stage(task_id, "done", "enduro-trails", "ET-001",
|
||||
@@ -203,10 +238,13 @@ class TestQgFailureDoesNotAdvance:
|
||||
assert _jobs() == []
|
||||
|
||||
def test_webhook_path_emits_qg_failure_notification(self, monkeypatch):
|
||||
"""finished_agent=None -> generic QG-failure notification fires (plane parity)."""
|
||||
"""finished_agent=None -> generic QG-failure notification fires (plane parity).
|
||||
|
||||
development stage QG is now check_ci_green (was check_tests_local).
|
||||
"""
|
||||
monkeypatch.setattr(
|
||||
stage_engine, "QG_CHECKS",
|
||||
{**stage_engine.QG_CHECKS, "check_tests_local": _fail("ci red")},
|
||||
{**stage_engine.QG_CHECKS, "check_ci_green": _fail("ci red")},
|
||||
)
|
||||
task_id = _make_task("development")
|
||||
advance_stage(task_id, "development", "enduro-trails", "ET-001",
|
||||
@@ -297,6 +335,59 @@ class TestTesterFail:
|
||||
assert _jobs() == []
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# BUG 8: deploy verdict gates deploy -> done (not the LLM exit code)
|
||||
# ---------------------------------------------------------------------------
|
||||
class TestDeployVerdict:
|
||||
"""deploy -> done must be gated on check_deploy_status (the deployer's
|
||||
machine-readable verdict), NOT on the LLM exit code (always 0)."""
|
||||
|
||||
def test_failed_verdict_rolls_back_to_development(self, monkeypatch):
|
||||
# deployer finished (exit_code 0 from launcher), but verdict is FAILED.
|
||||
monkeypatch.setattr(
|
||||
stage_engine, "QG_CHECKS",
|
||||
{**stage_engine.QG_CHECKS,
|
||||
"check_deploy_status": _fail("Deploy status: FAILED")},
|
||||
)
|
||||
task_id = _make_task("deploy")
|
||||
res = advance_stage(task_id, "deploy", "enduro-trails", "ET-011",
|
||||
"feature/ET-011-x", finished_agent="deployer")
|
||||
assert res.advanced is False
|
||||
assert res.rolled_back_to == "development"
|
||||
assert _stage(task_id) == "development" # NOT done
|
||||
assert res.alerted is True
|
||||
assert stage_engine.set_issue_blocked.called
|
||||
assert stage_engine.send_telegram.called
|
||||
|
||||
def test_no_deploy_log_rolls_back(self, monkeypatch):
|
||||
# No frontmatter field / no file -> check returns False -> rollback.
|
||||
monkeypatch.setattr(
|
||||
stage_engine, "QG_CHECKS",
|
||||
{**stage_engine.QG_CHECKS,
|
||||
"check_deploy_status": _fail("Deploy log not found (14-deploy-log.md)")},
|
||||
)
|
||||
task_id = _make_task("deploy")
|
||||
res = advance_stage(task_id, "deploy", "enduro-trails", "ET-011",
|
||||
"feature/ET-011-x", finished_agent="deployer")
|
||||
assert res.advanced is False
|
||||
assert _stage(task_id) == "development"
|
||||
|
||||
def test_success_verdict_advances_to_done(self, monkeypatch):
|
||||
monkeypatch.setattr(
|
||||
stage_engine, "QG_CHECKS",
|
||||
{**stage_engine.QG_CHECKS,
|
||||
"check_deploy_status": _pass},
|
||||
)
|
||||
task_id = _make_task("deploy")
|
||||
res = advance_stage(task_id, "deploy", "enduro-trails", "ET-011",
|
||||
"feature/ET-011-x", finished_agent="deployer")
|
||||
assert res.advanced is True
|
||||
assert res.to_stage == "done"
|
||||
assert _stage(task_id) == "done"
|
||||
assert res.enqueued_agent is None # no agent leaves deploy
|
||||
assert _jobs() == []
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Architect conflict -> rollback to analysis + enqueue analyst
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -358,6 +449,177 @@ class TestAnalysisApprovedFlow:
|
||||
assert stage_engine.notify_approve_requested.called
|
||||
assert _jobs() == []
|
||||
|
||||
def test_approved_verdict_advances_analysis_to_architecture(self, monkeypatch):
|
||||
"""BUG 4: a human Approved STATUS (webhook path, finished_agent=None)
|
||||
must satisfy the analysis gate and advance analysis -> architecture,
|
||||
enqueuing the architect. The status-only approval must NOT re-run
|
||||
check_analysis_approved (which looks for an :approved: COMMENT and would
|
||||
otherwise wrongly block the advance).
|
||||
"""
|
||||
# Make check_analysis_approved FAIL if it is ever called: the webhook
|
||||
# path must bypass it entirely (status == approval). If the engine were
|
||||
# to re-run the gate, this would block the advance and fail the test.
|
||||
monkeypatch.setattr(
|
||||
stage_engine, "QG_CHECKS",
|
||||
{
|
||||
**stage_engine.QG_CHECKS,
|
||||
"check_analysis_approved": _fail("no :approved: comment"),
|
||||
},
|
||||
)
|
||||
# Guard: the approval-flow (launcher-only) must NOT be invoked here.
|
||||
flow = MagicMock()
|
||||
monkeypatch.setattr(stage_engine, "_handle_analysis_approved_flow", flow)
|
||||
|
||||
task_id = _make_task("analysis")
|
||||
res = advance_stage(
|
||||
task_id, "analysis", "enduro-trails", "ET-001",
|
||||
"feature/ET-001-x", finished_agent=None,
|
||||
)
|
||||
|
||||
assert res.advanced is True
|
||||
assert res.to_stage == "architecture"
|
||||
assert _stage(task_id) == "architecture"
|
||||
assert res.enqueued_agent == "architect"
|
||||
# Sanity: agent for analysis is architect, never analyst (no re-run loop).
|
||||
assert get_agent_for_stage("analysis") == "architect"
|
||||
jobs = _jobs()
|
||||
assert len(jobs) == 1
|
||||
assert jobs[0]["agent"] == "architect"
|
||||
# The launcher-only approval-flow was NOT called on the webhook path.
|
||||
flow.assert_not_called()
|
||||
|
||||
def test_launcher_path_does_not_advance_and_calls_flow(self, monkeypatch):
|
||||
"""Regression: the launcher path (finished_agent='analyst') still routes
|
||||
into _handle_analysis_approved_flow and does NOT advance.
|
||||
"""
|
||||
flow = MagicMock()
|
||||
monkeypatch.setattr(stage_engine, "_handle_analysis_approved_flow", flow)
|
||||
|
||||
task_id = _make_task("analysis")
|
||||
res = advance_stage(
|
||||
task_id, "analysis", "enduro-trails", "ET-001",
|
||||
"feature/ET-001-x", finished_agent="analyst",
|
||||
)
|
||||
|
||||
assert res.advanced is not True
|
||||
assert _stage(task_id) == "analysis"
|
||||
assert _jobs() == []
|
||||
flow.assert_called_once()
|
||||
|
||||
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# ORCH-35: deploy-staging gate — rollback on staging failure
|
||||
# ---------------------------------------------------------------------------
|
||||
class TestStagingGate:
|
||||
"""deploy-staging -> deploy must be gated on check_staging_status.
|
||||
FAILED verdict rolls back to development (same as deploy БАГ-8 pattern:
|
||||
staging failure = code is bad, needs developer fix)."""
|
||||
|
||||
def test_staging_success_advances_to_deploy(self, monkeypatch):
|
||||
"""Happy path: staging SUCCESS -> advance to deploy (no agent launched)."""
|
||||
monkeypatch.setattr(
|
||||
stage_engine, "QG_CHECKS",
|
||||
{**stage_engine.QG_CHECKS, "check_staging_status": _pass},
|
||||
)
|
||||
task_id = _make_task("deploy-staging")
|
||||
res = advance_stage(
|
||||
task_id, "deploy-staging", "enduro-trails", "ET-035",
|
||||
"feature/ET-035-x", finished_agent="deployer",
|
||||
)
|
||||
assert res.advanced is True
|
||||
assert res.to_stage == "deploy"
|
||||
assert _stage(task_id) == "deploy"
|
||||
# deploy-staging has agent=deployer, so deployer is enqueued for deploy stage
|
||||
assert res.enqueued_agent == "deployer"
|
||||
jobs = _jobs()
|
||||
assert len(jobs) == 1
|
||||
assert jobs[0]["agent"] == "deployer"
|
||||
|
||||
def test_staging_failed_rolls_back_to_development(self, monkeypatch):
|
||||
"""ORCH-35: staging FAILED -> roll back to development, not to testing."""
|
||||
monkeypatch.setattr(
|
||||
stage_engine, "QG_CHECKS",
|
||||
{**stage_engine.QG_CHECKS,
|
||||
"check_staging_status": _fail("Staging status: FAILED")},
|
||||
)
|
||||
task_id = _make_task("deploy-staging")
|
||||
res = advance_stage(
|
||||
task_id, "deploy-staging", "enduro-trails", "ET-035",
|
||||
"feature/ET-035-x", finished_agent="deployer",
|
||||
)
|
||||
assert res.advanced is False
|
||||
assert res.rolled_back_to == "development"
|
||||
assert _stage(task_id) == "development" # NOT deploy, NOT testing
|
||||
assert res.alerted is True
|
||||
assert stage_engine.set_issue_blocked.called
|
||||
assert stage_engine.send_telegram.called
|
||||
|
||||
def test_staging_failed_does_not_reach_deploy(self, monkeypatch):
|
||||
"""Prod deploy is unreachable if staging gate is not green."""
|
||||
monkeypatch.setattr(
|
||||
stage_engine, "QG_CHECKS",
|
||||
{**stage_engine.QG_CHECKS,
|
||||
"check_staging_status": _fail("Staging log not found")},
|
||||
)
|
||||
task_id = _make_task("deploy-staging")
|
||||
res = advance_stage(
|
||||
task_id, "deploy-staging", "enduro-trails", "ET-035",
|
||||
"feature/ET-035-x", finished_agent="deployer",
|
||||
)
|
||||
assert res.advanced is False
|
||||
# Task must NOT be in deploy stage
|
||||
assert _stage(task_id) != "deploy"
|
||||
|
||||
def test_staging_missing_log_rolls_back(self, monkeypatch):
|
||||
"""Missing 15-staging-log.md -> gate fails -> rollback to development."""
|
||||
monkeypatch.setattr(
|
||||
stage_engine, "QG_CHECKS",
|
||||
{**stage_engine.QG_CHECKS,
|
||||
"check_staging_status": _fail("Staging log not found (15-staging-log.md)")},
|
||||
)
|
||||
task_id = _make_task("deploy-staging")
|
||||
res = advance_stage(
|
||||
task_id, "deploy-staging", "enduro-trails", "ET-035",
|
||||
"feature/ET-035-x", finished_agent="deployer",
|
||||
)
|
||||
assert res.advanced is False
|
||||
assert _stage(task_id) == "development"
|
||||
|
||||
def test_testing_to_deploy_staging_advance(self, monkeypatch):
|
||||
"""testing -> deploy-staging: deployer is enqueued (ORCH-35 chain check)."""
|
||||
monkeypatch.setattr(
|
||||
stage_engine, "QG_CHECKS",
|
||||
{**stage_engine.QG_CHECKS, "check_tests_passed": _pass},
|
||||
)
|
||||
task_id = _make_task("testing")
|
||||
res = advance_stage(
|
||||
task_id, "testing", "enduro-trails", "ET-035",
|
||||
"feature/ET-035-x", finished_agent="tester",
|
||||
)
|
||||
assert res.advanced is True
|
||||
assert res.to_stage == "deploy-staging"
|
||||
assert _stage(task_id) == "deploy-staging"
|
||||
assert res.enqueued_agent == "deployer"
|
||||
|
||||
def test_deploy_still_rolls_back_on_check_deploy_status_fail(self, monkeypatch):
|
||||
"""Existing БАГ-8 rollback must still work for deploy stage (regression guard)."""
|
||||
monkeypatch.setattr(
|
||||
stage_engine, "QG_CHECKS",
|
||||
{**stage_engine.QG_CHECKS,
|
||||
"check_deploy_status": _fail("Deploy status: FAILED")},
|
||||
)
|
||||
task_id = _make_task("deploy")
|
||||
res = advance_stage(
|
||||
task_id, "deploy", "enduro-trails", "ET-011",
|
||||
"feature/ET-011-x", finished_agent="deployer",
|
||||
)
|
||||
assert res.advanced is False
|
||||
assert res.rolled_back_to == "development"
|
||||
assert _stage(task_id) == "development"
|
||||
assert res.alerted is True
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# launcher + plane both delegate to the engine
|
||||
|
||||
94
tests/test_stage_visibility.py
Normal file
94
tests/test_stage_visibility.py
Normal file
@@ -0,0 +1,94 @@
|
||||
"""Feature 3: stage visibility on the Plane board.
|
||||
|
||||
* PLANE_STATES carries the 6 new per-stage / verdict UUIDs.
|
||||
* STAGE_TO_STATE maps architecture/development/review/testing to their
|
||||
dedicated board statuses (not all -> In Progress anymore).
|
||||
* set_issue_stage_state(work_item_id, stage) PATCHes the correct state UUID
|
||||
for a visible stage, and is a no-op for stages without one (analysis/deploy).
|
||||
* Needs Input / In Review / Blocked remain higher priority: their explicit
|
||||
setters use their own state, never overwritten by the stage map.
|
||||
|
||||
httpx is mocked; no network.
|
||||
"""
|
||||
|
||||
import os
|
||||
|
||||
os.environ.setdefault("ORCH_PLANE_API_TOKEN", "test-token")
|
||||
os.environ.setdefault("ORCH_GITEA_TOKEN", "test-token")
|
||||
|
||||
from unittest.mock import patch, MagicMock # noqa: E402
|
||||
|
||||
from src import plane_sync as PS # noqa: E402
|
||||
|
||||
|
||||
EXPECTED_UUIDS = {
|
||||
"architecture": "3020bbb7-6122-4663-930c-0315ba8dfa3d",
|
||||
"development": "9920609b-f140-4e46-ab95-89acda8412c8",
|
||||
"review": "ba0d802c-5218-41d4-ab43-978b0ea123ed",
|
||||
"testing": "7855d807-b1bf-42ef-8dae-6cde0df92d02",
|
||||
"approved": "a519a341-dada-4a91-8910-7604f82b79c5",
|
||||
"rejected": "ba958f3c-5db5-461d-8f82-89425e413b97",
|
||||
}
|
||||
|
||||
|
||||
def test_plane_states_has_new_uuids():
|
||||
for key, uuid in EXPECTED_UUIDS.items():
|
||||
assert PS.PLANE_STATES[key] == uuid
|
||||
|
||||
|
||||
def test_stage_to_state_maps_visible_stages():
|
||||
assert PS.STAGE_TO_STATE["architecture"] == EXPECTED_UUIDS["architecture"]
|
||||
assert PS.STAGE_TO_STATE["development"] == EXPECTED_UUIDS["development"]
|
||||
assert PS.STAGE_TO_STATE["review"] == EXPECTED_UUIDS["review"]
|
||||
assert PS.STAGE_TO_STATE["testing"] == EXPECTED_UUIDS["testing"]
|
||||
# analysis / deploy stay on In Progress; done stays Done.
|
||||
assert PS.STAGE_TO_STATE["analysis"] == PS.PLANE_STATES["in_progress"]
|
||||
assert PS.STAGE_TO_STATE["deploy"] == PS.PLANE_STATES["in_progress"]
|
||||
assert PS.STAGE_TO_STATE["done"] == PS.PLANE_STATES["done"]
|
||||
|
||||
|
||||
def _patch_resolution(monkey_targets):
|
||||
"""Helper: patch find_issue_id + _resolve_project_id to skip the DB/network."""
|
||||
return monkey_targets
|
||||
|
||||
|
||||
@patch("src.plane_sync.httpx.patch")
|
||||
@patch("src.plane_sync.find_issue_id", return_value="issue-uuid")
|
||||
@patch("src.plane_sync._resolve_project_id", return_value="proj-1")
|
||||
def test_set_issue_stage_state_patches_correct_uuid(mock_proj, mock_find, mock_patch):
|
||||
resp = MagicMock(); resp.raise_for_status.return_value = None
|
||||
mock_patch.return_value = resp
|
||||
|
||||
PS.set_issue_stage_state("ET-1", "development")
|
||||
# the PATCH carried the development state UUID
|
||||
_, kwargs = mock_patch.call_args
|
||||
assert kwargs["json"]["state"] == EXPECTED_UUIDS["development"]
|
||||
|
||||
|
||||
@patch("src.plane_sync.httpx.patch")
|
||||
@patch("src.plane_sync.find_issue_id", return_value="issue-uuid")
|
||||
@patch("src.plane_sync._resolve_project_id", return_value="proj-1")
|
||||
def test_set_issue_stage_state_noop_for_analysis(mock_proj, mock_find, mock_patch):
|
||||
# analysis has no dedicated board status -> no PATCH at all.
|
||||
PS.set_issue_stage_state("ET-1", "analysis")
|
||||
mock_patch.assert_not_called()
|
||||
PS.set_issue_stage_state("ET-1", "deploy")
|
||||
mock_patch.assert_not_called()
|
||||
|
||||
|
||||
@patch("src.plane_sync.httpx.patch")
|
||||
@patch("src.plane_sync.find_issue_id", return_value="issue-uuid")
|
||||
@patch("src.plane_sync._resolve_project_id", return_value="proj-1")
|
||||
def test_priority_states_use_their_own_uuid(mock_proj, mock_find, mock_patch):
|
||||
"""Needs Input / In Review / Blocked are set explicitly and take priority."""
|
||||
resp = MagicMock(); resp.raise_for_status.return_value = None
|
||||
mock_patch.return_value = resp
|
||||
|
||||
PS.set_issue_needs_input("ET-1")
|
||||
assert mock_patch.call_args.kwargs["json"]["state"] == PS.PLANE_STATES["needs_input"]
|
||||
|
||||
PS.set_issue_in_review("ET-1")
|
||||
assert mock_patch.call_args.kwargs["json"]["state"] == PS.PLANE_STATES["in_review"]
|
||||
|
||||
PS.set_issue_blocked("ET-1")
|
||||
assert mock_patch.call_args.kwargs["json"]["state"] == PS.PLANE_STATES["blocked"]
|
||||
200
tests/test_status_only_verdict.py
Normal file
200
tests/test_status_only_verdict.py
Normal file
@@ -0,0 +1,200 @@
|
||||
"""Status-only verdict model (bug 3 fix).
|
||||
|
||||
The comment-based control mechanism (:approved: / :rejected: / answer-to-questions)
|
||||
was removed. The pipeline is driven SOLELY by Plane status changes. These tests
|
||||
lock in the new behaviour:
|
||||
|
||||
* test_inreview_comment_does_not_revert — bug 3 root: an In Review task,
|
||||
any comment arrives -> status NOT reverted, no agent launched.
|
||||
* test_any_comment_no_pipeline_action — :approved: / :rejected: / plain
|
||||
text comment -> no status change, no enqueue.
|
||||
* test_approved_status_advances_without_inprogress_reset — Approved status
|
||||
advances WITHOUT an intermediate set_issue_in_progress reset.
|
||||
* test_rejected_status_pulls_reason_from_comment — Rejected status pulls the
|
||||
reason from the issue's latest comment (mocked GET comments).
|
||||
"""
|
||||
|
||||
import os
|
||||
import tempfile
|
||||
|
||||
_test_db = os.path.join(tempfile.gettempdir(), "test_orchestrator_status_only.db")
|
||||
os.environ["ORCH_DB_PATH"] = _test_db
|
||||
os.environ.setdefault("ORCH_PLANE_WEBHOOK_SECRET", "")
|
||||
os.environ.setdefault("ORCH_GITEA_TOKEN", "test-token")
|
||||
os.environ.setdefault("ORCH_PLANE_API_TOKEN", "test-token")
|
||||
|
||||
import pytest # noqa: E402
|
||||
from unittest.mock import patch, AsyncMock # noqa: E402
|
||||
from fastapi.testclient import TestClient # noqa: E402
|
||||
|
||||
from src.main import app # noqa: E402
|
||||
from src.db import init_db, get_db # noqa: E402
|
||||
from src import projects as P # noqa: E402
|
||||
from src.projects import reload_projects # noqa: E402
|
||||
|
||||
ENDURO_PLANE_ID = "7a79f0a9-5278-49cd-9007-9a338f238f9c"
|
||||
APPROVED = "a519a341-dada-4a91-8910-7604f82b79c5"
|
||||
REJECTED = "ba958f3c-5db5-461d-8f82-89425e413b97"
|
||||
IN_REVIEW = "38fb1f64-aa1e-48a3-92e0-0b109679046b"
|
||||
|
||||
client = TestClient(app)
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def setup(monkeypatch):
|
||||
monkeypatch.setattr(P.settings, "db_path", _test_db)
|
||||
import src.db as _db
|
||||
monkeypatch.setattr(_db.settings, "db_path", _test_db)
|
||||
if os.path.exists(_test_db):
|
||||
os.unlink(_test_db)
|
||||
init_db()
|
||||
monkeypatch.setattr("src.webhooks.plane.verify_plane_signature", lambda body, sig: True)
|
||||
registry_json = (
|
||||
f'[{{"plane_project_id": "{ENDURO_PLANE_ID}", "repo": "enduro-trails",'
|
||||
f' "work_item_prefix": "ET", "name": "enduro-trails"}}]'
|
||||
)
|
||||
monkeypatch.setattr(P.settings, "projects_json", registry_json)
|
||||
reload_projects()
|
||||
# Seed a task at the 'review' stage for plane_id 'r-1'.
|
||||
conn = get_db()
|
||||
conn.execute(
|
||||
"INSERT INTO tasks (plane_id, work_item_id, repo, branch, stage, plane_issue_id) "
|
||||
"VALUES (?, ?, ?, ?, ?, ?)",
|
||||
("r-1", "ET-700", "enduro-trails", "feature/ET-700-x", "review", "r-1"),
|
||||
)
|
||||
conn.commit()
|
||||
conn.close()
|
||||
yield
|
||||
reload_projects()
|
||||
if os.path.exists(_test_db):
|
||||
os.unlink(_test_db)
|
||||
|
||||
|
||||
class _FakeResp:
|
||||
def __init__(self, status_code, payload):
|
||||
self.status_code = status_code
|
||||
self._payload = payload
|
||||
|
||||
def json(self):
|
||||
return self._payload
|
||||
|
||||
|
||||
def _comment(text, plane_id="r-1"):
|
||||
return client.post("/webhook/plane", json={
|
||||
"event": "issue_comment", "action": "created",
|
||||
"data": {"work_item_id": plane_id, "comment_stripped": text,
|
||||
"project": ENDURO_PLANE_ID},
|
||||
})
|
||||
|
||||
|
||||
def _status(state_id, plane_id="r-1", old="prev"):
|
||||
return client.post("/webhook/plane", json={
|
||||
"event": "issue", "action": "updated",
|
||||
"data": {
|
||||
"id": plane_id, "name": "Status task", "project": ENDURO_PLANE_ID,
|
||||
"state": {"id": state_id, "name": "X", "group": "started"},
|
||||
},
|
||||
"activity": {"field": "state", "new_value": state_id, "old_value": old},
|
||||
})
|
||||
|
||||
|
||||
def _stage(plane_id="r-1"):
|
||||
conn = get_db()
|
||||
row = conn.execute("SELECT stage FROM tasks WHERE plane_id=?", (plane_id,)).fetchone()
|
||||
conn.close()
|
||||
return row[0] if row else None
|
||||
|
||||
|
||||
# --------------------------------------------------------------------------- #
|
||||
# Bug 3 root: In Review must not revert on a comment.
|
||||
# --------------------------------------------------------------------------- #
|
||||
@patch("src.webhooks.plane.enqueue_job")
|
||||
@patch("src.plane_sync.set_issue_in_progress")
|
||||
@patch("src.plane_sync._set_issue_state_direct")
|
||||
@patch("src.plane_sync.update_issue_state")
|
||||
def test_inreview_comment_does_not_revert(
|
||||
mock_update_state, mock_set_direct, mock_sip, mock_enqueue
|
||||
):
|
||||
"""Bug 3: task in In Review, ANY comment arrives -> status NOT reverted to
|
||||
In Progress, NO agent launched. The analyst's own 'waiting for approval'
|
||||
comment used to echo back and self-hit -> reverted In Review -> In Progress.
|
||||
"""
|
||||
# analyst's own echo comment
|
||||
resp = _comment("Готово, жду approved")
|
||||
assert resp.status_code == 200
|
||||
# no status changes whatsoever
|
||||
mock_sip.assert_not_called()
|
||||
mock_set_direct.assert_not_called()
|
||||
mock_update_state.assert_not_called()
|
||||
# no agent launched
|
||||
mock_enqueue.assert_not_called()
|
||||
# stage untouched
|
||||
assert _stage() == "review"
|
||||
|
||||
|
||||
# --------------------------------------------------------------------------- #
|
||||
# Any comment -> zero pipeline side-effects.
|
||||
# --------------------------------------------------------------------------- #
|
||||
@pytest.mark.parametrize("text", [":approved:", ":rejected: bad", "plain text", ""])
|
||||
@patch("src.webhooks.plane.enqueue_job")
|
||||
@patch("src.webhooks.plane._try_advance_stage", new_callable=AsyncMock)
|
||||
@patch("src.webhooks.plane._rollback_stage", new_callable=AsyncMock)
|
||||
@patch("src.plane_sync.set_issue_in_progress")
|
||||
@patch("src.plane_sync._set_issue_state_direct")
|
||||
def test_any_comment_no_pipeline_action(
|
||||
mock_set_direct, mock_sip, mock_rollback, mock_advance, mock_enqueue, text
|
||||
):
|
||||
resp = _comment(text)
|
||||
assert resp.status_code == 200
|
||||
mock_advance.assert_not_called()
|
||||
mock_rollback.assert_not_called()
|
||||
mock_sip.assert_not_called()
|
||||
mock_set_direct.assert_not_called()
|
||||
mock_enqueue.assert_not_called()
|
||||
assert _stage() == "review"
|
||||
|
||||
|
||||
# --------------------------------------------------------------------------- #
|
||||
# Approved status advances WITHOUT in_progress reset.
|
||||
# --------------------------------------------------------------------------- #
|
||||
@patch("src.plane_sync.set_issue_in_progress")
|
||||
@patch("src.webhooks.plane._try_advance_stage", new_callable=AsyncMock)
|
||||
def test_approved_status_advances_without_inprogress_reset(mock_advance, mock_sip):
|
||||
resp = _status(APPROVED)
|
||||
assert resp.status_code == 200
|
||||
mock_advance.assert_awaited_once()
|
||||
# work_item_id passed positionally
|
||||
assert "ET-700" in mock_advance.call_args.args
|
||||
# bug 3 (cause B): NO intermediate set_issue_in_progress before advance.
|
||||
mock_sip.assert_not_called()
|
||||
|
||||
|
||||
# --------------------------------------------------------------------------- #
|
||||
# Rejected status pulls reason from latest comment.
|
||||
# --------------------------------------------------------------------------- #
|
||||
@patch("src.webhooks.plane.httpx.get")
|
||||
@patch("src.webhooks.plane._rollback_stage", new_callable=AsyncMock)
|
||||
def test_rejected_status_pulls_reason_from_comment(mock_rollback, mock_get):
|
||||
mock_get.return_value = _FakeResp(200, {"results": [
|
||||
{"comment_stripped": "old comment", "created_at": "2026-06-03T09:00:00Z"},
|
||||
{"comment_html": "<p>Needs more test coverage</p>",
|
||||
"created_at": "2026-06-03T11:30:00Z"},
|
||||
]})
|
||||
resp = _status(REJECTED)
|
||||
assert resp.status_code == 200
|
||||
mock_rollback.assert_awaited_once()
|
||||
reason = mock_rollback.call_args.args[-1]
|
||||
# latest by created_at, HTML stripped
|
||||
assert "Needs more test coverage" in reason
|
||||
assert "<p>" not in reason
|
||||
|
||||
|
||||
@patch("src.webhooks.plane.httpx.get")
|
||||
@patch("src.webhooks.plane._rollback_stage", new_callable=AsyncMock)
|
||||
def test_rejected_status_no_comment_uses_fallback(mock_rollback, mock_get):
|
||||
mock_get.return_value = _FakeResp(200, {"results": []})
|
||||
resp = _status(REJECTED)
|
||||
assert resp.status_code == 200
|
||||
mock_rollback.assert_awaited_once()
|
||||
reason = mock_rollback.call_args.args[-1]
|
||||
assert "no reason comment" in reason
|
||||
243
tests/test_status_trigger.py
Normal file
243
tests/test_status_trigger.py
Normal file
@@ -0,0 +1,243 @@
|
||||
"""Feature 1: pipeline starts on status -> In Progress, not on creation.
|
||||
|
||||
* work_item.created / issue created -> NO task, NO branch, NO analyst.
|
||||
* issue updated -> In Progress (from backlog) -> task created + analyst enqueued.
|
||||
* a second In Progress update while the agent is busy -> NO duplicate, NO
|
||||
restart (busy-guard).
|
||||
* In Progress returned from Needs Input (agent idle) -> agent RELAUNCHED.
|
||||
|
||||
launcher / Gitea network are mocked. Real FastAPI endpoint via TestClient.
|
||||
"""
|
||||
|
||||
import os
|
||||
import tempfile
|
||||
|
||||
_test_db = os.path.join(tempfile.gettempdir(), "test_orchestrator_status_trigger.db")
|
||||
os.environ["ORCH_DB_PATH"] = _test_db
|
||||
os.environ.setdefault("ORCH_PLANE_WEBHOOK_SECRET", "")
|
||||
os.environ.setdefault("ORCH_GITEA_TOKEN", "test-token")
|
||||
os.environ.setdefault("ORCH_PLANE_API_TOKEN", "test-token")
|
||||
|
||||
import pytest # noqa: E402
|
||||
from unittest.mock import patch, AsyncMock # noqa: E402
|
||||
from fastapi.testclient import TestClient # noqa: E402
|
||||
|
||||
from src.main import app # noqa: E402
|
||||
from src.db import init_db, get_db # noqa: E402
|
||||
from src import projects as P # noqa: E402
|
||||
from src.projects import reload_projects # noqa: E402
|
||||
|
||||
ENDURO_PLANE_ID = "7a79f0a9-5278-49cd-9007-9a338f238f9c"
|
||||
IN_PROGRESS = "b873d9eb-993c-48cd-97ac-99a9b1623967"
|
||||
BACKLOG = "113b24f6-cce8-4be9-9a22-a359b9cf0122"
|
||||
|
||||
client = TestClient(app)
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def setup(monkeypatch):
|
||||
monkeypatch.setattr(P.settings, "db_path", _test_db)
|
||||
import src.db as _db
|
||||
monkeypatch.setattr(_db.settings, "db_path", _test_db)
|
||||
if os.path.exists(_test_db):
|
||||
os.unlink(_test_db)
|
||||
init_db()
|
||||
monkeypatch.setattr("src.webhooks.plane.verify_plane_signature", lambda body, sig: True)
|
||||
registry_json = (
|
||||
f'[{{"plane_project_id": "{ENDURO_PLANE_ID}", "repo": "enduro-trails",'
|
||||
f' "work_item_prefix": "ET", "name": "enduro-trails"}}]'
|
||||
)
|
||||
monkeypatch.setattr(P.settings, "projects_json", registry_json)
|
||||
reload_projects()
|
||||
yield
|
||||
reload_projects()
|
||||
if os.path.exists(_test_db):
|
||||
os.unlink(_test_db)
|
||||
|
||||
|
||||
def _created(plane_id="st-created"):
|
||||
return client.post("/webhook/plane", json={
|
||||
"event": "issue", "action": "created",
|
||||
"data": {
|
||||
"id": plane_id, "name": "A valid backlog item title",
|
||||
"description_stripped": "A sufficiently long description for QG-0.",
|
||||
"project": ENDURO_PLANE_ID,
|
||||
"state": {"id": BACKLOG, "name": "Backlog", "group": "backlog"},
|
||||
},
|
||||
})
|
||||
|
||||
|
||||
def _to_in_progress(plane_id="st-1"):
|
||||
return client.post("/webhook/plane", json={
|
||||
"event": "issue", "action": "updated",
|
||||
"data": {
|
||||
"id": plane_id, "name": "A valid backlog item title",
|
||||
"description_stripped": "A sufficiently long description for QG-0.",
|
||||
"project": ENDURO_PLANE_ID,
|
||||
"state": {"id": IN_PROGRESS, "name": "In Progress", "group": "started"},
|
||||
},
|
||||
"activity": {"field": "state", "new_value": IN_PROGRESS, "old_value": BACKLOG},
|
||||
})
|
||||
|
||||
|
||||
def _count(plane_id):
|
||||
conn = get_db()
|
||||
n = conn.execute("SELECT COUNT(*) FROM tasks WHERE plane_id=?", (plane_id,)).fetchone()[0]
|
||||
conn.close()
|
||||
return n
|
||||
|
||||
|
||||
# --------------------------------------------------------------------------- #
|
||||
@patch("src.webhooks.plane.enqueue_job")
|
||||
@patch("src.webhooks.plane._create_initial_docs", new_callable=AsyncMock)
|
||||
@patch("src.webhooks.plane._create_gitea_branch", new_callable=AsyncMock)
|
||||
def test_created_does_not_start_pipeline(mock_branch, mock_docs, mock_enqueue):
|
||||
resp = _created("st-created")
|
||||
assert resp.status_code == 200
|
||||
assert resp.json()["status"] == "accepted"
|
||||
# No task, no branch, no analyst enqueue.
|
||||
assert _count("st-created") == 0
|
||||
mock_branch.assert_not_called()
|
||||
mock_enqueue.assert_not_called()
|
||||
|
||||
|
||||
@patch("src.webhooks.plane.enqueue_job")
|
||||
@patch("src.webhooks.plane._create_initial_docs", new_callable=AsyncMock)
|
||||
@patch("src.webhooks.plane._create_gitea_branch", new_callable=AsyncMock)
|
||||
@patch("src.plane_sync.fetch_issue_sequence_id", return_value=5)
|
||||
def test_in_progress_starts_pipeline(mock_seq, mock_branch, mock_docs, mock_enqueue):
|
||||
mock_enqueue.return_value = 1
|
||||
resp = _to_in_progress("st-1")
|
||||
assert resp.status_code == 200
|
||||
assert resp.json()["status"] == "accepted"
|
||||
assert _count("st-1") == 1
|
||||
conn = get_db()
|
||||
task = conn.execute("SELECT * FROM tasks WHERE plane_id='st-1'").fetchone()
|
||||
conn.close()
|
||||
assert task["stage"] == "analysis"
|
||||
assert task["repo"] == "enduro-trails"
|
||||
mock_branch.assert_called_once()
|
||||
# analyst enqueued exactly once
|
||||
assert mock_enqueue.call_count == 1
|
||||
assert mock_enqueue.call_args.args[0] == "analyst"
|
||||
|
||||
|
||||
@patch("src.webhooks.plane.enqueue_job")
|
||||
@patch("src.webhooks.plane._create_initial_docs", new_callable=AsyncMock)
|
||||
@patch("src.webhooks.plane._create_gitea_branch", new_callable=AsyncMock)
|
||||
@patch("src.plane_sync.fetch_issue_sequence_id", return_value=5)
|
||||
def test_repeat_in_progress_while_job_active_does_not_relaunch(
|
||||
mock_seq, mock_branch, mock_docs, mock_enqueue
|
||||
):
|
||||
"""Status-only model busy-guard: a duplicate In Progress webhook that arrives
|
||||
while the stage agent still has a queued/running job must NOT relaunch the
|
||||
agent (no double launch).
|
||||
"""
|
||||
mock_enqueue.return_value = 1
|
||||
_to_in_progress("st-2")
|
||||
assert _count("st-2") == 1
|
||||
assert mock_enqueue.call_count == 1
|
||||
|
||||
# enqueue_job is mocked above, so no real job row exists. Seed an ACTIVE
|
||||
# (queued) job for the task so has_active_job_for_task() reports the agent as
|
||||
# busy -> the busy-guard fires.
|
||||
conn = get_db()
|
||||
task_id = conn.execute(
|
||||
"SELECT id FROM tasks WHERE plane_id='st-2'"
|
||||
).fetchone()[0]
|
||||
conn.execute(
|
||||
"INSERT INTO jobs (agent, repo, task_id, status) VALUES (?, ?, ?, 'queued')",
|
||||
("analyst", "enduro-trails", task_id),
|
||||
)
|
||||
conn.commit()
|
||||
conn.close()
|
||||
|
||||
# Second In Progress update. DISTINCT body (different activity old_value) so
|
||||
# webhook dedup does NOT short-circuit it — this exercises the busy-guard in
|
||||
# handle_status_start, not the delivery-dedup layer.
|
||||
resp = client.post("/webhook/plane", json={
|
||||
"event": "issue", "action": "updated",
|
||||
"data": {
|
||||
"id": "st-2", "name": "A valid backlog item title",
|
||||
"description_stripped": "A sufficiently long description for QG-0.",
|
||||
"project": ENDURO_PLANE_ID,
|
||||
"state": {"id": IN_PROGRESS, "name": "In Progress", "group": "started"},
|
||||
},
|
||||
"activity": {"field": "state", "new_value": IN_PROGRESS, "old_value": "some-other-state"},
|
||||
})
|
||||
assert resp.status_code == 200
|
||||
assert _count("st-2") == 1 # still exactly one task
|
||||
assert mock_enqueue.call_count == 1 # analyst NOT re-enqueued (busy-guard)
|
||||
|
||||
|
||||
@patch("src.webhooks.plane.add_comment", create=True)
|
||||
@patch("src.webhooks.plane.enqueue_job")
|
||||
@patch("src.webhooks.plane._create_initial_docs", new_callable=AsyncMock)
|
||||
@patch("src.webhooks.plane._create_gitea_branch", new_callable=AsyncMock)
|
||||
@patch("src.plane_sync.fetch_issue_sequence_id", return_value=5)
|
||||
def test_inprogress_from_needs_input_relaunches_analyst(
|
||||
mock_seq, mock_branch, mock_docs, mock_enqueue, mock_comment
|
||||
):
|
||||
"""Status-only answer-to-questions flow: an existing analysis task whose agent
|
||||
is IDLE (no active job — it went to Needs Input) is returned to In Progress
|
||||
-> the analyst is relaunched to read Slava's fresh comments.
|
||||
|
||||
+ double-webhook protection: a second In Progress while the relaunch job is
|
||||
active does NOT relaunch again.
|
||||
"""
|
||||
mock_enqueue.return_value = 1
|
||||
# First In Progress: starts the pipeline (creates task + enqueues analyst).
|
||||
_to_in_progress("st-ni")
|
||||
assert _count("st-ni") == 1
|
||||
assert mock_enqueue.call_count == 1
|
||||
|
||||
# The analyst finished and asked questions -> Needs Input. In our model that
|
||||
# means NO active job for the task (enqueue_job is mocked, so no job row).
|
||||
conn = get_db()
|
||||
task_id = conn.execute(
|
||||
"SELECT id FROM tasks WHERE plane_id='st-ni'"
|
||||
).fetchone()[0]
|
||||
has_job = conn.execute(
|
||||
"SELECT COUNT(*) FROM jobs WHERE task_id=? AND status IN ('queued','running')",
|
||||
(task_id,),
|
||||
).fetchone()[0]
|
||||
conn.close()
|
||||
assert has_job == 0 # agent idle
|
||||
|
||||
# Slava answers + returns the issue to In Progress (distinct body).
|
||||
resp = client.post("/webhook/plane", json={
|
||||
"event": "issue", "action": "updated",
|
||||
"data": {
|
||||
"id": "st-ni", "name": "A valid backlog item title",
|
||||
"description_stripped": "A sufficiently long description for QG-0.",
|
||||
"project": ENDURO_PLANE_ID,
|
||||
"state": {"id": IN_PROGRESS, "name": "In Progress", "group": "started"},
|
||||
},
|
||||
"activity": {"field": "state", "new_value": IN_PROGRESS, "old_value": "needs-input"},
|
||||
})
|
||||
assert resp.status_code == 200
|
||||
assert _count("st-ni") == 1 # no duplicate task
|
||||
assert mock_enqueue.call_count == 2 # analyst RELAUNCHED
|
||||
assert mock_enqueue.call_args.args[0] == "analyst"
|
||||
|
||||
# Seed an active job for the relaunch, then a SECOND In Progress webhook must
|
||||
# NOT relaunch again (busy-guard against double webhooks).
|
||||
conn = get_db()
|
||||
conn.execute(
|
||||
"INSERT INTO jobs (agent, repo, task_id, status) VALUES (?, ?, ?, 'running')",
|
||||
("analyst", "enduro-trails", task_id),
|
||||
)
|
||||
conn.commit()
|
||||
conn.close()
|
||||
resp2 = client.post("/webhook/plane", json={
|
||||
"event": "issue", "action": "updated",
|
||||
"data": {
|
||||
"id": "st-ni", "name": "A valid backlog item title",
|
||||
"description_stripped": "A sufficiently long description for QG-0.",
|
||||
"project": ENDURO_PLANE_ID,
|
||||
"state": {"id": IN_PROGRESS, "name": "In Progress", "group": "started"},
|
||||
},
|
||||
"activity": {"field": "state", "new_value": IN_PROGRESS, "old_value": "x-y-z"},
|
||||
})
|
||||
assert resp2.status_code == 200
|
||||
assert mock_enqueue.call_count == 2 # still 2 — busy-guard held
|
||||
138
tests/test_taskmd_description.py
Normal file
138
tests/test_taskmd_description.py
Normal file
@@ -0,0 +1,138 @@
|
||||
"""Tests for fix/taskmd-description (3 bugs at the analyst pipeline entry/exit):
|
||||
|
||||
BUG A: start_pipeline built the analyst .task.md WITHOUT the description body
|
||||
(only Title), so analyst received a ~101-byte file and reported the
|
||||
"business request is empty". task_desc must now carry the description.
|
||||
|
||||
BUG B: issue.updated ships only changed fields, so `name` is usually absent ->
|
||||
slug/branch became "untitled". start_pipeline must pull the real name
|
||||
from the Plane API (single fetch_issue_fields GET, above the slug build)
|
||||
so the branch slug is NOT "untitled".
|
||||
|
||||
BUG C: the analyst "artifacts ready" comment used the obsolete ":approved:"
|
||||
wording. Under the status-only model it must ask for the **Approved**
|
||||
status (not ":approved:", not "In Progress") and link the docs that
|
||||
actually exist.
|
||||
"""
|
||||
|
||||
import os
|
||||
import tempfile
|
||||
|
||||
_test_db = os.path.join(tempfile.gettempdir(), "test_orchestrator_taskmd_desc.db")
|
||||
os.environ["ORCH_DB_PATH"] = _test_db
|
||||
os.environ.setdefault("ORCH_PLANE_WEBHOOK_SECRET", "")
|
||||
os.environ.setdefault("ORCH_GITEA_TOKEN", "test-token")
|
||||
os.environ.setdefault("ORCH_PLANE_API_TOKEN", "test-token")
|
||||
|
||||
import pytest # noqa: E402
|
||||
from unittest.mock import patch, AsyncMock # noqa: E402
|
||||
from fastapi.testclient import TestClient # noqa: E402
|
||||
|
||||
from src.main import app # noqa: E402
|
||||
from src.db import init_db, get_db # noqa: E402
|
||||
from src import projects as P # noqa: E402
|
||||
from src.projects import reload_projects # noqa: E402
|
||||
|
||||
ENDURO_PLANE_ID = "7a79f0a9-5278-49cd-9007-9a338f238f9c"
|
||||
IN_PROGRESS = "b873d9eb-993c-48cd-97ac-99a9b1623967"
|
||||
BACKLOG = "113b24f6-cce8-4be9-9a22-a359b9cf0122"
|
||||
|
||||
client = TestClient(app)
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def setup(monkeypatch):
|
||||
monkeypatch.setattr(P.settings, "db_path", _test_db)
|
||||
import src.db as _db
|
||||
monkeypatch.setattr(_db.settings, "db_path", _test_db)
|
||||
if os.path.exists(_test_db):
|
||||
os.unlink(_test_db)
|
||||
init_db()
|
||||
monkeypatch.setattr("src.webhooks.plane.verify_plane_signature", lambda body, sig: True)
|
||||
registry_json = (
|
||||
f'[{{"plane_project_id": "{ENDURO_PLANE_ID}", "repo": "enduro-trails",'
|
||||
f' "work_item_prefix": "ET", "name": "enduro-trails"}}]'
|
||||
)
|
||||
monkeypatch.setattr(P.settings, "projects_json", registry_json)
|
||||
reload_projects()
|
||||
yield
|
||||
reload_projects()
|
||||
if os.path.exists(_test_db):
|
||||
os.unlink(_test_db)
|
||||
|
||||
|
||||
def _task(plane_id):
|
||||
conn = get_db()
|
||||
row = conn.execute("SELECT * FROM tasks WHERE plane_id=?", (plane_id,)).fetchone()
|
||||
conn.close()
|
||||
return row
|
||||
|
||||
|
||||
# --------------------------------------------------------------------------- #
|
||||
# BUG A: description reaches the analyst .task.md
|
||||
# --------------------------------------------------------------------------- #
|
||||
@patch("src.webhooks.plane.enqueue_job", return_value=1)
|
||||
@patch("src.webhooks.plane._create_initial_docs", new_callable=AsyncMock)
|
||||
@patch("src.webhooks.plane._create_gitea_branch", new_callable=AsyncMock)
|
||||
@patch("src.plane_sync.fetch_issue_sequence_id", return_value=11)
|
||||
@patch("src.plane_sync.fetch_issue_fields",
|
||||
return_value=("ET-011 real title",
|
||||
"REAL BUSINESS REQUEST BODY: user wants GPX upload with "
|
||||
"validation and a results map."))
|
||||
def test_taskdesc_includes_description(
|
||||
mock_fields, mock_seq, mock_branch, mock_docs, mock_enqueue
|
||||
):
|
||||
resp = client.post("/webhook/plane", json={
|
||||
"event": "issue", "action": "updated",
|
||||
"data": {
|
||||
"id": "taskA",
|
||||
# status change payload: NO name, NO description (only changed field)
|
||||
"project": ENDURO_PLANE_ID,
|
||||
"state": {"id": IN_PROGRESS, "name": "In Progress", "group": "started"},
|
||||
},
|
||||
"activity": {"field": "state", "new_value": IN_PROGRESS, "old_value": BACKLOG},
|
||||
})
|
||||
assert resp.status_code == 200
|
||||
mock_enqueue.assert_called_once()
|
||||
# task_desc is the 3rd positional arg of enqueue_job(agent, repo, task_desc, ...)
|
||||
task_desc = mock_enqueue.call_args.args[2]
|
||||
assert "Description:" in task_desc
|
||||
# the actual description body (not just the Title) is in the file
|
||||
assert "REAL BUSINESS REQUEST BODY" in task_desc
|
||||
assert "results map" in task_desc
|
||||
|
||||
|
||||
# --------------------------------------------------------------------------- #
|
||||
# BUG B: name fetched from Plane API when payload is empty -> slug not untitled
|
||||
# --------------------------------------------------------------------------- #
|
||||
@patch("src.webhooks.plane.enqueue_job", return_value=1)
|
||||
@patch("src.webhooks.plane._create_initial_docs", new_callable=AsyncMock)
|
||||
@patch("src.webhooks.plane._create_gitea_branch", new_callable=AsyncMock)
|
||||
@patch("src.plane_sync.fetch_issue_sequence_id", return_value=11)
|
||||
@patch("src.plane_sync.fetch_issue_fields",
|
||||
return_value=("GPX upload feature",
|
||||
"A sufficiently long description so QG-0 passes cleanly."))
|
||||
def test_name_fetched_when_payload_empty(
|
||||
mock_fields, mock_seq, mock_branch, mock_docs, mock_enqueue
|
||||
):
|
||||
resp = client.post("/webhook/plane", json={
|
||||
"event": "issue", "action": "updated",
|
||||
"data": {
|
||||
"id": "taskB",
|
||||
# NO name, NO description in the payload (Plane status-change shape)
|
||||
"project": ENDURO_PLANE_ID,
|
||||
"state": {"id": IN_PROGRESS, "name": "In Progress", "group": "started"},
|
||||
},
|
||||
"activity": {"field": "state", "new_value": IN_PROGRESS, "old_value": BACKLOG},
|
||||
})
|
||||
assert resp.status_code == 200
|
||||
mock_fields.assert_called_once()
|
||||
row = _task("taskB")
|
||||
assert row is not None
|
||||
branch = row["branch"]
|
||||
# slug derived from the fetched name -> "gpx-upload-feature", NOT untitled
|
||||
assert "untitled" not in branch
|
||||
assert "gpx-upload-feature" in branch
|
||||
# Title in the analyst task file is the fetched name, not "untitled"
|
||||
task_desc = mock_enqueue.call_args.args[2]
|
||||
assert "Title: GPX upload feature" in task_desc
|
||||
518
tests/test_telegram_tracker.py
Normal file
518
tests/test_telegram_tracker.py
Normal file
@@ -0,0 +1,518 @@
|
||||
"""feat/telegram-live-tracker: tests for the live Telegram task tracker.
|
||||
|
||||
Covers (per DEV_TASK_TELEGRAM_TRACKER.md):
|
||||
* short_model_name: provider/claude- prefix trimming.
|
||||
* render_task_tracker: per-stage line format (in↓/out↑, model, cost, minutes),
|
||||
the "⏸️ Ревью БРД · твоё время" line, the 💰 totals, and the finish block
|
||||
(⏱️ three times + 🔗/📦).
|
||||
* first message -> sendMessage stores message_id; transition -> editMessageText.
|
||||
* fallback: editMessageText fails -> a NEW message is sent and the id updated.
|
||||
* which alerts go out SEPARATELY (approve-gate / deploy-fail / agent-fail /
|
||||
error) vs which do NOT (QG-pending / agent-start / stage-transition).
|
||||
|
||||
Isolated temp DB; no network (httpx is patched).
|
||||
"""
|
||||
|
||||
import os
|
||||
import tempfile
|
||||
|
||||
os.environ.setdefault("ORCH_PLANE_API_TOKEN", "test-token")
|
||||
os.environ.setdefault("ORCH_GITEA_TOKEN", "test-token")
|
||||
|
||||
_test_db = os.path.join(tempfile.gettempdir(), "test_orchestrator_tracker.db")
|
||||
os.environ["ORCH_DB_PATH"] = _test_db
|
||||
|
||||
from unittest.mock import MagicMock, patch # noqa: E402
|
||||
|
||||
import pytest # noqa: E402
|
||||
|
||||
import src.db as db_module # noqa: E402
|
||||
from src.db import init_db, get_db # noqa: E402
|
||||
from src import notifications as N # noqa: E402
|
||||
from src import usage as U # noqa: E402
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def setup_db(monkeypatch):
|
||||
monkeypatch.setattr(db_module.settings, "db_path", _test_db, raising=False)
|
||||
if os.path.exists(_test_db):
|
||||
os.unlink(_test_db)
|
||||
init_db()
|
||||
# Re-enable send_telegram (conftest stubs it to a no-op); these tests patch
|
||||
# httpx / the lower-level helpers explicitly per case.
|
||||
yield
|
||||
if os.path.exists(_test_db):
|
||||
os.unlink(_test_db)
|
||||
|
||||
|
||||
# --------------------------------------------------------------------------- #
|
||||
# helpers to build a task + runs in the DB
|
||||
# --------------------------------------------------------------------------- #
|
||||
def _mk_task(stage="development", title="\u0422\u0440\u0435\u043a\u0438 \u0441 \u0437\u0443\u043c\u0430 z5",
|
||||
wid="ET-012", brd_start=None, brd_end=None):
|
||||
conn = get_db()
|
||||
cur = conn.execute(
|
||||
"INSERT INTO tasks (plane_id, work_item_id, repo, branch, stage, title, "
|
||||
"brd_review_started_at, brd_review_ended_at) "
|
||||
"VALUES (?, ?, ?, ?, ?, ?, ?, ?)",
|
||||
("p1", wid, "enduro-trails", "feature/ET-012-x", stage, title,
|
||||
brd_start, brd_end),
|
||||
)
|
||||
tid = cur.lastrowid
|
||||
conn.commit()
|
||||
conn.close()
|
||||
return tid
|
||||
|
||||
|
||||
def _mk_run(task_id, agent, started, finished, in_tok, out_tok,
|
||||
cache_read=0, cache_creation=0, cost=0.0, model=None, exit_code=0):
|
||||
conn = get_db()
|
||||
cur = conn.execute(
|
||||
"INSERT INTO agent_runs (task_id, agent, started_at, finished_at, "
|
||||
"exit_code, input_tokens, output_tokens, cache_read_tokens, "
|
||||
"cache_creation_tokens, cost_usd, model) "
|
||||
"VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
|
||||
(task_id, agent, started, finished, exit_code, in_tok, out_tok,
|
||||
cache_read, cache_creation, cost, model),
|
||||
)
|
||||
rid = cur.lastrowid
|
||||
conn.commit()
|
||||
conn.close()
|
||||
return rid
|
||||
|
||||
|
||||
# --------------------------------------------------------------------------- #
|
||||
# short_model_name
|
||||
# --------------------------------------------------------------------------- #
|
||||
def test_short_model_name():
|
||||
assert U.short_model_name("tokenator/claude-opus-4-8") == "opus-4-8"
|
||||
assert U.short_model_name("vibecode/claude-sonnet-4.6") == "sonnet-4.6"
|
||||
assert U.short_model_name("claude-opus-4-8") == "opus-4-8"
|
||||
assert U.short_model_name("opus-4-8") == "opus-4-8"
|
||||
assert U.short_model_name(None) == ""
|
||||
assert U.short_model_name("") == ""
|
||||
|
||||
|
||||
def test_parse_usage_extracts_model_from_modelusage():
|
||||
blob = (
|
||||
'{"total_cost_usd":0.01,'
|
||||
'"usage":{"input_tokens":10,"output_tokens":5},'
|
||||
'"modelUsage":{"claude-opus-4-8":{"inputTokens":10,"outputTokens":5}}}'
|
||||
)
|
||||
u = U.parse_usage_from_text(blob)
|
||||
assert u["model"] == "claude-opus-4-8"
|
||||
|
||||
|
||||
# --------------------------------------------------------------------------- #
|
||||
# render_task_tracker
|
||||
# --------------------------------------------------------------------------- #
|
||||
def test_render_in_progress_stage_lines_and_totals():
|
||||
tid = _mk_task(stage="deploy", brd_start="2026-06-04 10:00:00",
|
||||
brd_end="2026-06-04 10:08:00")
|
||||
# Analysis: 10м, 1.1M in (mostly cache) / 39.6k out, $2.38, opus-4-8
|
||||
_mk_run(tid, "analyst", "2026-06-04 09:00:00", "2026-06-04 09:10:00",
|
||||
in_tok=1000, out_tok=39600, cache_read=1_100_000, cost=2.38,
|
||||
model="tokenator/claude-opus-4-8")
|
||||
_mk_run(tid, "architect", "2026-06-04 10:08:00", "2026-06-04 10:17:00",
|
||||
in_tok=500, out_tok=34400, cache_read=1_500_000, cost=2.24,
|
||||
model="tokenator/claude-opus-4-8")
|
||||
_mk_run(tid, "developer", "2026-06-04 10:17:00", "2026-06-04 10:28:00",
|
||||
in_tok=400, out_tok=45800, cache_read=8_400_000, cost=7.29,
|
||||
model="tokenator/claude-opus-4-8")
|
||||
_mk_run(tid, "reviewer", "2026-06-04 10:28:00", "2026-06-04 10:31:00",
|
||||
in_tok=300, out_tok=12900, cache_read=1_200_000, cost=1.53,
|
||||
model="vibecode/claude-sonnet-4.6")
|
||||
_mk_run(tid, "tester", "2026-06-04 10:31:00", "2026-06-04 10:36:00",
|
||||
in_tok=200, out_tok=19500, cache_read=1_200_000, cost=1.51,
|
||||
model="vibecode/claude-sonnet-4.6")
|
||||
# deployer started but not finished -> active "идёт" line.
|
||||
_mk_run(tid, "deployer", "2026-06-04 10:36:00", None,
|
||||
in_tok=0, out_tok=0, model=None, exit_code=None)
|
||||
|
||||
text = N.render_task_tracker(tid)
|
||||
|
||||
# Header in-progress
|
||||
assert text.startswith("\U0001f6e0\ufe0f ET-012 \u00b7 \u0422\u0440\u0435\u043a\u0438")
|
||||
# Per-stage format: in↓/out↑ · cost · model
|
||||
assert "\u2705 Analysis" in text
|
||||
assert "10\u043c" in text # analysis duration
|
||||
assert "39.6k\u2191" in text # analysis out
|
||||
assert "$2.38" in text
|
||||
assert "opus-4-8" in text
|
||||
assert "sonnet-4.6" in text # reviewer/tester model
|
||||
# BRD review line (human time, ended)
|
||||
assert "\u0420\u0435\u0432\u044c\u044e \u0411\u0420\u0414" in text
|
||||
assert "\u0442\u0432\u043e\u0451 \u0432\u0440\u0435\u043c\u044f" in text
|
||||
# Active stage
|
||||
assert "\U0001f504 Deploy" in text
|
||||
assert "\u0438\u0434\u0451\u0442" in text
|
||||
# Totals line present with 💰
|
||||
assert "\U0001f4b0" in text
|
||||
# In-progress: no final ⏱️ line
|
||||
assert "\u0412\u0441\u0435\u0433\u043e" not in text
|
||||
|
||||
|
||||
def test_render_brd_review_waiting_shows_hourglass():
|
||||
tid = _mk_task(stage="analysis", brd_start="2026-06-04 10:00:00",
|
||||
brd_end=None)
|
||||
_mk_run(tid, "analyst", "2026-06-04 09:00:00", "2026-06-04 09:10:00",
|
||||
in_tok=1000, out_tok=39600, cache_read=1_100_000, cost=2.38,
|
||||
model="tokenator/claude-opus-4-8")
|
||||
text = N.render_task_tracker(tid)
|
||||
assert "\u0420\u0435\u0432\u044c\u044e \u0411\u0420\u0414" in text
|
||||
assert "\u23f3" in text # hourglass while waiting
|
||||
|
||||
|
||||
def test_render_done_has_times_and_links():
|
||||
tid = _mk_task(stage="done", brd_start="2026-06-04 10:00:00",
|
||||
brd_end="2026-06-04 10:08:00")
|
||||
# set created/updated to compute wall clock
|
||||
conn = get_db()
|
||||
conn.execute(
|
||||
"UPDATE tasks SET created_at='2026-06-04 09:00:00', "
|
||||
"updated_at='2026-06-04 09:56:00' WHERE id=?", (tid,))
|
||||
conn.commit()
|
||||
conn.close()
|
||||
_mk_run(tid, "analyst", "2026-06-04 09:00:00", "2026-06-04 09:10:00",
|
||||
in_tok=1000, out_tok=39600, cache_read=1_100_000, cost=2.38,
|
||||
model="tokenator/claude-opus-4-8")
|
||||
_mk_run(tid, "deployer", "2026-06-04 09:50:00", "2026-06-04 09:56:00",
|
||||
in_tok=400, out_tok=22400, cache_read=1_600_000, cost=1.73,
|
||||
model="tokenator/claude-opus-4-8")
|
||||
|
||||
with patch("src.notifications.httpx") as _hx:
|
||||
# No PR found -> just "📦 deployed"
|
||||
_resp = MagicMock(status_code=200)
|
||||
_resp.json.return_value = []
|
||||
_hx.get.return_value = _resp
|
||||
text = N.render_task_tracker(tid)
|
||||
|
||||
assert text.startswith("\U0001f389 ET-012")
|
||||
assert "\u0413\u041e\u0422\u041e\u0412\u041e" in text
|
||||
# ⏱️ with three times
|
||||
assert "\u23f1\ufe0f" in text
|
||||
assert "\u0412\u0441\u0435\u0433\u043e" in text
|
||||
assert "\u0430\u0433\u0435\u043d\u0442\u044b" in text
|
||||
assert "\u0442\u0432\u043e\u0451" in text
|
||||
# 📦 deployed line
|
||||
assert "\U0001f4e6" in text
|
||||
|
||||
|
||||
def test_render_escapes_html_in_title():
|
||||
tid = _mk_task(stage="analysis", title="A <b>& B</b>")
|
||||
_mk_run(tid, "analyst", "2026-06-04 09:00:00", "2026-06-04 09:10:00",
|
||||
in_tok=10, out_tok=5, cost=0.0)
|
||||
text = N.render_task_tracker(tid)
|
||||
assert "<b>" in text
|
||||
assert "&" in text
|
||||
|
||||
|
||||
def test_render_omits_model_when_unknown():
|
||||
tid = _mk_task(stage="analysis")
|
||||
_mk_run(tid, "analyst", "2026-06-04 09:00:00", "2026-06-04 09:10:00",
|
||||
in_tok=10, out_tok=5, cost=0.0, model=None)
|
||||
text = N.render_task_tracker(tid)
|
||||
# No trailing " · <model>" — line ends at cost.
|
||||
line = [l for l in text.splitlines() if l.startswith("\u2705 Analysis")][0]
|
||||
assert line.rstrip().endswith("$0.00")
|
||||
|
||||
|
||||
# --------------------------------------------------------------------------- #
|
||||
# tracker send / edit / fallback
|
||||
# --------------------------------------------------------------------------- #
|
||||
def test_first_call_sends_message_and_stores_id(monkeypatch):
|
||||
tid = _mk_task(stage="analysis")
|
||||
_mk_run(tid, "analyst", "2026-06-04 09:00:00", None, in_tok=0, out_tok=0,
|
||||
exit_code=None)
|
||||
|
||||
sent = {}
|
||||
def _fake_send(text, disable_notification=False):
|
||||
sent["text"] = text
|
||||
sent["silent"] = disable_notification
|
||||
return 555
|
||||
monkeypatch.setattr(N, "send_telegram", _fake_send)
|
||||
monkeypatch.setattr(N, "edit_telegram", lambda *a, **k: (_ for _ in ()).throw(AssertionError("should not edit on first call")))
|
||||
|
||||
N.update_task_tracker(tid)
|
||||
|
||||
from src.db import get_tracker_message_id
|
||||
assert get_tracker_message_id(tid) == 555
|
||||
assert sent["silent"] is True # tracker is silent
|
||||
|
||||
|
||||
def test_second_call_edits_existing_message(monkeypatch):
|
||||
tid = _mk_task(stage="development")
|
||||
_mk_run(tid, "analyst", "2026-06-04 09:00:00", "2026-06-04 09:10:00",
|
||||
in_tok=10, out_tok=5, cost=0.1)
|
||||
from src.db import set_tracker_message_id
|
||||
set_tracker_message_id(tid, 777)
|
||||
|
||||
edited = {}
|
||||
monkeypatch.setattr(N, "edit_telegram",
|
||||
lambda mid, text: edited.update(mid=mid) or N.EDIT_OK)
|
||||
monkeypatch.setattr(N, "send_telegram",
|
||||
lambda *a, **k: (_ for _ in ()).throw(AssertionError("should not send when edit succeeds")))
|
||||
|
||||
N.update_task_tracker(tid)
|
||||
assert edited["mid"] == 777
|
||||
|
||||
|
||||
def test_fallback_to_new_message_when_edit_gone(monkeypatch):
|
||||
"""edit returns 'gone' (message deleted/too old) -> send NEW + update id."""
|
||||
tid = _mk_task(stage="development")
|
||||
_mk_run(tid, "analyst", "2026-06-04 09:00:00", "2026-06-04 09:10:00",
|
||||
in_tok=10, out_tok=5, cost=0.1)
|
||||
from src.db import set_tracker_message_id, get_tracker_message_id
|
||||
set_tracker_message_id(tid, 100)
|
||||
|
||||
monkeypatch.setattr(N, "edit_telegram", lambda mid, text: N.EDIT_GONE)
|
||||
monkeypatch.setattr(N, "send_telegram", lambda text, disable_notification=False: 200)
|
||||
|
||||
N.update_task_tracker(tid)
|
||||
assert get_tracker_message_id(tid) == 200 # id updated to the new message
|
||||
|
||||
|
||||
def test_not_modified_does_not_send_new_message(monkeypatch):
|
||||
"""edit returns 'not_modified' -> NO new message, id unchanged (no dupe)."""
|
||||
tid = _mk_task(stage="development")
|
||||
_mk_run(tid, "analyst", "2026-06-04 09:00:00", "2026-06-04 09:10:00",
|
||||
in_tok=10, out_tok=5, cost=0.1)
|
||||
from src.db import set_tracker_message_id, get_tracker_message_id
|
||||
set_tracker_message_id(tid, 100)
|
||||
|
||||
monkeypatch.setattr(N, "edit_telegram", lambda mid, text: N.EDIT_NOT_MODIFIED)
|
||||
monkeypatch.setattr(N, "send_telegram",
|
||||
lambda *a, **k: (_ for _ in ()).throw(AssertionError("must not send on not_modified")))
|
||||
|
||||
N.update_task_tracker(tid)
|
||||
assert get_tracker_message_id(tid) == 100 # unchanged, no duplicate
|
||||
|
||||
|
||||
def test_transient_edit_failure_does_not_send_new_message(monkeypatch):
|
||||
"""edit returns 'failed' (network/timeout/5xx) -> NO new message (no dupe)."""
|
||||
tid = _mk_task(stage="development")
|
||||
_mk_run(tid, "analyst", "2026-06-04 09:00:00", "2026-06-04 09:10:00",
|
||||
in_tok=10, out_tok=5, cost=0.1)
|
||||
from src.db import set_tracker_message_id, get_tracker_message_id
|
||||
set_tracker_message_id(tid, 100)
|
||||
|
||||
monkeypatch.setattr(N, "edit_telegram", lambda mid, text: N.EDIT_FAILED)
|
||||
monkeypatch.setattr(N, "send_telegram",
|
||||
lambda *a, **k: (_ for _ in ()).throw(AssertionError("must not send on transient failure")))
|
||||
|
||||
N.update_task_tracker(tid)
|
||||
assert get_tracker_message_id(tid) == 100 # unchanged, no duplicate
|
||||
|
||||
|
||||
# --------------------------------------------------------------------------- #
|
||||
# edit_telegram outcome classification (httpx mocked)
|
||||
# --------------------------------------------------------------------------- #
|
||||
def _edit_resp(ok, description=None):
|
||||
resp = MagicMock()
|
||||
body = {"ok": ok}
|
||||
if description is not None:
|
||||
body["description"] = description
|
||||
resp.json.return_value = body
|
||||
return resp
|
||||
|
||||
|
||||
def _patch_tg_creds(monkeypatch):
|
||||
monkeypatch.setattr(N._get_settings(), "telegram_bot_token", "T", raising=False)
|
||||
monkeypatch.setattr(N._get_settings(), "telegram_chat_id", "C", raising=False)
|
||||
|
||||
|
||||
def test_edit_telegram_ok(monkeypatch):
|
||||
_patch_tg_creds(monkeypatch)
|
||||
with patch("src.notifications.httpx") as hx:
|
||||
hx.post.return_value = _edit_resp(True)
|
||||
assert N.edit_telegram(1, "x") == N.EDIT_OK
|
||||
|
||||
|
||||
def test_edit_telegram_not_modified_is_success(monkeypatch):
|
||||
# 400 "message is not modified" -> success, not gone, no duplicate
|
||||
_patch_tg_creds(monkeypatch)
|
||||
with patch("src.notifications.httpx") as hx:
|
||||
hx.post.return_value = _edit_resp(
|
||||
False, "Bad Request: message is not modified: ...")
|
||||
assert N.edit_telegram(1, "x") == N.EDIT_NOT_MODIFIED
|
||||
|
||||
|
||||
def test_edit_telegram_exactly_the_same_is_not_modified(monkeypatch):
|
||||
_patch_tg_creds(monkeypatch)
|
||||
with patch("src.notifications.httpx") as hx:
|
||||
hx.post.return_value = _edit_resp(
|
||||
False, "Bad Request: specified new message content and reply markup "
|
||||
"are exactly the same")
|
||||
assert N.edit_telegram(1, "x") == N.EDIT_NOT_MODIFIED
|
||||
|
||||
|
||||
def test_edit_telegram_message_not_found_is_gone(monkeypatch):
|
||||
_patch_tg_creds(monkeypatch)
|
||||
with patch("src.notifications.httpx") as hx:
|
||||
hx.post.return_value = _edit_resp(
|
||||
False, "Bad Request: message to edit not found")
|
||||
assert N.edit_telegram(1, "x") == N.EDIT_GONE
|
||||
|
||||
|
||||
def test_edit_telegram_cant_be_edited_is_gone(monkeypatch):
|
||||
_patch_tg_creds(monkeypatch)
|
||||
with patch("src.notifications.httpx") as hx:
|
||||
hx.post.return_value = _edit_resp(
|
||||
False, "Bad Request: message can't be edited")
|
||||
assert N.edit_telegram(1, "x") == N.EDIT_GONE
|
||||
|
||||
|
||||
def test_edit_telegram_unknown_400_is_failed(monkeypatch):
|
||||
# unknown 400 -> failed (NOT gone) -> caller won't duplicate
|
||||
_patch_tg_creds(monkeypatch)
|
||||
with patch("src.notifications.httpx") as hx:
|
||||
hx.post.return_value = _edit_resp(
|
||||
False, "Bad Request: some other unexpected error")
|
||||
assert N.edit_telegram(1, "x") == N.EDIT_FAILED
|
||||
|
||||
|
||||
def test_edit_telegram_timeout_is_failed(monkeypatch):
|
||||
_patch_tg_creds(monkeypatch)
|
||||
with patch("src.notifications.httpx") as hx:
|
||||
hx.post.side_effect = Exception("read timeout")
|
||||
assert N.edit_telegram(1, "x") == N.EDIT_FAILED
|
||||
|
||||
|
||||
def test_edit_telegram_5xx_is_failed(monkeypatch):
|
||||
# Telegram 5xx still returns ok:false w/o gone/not_modified markers
|
||||
_patch_tg_creds(monkeypatch)
|
||||
with patch("src.notifications.httpx") as hx:
|
||||
hx.post.return_value = _edit_resp(False, "Internal Server Error")
|
||||
assert N.edit_telegram(1, "x") == N.EDIT_FAILED
|
||||
|
||||
|
||||
# --------------------------------------------------------------------------- #
|
||||
# render: repeated stage attempt shows "попытка N"
|
||||
# --------------------------------------------------------------------------- #
|
||||
_POPYTKA = "\u043f\u043e\u043f\u044b\u0442\u043a\u0430" # popytka
|
||||
|
||||
|
||||
def test_render_active_stage_shows_attempt_on_second_run():
|
||||
# Two reviewer runs while in review -> active line shows attempt 2.
|
||||
tid = _mk_task(stage="review")
|
||||
_mk_run(tid, "analyst", "2026-06-04 09:00:00", "2026-06-04 09:10:00",
|
||||
in_tok=10, out_tok=5, cost=0.1, model="tokenator/claude-opus-4-8")
|
||||
_mk_run(tid, "developer", "2026-06-04 09:10:00", "2026-06-04 09:20:00",
|
||||
in_tok=10, out_tok=5, cost=0.1, model="tokenator/claude-opus-4-8")
|
||||
# First review run finished (sent back to dev), second review run active.
|
||||
_mk_run(tid, "reviewer", "2026-06-04 09:20:00", "2026-06-04 09:25:00",
|
||||
in_tok=10, out_tok=5, cost=0.1, model="vibecode/claude-sonnet-4.6",
|
||||
exit_code=0)
|
||||
_mk_run(tid, "reviewer", "2026-06-04 09:30:00", None,
|
||||
in_tok=0, out_tok=0, exit_code=None)
|
||||
|
||||
text = N.render_task_tracker(tid)
|
||||
active = [l for l in text.splitlines()
|
||||
if l.startswith("\U0001f504") and "Review" in l][0]
|
||||
assert _POPYTKA in active
|
||||
assert "2" in active
|
||||
assert "\u0438\u0434\u0451\u0442" in active
|
||||
|
||||
|
||||
def test_render_active_stage_no_attempt_on_first_run():
|
||||
# Single reviewer run -> active line has NO attempt marker.
|
||||
tid = _mk_task(stage="review")
|
||||
_mk_run(tid, "analyst", "2026-06-04 09:00:00", "2026-06-04 09:10:00",
|
||||
in_tok=10, out_tok=5, cost=0.1, model="tokenator/claude-opus-4-8")
|
||||
_mk_run(tid, "developer", "2026-06-04 09:10:00", "2026-06-04 09:20:00",
|
||||
in_tok=10, out_tok=5, cost=0.1, model="tokenator/claude-opus-4-8")
|
||||
_mk_run(tid, "reviewer", "2026-06-04 09:20:00", None,
|
||||
in_tok=0, out_tok=0, exit_code=None)
|
||||
|
||||
text = N.render_task_tracker(tid)
|
||||
active = [l for l in text.splitlines()
|
||||
if l.startswith("\U0001f504") and "Review" in l][0]
|
||||
assert _POPYTKA not in active
|
||||
assert "\u0438\u0434\u0451\u0442" in active
|
||||
|
||||
|
||||
def test_render_finished_lines_unaffected_by_attempt_logic():
|
||||
# Completed (checkmark) lines never carry an attempt marker.
|
||||
tid = _mk_task(stage="review")
|
||||
_mk_run(tid, "analyst", "2026-06-04 09:00:00", "2026-06-04 09:10:00",
|
||||
in_tok=10, out_tok=5, cost=0.1, model="tokenator/claude-opus-4-8")
|
||||
# developer ran twice (retry) but is a FINISHED stage now.
|
||||
_mk_run(tid, "developer", "2026-06-04 09:10:00", "2026-06-04 09:15:00",
|
||||
in_tok=10, out_tok=5, cost=0.1, model="tokenator/claude-opus-4-8")
|
||||
_mk_run(tid, "developer", "2026-06-04 09:16:00", "2026-06-04 09:20:00",
|
||||
in_tok=10, out_tok=5, cost=0.1, model="tokenator/claude-opus-4-8")
|
||||
text = N.render_task_tracker(tid)
|
||||
for l in text.splitlines():
|
||||
if l.startswith("\u2705"):
|
||||
assert _POPYTKA not in l
|
||||
|
||||
|
||||
# --------------------------------------------------------------------------- #
|
||||
# which alerts are SEPARATE vs tracker-only
|
||||
# --------------------------------------------------------------------------- #
|
||||
def test_approve_gate_sends_separate_message_and_starts_brd_clock(monkeypatch):
|
||||
tid = _mk_task(stage="analysis")
|
||||
calls = []
|
||||
monkeypatch.setattr(N, "send_telegram",
|
||||
lambda text, disable_notification=False: calls.append((text, disable_notification)) or 1)
|
||||
monkeypatch.setattr(N, "update_task_tracker", lambda task_id: None)
|
||||
|
||||
N.notify_approve_requested(tid)
|
||||
|
||||
# exactly one SEPARATE (notifying) send for the approve gate
|
||||
assert len(calls) == 1
|
||||
assert calls[0][1] is False # notifying
|
||||
assert "Approved" in calls[0][0]
|
||||
# BRD clock started
|
||||
conn = get_db()
|
||||
row = conn.execute("SELECT brd_review_started_at FROM tasks WHERE id=?", (tid,)).fetchone()
|
||||
conn.close()
|
||||
assert row[0] is not None
|
||||
|
||||
|
||||
def test_error_sends_separate_message(monkeypatch):
|
||||
tid = _mk_task(stage="development")
|
||||
calls = []
|
||||
monkeypatch.setattr(N, "send_telegram",
|
||||
lambda text, disable_notification=False: calls.append((text, disable_notification)) or 1)
|
||||
N.notify_error(tid, "boom")
|
||||
assert len(calls) == 1
|
||||
assert calls[0][1] is False # notifying
|
||||
assert "ERROR" in calls[0][0]
|
||||
|
||||
|
||||
def test_stage_change_does_not_send_separate_message(monkeypatch):
|
||||
tid = _mk_task(stage="development")
|
||||
sent = []
|
||||
monkeypatch.setattr(N, "send_telegram",
|
||||
lambda text, disable_notification=False: sent.append(text) or 1)
|
||||
# tracker refresh is allowed (edit/send silent) but must NOT use send_telegram
|
||||
# for a separate notification; stub update to isolate.
|
||||
refreshed = []
|
||||
monkeypatch.setattr(N, "update_task_tracker", lambda task_id: refreshed.append(task_id))
|
||||
|
||||
N.notify_stage_change(tid, "development", "review")
|
||||
assert sent == [] # no separate message
|
||||
assert refreshed == [tid] # tracker refreshed instead
|
||||
|
||||
|
||||
def test_agent_started_does_not_send_separate_message(monkeypatch):
|
||||
tid = _mk_task(stage="analysis")
|
||||
sent = []
|
||||
monkeypatch.setattr(N, "send_telegram",
|
||||
lambda text, disable_notification=False: sent.append(text) or 1)
|
||||
refreshed = []
|
||||
monkeypatch.setattr(N, "update_task_tracker", lambda task_id: refreshed.append(task_id))
|
||||
|
||||
N.notify_agent_started(1, "analyst", tid)
|
||||
assert sent == []
|
||||
assert refreshed == [tid]
|
||||
|
||||
|
||||
def test_qg_failure_does_not_send_separate_message(monkeypatch):
|
||||
tid = _mk_task(stage="development")
|
||||
sent = []
|
||||
monkeypatch.setattr(N, "send_telegram",
|
||||
lambda text, disable_notification=False: sent.append(text) or 1)
|
||||
N.notify_qg_failure(tid, "development", "check_ci_green", "CI state: pending")
|
||||
assert sent == [] # QG-pending is log-only, never a separate ping
|
||||
309
tests/test_usage.py
Normal file
309
tests/test_usage.py
Normal file
@@ -0,0 +1,309 @@
|
||||
"""Feature 4: token / cost accounting tests.
|
||||
|
||||
Covers:
|
||||
* parse_usage_from_text on a REAL claude --output-format json result blob
|
||||
(captured live from CLI 2.1.142), including a leading text line.
|
||||
* parse on garbage / missing JSON -> None (never raises).
|
||||
* record_usage writes the columns; NULLs when usage is None.
|
||||
* fmt_tokens / fmt_cost formatting.
|
||||
* usage_comment string format.
|
||||
* task_usage_summary / task_summary_comment aggregate over agent_runs.
|
||||
|
||||
DB is an isolated temp file; no network or subprocess.
|
||||
"""
|
||||
|
||||
import os
|
||||
import tempfile
|
||||
|
||||
os.environ.setdefault("ORCH_PLANE_API_TOKEN", "test-token")
|
||||
os.environ.setdefault("ORCH_GITEA_TOKEN", "test-token")
|
||||
|
||||
_test_db = os.path.join(tempfile.gettempdir(), "test_orchestrator_usage.db")
|
||||
os.environ["ORCH_DB_PATH"] = _test_db
|
||||
|
||||
import pytest # noqa: E402
|
||||
|
||||
from src import db as db_module # noqa: E402
|
||||
from src.db import init_db, get_db # noqa: E402
|
||||
from src import usage as U # noqa: E402
|
||||
|
||||
|
||||
# Real claude --output-format json result object (captured from CLI 2.1.142).
|
||||
REAL_RESULT_JSON = (
|
||||
'{"type":"result","subtype":"success","is_error":false,"duration_ms":1795,'
|
||||
'"num_turns":1,"result":"Hi!","session_id":"abc",'
|
||||
'"total_cost_usd":0.0560175,'
|
||||
'"usage":{"input_tokens":45231,"cache_creation_input_tokens":7418,'
|
||||
'"cache_read_input_tokens":18500,"output_tokens":12100,'
|
||||
'"service_tier":"standard"},'
|
||||
'"modelUsage":{"claude-opus-4-7":{"inputTokens":6,"outputTokens":7}},'
|
||||
'"permission_denials":[]}'
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def setup_db(monkeypatch):
|
||||
# get_db() reads settings.db_path live; pin it to our isolated DB.
|
||||
monkeypatch.setattr(db_module.settings, "db_path", _test_db, raising=False)
|
||||
if os.path.exists(_test_db):
|
||||
os.unlink(_test_db)
|
||||
init_db()
|
||||
yield
|
||||
if os.path.exists(_test_db):
|
||||
os.unlink(_test_db)
|
||||
|
||||
|
||||
# --------------------------------------------------------------------------- #
|
||||
# parsing
|
||||
# --------------------------------------------------------------------------- #
|
||||
def test_parse_real_result_json():
|
||||
u = U.parse_usage_from_text(REAL_RESULT_JSON)
|
||||
assert u is not None
|
||||
assert u["input_tokens"] == 45231
|
||||
assert u["output_tokens"] == 12100
|
||||
assert u["cache_read_tokens"] == 18500
|
||||
# FIX 2: cache_creation slice must now be parsed (was dropped before).
|
||||
assert u["cache_creation_tokens"] == 7418
|
||||
assert abs(u["cost_usd"] - 0.0560175) < 1e-9
|
||||
|
||||
|
||||
def test_parse_cache_creation_present():
|
||||
u = U.parse_usage_from_text(REAL_RESULT_JSON)
|
||||
assert u["cache_creation_tokens"] == 7418
|
||||
|
||||
|
||||
def test_parse_cache_creation_missing_defaults_zero():
|
||||
blob = (
|
||||
'{"total_cost_usd":0.01,'
|
||||
'"usage":{"input_tokens":10,"output_tokens":5,'
|
||||
'"cache_read_input_tokens":100}}'
|
||||
)
|
||||
u = U.parse_usage_from_text(blob)
|
||||
assert u["cache_creation_tokens"] == 0
|
||||
assert u["cache_read_tokens"] == 100
|
||||
|
||||
|
||||
def test_parse_with_leading_text():
|
||||
"""The agent may print text before the trailing JSON; we still find it."""
|
||||
text = "some agent stdout line\nanother line\n" + REAL_RESULT_JSON
|
||||
u = U.parse_usage_from_text(text)
|
||||
assert u is not None
|
||||
assert u["input_tokens"] == 45231
|
||||
assert u["output_tokens"] == 12100
|
||||
|
||||
|
||||
def test_parse_garbage_returns_none():
|
||||
assert U.parse_usage_from_text("not json at all { broken") is None
|
||||
assert U.parse_usage_from_text("") is None
|
||||
assert U.parse_usage_from_text(None) is None
|
||||
|
||||
|
||||
def test_parse_json_without_usage_returns_none():
|
||||
assert U.parse_usage_from_text('{"hello":"world"}') is None
|
||||
|
||||
|
||||
def test_parse_from_log_missing_file_returns_none():
|
||||
assert U.parse_usage_from_log("/no/such/file.log") is None
|
||||
|
||||
|
||||
# --------------------------------------------------------------------------- #
|
||||
# record_usage
|
||||
# --------------------------------------------------------------------------- #
|
||||
def _new_run(agent="developer", task_id=1):
|
||||
conn = get_db()
|
||||
cur = conn.execute("INSERT INTO agent_runs (task_id, agent) VALUES (?, ?)", (task_id, agent))
|
||||
rid = cur.lastrowid
|
||||
conn.commit()
|
||||
conn.close()
|
||||
return rid
|
||||
|
||||
|
||||
def test_record_usage_writes_columns():
|
||||
rid = _new_run()
|
||||
u = U.parse_usage_from_text(REAL_RESULT_JSON)
|
||||
U.record_usage(rid, u)
|
||||
conn = get_db()
|
||||
row = conn.execute(
|
||||
"SELECT input_tokens, output_tokens, cache_read_tokens, "
|
||||
"cache_creation_tokens, cost_usd "
|
||||
"FROM agent_runs WHERE id=?", (rid,)
|
||||
).fetchone()
|
||||
conn.close()
|
||||
assert row["input_tokens"] == 45231
|
||||
assert row["output_tokens"] == 12100
|
||||
assert row["cache_read_tokens"] == 18500
|
||||
# FIX 2: cache_creation column is now persisted.
|
||||
assert row["cache_creation_tokens"] == 7418
|
||||
assert abs(row["cost_usd"] - 0.0560175) < 1e-9
|
||||
|
||||
|
||||
def test_record_usage_none_writes_nulls():
|
||||
rid = _new_run()
|
||||
U.record_usage(rid, None) # must not raise
|
||||
conn = get_db()
|
||||
row = conn.execute("SELECT input_tokens, cost_usd FROM agent_runs WHERE id=?", (rid,)).fetchone()
|
||||
conn.close()
|
||||
assert row["input_tokens"] is None
|
||||
assert row["cost_usd"] is None
|
||||
|
||||
|
||||
# --------------------------------------------------------------------------- #
|
||||
# formatting
|
||||
# --------------------------------------------------------------------------- #
|
||||
def test_fmt_tokens():
|
||||
assert U.fmt_tokens(6) == "6"
|
||||
assert U.fmt_tokens(1234) == "1.2k"
|
||||
assert U.fmt_tokens(45231) == "45.2k"
|
||||
assert U.fmt_tokens(2_500_000) == "2.5M"
|
||||
assert U.fmt_tokens(None) == "0"
|
||||
|
||||
|
||||
def test_fmt_cost():
|
||||
assert U.fmt_cost(0.21) == "$0.21"
|
||||
assert U.fmt_cost(0.0560175) == "$0.06"
|
||||
assert U.fmt_cost(None) == "$0.00"
|
||||
|
||||
|
||||
def test_usage_comment_format():
|
||||
# No cache -> in_total == input_tokens, no cached breakdown shown.
|
||||
u = {"input_tokens": 45231, "output_tokens": 12100, "cost_usd": 0.21}
|
||||
c = U.usage_comment("developer", u)
|
||||
assert "Developer" in c
|
||||
assert "45.2k in" in c
|
||||
assert "cached" not in c
|
||||
assert "12.1k out" in c
|
||||
assert "$0.21" in c
|
||||
|
||||
|
||||
def test_usage_comment_shows_full_input_with_cached():
|
||||
"""FIX 2: in = input + cache_read + cache_creation, with cached breakdown."""
|
||||
u = {
|
||||
"input_tokens": 81,
|
||||
"cache_read_tokens": 8_400_000,
|
||||
"cache_creation_tokens": 100_000,
|
||||
"output_tokens": 45_800,
|
||||
"cost_usd": 7.29,
|
||||
}
|
||||
c = U.usage_comment("developer", u)
|
||||
# total in = 8_500_081 -> 8.5M ; cached = 8_500_000 -> 8.5M
|
||||
assert "8.5M in (8.5M cached)" in c
|
||||
assert "45.8k out" in c
|
||||
assert "$7.29" in c
|
||||
|
||||
|
||||
def test_usage_comment_no_cached_when_zero():
|
||||
u = {"input_tokens": 1234, "cache_read_tokens": 0,
|
||||
"cache_creation_tokens": 0, "output_tokens": 50, "cost_usd": 0.01}
|
||||
c = U.usage_comment("developer", u)
|
||||
assert "1.2k in" in c
|
||||
assert "cached" not in c
|
||||
|
||||
|
||||
# --------------------------------------------------------------------------- #
|
||||
# FIX 4: per-agent artifact links in finish comments
|
||||
# --------------------------------------------------------------------------- #
|
||||
def _ctx():
|
||||
return dict(repo="enduro-trails", branch="feature/ET-012-x",
|
||||
work_item_id="ET-012")
|
||||
|
||||
|
||||
def test_usage_comment_reviewer_links_review_doc():
|
||||
c = U.usage_comment("reviewer", {"input_tokens": 5}, **_ctx())
|
||||
assert "12-review.md" in c
|
||||
assert "ET-012" in c
|
||||
|
||||
|
||||
def test_usage_comment_tester_links_test_report():
|
||||
c = U.usage_comment("tester", {"input_tokens": 5}, **_ctx())
|
||||
assert "13-test-report.md" in c
|
||||
|
||||
|
||||
def test_usage_comment_deployer_links_deploy_log():
|
||||
c = U.usage_comment("deployer", {"input_tokens": 5}, **_ctx())
|
||||
assert "14-deploy-log.md" in c
|
||||
|
||||
|
||||
def test_usage_comment_developer_links_pr_and_branch():
|
||||
c = U.usage_comment("developer", {"input_tokens": 5}, pr_number=7, **_ctx())
|
||||
assert "pulls/7" in c
|
||||
assert "feature/ET-012-x" in c
|
||||
|
||||
|
||||
def test_usage_comment_architect_links_adr():
|
||||
c = U.usage_comment("architect", {"input_tokens": 5}, **_ctx())
|
||||
assert "06-adr" in c
|
||||
|
||||
|
||||
def test_usage_comment_no_links_without_context():
|
||||
"""Without repo/branch context, no links are appended (no crash)."""
|
||||
c = U.usage_comment("reviewer", {"input_tokens": 5})
|
||||
assert "12-review.md" not in c
|
||||
assert "http" not in c
|
||||
|
||||
|
||||
# --------------------------------------------------------------------------- #
|
||||
# task summary
|
||||
# --------------------------------------------------------------------------- #
|
||||
def test_task_summary_aggregates_over_agents():
|
||||
# two runs for the same task: developer + tester
|
||||
for agent, ti, to, cost in [("developer", 1000, 200, 0.10), ("tester", 500, 100, 0.05)]:
|
||||
rid = _new_run(agent=agent, task_id=42)
|
||||
U.record_usage(rid, {"input_tokens": ti, "output_tokens": to,
|
||||
"cache_read_tokens": 0, "cost_usd": cost})
|
||||
|
||||
s = U.task_usage_summary(42)
|
||||
assert s["total_in"] == 1500
|
||||
assert s["total_out"] == 300
|
||||
assert abs(s["total_cost"] - 0.15) < 1e-9
|
||||
agents = {a for a, *_ in s["per_agent"]}
|
||||
assert agents == {"developer", "tester"}
|
||||
|
||||
comment = U.task_summary_comment(42)
|
||||
assert "1.5k" in comment # total in
|
||||
assert "$0.15" in comment # total cost
|
||||
assert "Developer" in comment
|
||||
assert "Tester" in comment
|
||||
|
||||
|
||||
def test_task_summary_sums_all_three_input_components():
|
||||
"""FIX 2: total_in = SUM(input + cache_read + cache_creation); total_cached too."""
|
||||
rid = _new_run(agent="developer", task_id=77)
|
||||
U.record_usage(rid, {
|
||||
"input_tokens": 100,
|
||||
"cache_read_tokens": 2000,
|
||||
"cache_creation_tokens": 900,
|
||||
"output_tokens": 50,
|
||||
"cost_usd": 0.10,
|
||||
})
|
||||
rid2 = _new_run(agent="tester", task_id=77)
|
||||
U.record_usage(rid2, {
|
||||
"input_tokens": 10,
|
||||
"cache_read_tokens": 500,
|
||||
"cache_creation_tokens": 0,
|
||||
"output_tokens": 5,
|
||||
"cost_usd": 0.05,
|
||||
})
|
||||
s = U.task_usage_summary(77)
|
||||
# total_in = (100+2000+900) + (10+500+0) = 3510
|
||||
assert s["total_in"] == 3510
|
||||
# total_cached = (2000+900) + (500+0) = 3400
|
||||
assert s["total_cached"] == 3400
|
||||
assert s["total_out"] == 55
|
||||
comment = U.task_summary_comment(77)
|
||||
assert "cached" in comment
|
||||
|
||||
|
||||
def test_task_summary_handles_null_cache_creation():
|
||||
"""Pre-existing rows (NULL cache_creation) must not break aggregation."""
|
||||
rid = _new_run(agent="developer", task_id=88)
|
||||
conn = get_db()
|
||||
conn.execute(
|
||||
"UPDATE agent_runs SET input_tokens=100, cache_read_tokens=200, "
|
||||
"cache_creation_tokens=NULL, output_tokens=10, cost_usd=0.01 WHERE id=?",
|
||||
(rid,),
|
||||
)
|
||||
conn.commit()
|
||||
conn.close()
|
||||
s = U.task_usage_summary(88) # must not raise
|
||||
assert s["total_in"] == 300 # 100 + 200 + (NULL->0)
|
||||
assert s["total_cached"] == 200
|
||||
171
tests/test_verdict_status.py
Normal file
171
tests/test_verdict_status.py
Normal file
@@ -0,0 +1,171 @@
|
||||
"""Status-only verdict model: verdict statuses Approved / Rejected.
|
||||
|
||||
* issue updated -> Approved : calls _try_advance_stage, with NO intermediate
|
||||
set_issue_in_progress reset (bug 3 fix).
|
||||
* issue updated -> Rejected : calls _rollback_stage, with the reason pulled
|
||||
from the issue's latest comment.
|
||||
* COMMENTS NEVER trigger the pipeline: a :approved: / :rejected: comment is a
|
||||
pure no-op (the comment-based control mechanism was removed).
|
||||
|
||||
We mock the shared engine entry points (_try_advance_stage / _rollback_stage)
|
||||
and assert they fire ONLY for the status trigger, never for a comment.
|
||||
"""
|
||||
|
||||
import os
|
||||
import tempfile
|
||||
|
||||
_test_db = os.path.join(tempfile.gettempdir(), "test_orchestrator_verdict.db")
|
||||
os.environ["ORCH_DB_PATH"] = _test_db
|
||||
os.environ.setdefault("ORCH_PLANE_WEBHOOK_SECRET", "")
|
||||
os.environ.setdefault("ORCH_GITEA_TOKEN", "test-token")
|
||||
os.environ.setdefault("ORCH_PLANE_API_TOKEN", "test-token")
|
||||
|
||||
import pytest # noqa: E402
|
||||
from unittest.mock import patch, AsyncMock # noqa: E402
|
||||
from fastapi.testclient import TestClient # noqa: E402
|
||||
|
||||
from src.main import app # noqa: E402
|
||||
from src.db import init_db, get_db # noqa: E402
|
||||
from src import projects as P # noqa: E402
|
||||
from src.projects import reload_projects # noqa: E402
|
||||
|
||||
ENDURO_PLANE_ID = "7a79f0a9-5278-49cd-9007-9a338f238f9c"
|
||||
APPROVED = "a519a341-dada-4a91-8910-7604f82b79c5"
|
||||
REJECTED = "ba958f3c-5db5-461d-8f82-89425e413b97"
|
||||
|
||||
client = TestClient(app)
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def setup(monkeypatch):
|
||||
monkeypatch.setattr(P.settings, "db_path", _test_db)
|
||||
import src.db as _db
|
||||
monkeypatch.setattr(_db.settings, "db_path", _test_db)
|
||||
if os.path.exists(_test_db):
|
||||
os.unlink(_test_db)
|
||||
init_db()
|
||||
monkeypatch.setattr("src.webhooks.plane.verify_plane_signature", lambda body, sig: True)
|
||||
registry_json = (
|
||||
f'[{{"plane_project_id": "{ENDURO_PLANE_ID}", "repo": "enduro-trails",'
|
||||
f' "work_item_prefix": "ET", "name": "enduro-trails"}}]'
|
||||
)
|
||||
monkeypatch.setattr(P.settings, "projects_json", registry_json)
|
||||
reload_projects()
|
||||
# Seed a task at the 'review' stage for plane_id 'v-1'.
|
||||
conn = get_db()
|
||||
conn.execute(
|
||||
"INSERT INTO tasks (plane_id, work_item_id, repo, branch, stage, plane_issue_id) "
|
||||
"VALUES (?, ?, ?, ?, ?, ?)",
|
||||
("v-1", "ET-500", "enduro-trails", "feature/ET-500-x", "review", "v-1"),
|
||||
)
|
||||
conn.commit()
|
||||
conn.close()
|
||||
yield
|
||||
reload_projects()
|
||||
if os.path.exists(_test_db):
|
||||
os.unlink(_test_db)
|
||||
|
||||
|
||||
def _status(state_id, plane_id="v-1", old="prev"):
|
||||
return client.post("/webhook/plane", json={
|
||||
"event": "issue", "action": "updated",
|
||||
"data": {
|
||||
"id": plane_id, "name": "Verdict task", "project": ENDURO_PLANE_ID,
|
||||
"state": {"id": state_id, "name": "X", "group": "started"},
|
||||
},
|
||||
"activity": {"field": "state", "new_value": state_id, "old_value": old},
|
||||
})
|
||||
|
||||
|
||||
def _comment(text, plane_id="v-1"):
|
||||
return client.post("/webhook/plane", json={
|
||||
"event": "issue_comment", "action": "created",
|
||||
"data": {"work_item_id": plane_id, "comment_stripped": text,
|
||||
"project": ENDURO_PLANE_ID},
|
||||
})
|
||||
|
||||
|
||||
class _FakeResp:
|
||||
def __init__(self, status_code, payload):
|
||||
self.status_code = status_code
|
||||
self._payload = payload
|
||||
|
||||
def json(self):
|
||||
return self._payload
|
||||
|
||||
|
||||
def _comments_response(comments):
|
||||
return _FakeResp(200, {"results": comments})
|
||||
|
||||
|
||||
# --------------------------------------------------------------------------- #
|
||||
# Approved status -> advance (no in_progress reset)
|
||||
# --------------------------------------------------------------------------- #
|
||||
@patch("src.plane_sync.set_issue_in_progress")
|
||||
@patch("src.webhooks.plane._try_advance_stage", new_callable=AsyncMock)
|
||||
def test_approved_status_advances(mock_advance, mock_sip):
|
||||
resp = _status(APPROVED)
|
||||
assert resp.status_code == 200
|
||||
mock_advance.assert_awaited_once()
|
||||
# advanced the right task (ET-500 at review)
|
||||
args = mock_advance.call_args.args
|
||||
assert "ET-500" in args # work_item_id is passed positionally
|
||||
# bug 3 fix: handle_verdict no longer resets the status to In Progress.
|
||||
mock_sip.assert_not_called()
|
||||
|
||||
|
||||
@patch("src.plane_sync.set_issue_in_progress")
|
||||
@patch("src.webhooks.plane._rollback_stage", new_callable=AsyncMock)
|
||||
@patch("src.webhooks.plane._try_advance_stage", new_callable=AsyncMock)
|
||||
def test_approved_comment_is_noop(mock_advance, mock_rollback, mock_sip):
|
||||
"""Status-only model: a :approved: comment NEVER advances the pipeline."""
|
||||
resp = _comment(":approved:")
|
||||
assert resp.status_code == 200
|
||||
mock_advance.assert_not_called()
|
||||
mock_rollback.assert_not_called()
|
||||
mock_sip.assert_not_called()
|
||||
|
||||
|
||||
# --------------------------------------------------------------------------- #
|
||||
# Rejected status -> rollback (reason from latest comment)
|
||||
# --------------------------------------------------------------------------- #
|
||||
@patch("src.webhooks.plane.httpx.get")
|
||||
@patch("src.webhooks.plane._rollback_stage", new_callable=AsyncMock)
|
||||
def test_rejected_status_rolls_back(mock_rollback, mock_get):
|
||||
mock_get.return_value = _comments_response(
|
||||
[{"comment_stripped": "ADR missing tradeoffs",
|
||||
"created_at": "2026-06-03T10:00:00Z"}]
|
||||
)
|
||||
resp = _status(REJECTED)
|
||||
assert resp.status_code == 200
|
||||
mock_rollback.assert_awaited_once()
|
||||
# reason pulled from the latest comment
|
||||
reason = mock_rollback.call_args.args[-1]
|
||||
assert "ADR missing tradeoffs" in reason
|
||||
|
||||
|
||||
@patch("src.webhooks.plane.httpx.get")
|
||||
@patch("src.plane_sync.set_issue_in_progress")
|
||||
@patch("src.webhooks.plane._rollback_stage", new_callable=AsyncMock)
|
||||
@patch("src.webhooks.plane._try_advance_stage", new_callable=AsyncMock)
|
||||
def test_rejected_comment_is_noop(mock_advance, mock_rollback, mock_sip, mock_get):
|
||||
"""Status-only model: a :rejected: comment NEVER rolls back the pipeline."""
|
||||
resp = _comment(":rejected: bad ADR")
|
||||
assert resp.status_code == 200
|
||||
mock_advance.assert_not_called()
|
||||
mock_rollback.assert_not_called()
|
||||
mock_sip.assert_not_called()
|
||||
mock_get.assert_not_called()
|
||||
|
||||
|
||||
# --------------------------------------------------------------------------- #
|
||||
# Unknown verdict status -> no-op
|
||||
# --------------------------------------------------------------------------- #
|
||||
@patch("src.webhooks.plane._rollback_stage", new_callable=AsyncMock)
|
||||
@patch("src.webhooks.plane._try_advance_stage", new_callable=AsyncMock)
|
||||
def test_other_status_no_verdict_action(mock_advance, mock_rollback):
|
||||
# In Review status is not a verdict -> neither advance nor rollback.
|
||||
resp = _status("38fb1f64-aa1e-48a3-92e0-0b109679046b") # in_review
|
||||
assert resp.status_code == 200
|
||||
mock_advance.assert_not_called()
|
||||
mock_rollback.assert_not_called()
|
||||
@@ -211,14 +211,21 @@ def test_gitea_fallback_hash_when_no_delivery_header():
|
||||
@patch("src.webhooks.plane._create_gitea_branch", new_callable=AsyncMock)
|
||||
@patch("src.webhooks.plane._create_initial_docs", new_callable=AsyncMock)
|
||||
def test_plane_fallback_hash_dedup(mock_docs, mock_branch, mock_enqueue):
|
||||
"""Repeated identical Plane body -> first accepted+enqueue, repeat duplicate."""
|
||||
"""Repeated identical Plane body -> first accepted+enqueue, repeat duplicate.
|
||||
|
||||
Feature 1: the pipeline now starts on a status change to In Progress, not on
|
||||
creation, so this drives the dedup test with an 'issue updated' event.
|
||||
"""
|
||||
IN_PROGRESS = "b873d9eb-993c-48cd-97ac-99a9b1623967"
|
||||
body = {
|
||||
"event": "work_item.created",
|
||||
"event": "issue",
|
||||
"action": "updated",
|
||||
"data": {
|
||||
"id": "pd-001",
|
||||
"name": "Dedup plane task",
|
||||
"description_stripped": "A sufficiently long description for QG-0 to pass.",
|
||||
"project": "proj-1",
|
||||
"state": {"id": IN_PROGRESS, "name": "In Progress", "group": "started"},
|
||||
},
|
||||
}
|
||||
r1 = client.post("/webhook/plane", json=body)
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import pytest
|
||||
import asyncio
|
||||
import os
|
||||
import tempfile
|
||||
from unittest.mock import patch, MagicMock, AsyncMock
|
||||
@@ -53,13 +54,19 @@ def test_status_endpoint():
|
||||
assert "active_tasks" in resp.json()
|
||||
|
||||
|
||||
@patch("src.plane_sync.add_comment")
|
||||
@patch("src.plane_sync.fetch_issue_sequence_id", return_value=None)
|
||||
@patch("src.plane_sync.fetch_issue_fields", return_value=("Test task", "This is a detailed test description for the task"))
|
||||
@patch("src.webhooks.plane._create_gitea_branch", new_callable=AsyncMock)
|
||||
@patch("src.webhooks.plane._create_initial_docs", new_callable=AsyncMock)
|
||||
def test_plane_webhook_creates_task(mock_docs, mock_branch):
|
||||
"""work_item.created → task in DB with stage=analysis."""
|
||||
def test_plane_webhook_creates_task(mock_docs, mock_branch, mock_fetch_fields, mock_fetch_seq, mock_add_comment):
|
||||
"""work_item.created (via In Progress status) → task in DB with stage=analysis."""
|
||||
resp = client.post("/webhook/plane", json={
|
||||
"event": "work_item.created",
|
||||
"data": {"id": "test-123", "name": "Test task", "project": "proj-1"}
|
||||
"event": "issue", "action": "updated",
|
||||
"data": {
|
||||
"id": "test-123", "name": "Test task", "project": "proj-1",
|
||||
"state": {"id": "b873d9eb-993c-48cd-97ac-99a9b1623967", "name": "In Progress", "group": "started"},
|
||||
}
|
||||
})
|
||||
assert resp.status_code == 200
|
||||
assert resp.json()["status"] == "accepted"
|
||||
@@ -74,17 +81,37 @@ def test_plane_webhook_creates_task(mock_docs, mock_branch):
|
||||
assert "feature/" in task["branch"]
|
||||
|
||||
|
||||
@patch("src.plane_sync.add_comment")
|
||||
@patch("src.plane_sync.fetch_issue_sequence_id", return_value=None)
|
||||
@patch("src.plane_sync.fetch_issue_fields",
|
||||
side_effect=[
|
||||
("First task", "This is a detailed description for the first task item"),
|
||||
("Second task", "This is a detailed description for the second task item"),
|
||||
])
|
||||
@patch("src.webhooks.plane._create_gitea_branch", new_callable=AsyncMock)
|
||||
@patch("src.webhooks.plane._create_initial_docs", new_callable=AsyncMock)
|
||||
def test_plane_webhook_generates_sequential_ids(mock_docs, mock_branch):
|
||||
"""Multiple work items get sequential IDs."""
|
||||
def test_plane_webhook_generates_sequential_ids(
|
||||
mock_docs, mock_branch, mock_fetch_fields, mock_fetch_seq, mock_add_comment
|
||||
):
|
||||
"""Multiple In Progress transitions get sequential IDs (ET-001, ET-002)."""
|
||||
in_progress_state = {
|
||||
"id": "b873d9eb-993c-48cd-97ac-99a9b1623967",
|
||||
"name": "In Progress",
|
||||
"group": "started",
|
||||
}
|
||||
client.post("/webhook/plane", json={
|
||||
"event": "work_item.created",
|
||||
"data": {"id": "item-1", "name": "First task", "project": "proj-1"}
|
||||
"event": "issue", "action": "updated",
|
||||
"data": {
|
||||
"id": "item-1", "name": "First task", "project": "proj-1",
|
||||
"state": in_progress_state,
|
||||
}
|
||||
})
|
||||
client.post("/webhook/plane", json={
|
||||
"event": "work_item.created",
|
||||
"data": {"id": "item-2", "name": "Second task", "project": "proj-1"}
|
||||
"event": "issue", "action": "updated",
|
||||
"data": {
|
||||
"id": "item-2", "name": "Second task", "project": "proj-1",
|
||||
"state": in_progress_state,
|
||||
}
|
||||
})
|
||||
|
||||
conn = get_db()
|
||||
@@ -95,27 +122,32 @@ def test_plane_webhook_generates_sequential_ids(mock_docs, mock_branch):
|
||||
assert ids[1] == "ET-002"
|
||||
|
||||
|
||||
APPROVED_STATE = "a519a341-dada-4a91-8910-7604f82b79c5"
|
||||
REJECTED_STATE = "ba958f3c-5db5-461d-8f82-89425e413b97"
|
||||
|
||||
|
||||
@patch("src.webhooks.plane._create_gitea_branch", new_callable=AsyncMock)
|
||||
@patch("src.webhooks.plane._create_initial_docs", new_callable=AsyncMock)
|
||||
@patch("src.webhooks.plane.launcher")
|
||||
def test_plane_approved_advances_stage(mock_launcher, mock_docs, mock_branch, tmp_path, monkeypatch):
|
||||
"""Comment :approved: at stage=analysis → advance to architecture."""
|
||||
"""Status-only model: Approved STATUS at stage=analysis -> advance to
|
||||
architecture. A comment never triggers this.
|
||||
"""
|
||||
# Patch repos_dir for QG check
|
||||
monkeypatch.setattr("src.qg.checks.settings.repos_dir", str(tmp_path))
|
||||
|
||||
# Create task first
|
||||
client.post("/webhook/plane", json={
|
||||
"event": "work_item.created",
|
||||
"data": {"id": "adv-001", "name": "Advance test", "project": "proj-1"}
|
||||
})
|
||||
|
||||
# Get the task to find work_item_id
|
||||
# Seed an analysis task directly (creation no longer makes a task post-PR#11).
|
||||
conn = get_db()
|
||||
task = conn.execute("SELECT * FROM tasks WHERE plane_id = 'adv-001'").fetchone()
|
||||
conn.execute(
|
||||
"INSERT INTO tasks (plane_id, work_item_id, repo, branch, stage, plane_issue_id) "
|
||||
"VALUES (?, ?, ?, ?, ?, ?)",
|
||||
("adv-001", "ET-001", "enduro-trails", "feature/ET-001-x", "analysis", "adv-001"),
|
||||
)
|
||||
conn.commit()
|
||||
conn.close()
|
||||
work_item_id = task["work_item_id"]
|
||||
work_item_id = "ET-001"
|
||||
|
||||
# Create required analysis files
|
||||
# Create required analysis files so the analysis QG passes.
|
||||
wi_dir = tmp_path / "enduro-trails" / "docs" / "work-items" / work_item_id
|
||||
wi_dir.mkdir(parents=True)
|
||||
(wi_dir / "01-brd.md").write_text("# BRD")
|
||||
@@ -123,16 +155,15 @@ def test_plane_approved_advances_stage(mock_launcher, mock_docs, mock_branch, tm
|
||||
(wi_dir / "03-acceptance-criteria.md").write_text("# AC")
|
||||
(wi_dir / "04-test-plan.yaml").write_text("tests: []")
|
||||
|
||||
# Mock launcher
|
||||
mock_launcher.launch.return_value = 1
|
||||
|
||||
# Send approved comment
|
||||
# Send Approved STATUS change.
|
||||
resp = client.post("/webhook/plane", json={
|
||||
"event": "comment.created",
|
||||
"event": "issue", "action": "updated",
|
||||
"data": {
|
||||
"work_item_id": "adv-001",
|
||||
"comment": "Looks good :approved:"
|
||||
}
|
||||
"id": "adv-001", "name": "Advance test", "project": "proj-1",
|
||||
"state": {"id": APPROVED_STATE, "name": "Approved", "group": "completed"},
|
||||
},
|
||||
})
|
||||
assert resp.status_code == 200
|
||||
|
||||
@@ -143,29 +174,39 @@ def test_plane_approved_advances_stage(mock_launcher, mock_docs, mock_branch, tm
|
||||
assert task["stage"] == "architecture"
|
||||
|
||||
|
||||
@patch("src.webhooks.plane.httpx.get")
|
||||
@patch("src.webhooks.plane._create_gitea_branch", new_callable=AsyncMock)
|
||||
@patch("src.webhooks.plane._create_initial_docs", new_callable=AsyncMock)
|
||||
def test_plane_rejected_rolls_back(mock_docs, mock_branch):
|
||||
"""Comment :rejected: rolls back stage."""
|
||||
# Create task
|
||||
client.post("/webhook/plane", json={
|
||||
"event": "work_item.created",
|
||||
"data": {"id": "rej-001", "name": "Reject test", "project": "proj-1"}
|
||||
})
|
||||
def test_plane_rejected_rolls_back(mock_docs, mock_branch, mock_get):
|
||||
"""Status-only model: Rejected STATUS rolls back stage. A comment never
|
||||
triggers this; the reason is pulled from the latest comment.
|
||||
"""
|
||||
class _R:
|
||||
status_code = 200
|
||||
@staticmethod
|
||||
def json():
|
||||
return {"results": [
|
||||
{"comment_stripped": "missing ADR", "created_at": "2026-06-03T10:00:00Z"}
|
||||
]}
|
||||
mock_get.return_value = _R()
|
||||
|
||||
# Manually set stage to architecture
|
||||
# Seed an architecture task directly.
|
||||
conn = get_db()
|
||||
conn.execute("UPDATE tasks SET stage = 'architecture' WHERE plane_id = 'rej-001'")
|
||||
conn.execute(
|
||||
"INSERT INTO tasks (plane_id, work_item_id, repo, branch, stage, plane_issue_id) "
|
||||
"VALUES (?, ?, ?, ?, ?, ?)",
|
||||
("rej-001", "ET-002", "enduro-trails", "feature/ET-002-x", "architecture", "rej-001"),
|
||||
)
|
||||
conn.commit()
|
||||
conn.close()
|
||||
|
||||
# Send rejected comment
|
||||
# Send Rejected STATUS change.
|
||||
resp = client.post("/webhook/plane", json={
|
||||
"event": "comment.created",
|
||||
"event": "issue", "action": "updated",
|
||||
"data": {
|
||||
"work_item_id": "rej-001",
|
||||
"comment": "Not ready :rejected:"
|
||||
}
|
||||
"id": "rej-001", "name": "Reject test", "project": "proj-1",
|
||||
"state": {"id": REJECTED_STATE, "name": "Rejected", "group": "cancelled"},
|
||||
},
|
||||
})
|
||||
assert resp.status_code == 200
|
||||
|
||||
@@ -187,8 +228,9 @@ def test_gitea_webhook_push():
|
||||
assert resp.json()["status"] == "accepted"
|
||||
|
||||
|
||||
@patch("src.webhooks.gitea.plane_notify_stage")
|
||||
@patch("src.webhooks.gitea.launcher")
|
||||
def test_gitea_push_with_adr_advances_stage(mock_launcher):
|
||||
def test_gitea_push_with_adr_advances_stage(mock_launcher, mock_plane_notify):
|
||||
"""Push with ADR files at architecture stage → advance to development."""
|
||||
mock_launcher.launch.return_value = 1
|
||||
|
||||
@@ -220,7 +262,7 @@ def test_gitea_push_with_adr_advances_stage(mock_launcher):
|
||||
task = conn.execute("SELECT * FROM tasks WHERE plane_id = 'push-001'").fetchone()
|
||||
conn.close()
|
||||
assert task["stage"] == "development"
|
||||
mock_launcher.launch.assert_called_once()
|
||||
mock_plane_notify.assert_called_once()
|
||||
|
||||
|
||||
@patch("src.webhooks.gitea.check_ci_green")
|
||||
@@ -258,6 +300,46 @@ def test_gitea_ci_success_advances_to_review(mock_launcher, mock_ci):
|
||||
assert task["stage"] == "review"
|
||||
|
||||
|
||||
@patch("src.webhooks.gitea.notify_qg_failure")
|
||||
@patch("src.webhooks.gitea.launcher")
|
||||
def test_gitea_ci_failure_on_development_notifies_qg_failure(mock_launcher, mock_notify):
|
||||
"""BUG 6: CI failure at development is now the authoritative QG gate failing.
|
||||
|
||||
It must notify QG failure (not silently suppress) and must NOT advance the stage.
|
||||
"""
|
||||
conn = get_db()
|
||||
conn.execute(
|
||||
"INSERT INTO tasks (plane_id, work_item_id, repo, branch, stage) VALUES (?, ?, ?, ?, ?)",
|
||||
("ci-fail-001", "ET-011", "enduro-trails", "feature/ET-011-test", "development"),
|
||||
)
|
||||
conn.commit()
|
||||
conn.close()
|
||||
|
||||
resp = client.post(
|
||||
"/webhook/gitea",
|
||||
json={
|
||||
"state": "failure",
|
||||
"branches": [{"name": "feature/ET-011-test"}],
|
||||
"repository": {"name": "enduro-trails"},
|
||||
},
|
||||
headers={"X-Gitea-Event": "status"},
|
||||
)
|
||||
assert resp.status_code == 200
|
||||
|
||||
# QG failure was reported for the development stage with check_ci_green.
|
||||
assert mock_notify.called
|
||||
args, kwargs = mock_notify.call_args
|
||||
call = list(args) + list(kwargs.values())
|
||||
assert "development" in call
|
||||
assert "check_ci_green" in call
|
||||
|
||||
# Stage did NOT advance.
|
||||
conn = get_db()
|
||||
task = conn.execute("SELECT * FROM tasks WHERE plane_id = 'ci-fail-001'").fetchone()
|
||||
conn.close()
|
||||
assert task["stage"] == "development"
|
||||
|
||||
|
||||
def test_gitea_webhook_pr():
|
||||
"""PR event is accepted."""
|
||||
resp = client.post(
|
||||
@@ -287,3 +369,158 @@ def test_plane_webhook_event_logged():
|
||||
conn.close()
|
||||
assert event is not None
|
||||
assert event["source"] == "plane"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# BUG 7: red CI on development must bounce the task back to the developer
|
||||
# (capped retries, symmetric to review REQUEST_CHANGES). These are pure-logic
|
||||
# tests: they invoke handle_ci_status() directly with mocked helpers so they do
|
||||
# not pass through the TestClient HMAC barrier (baseline 401s are off-limits).
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _ci_failure_payload():
|
||||
return {
|
||||
"state": "failure",
|
||||
"branches": [{"name": "feature/ET-011-test"}],
|
||||
"repository": {"name": "enduro-trails"},
|
||||
}
|
||||
|
||||
|
||||
def _mock_db_with_retry_count(count):
|
||||
"""Build a get_db() mock whose retry_count query returns `count`."""
|
||||
conn = MagicMock()
|
||||
conn.execute.return_value.fetchone.return_value = {"cnt": count}
|
||||
return conn
|
||||
|
||||
|
||||
@patch("src.webhooks.gitea.notify_error")
|
||||
@patch("src.webhooks.gitea.notify_qg_failure")
|
||||
@patch("src.webhooks.gitea.enqueue_job")
|
||||
@patch("src.webhooks.gitea.update_task_stage")
|
||||
@patch("src.webhooks.gitea.get_db")
|
||||
@patch("src.webhooks.gitea.get_task_by_repo_branch")
|
||||
@patch("src.webhooks.gitea.get_project_by_repo")
|
||||
def test_ci_failure_development_retries_developer_under_limit(
|
||||
mock_proj, mock_task, mock_get_db, mock_update_stage,
|
||||
mock_enqueue, mock_qg, mock_err,
|
||||
):
|
||||
"""retry_count < MAX_DEV_RETRIES → relaunch developer, stage untouched."""
|
||||
from src.webhooks.gitea import handle_ci_status
|
||||
|
||||
mock_proj.return_value = {"repo": "enduro-trails"}
|
||||
mock_task.return_value = {
|
||||
"id": 1, "stage": "development", "work_item_id": "ET-011",
|
||||
}
|
||||
mock_get_db.return_value = _mock_db_with_retry_count(0)
|
||||
mock_enqueue.return_value = 42
|
||||
|
||||
asyncio.run(handle_ci_status(_ci_failure_payload()))
|
||||
|
||||
# QG failure was still reported (Slava sees both the failure and the retry).
|
||||
assert mock_qg.called
|
||||
# developer was re-enqueued.
|
||||
assert mock_enqueue.called
|
||||
assert mock_enqueue.call_args[0][0] == "developer"
|
||||
# No escalation.
|
||||
assert not mock_err.called
|
||||
# Stage stays on development — no update_task_stage in the CI-failure path.
|
||||
assert not mock_update_stage.called
|
||||
|
||||
|
||||
@patch("src.webhooks.gitea.notify_error")
|
||||
@patch("src.webhooks.gitea.notify_qg_failure")
|
||||
@patch("src.webhooks.gitea.enqueue_job")
|
||||
@patch("src.webhooks.gitea.update_task_stage")
|
||||
@patch("src.webhooks.gitea.get_db")
|
||||
@patch("src.webhooks.gitea.get_task_by_repo_branch")
|
||||
@patch("src.webhooks.gitea.get_project_by_repo")
|
||||
def test_ci_failure_development_escalates_at_limit(
|
||||
mock_proj, mock_task, mock_get_db, mock_update_stage,
|
||||
mock_enqueue, mock_qg, mock_err,
|
||||
):
|
||||
"""retry_count >= MAX_DEV_RETRIES → escalate via notify_error, no relaunch."""
|
||||
from src.webhooks.gitea import handle_ci_status, MAX_DEV_RETRIES
|
||||
|
||||
mock_proj.return_value = {"repo": "enduro-trails"}
|
||||
mock_task.return_value = {
|
||||
"id": 1, "stage": "development", "work_item_id": "ET-011",
|
||||
}
|
||||
mock_get_db.return_value = _mock_db_with_retry_count(MAX_DEV_RETRIES)
|
||||
|
||||
asyncio.run(handle_ci_status(_ci_failure_payload()))
|
||||
|
||||
# QG failure still reported.
|
||||
assert mock_qg.called
|
||||
# developer NOT re-enqueued at the cap.
|
||||
assert not mock_enqueue.called
|
||||
# Escalation message mentions CI failure.
|
||||
assert mock_err.called
|
||||
err_msg = " ".join(str(a) for a in mock_err.call_args[0])
|
||||
assert "Max developer retries" in err_msg
|
||||
assert "after CI failure" in err_msg
|
||||
# Stage untouched.
|
||||
assert not mock_update_stage.called
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# BUG 8 (second door): a merged-PR webhook must NOT fake-complete a task that is
|
||||
# still in the deploy stage. On `deploy` done is gated by the deployer's verdict
|
||||
# (check_deploy_status via advance_stage), not by the merge event. For every
|
||||
# other stage the merge->done behaviour is preserved. Pure-logic tests: invoke
|
||||
# handle_pr() directly with mocked helpers (no HMAC barrier).
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _merged_pr_payload(branch="feature/ET-012-x"):
|
||||
return {
|
||||
"action": "closed",
|
||||
"pull_request": {
|
||||
"merged": True,
|
||||
"number": 7,
|
||||
"head": {"ref": branch},
|
||||
},
|
||||
"repository": {"name": "enduro-trails"},
|
||||
}
|
||||
|
||||
|
||||
@patch("src.webhooks.gitea.notify_stage_change")
|
||||
@patch("src.webhooks.gitea.update_task_stage")
|
||||
@patch("src.webhooks.gitea.get_task_by_repo_branch")
|
||||
@patch("src.webhooks.gitea.get_project_by_repo")
|
||||
def test_merge_on_deploy_stage_does_not_set_done(
|
||||
mock_proj, mock_task, mock_update_stage, mock_notify,
|
||||
):
|
||||
"""FIX 1: merge at deploy stage is ignored — done is gated by deployer verdict."""
|
||||
from src.webhooks.gitea import handle_pr
|
||||
|
||||
mock_proj.return_value = {"repo": "enduro-trails"}
|
||||
mock_task.return_value = {
|
||||
"id": 1, "stage": "deploy", "work_item_id": "ET-012",
|
||||
}
|
||||
|
||||
asyncio.run(handle_pr(_merged_pr_payload()))
|
||||
|
||||
# The merge-driven done path must NOT run on deploy.
|
||||
assert not mock_update_stage.called
|
||||
assert not mock_notify.called
|
||||
|
||||
|
||||
@patch("src.webhooks.gitea.notify_stage_change")
|
||||
@patch("src.webhooks.gitea.update_task_stage")
|
||||
@patch("src.webhooks.gitea.get_task_by_repo_branch")
|
||||
@patch("src.webhooks.gitea.get_project_by_repo")
|
||||
def test_merge_on_non_deploy_stage_sets_done(
|
||||
mock_proj, mock_task, mock_update_stage, mock_notify,
|
||||
):
|
||||
"""FIX 1: merge behaviour is preserved for non-deploy stages (e.g. review)."""
|
||||
from src.webhooks.gitea import handle_pr
|
||||
|
||||
mock_proj.return_value = {"repo": "enduro-trails"}
|
||||
mock_task.return_value = {
|
||||
"id": 2, "stage": "review", "work_item_id": "ET-013",
|
||||
}
|
||||
|
||||
asyncio.run(handle_pr(_merged_pr_payload(branch="feature/ET-013-x")))
|
||||
|
||||
# Non-deploy stages still get the merge-driven done.
|
||||
mock_update_stage.assert_called_once_with(2, "done")
|
||||
assert mock_notify.called
|
||||
|
||||
Reference in New Issue
Block a user