diff --git a/.env.example b/.env.example index a5f54b3..40dd85f 100644 --- a/.env.example +++ b/.env.example @@ -95,9 +95,14 @@ ORCH_IMAGE_FRESHNESS_REPOS= # GRACE_DEFAULT_S -> default "stuck" threshold on tasks.updated_at (seconds). # GRACE_OVERRIDES_JSON -> per-stage thresholds, e.g. {"development":300}; bad JSON -> default. # NOTIFY_UNBLOCK -> send a Telegram message when a stuck task is unblocked. +# SKIP_BLOCKED_ENABLED -> ORCH-060 F-1 Guard 2: skip reconciling issues a human moved +# to Blocked / Needs Input (per-candidate Plane state lookup). +# false mutes ONLY the networked Guard 2; Guard 1 (escalated by +# developer retries, local+deterministic) is always active. ORCH_RECONCILE_ENABLED=true ORCH_RECONCILE_PLANE_ENABLED=true ORCH_RECONCILE_INTERVAL_S=120 ORCH_RECONCILE_GRACE_DEFAULT_S=600 ORCH_RECONCILE_GRACE_OVERRIDES_JSON= ORCH_RECONCILE_NOTIFY_UNBLOCK=true +ORCH_RECONCILE_SKIP_BLOCKED_ENABLED=true diff --git a/CHANGELOG.md b/CHANGELOG.md index a8830f2..a3670e8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -29,6 +29,7 @@ - Цепочка стадий: `... testing → deploy-staging → deploy → done` (была без `deploy-staging`). ### Fixed +- **Reconciler (F-1) больше не разблокирует escalated / Blocked / Needs-Input задачи** (ORCH-060): sweeper потерянных webhook (ORCH-053) не отличал «застряла из-за потерянного события» от «исчерпала лимит developer-ретраев и ждёт человека» — если CI зелёный, а reviewer слал REQUEST_CHANGES до `MAX_DEVELOPER_RETRIES`, каждый тик F-1 видел зелёный `check_ci_green` и доигрывал `development → review` → reviewer снова REQUEST_CHANGES → откат (стадия не меняется, escalated в `gitea.py` лишь шлёт `notify_error`) → следующий тик снова разблокировал. Бесконечная петля (инцидент ET-013: 10 разблокировок за ночь, лишние запуски агентов/токены, спам в Telegram, паразитная нагрузка общего self-hosting-инстанса). В `Reconciler._reconcile_gate_task` (`src/reconciler.py`) ПОСЛЕ существующих гардов (`analysis` carve-out, нет гейта, активный job, grace) и ДО пред-оценки гейта добавлены два пред-гарда с ранним `return` (молчаливый skip — без `advance`, без инкремента `unblocked_total`, без нотификаций): **Guard 1 (escalated, детерминированный, без сети, проверяется первым)** — `developer_retry_count(task_id) >= MAX_DEVELOPER_RETRIES`; приватный `stage_engine._developer_retry_count` повышен до публичного `developer_retry_count` (единый источник истины по подсчёту ретраев `agent_runs`, приватное имя сохранено как алиас), граница берётся из `stage_engine.MAX_DEVELOPER_RETRIES` (не хардкод `3`). **Guard 2 (явный человеческий Plane-статус, Вариант A — без миграции БД)** — новый never-raise хелпер `plane_sync.fetch_issue_state(issue_id, project_id) -> str|None` (тот же endpoint/headers, что `fetch_issue_sequence_id`) + `Reconciler._is_blocked_or_needs_input(task)`: резолв проекта (`projects.get_project_by_repo`) → `get_project_states(pid)` → сверка текущего state issue с `blocked`/`needs_input`; любая ошибка/`None`/нерезолвленный проект → консервативный skip (`True`: не-разблокировать безопаснее). F-2 по существу не менялся: Blocked/Needs Input не входят в опрашиваемый набор `{in_progress, approved, rejected}` → не доигрываются (зафиксировано регресс-тестом). Новый под-флаг `ORCH_RECONCILE_SKIP_BLOCKED_ENABLED` (true) гасит ТОЛЬКО сетевой Guard 2 (escape hatch при Plane-outage); Guard 1 всегда активен. Схема БД, `STAGE_TRANSITIONS`, `QG_CHECKS`, never-raise на единицу работы, `analysis` carve-out и kill-switch'и (`reconcile_enabled`/`reconcile_plane_enabled`) не менялись. ADR `docs/work-items/ORCH-060/06-adr/ADR-001-reconciler-skip-escalated.md`. Тесты: `tests/test_reconciler.py` (TC-01…TC-11 + регресс ORCH-053). - **Re-deploy после отката больше не зависает на `deploy`; `.env.example` дополнен** (ORCH-036, review-fix): sentinel-маркеры самодеплоя (`approve-requested`/`initiated`/`result`) ключуются по стабильному `work_item_id`, поэтому при FAILED-деплое и откате БАГ-8 (`deploy → development`) они оставались на диске — после фикса developer-ом и повторного захода задачи на `deploy` Фаза B по idempotency-guard видела STALE `initiated` и становилась no-op: detached-хук не перезапускался, finalizer не ставился, задача висела на `deploy` навсегда (нарушался retry-контракт стадии, AC-4/AC-10; устаревший `result` к тому же был бы перечитан новым finalizer'ом). Добавлен `self_deploy.clear_state(repo, work_item_id)` (never-raise, idempotent, рекурсивное удаление `/.deploy-state-//`), вызывается в ветке БАГ-8-отката `check_deploy_status` FAILED (`src/stage_engine.py`) и дополнительно в начале Фазы A (`_handle_self_deploy_phase_a`) — каждый новый прод-деплой-проход стартует с чистого состояния. Отдельно: канонический `.env.example` (CLAUDE.md правило №8, ТЗ §2.6) дополнен полным блоком новых дескрипторов `ORCH_SELF_DEPLOY_*` / `ORCH_DEPLOY_*` (плейсхолдеры, секреты не коммитятся) по образцу merge-gate ORCH-043. Контракты `STAGE_TRANSITIONS` / `QG_CHECKS` / `_parse_deploy_status` / БАГ-8 / merge-gate не тронуты. Тесты: `tests/test_deploy_rollback.py::test_tc11_re_deploy_after_rollback_not_wedged`, `tests/test_deploy_hook_mapping.py::test_clear_state_removes_all_markers_and_is_idempotent`. - **Контейнер и агенты бегут под uid хоста (1000:1000), не root** (ORCH-040): оба сервиса в `docker-compose.yml` (`orchestrator`, `orchestrator-staging`) получили `user: "1000:1000"` (slin) — устраняет корень проблемы, при которой Claude-CLI агенты, запускаемые через `subprocess.Popen` внутри root-контейнера, создавали все артефакты конвейера (git worktree `/repos/_wt/...`, коммиты в `docs/work-items/...`) с владельцем `root:root` на хосте, из-за чего `git pull`/`git reset` под slin падали с `insufficient permission for adding an object` и каждый деплой требовал ручного `chown`. Теперь файлы сразу `slin:slin`. Доступ к docker.sock сохранён через `group_add: ["999"]` (МИНА 1 — НЕ удалена). SSH-маунт приведён к единому HOME агента: target `/root/.ssh` → `/home/slin/.ssh` (`/home/slin/.orchestrator-ssh:/home/slin/.ssh:ro`), синхронно с `HOME=/home/slin`, который launcher форсит в env Popen и git_env — устранён скрытый рассинхрон SSH-маунта с форсимым HOME. `src/agents/launcher.py` и `Dockerfile` НЕ менялись (numeric uid работает без записи в `/etc/passwd`; `safe.directory '*'` уже покрывает git над bind-mount). Требует host-prerequisites Owner (P-1…P-4, вне кода): блокер P-1 — `chown -R 1000:1000 /home/slin/.claude` для доступа uid 1000 к claude creds (иначе preflight заворачивает конвейер); прод-рестарт self — только в окно тишины (общий инстанс с enduro-trails), страховка — staging-гейт (adr-0003). ADR `docs/work-items/ORCH-040/06-adr/ADR-001-run-agents-as-host-uid.md`, глобальный `docs/architecture/adr/adr-0005-container-runs-as-host-uid.md`; INFRA.md обновлён (рантайм-uid, volumes/SSH target, host-prerequisites). Тесты: `tests/test_orch040_compose.py`. - **Staging-чек B6 читает реестр из окружения работающего staging-инстанса** (ORCH-048): блок B6 «Registry: sandbox present, prod ET/ORCH absent» в `scripts/staging_check.py` давал **ложный FAIL** (`prod-ET=YES(BAD!)`, `prod-ORCH=YES(BAD!)`) при фактически исправной изоляции — единственный чек suite, который не ходил к инстансу по HTTP, а импортировал `src.projects` локально через host-path хак `sys.path.insert(0, "/repos/orchestrator")` + `importlib.reload`, строя реестр из `ORCH_PROJECTS_JSON` **process-env запускающего процесса**. При фактическом запуске деплоером с хоста переменная не задана → дефолт `_DEFAULT_PROJECTS` (ET+ORCH) → ложный FAIL → лишний откат `deploy-staging → development`. Решение (вариант «в», ADR-001): host-path хак удалён; suite канонически запускается ВНУТРИ контейнера `orchestrator-staging` через `docker exec … python3 /repos/orchestrator/scripts/staging_check.py` (`scripts/` доступен только через bind-mount, `import src.projects` резолвится через `PYTHONPATH=/app` из кода контейнера, env — `.env.staging`) → B6 читает реестр именно работающего инстанса, без HTTP-bootstrap и «курицы-яйца». Логика вердикта вынесена в чистую `_evaluate_b6(known) -> (passed, detail)` (инвариант `passed ⟺ SANDBOX ∈ known ∧ PROD_ET ∉ known ∧ PROD_ORCH ∉ known`, формат detail сохранён) + `_known_project_ids_from_registry()` / `_run_b6()` с детерминированным FAIL при недоступности источника (не ложный PASS, не необработанное исключение). Синхронно обновлены `.openclaw/agents/deployer.md` (команда стадии через `docker exec`) и `docs/operations/STAGING_CHECK.md`. `src/projects.py`, `.env*` и прочие чеки A/B4/B5/C не тронуты; реестр `QG_CHECKS` и `check_staging_status` (ADR-0003) не менялись. ADR `docs/work-items/ORCH-048/06-adr/ADR-001-b6-registry-via-in-container-run.md`. Тесты: `tests/test_staging_check_b6.py`. diff --git a/README.md b/README.md index 01982a1..e7de2cd 100644 --- a/README.md +++ b/README.md @@ -135,6 +135,7 @@ uvicorn src.main:app --reload --port 8500 | `ORCH_RECONCILE_GRACE_DEFAULT_S` | Порог «застряла» по `tasks.updated_at`, сек | `600` | | `ORCH_RECONCILE_GRACE_OVERRIDES_JSON` | Per-stage пороги, напр. `{"development":300}` | `""` | | `ORCH_RECONCILE_NOTIFY_UNBLOCK` | Telegram при разблокировке застрявшей задачи | `true` | +| `ORCH_RECONCILE_SKIP_BLOCKED_ENABLED` | F-1 Guard 2 (ORCH-060): пропуск задач в Plane-статусе Blocked / Needs Input; `false` глушит только сетевой Guard 2 (Guard 1 escalated всегда активен) | `true` | ## Очередь задач (ORCH-1 / F-2b) diff --git a/docs/architecture/README.md b/docs/architecture/README.md index 91da3e3..1cc866f 100644 --- a/docs/architecture/README.md +++ b/docs/architecture/README.md @@ -201,4 +201,4 @@ never-raise на единицу работы; тишина при синхрон Схема БД, потоки данных, resilience-слой, детали Dockerfile — [internals.md](internals.md). --- -*Актуально на 2026-06-07. Обновлять при изменении src/stages.py, src/qg/checks.py, src/main.py. Статусы доработок: ORCH-036 (исполняемый самодеплой `deploy`, adr-0007) — реализовано; ORCH-043 (merge-gate, adr-0006) — design, ветка feature/ORCH-043; ORCH-053 (reconciler, adr-0007, src/reconciler.py) — реализовано; ORCH-060 (F-1 skip escalated/Blocked/Needs-Input, `docs/work-items/ORCH-060/06-adr/ADR-001`) — architecture, ветка feature/ORCH-060; ORCH-058 (провенанс staging-образа: check_staging_image_fresh + staging_check свежего образа + хук-guard, adr-0008) — реализовано в ветке feature/ORCH-058 (обновлять также при изменении src/image_freshness.py, scripts/orchestrator-deploy-hook.sh, Dockerfile).* +*Актуально на 2026-06-07. Обновлять при изменении src/stages.py, src/qg/checks.py, src/main.py. Статусы доработок: ORCH-036 (исполняемый самодеплой `deploy`, adr-0007) — реализовано; ORCH-043 (merge-gate, adr-0006) — design, ветка feature/ORCH-043; ORCH-053 (reconciler, adr-0007, src/reconciler.py) — реализовано; ORCH-060 (F-1 skip escalated/Blocked/Needs-Input, `docs/work-items/ORCH-060/06-adr/ADR-001`) — реализовано в ветке feature/ORCH-060 (Guard 1 `developer_retry_count>=MAX_DEVELOPER_RETRIES` + Guard 2 `plane_sync.fetch_issue_state` Blocked/Needs-Input, флаг `ORCH_RECONCILE_SKIP_BLOCKED_ENABLED`); ORCH-058 (провенанс staging-образа: check_staging_image_fresh + staging_check свежего образа + хук-guard, adr-0008) — реализовано в ветке feature/ORCH-058 (обновлять также при изменении src/image_freshness.py, scripts/orchestrator-deploy-hook.sh, Dockerfile).* diff --git a/src/config.py b/src/config.py index dd30d4a..c2781b2 100644 --- a/src/config.py +++ b/src/config.py @@ -234,12 +234,20 @@ class Settings(BaseSettings): # JSON -> default (mirrors agent_timeout_overrides_json). # reconcile_notify_unblock -> send a Telegram message when a stuck task is # unblocked (F-4 observability). + # reconcile_skip_blocked_enabled -> ORCH-060 Guard 2: skip F-1 reconciliation of + # issues a human moved to Blocked / Needs Input + # (per-candidate Plane state lookup). Disabling it + # mutes ONLY the networked Guard 2; Guard 1 + # (escalated-by-retries, local + deterministic) is + # always active. Manual escape hatch during a Plane + # outage. reconcile_enabled: bool = True reconcile_interval_s: int = 120 reconcile_plane_enabled: bool = True reconcile_grace_default_s: int = 600 reconcile_grace_overrides_json: str = "" reconcile_notify_unblock: bool = True + reconcile_skip_blocked_enabled: bool = True # Telegram notifications telegram_bot_token: str = "" diff --git a/src/plane_sync.py b/src/plane_sync.py index 8bf1d85..f6ed56f 100644 --- a/src/plane_sync.py +++ b/src/plane_sync.py @@ -278,6 +278,33 @@ def fetch_issue_sequence_id(issue_id: str, project_id: str) -> int | None: return None +def fetch_issue_state(issue_id: str, project_id: str) -> str | None: + """ORCH-060 (F-1 Guard 2): GET the Plane issue and return its current state uuid. + + Used by the reconciler to honour an explicit human gate: an issue a person + moved to **Blocked** / **Needs Input** must not be auto-unblocked by the + sweeper. Reuses the exact GET issue-detail endpoint / shared token already + used by ``fetch_issue_sequence_id`` / ``fetch_issue_fields``. + + Plane returns ``state`` as a bare uuid string; older shapes may nest it as a + ``{"id": ...}`` dict — both are handled. + + Returns None on network error, non-2xx, or a missing field — never raises, so + the caller can apply its conservative fallback (treat as "possibly blocked"). + """ + url = f"{PLANE_BASE}/workspaces/{WORKSPACE}/projects/{project_id}/issues/{issue_id}/" + try: + resp = httpx.get(url, headers=PLANE_HEADERS, timeout=10) + resp.raise_for_status() + state = resp.json().get("state") + if isinstance(state, dict): + state = state.get("id") + return str(state) if state else None + except Exception as e: + logger.warning(f"fetch_issue_state failed for {issue_id}: {e}") + return None + + import re as _re diff --git a/src/reconciler.py b/src/reconciler.py index a70695c..6d65baa 100644 --- a/src/reconciler.py +++ b/src/reconciler.py @@ -19,7 +19,12 @@ handlers a webhook would use: canonical quality gate; green -> advance through the unchanged ``stage_engine.advance_stage(..., finished_agent=None)``; red -> silence (no advance, no notification). ``analysis`` is NOT reconciled here (human - gate; owned by F-2). + gate; owned by F-2). **ORCH-060:** before the gate is even evaluated, F-1 + skips (silently) tasks that are waiting for a human — Guard 1: escalated by + developer retries (``developer_retry_count >= MAX_DEVELOPER_RETRIES``, + deterministic, local; closes the ET-013 bounce loop) checked first, then + Guard 2: an explicit Plane ``Blocked`` / ``Needs Input`` state (Variant A — + networked, never-raise -> conservative skip). * **F-2 plane-side** (``reconcile_plane_once``): poll the Plane API per project (``list_issues_by_state``) and replay In Progress / Approved / @@ -49,9 +54,13 @@ from .db import ( get_task_by_plane_id, has_active_job_for_task, ) -from .stage_engine import advance_if_gate_passed +from .stage_engine import ( + advance_if_gate_passed, + developer_retry_count, + MAX_DEVELOPER_RETRIES, +) from .stages import get_qg_for_stage -from .plane_sync import get_project_states, list_issues_by_state +from .plane_sync import fetch_issue_state, get_project_states, list_issues_by_state from .webhooks.plane import handle_status_start, handle_verdict from .notifications import send_telegram from . import projects @@ -162,6 +171,17 @@ class Reconciler: age_s = task.get("age_s") or 0 if age_s < grace_for_stage(stage): return + # ORCH-060 Guard 1: escalated tasks (developer retries reached the cap) are + # terminal — they wait for a human, not the sweeper. Without this, a task + # whose CI is green but whose reviewer kept sending REQUEST_CHANGES until the + # cap would be re-unblocked every tick (incident ET-013, infinite bounce). + # Deterministic, local SQL, no network — and checked FIRST (cheapest). + if developer_retry_count(task_id) >= MAX_DEVELOPER_RETRIES: + return + # ORCH-060 Guard 2: respect an explicit human gate (Blocked / Needs Input). + # Networked; runs after Guard 1 so escalated tasks never hit Plane. + if self._is_blocked_or_needs_input(task): + return result = advance_if_gate_passed( task_id, stage, @@ -172,6 +192,41 @@ class Reconciler: if result is not None and getattr(result, "advanced", False): self._note_unblock(task.get("work_item_id") or str(task_id), stage) + def _is_blocked_or_needs_input(self, task: dict) -> bool: + """ORCH-060 Guard 2: is this issue in an explicit human Plane gate? + + Variant A (no schema migration): resolve the task's Plane project, fetch + the issue's current state uuid and compare against the project's + ``blocked`` / ``needs_input`` states. ``tasks`` has no status column, so + the live Plane state is the source of truth. + + **Never-raise, conservative fallback.** Any error / unresolved project / + missing state -> return ``True`` (treat as "possibly blocked" -> skip): + NOT unblocking a task is always safe, whereas wrongly unblocking a + human-gated task re-introduces the bounce we are trying to kill. The + sub-flag ``reconcile_skip_blocked_enabled`` disables ONLY this networked + guard (escape hatch for a Plane outage); Guard 1 stays active. + """ + if not settings.reconcile_skip_blocked_enabled: + return False + try: + proj = projects.get_project_by_repo(task.get("repo") or "") + if proj is None: + return True # cannot resolve the project -> conservative skip + pid = proj.plane_project_id + states = get_project_states(pid) + issue_id = task.get("plane_id") or task.get("plane_issue_id") or "" + cur = fetch_issue_state(issue_id, pid) + if cur is None: + return True # Plane unreachable / no state -> conservative skip + return cur in {states.get("blocked"), states.get("needs_input")} + except Exception as e: # noqa: BLE001 - never break the tick + logger.warning( + f"reconciler Guard 2: blocked-check failed for task " + f"{task.get('id')}, skipping conservatively: {e}" + ) + return True + # -- F-2: plane-side --------------------------------------------------- def reconcile_plane_once(self) -> None: """One F-2 pass: poll Plane per project and replay missed transitions.""" diff --git a/src/stage_engine.py b/src/stage_engine.py index c9bf7b2..9cc3b1a 100644 --- a/src/stage_engine.py +++ b/src/stage_engine.py @@ -142,8 +142,14 @@ def _check_review_approved_by_branch(check_fn, repo: str, work_item_id: str, bra return False, f"Error finding PR: {e}" -def _developer_retry_count(task_id: int) -> int: - """How many developer runs have already happened for this task.""" +def developer_retry_count(task_id: int) -> int: + """How many developer runs have already happened for this task. + + Single source of truth for the developer-retry count: the rollback path + (REQUEST_CHANGES / test-fail / merge-gate) and the ORCH-060 reconciler guard + both read the cap from here, so the SQL is never duplicated. ``task`` is + considered *escalated* once this reaches ``MAX_DEVELOPER_RETRIES``. + """ conn = get_db() n = conn.execute( "SELECT COUNT(*) FROM agent_runs WHERE task_id=? AND agent='developer'", @@ -153,6 +159,10 @@ def _developer_retry_count(task_id: int) -> int: return n +# Backward-compat private alias — existing internal call sites keep working. +_developer_retry_count = developer_retry_count + + def advance_stage( task_id: int, current_stage: str, diff --git a/tests/test_reconciler.py b/tests/test_reconciler.py index ea9332c..8e47314 100644 --- a/tests/test_reconciler.py +++ b/tests/test_reconciler.py @@ -114,6 +114,47 @@ def _green_ci(monkeypatch, value=(True, "CI green")): return m +# --- ORCH-060 fixtures / helpers ------------------------------------------- +# State uuids the default "not blocked" fixture maps Blocked / Needs Input to. +_BLOCKED_UUID = "blocked-state-uuid" +_NEEDS_INPUT_UUID = "needs-input-state-uuid" + + +@pytest.fixture(autouse=True) +def plane_state_not_blocked(monkeypatch): + """ORCH-060 Guard 2 boundary: by default Plane says the issue is NOT in a + human gate, so the F-1 happy path runs deterministically offline (no real + httpx call). Tests that exercise Guard 2 override ``fetch_issue_state`` to + return ``_BLOCKED_UUID`` / ``_NEEDS_INPUT_UUID`` (or raise).""" + monkeypatch.setattr( + reconciler_mod, "fetch_issue_state", + MagicMock(return_value="some-non-gated-state"), + ) + monkeypatch.setattr( + reconciler_mod, "get_project_states", + MagicMock(return_value={ + "blocked": _BLOCKED_UUID, + "needs_input": _NEEDS_INPUT_UUID, + }), + ) + monkeypatch.setattr( + reconciler_mod.projects, "get_project_by_repo", + MagicMock(return_value=MagicMock(plane_project_id="proj-test")), + ) + + +def _add_dev_runs(task_id, n, agent="developer"): + """Model N developer retries by inserting N agent_runs rows (ORCH-060).""" + conn = get_db() + for _ in range(n): + conn.execute( + "INSERT INTO agent_runs (task_id, agent) VALUES (?, ?)", + (task_id, agent), + ) + conn.commit() + conn.close() + + # --------------------------------------------------------------------------- # TC-01: happy path — stuck development task is advanced to review # --------------------------------------------------------------------------- @@ -377,3 +418,265 @@ def test_tc21_daemon_thread_lifecycle(monkeypatch): rec.stop(timeout=5.0) assert not first_thread.is_alive() + + +# =========================================================================== +# ORCH-060: F-1 skips escalated (max developer retries) / Blocked / Needs Input +# =========================================================================== + +# --------------------------------------------------------------------------- +# TC-01 (AC-1): escalated dev task (exactly MAX_DEVELOPER_RETRIES dev runs) at a +# green gate is NOT unblocked — stays development, no job, count 0. +# --------------------------------------------------------------------------- +def test_tc060_01_escalated_at_limit_skipped(monkeypatch): + _green_ci(monkeypatch) + task_id = _make_task("development", age_s=3600) + _add_dev_runs(task_id, stage_engine.MAX_DEVELOPER_RETRIES) + + rec = Reconciler() + rec.reconcile_gate_once() + + assert _stage_of(task_id) == "development" + assert rec.unblocked_total == 0 + assert _jobs_for(task_id, "reviewer") == [] + + +# --------------------------------------------------------------------------- +# TC-02 (AC-2): more dev runs than the cap (4–5) -> also skipped (>= boundary). +# --------------------------------------------------------------------------- +def test_tc060_02_over_limit_skipped(monkeypatch): + _green_ci(monkeypatch) + task_id = _make_task("development", age_s=3600) + _add_dev_runs(task_id, stage_engine.MAX_DEVELOPER_RETRIES + 2) + + rec = Reconciler() + rec.reconcile_gate_once() + + assert _stage_of(task_id) == "development" + assert rec.unblocked_total == 0 + + +# --------------------------------------------------------------------------- +# TC-03 (AC-3): regression — retry < cap (here 2) still advances to review. +# --------------------------------------------------------------------------- +def test_tc060_03_under_limit_still_advances(monkeypatch): + _green_ci(monkeypatch) + task_id = _make_task("development", age_s=3600) + _add_dev_runs(task_id, stage_engine.MAX_DEVELOPER_RETRIES - 1) + + rec = Reconciler() + rec.reconcile_gate_once() + + assert _stage_of(task_id) == "review" + assert rec.unblocked_total == 1 + + +# --------------------------------------------------------------------------- +# TC-04 (AC-4): twins — one at the cap (skip), one at cap-1 (advance). Exactly +# one advances. +# --------------------------------------------------------------------------- +def test_tc060_04_boundary_exactly_one_advances(monkeypatch): + _green_ci(monkeypatch) + at_limit = _make_task("development", branch="feature/ET-200-a", + wi="ET-200", age_s=3600) + below = _make_task("development", branch="feature/ET-201-b", + wi="ET-201", age_s=3600) + _add_dev_runs(at_limit, stage_engine.MAX_DEVELOPER_RETRIES) + _add_dev_runs(below, stage_engine.MAX_DEVELOPER_RETRIES - 1) + + rec = Reconciler() + rec.reconcile_gate_once() + + assert _stage_of(at_limit) == "development" # skipped + assert _stage_of(below) == "review" # advanced + assert rec.unblocked_total == 1 + + +# --------------------------------------------------------------------------- +# TC-05 (AC-5): explicit Plane Blocked (retry < cap) -> skipped. +# --------------------------------------------------------------------------- +def test_tc060_05_blocked_skipped(monkeypatch): + _green_ci(monkeypatch) + monkeypatch.setattr( + reconciler_mod, "fetch_issue_state", + MagicMock(return_value=_BLOCKED_UUID), + ) + task_id = _make_task("development", age_s=3600) + + rec = Reconciler() + rec.reconcile_gate_once() + + assert _stage_of(task_id) == "development" + assert rec.unblocked_total == 0 + + +# --------------------------------------------------------------------------- +# TC-06 (AC-6): explicit Plane Needs Input (retry < cap) -> skipped. +# --------------------------------------------------------------------------- +def test_tc060_06_needs_input_skipped(monkeypatch): + _green_ci(monkeypatch) + monkeypatch.setattr( + reconciler_mod, "fetch_issue_state", + MagicMock(return_value=_NEEDS_INPUT_UUID), + ) + task_id = _make_task("development", age_s=3600) + + rec = Reconciler() + rec.reconcile_gate_once() + + assert _stage_of(task_id) == "development" + assert rec.unblocked_total == 0 + + +# --------------------------------------------------------------------------- +# TC-07 (AC-7): no spam — escalated task triggers no unblock log / telegram / +# QG-failure notification, across several ticks. +# --------------------------------------------------------------------------- +def test_tc060_07_escalated_no_spam(monkeypatch, caplog): + _green_ci(monkeypatch) + monkeypatch.setattr(reconciler_mod.settings, "reconcile_notify_unblock", True) + tg = MagicMock() + monkeypatch.setattr(reconciler_mod, "send_telegram", tg) + + task_id = _make_task("development", wi="ET-210", age_s=3600) + _add_dev_runs(task_id, stage_engine.MAX_DEVELOPER_RETRIES) + + rec = Reconciler() + with caplog.at_level("INFO", logger="orchestrator.reconciler"): + for _ in range(3): + rec.reconcile_gate_once() + + assert "разблокирована" not in caplog.text + tg.assert_not_called() + stage_engine.notify_qg_failure.assert_not_called() + assert rec.unblocked_total == 0 + + +# --------------------------------------------------------------------------- +# TC-08 (AC-8): the gate (check_ci_green) is NOT even evaluated for an escalated +# task — Guard 1 skips before the pre-evaluation. +# --------------------------------------------------------------------------- +def test_tc060_08_no_gate_call_on_escalated(monkeypatch): + ci = _green_ci(monkeypatch) + task_id = _make_task("development", age_s=3600) + _add_dev_runs(task_id, stage_engine.MAX_DEVELOPER_RETRIES) + + Reconciler().reconcile_gate_once() + + ci.assert_not_called() + + +# --------------------------------------------------------------------------- +# TC-09 (AC-9): F-2 never replays Blocked / Needs Input — those states are not +# in the polled set, so the handlers are never invoked. +# --------------------------------------------------------------------------- +def test_tc060_09_f2_does_not_replay_blocked(monkeypatch): + states = { + "in_progress": "IP", "approved": "AP", "rejected": "RJ", + "blocked": "BL", "needs_input": "NI", + } + monkeypatch.setattr( + reconciler_mod, "get_project_states", MagicMock(return_value=states) + ) + captured = {} + + def fake_list(pid, state_uuids): + captured["states"] = list(state_uuids) + # Plane filters client-side to the requested states, so a Blocked / + # Needs Input issue is structurally excluded from the result. + return [] + + monkeypatch.setattr(reconciler_mod, "list_issues_by_state", fake_list) + hss = MagicMock() + hv = MagicMock() + monkeypatch.setattr(reconciler_mod, "handle_status_start", hss) + monkeypatch.setattr(reconciler_mod, "handle_verdict", hv) + monkeypatch.setattr( + reconciler_mod.projects, "PROJECTS", + [MagicMock(repo="enduro-trails", plane_project_id="P")], + ) + + rec = Reconciler() + rec.reconcile_plane_once() + + assert "BL" not in captured["states"] + assert "NI" not in captured["states"] + hss.assert_not_called() + hv.assert_not_called() + assert rec.unblocked_total == 0 + + +# --------------------------------------------------------------------------- +# TC-10 (AC-10): never-raise — a Guard 2 lookup that raises for one task is +# isolated (that task is conservatively skipped); a neighbour +# still advances and the tick does not blow up. +# --------------------------------------------------------------------------- +def test_tc060_10_guard2_never_raise(monkeypatch): + _green_ci(monkeypatch) + bad = _make_task("development", branch="feature/ET-220-bad", + wi="ET-220", age_s=3600) + ok = _make_task("development", branch="feature/ET-221-ok", + wi="ET-221", age_s=3600) + + def flaky(issue_id, project_id): + if issue_id == "plane-ET-220": + raise RuntimeError("plane boom") + return "some-non-gated-state" + + monkeypatch.setattr( + reconciler_mod, "fetch_issue_state", MagicMock(side_effect=flaky) + ) + + rec = Reconciler() + rec.reconcile_gate_once() # must not raise + + assert _stage_of(bad) == "development" # conservative skip + assert _stage_of(ok) == "review" # neighbour advanced + assert rec.unblocked_total == 1 + + +# --------------------------------------------------------------------------- +# TC-11 (AC-11): the cutoff comes from MAX_DEVELOPER_RETRIES, not a literal 3. +# Patching the constant to 2 makes a 2-run task escalate (it would +# have advanced under a hardcoded 3). +# --------------------------------------------------------------------------- +def test_tc060_11_limit_from_constant(monkeypatch): + _green_ci(monkeypatch) + monkeypatch.setattr(reconciler_mod, "MAX_DEVELOPER_RETRIES", 2) + task_id = _make_task("development", age_s=3600) + _add_dev_runs(task_id, 2) # == patched cap -> skip + + rec = Reconciler() + rec.reconcile_gate_once() + + assert _stage_of(task_id) == "development" + assert rec.unblocked_total == 0 + + +# --------------------------------------------------------------------------- +# AC-10 extra: the sub-flag reconcile_skip_blocked_enabled=False mutes ONLY +# Guard 2 (a Blocked task would then be reconciled), while Guard 1 +# (escalated) stays active. +# --------------------------------------------------------------------------- +def test_tc060_subflag_disables_only_guard2(monkeypatch): + _green_ci(monkeypatch) + monkeypatch.setattr( + reconciler_mod.settings, "reconcile_skip_blocked_enabled", False + ) + monkeypatch.setattr( + reconciler_mod, "fetch_issue_state", + MagicMock(return_value=_BLOCKED_UUID), + ) + # Guard 2 disabled -> a Blocked task with retry < cap advances again. + blocked = _make_task("development", branch="feature/ET-230-a", + wi="ET-230", age_s=3600) + # Guard 1 stays active regardless of the sub-flag. + escalated = _make_task("development", branch="feature/ET-231-b", + wi="ET-231", age_s=3600) + _add_dev_runs(escalated, stage_engine.MAX_DEVELOPER_RETRIES) + + rec = Reconciler() + rec.reconcile_gate_once() + + assert _stage_of(blocked) == "review" # Guard 2 muted + assert _stage_of(escalated) == "development" # Guard 1 still skips