from pydantic import field_validator from pydantic_settings import BaseSettings class Settings(BaseSettings): # Plane plane_api_url: str = "http://localhost:8091" # ORCH-017: external (browser) web URL of Plane for clickable issue links in # notifications, e.g. https://plane.example.org. Falls back to plane_api_url, # but a loopback fallback (localhost/127.0.0.1) is treated as "no web URL" and # the Plane link is omitted (see notifications._build_plane_issue_link). plane_web_url: str = "" plane_api_token: str = "" plane_workspace_slug: str = "" plane_webhook_secret: str = "" plane_project_id: str = "" # Per-agent Plane bot tokens (feat: per-agent comment authorship). # When set, add_comment posts under the matching bot so Plane shows the # real author (Analyst/Architect/...). Empty -> fallback to plane_api_token. plane_bot_analyst: str = "" plane_bot_architect: str = "" plane_bot_developer: str = "" plane_bot_reviewer: str = "" plane_bot_tester: str = "" plane_bot_deployer: str = "" plane_bot_stream: str = "" # Gitea gitea_url: str = "http://localhost:3000" gitea_public_url: str = "" # external URL for clickable links in comments; falls back to gitea_url gitea_token: str = "" gitea_webhook_secret: str = "" gitea_owner: str = "admin" default_repo: str = "enduro-trails" # ORCH-6: multi-repo project registry. JSON array of # {plane_project_id, repo, work_item_prefix, name}. # Empty -> built-in default registry in src/projects.py. projects_json: str = "" # Claude CLI claude_bin: str = "/opt/claude-code/bin/claude.exe" repos_dir: str = "/repos" host_repos_dir: str = "/home/slin/repos" worktrees_dir: str = "/repos/_wt" # ORCH-2 / S-4: isolated worktree per task/branch # ORCH-087: base dir for per-run agent logs (/.log). Lifted out # of the hardcoded '/app/data/runs' so tests (and any non-container host) can point # it at a writable path; default preserves the container layout. runs_dir: str = "/app/data/runs" # DB db_path: str = "/app/data/orchestrator.db" # ORCH-1 (F-2b): persistent job queue / background worker. # max_concurrency -> max agent jobs running in parallel (env ORCH_MAX_CONCURRENCY) # queue_poll_interval -> worker loop poll seconds (env ORCH_QUEUE_POLL_INTERVAL) max_concurrency: int = 1 queue_poll_interval: float = 2.0 # ORCH-1b (resilience): preflight + 429/rate-limit + backoff + circuit breaker. # preflight_cache_ttl -> cache the cheap CLI/network preflight result (seconds); # the worker does NOT re-run `claude --version` more often # than this (env ORCH_PREFLIGHT_CACHE_TTL). # backoff_base_seconds -> base for exponential transient backoff. # backoff_max_seconds -> ceiling for the transient backoff. # transient_max_attempts -> retry budget for transient (429/overload/network) # failures, separate from code-fault `attempts`. # breaker_threshold -> consecutive transient failures that OPEN the breaker. # breaker_pause_seconds -> how long the breaker stays open before half-open. preflight_cache_ttl: int = 45 backoff_base_seconds: int = 10 backoff_max_seconds: int = 600 transient_max_attempts: int = 5 breaker_threshold: int = 3 breaker_pause_seconds: int = 300 # ORCH-7 (M-2): agent timeout + graceful kill. # agent_timeout_seconds -> default per-agent wall-clock budget; the watchdog # kills the run after this (env ORCH_AGENT_TIMEOUT_SECONDS). # agent_kill_grace_seconds-> pause between SIGTERM and SIGKILL so claude can # flush artifacts before the hard kill # (env ORCH_AGENT_KILL_GRACE_SECONDS). # agent_timeout_overrides_json -> optional per-agent override JSON object, # e.g. {"reviewer": 3600, "architect": 2700} # (env ORCH_AGENT_TIMEOUT_OVERRIDES_JSON). agent_timeout_seconds: int = 1800 agent_kill_grace_seconds: int = 20 agent_timeout_overrides_json: str = "" # ORCH-41: per-agent LLM model. Empty -> agent_model_default. Resolution order: # project-override (projects_json agent_models) > ORCH_AGENT_MODEL_ > # agent_model_default > CLI default (no --model flag). Default is 4-8 because # 4-7 == 4-8 in price (Slava 05.06); do NOT hardcode the version anywhere else. agent_model_default: str = "claude-opus-4-8" agent_model_analyst: str = "" agent_model_architect: str = "" agent_model_developer: str = "" agent_model_reviewer: str = "" agent_model_tester: str = "" agent_model_deployer: str = "" # ORCH-41: per-agent effort / reasoning level: low|medium|high|xhigh|max. # Empty -> agent_effort_default. Same resolution order as model. Default split # (ORCH-081/ORCH-52h): thinking agents (analyst/architect/reviewer) -> high; # developer -> xhigh (coding/agentic role, Opus 4.8 canon); mechanical agents # (tester/deployer) -> medium. These class-defaults are ALSO the per-role floor # used by resolve_agent_effort when the env is empty (single source of truth). agent_effort_default: str = "high" agent_effort_analyst: str = "high" agent_effort_architect: str = "high" agent_effort_developer: str = "xhigh" agent_effort_reviewer: str = "high" agent_effort_tester: str = "medium" agent_effort_deployer: str = "medium" # ORCH-41: optional per-agent fallback model used when the primary is # overloaded (--fallback-model, works with --print). Empty -> no flag. agent_fallback_model: str = "" # L-2: run-log rotation. Old per-run logs in /runs/*.log are pruned at # app startup (best-effort). A *.log is removed if it is older than # log_keep_days OR not within the log_keep_max most-recent logs (whichever # hits first). Only *.log files are touched; the active run log is skipped. # log_keep_days -> max age in days (env ORCH_LOG_KEEP_DAYS). # log_keep_max -> max number of newest logs to retain (env ORCH_LOG_KEEP_MAX). log_keep_days: int = 30 log_keep_max: int = 500 # ORCH-045: quality-gate CI poll/retry. check_ci_green polls the Gitea # combined commit status up to ci_poll_max_attempts times, sleeping # ci_poll_interval_s between attempts, to ride out a transient pending # state right after the developer push (race fix, see ORCH-017). # ci_poll_max_attempts -> max status polls (env ORCH_CI_POLL_MAX_ATTEMPTS) # ci_poll_interval_s -> seconds between polls (env ORCH_CI_POLL_INTERVAL_S) ci_poll_max_attempts: int = 12 ci_poll_interval_s: int = 10 # ORCH-043: merge-gate (auto-rebase + re-test + merge-lock) on the # deploy-staging -> deploy edge. A deterministic sub-gate (no LLM) that # catches the up-to-date branch up to the CURRENT origin/main, re-tests it, # and serialises merges so two green branches can't break main. # merge_gate_enabled -> global kill-switch; False -> no-op pass for the # whole gate (staged rollout, env ORCH_MERGE_GATE_ENABLED). # merge_gate_repos -> CSV of repos where the gate is REAL; empty means # only the self-hosting repo (orchestrator). Other # repos -> conditional no-op (mirrors ORCH-35 staging). # merge_retest_timeout_s -> wall-clock budget for the post-rebase re-test. # merge_retest_target -> pytest target for the re-test (portability across repos). # merge_lock_timeout_s -> max lease age; an older lease is reclaimed (crash backstop). # merge_defer_delay_s -> delay before re-running the gate when the lock is busy. # merge_defer_max_attempts -> defer retries before escalation (avoids livelock). merge_gate_enabled: bool = True merge_gate_repos: str = "" merge_retest_timeout_s: int = 600 merge_retest_target: str = "tests/" merge_lock_timeout_s: int = 300 merge_defer_delay_s: int = 60 merge_defer_max_attempts: int = 5 # ORCH-036: executable self-deploy (deploy stage drives the host hook). # The `deploy` stage for the self-hosting repo is turned into a REAL prod # restart via a detached host process, gated by a manual approve. Three-phase # design (ADR-001): A=approve-request, B=initiate (human Approved), C=finalizer # maps the hook exit-code -> deploy_status. Non-self repos are unaffected. # # self_deploy_enabled -> global kill-switch; False -> no Phase A/B/C # interception (the legacy synchronous deployer # path runs for everyone, env ORCH_SELF_DEPLOY_ENABLED). # self_deploy_repos -> CSV of repos where executable self-deploy is # REAL; empty -> only the self-hosting repo # (orchestrator). Mirrors merge_gate_repos. # deploy_require_manual_approve -> require a human Approved before the prod # restart (BR-5). Default true; NOT toggled in # ORCH-36 (AC-12). false -> Phase A initiates # immediately (structural branch, off by default). # deploy_finalize_delay_s -> delay before the first finalize poll; must be # > the hook health-loop (~60s) so the verdict # usually exists on the first poll. # deploy_finalize_max_attempts -> bounded finalize-defer budget (anti-livelock). # ssh / hook target (detached prod restart; real values live on the host): # deploy_ssh_user / deploy_ssh_host -> ssh target for the host hook (INFRA P-2). # deploy_hook_script -> path to the hook ON THE HOST (relative to repo). # deploy_host_repo_path -> orchestrator clone path on the host. # prod overrides passed to the hook for build-once (retag staging image -> prod): # deploy_prod_source_image -> image validated on staging (retagged, no rebuild). # deploy_prod_target_service / _port / _image / _compose_profile -> prod profile. # deploy_prod_prev_image_file -> prod prev-image snapshot (separate from staging). self_deploy_enabled: bool = True self_deploy_repos: str = "" deploy_require_manual_approve: bool = True deploy_finalize_delay_s: int = 90 deploy_finalize_max_attempts: int = 10 deploy_ssh_user: str = "slin" deploy_ssh_host: str = "" deploy_hook_script: str = "scripts/orchestrator-deploy-hook.sh" deploy_host_repo_path: str = "/home/slin/repos/orchestrator" deploy_prod_source_image: str = "orchestrator-orchestrator-staging" deploy_prod_target_service: str = "orchestrator" deploy_prod_target_port: int = 8500 deploy_prod_target_image: str = "orchestrator-orchestrator" deploy_prod_compose_profile: str = "" deploy_prod_prev_image_file: str = ".deploy-prev-image-prod" # ORCH-058: staging-image provenance before the BUILD-ONCE retag to prod. # Closes the INV-FRESH gap (ADR-001): the BUILD-ONCE retag (ORCH-36) promotes # the staging image to prod WITHOUT a rebuild, assuming the staging image is # fresh — a guarantee the pipeline never had (a stale image could be silently # promoted, LESSONS_ORCH-036 §4). Two complementary layers, self-hosting only: # A (liveness): the QG sub-check check_staging_image_fresh rebuilds the # staging image from the VALIDATED commit (worktree HEAD after merge-gate) # and recreates 8501 on the deploy-staging -> deploy edge, so we validate # and promote ONE artefact. # B (safety): build_deploy_command passes EXPECTED_REVISION and the hook # fail-closes (exit 1) if SOURCE_IMAGE's revision label != EXPECTED_REVISION # before `docker tag`, making a silent stale promote structurally impossible. # # image_freshness_enabled -> SINGLE kill-switch for the WHOLE feature (A + B # together; never "B without A" = a deadlock). False # -> legacy ORCH-36 behaviour (BUILD-ONCE, no guard, # no EXPECTED_REVISION). Env ORCH_IMAGE_FRESHNESS_ENABLED. # image_freshness_repos -> CSV of repos where the feature is REAL; empty -> # only the self-hosting repo (orchestrator). Mirrors # self_deploy_repos / merge_gate_repos. image_freshness_enabled: bool = True image_freshness_repos: str = "" # ORCH-022: security-gate (secret-scanning + dependency audit) on the # deploy-staging -> deploy edge, run FIRST among the edge sub-gates (cheap to # fail before the expensive rebase/rebuild). Deterministic (no LLM): gitleaks # (offline secret-scan) + pip-audit (OSV/PyPI dependency audit), verdict in the # versioned 17-security-report.md frontmatter; FAIL -> rollback to development + # developer-retry (cap MAX_DEVELOPER_RETRIES). See ADR-001-security-gate.md. # security_gate_enabled -> SINGLE kill-switch; False -> pipeline 1:1 as # before ORCH-022 for everyone. Env # ORCH_SECURITY_GATE_ENABLED. # security_gate_repos -> CSV of repos where the gate is REAL; empty -> # only the self-hosting repo (orchestrator). # Mirrors merge_gate_repos / image_freshness_repos. # security_dep_block_severity -> CVE severity threshold that BLOCKS (CRITICAL > # HIGH > MEDIUM > LOW); below it / UNKNOWN -> a # warning only (anti-loop ADR-001 Р-4). # security_scan_timeout_s -> per external scanner call timeout (mirrors # merge_retest_timeout_s). # security_dep_audit_fail_closed -> strict mode: an unreachable CVE feed -> FAIL # instead of the default fail-open + warning # (Р-3). Default False (anti-loop ORCH-061). # security_secrets_block -> a found secret blocks (always True by default; # the offline secrets guarantee is unconditional, # BR-2). security_gate_enabled: bool = True security_gate_repos: str = "" security_dep_block_severity: str = "HIGH" security_scan_timeout_s: int = 300 security_dep_audit_fail_closed: bool = False security_secrets_block: bool = True # ORCH-061: tolerate KNOWN sandbox-infra FAILs (C9a/C9b) in the staging suite. # The self-hosting deploy-staging stage looped because scripts/staging_check.py # exited non-zero on ANY failed check, so two infra-only failures (sandbox bot # accounts not members of the sandbox Plane project) produced staging_status: # FAILED -> rollback deploy-staging -> development -> loop. # True -> a run whose ONLY failures are allowlisted sandbox-infra checks # (C9a/C9b) is waived to SUCCESS; ANY real pipeline check that fails # still fails closed -> FAILED -> rollback (safety net intact, FR-4). # False -> 1:1 pre-ORCH-061 strict behaviour: any FAIL -> FAILED -> rollback. # Default True (mirrors merge_gate_enabled / image_freshness_enabled / # self_deploy_enabled): the safety net holds regardless of the flag; the flag # exists to instantly restore legacy strictness without a code redeploy. Lives # in .env.staging (ORCH_ prefix) so it is reachable inside orchestrator-staging. # Env ORCH_STAGING_INFRA_TOLERANCE_ENABLED. staging_infra_tolerance_enabled: bool = True # ORCH-053: stuck-task reconciler (sweeper for lost webhooks). A background # daemon thread reconciles the "source of truth (gate / Plane) != task stage" # drift left behind by a dropped webhook (502 on rebuild, no Plane/Gitea # retries, unresolved sha->branch). See docs/architecture/adr/adr-0007-reconciler.md. # reconcile_enabled -> global kill-switch (self-hosting safety, # staged rollout, env ORCH_RECONCILE_ENABLED). # reconcile_interval_s -> background sweep period (seconds). # reconcile_plane_enabled -> separate flag for the F-2 Plane-API poll so # only the plane branch can be muted. # reconcile_grace_default_s -> default "stuck" threshold on tasks.updated_at. # reconcile_grace_overrides_json -> JSON object of per-stage thresholds, e.g. # {"analysis": 1800, "development": 300}. Invalid # JSON -> default (mirrors agent_timeout_overrides_json). # reconcile_notify_unblock -> send a Telegram message when a stuck task is # unblocked (F-4 observability). # reconcile_skip_blocked_enabled -> ORCH-060 Guard 2: skip F-1 reconciliation of # issues a human moved to Blocked / Needs Input # (per-candidate Plane state lookup). Disabling it # mutes ONLY the networked Guard 2; Guard 1 # (escalated-by-retries, local + deterministic) is # always active. Manual escape hatch during a Plane # outage. reconcile_enabled: bool = True reconcile_interval_s: int = 120 reconcile_plane_enabled: bool = True reconcile_grace_default_s: int = 600 reconcile_grace_overrides_json: str = "" reconcile_notify_unblock: bool = True reconcile_skip_blocked_enabled: bool = True # ORCH-068: TTL for the per-project Plane states cache (_STATES_CACHE in # plane_sync). Historically the cache lived for the whole process lifetime, # so a status added to Plane after start was never seen without a restart # ("stale set -> no pipeline action"). With a TTL the entry self-heals by # re-fetching /states/ after it expires (invalidation reuses the existing # reload_project_states() primitive — no duplicated reset logic). # plane_states_ttl_s (env ORCH_PLANE_STATES_TTL_S): # >0 -> seconds before a cache entry is re-fetched (default 300 = 5 min); # 0 -> disable TTL -> strictly the previous lifetime cache (back-compat # escape hatch). get_project_states return shape is unchanged. plane_states_ttl_s: int = 300 # ORCH-021: post-deploy production monitoring + degradation reaction. After # the terminal deploy->done transition for an applicable repo, a reserved-agent # `post-deploy-monitor` job (no LLM, modelled on deploy-finalizer) probes prod # over a window and reacts to a degradation the restart-time health-check # missed (class "green deploy, red prod", precedent ET-8). State is in sentinel # files (.post-deploy-state-//), no DB migration. See # docs/architecture/adr/adr-0010-post-deploy-monitor.md. # post_deploy_monitor_enabled -> global kill-switch (BR-8); False -> the # pipeline is 1:1 as before ORCH-021 (no arm). # post_deploy_repos -> CSV of repos where monitoring is REAL; empty # -> only the self-hosting repo (orchestrator). # Mirrors self_deploy_repos / merge_gate_repos. # post_deploy_window_s -> observation window length (~15 min, BR-1). # post_deploy_interval_s -> seconds between probe ticks. # post_deploy_fail_threshold -> N CONSECUTIVE health failures -> DEGRADED. # post_deploy_5xx_threshold -> window 5xx ratio above this -> DEGRADED. # post_deploy_auto_rollback -> globally allow auto-rollback; True acts ONLY # for non-self repos. For self-hosting the # reaction is ALWAYS ALERT_ONLY (BR-5) — a tick # NEVER restarts the prod orchestrator container. # post_deploy_base_url -> base URL of the observed prod instance. # Rollback target params reuse the existing deploy_prod_* settings (no dupes). post_deploy_monitor_enabled: bool = True post_deploy_repos: str = "" post_deploy_window_s: int = 900 post_deploy_interval_s: int = 30 post_deploy_fail_threshold: int = 3 post_deploy_5xx_threshold: float = 0.5 post_deploy_auto_rollback: bool = False post_deploy_base_url: str = "http://localhost:8500" # ORCH-065: job-reaper + proactive merge-lease reclaim. A background daemon # thread (modelled on the reconciler) makes "the monitor thread / process died # while a job/lease was held" self-heal WITHOUT a restart. Status (done/queued/ # failed) is otherwise only ever set by launcher._monitor_agent -> _finalize_job # inside the live process; a death there left the jobs row 'running' forever and # (at max_concurrency=1) wedged the queue of EVERY project (incidents 07.06: jobs # 236/239/242/254). The same thread proactively reclaims a stale/dead merge-lease # (ORCH-043) instead of waiting for the lazy TTL on the next foreign acquire. See # docs/architecture/adr/adr-0011-job-reaper-lease-reclaim.md. # reaper_enabled -> global kill-switch (false -> strictly prior behaviour; # only the startup requeue_running_jobs remains). # reaper_interval_s -> background scan period (seconds). # reaper_dead_ticks -> Tier-1: consecutive ticks a job's pid must be dead # before it is reaped (>=2 anti-false-positive; a live # long-running agent is NEVER reaped). # reaper_max_running_s -> Tier-3 backstop ceiling: a job 'running' longer than # this is reaped even when liveness is unknowable. MUST be # > max agent_timeout + grace so a legit agent is safe. # reaper_finalize_grace_s -> Tier-2 anti-false-positive: a LIVE monitor writes # agent_runs.exit_code FIRST, THEN does git commit/push + # PR + Plane usage comments (seconds..minutes) and only # then _finalize_job. The agent pid is already dead in # that window, so pid cannot tell "monitor died" from # "monitor still finalizing". A job is reaped via Tier-2 # only once exit_code has been recorded for at least this # many seconds (MUST be > the max finalization window). # lease_reclaim_enabled -> kill-switch for the proactive stale/dead lease reclaim # (false -> only the legacy lazy TTL reclaim in acquire). # (reuse) merge_lock_timeout_s -> lease TTL; merge_gate_repos -> reclaim scope. reaper_enabled: bool = True reaper_interval_s: int = 60 reaper_dead_ticks: int = 2 reaper_max_running_s: int = 3600 reaper_finalize_grace_s: int = 300 lease_reclaim_enabled: bool = True # ORCH-071: merge-verify under-gate on the `deploy -> done` edge. For the # self-hosting repo the `deploy` stage runs the DETERMINISTIC self-deploy path # (Phase A/B/C), where the LLM `deployer` agent — historically the ONLY actor # that merged the feature PR into `main` — never runs. Result: a "green" deploy # could reach `done` while the PR stayed `open` (phantom merge, postmortem # LESSONS_2026-06-08). This under-gate (врезка in advance_stage, NOT a new # STAGE_TRANSITIONS edge or registered QG) runs a deterministic merge-actor + # post-deploy verification before `done`: not-merged -> alert + HOLD (no done), # merged -> normal advance. Mirrors merge_gate_* / image_freshness_* rollout. # merge_verify_enabled -> global kill-switch; False -> strictly the prior # behaviour (no merge/verify), env ORCH_MERGE_VERIFY_ENABLED. # merge_verify_repos -> CSV of repos where the under-gate is REAL; empty -> # only the self-hosting repo (orchestrator). Mirrors # merge_gate_repos / self_deploy_repos. # merge_pr_timeout_s -> per Gitea merge/list HTTP call timeout. # merge_verify_timeout_s-> git fetch/merge-base timeout for the ancestor check. merge_verify_enabled: bool = True merge_verify_repos: str = "" merge_pr_timeout_s: int = 60 merge_verify_timeout_s: int = 60 # ORCH-026: intra-repo merge serialisation (Level A) + declarative task # dependencies (Level B). Level A reuses the ORCH-043/065 merge-lease window # (no new mechanism) — the merge-lease already serialises "merge -> main-updated" # per repo; the ONLY new behaviour is an unconditional pre-merge rebase. Level B # adds a new ADDITIVE job_deps table + a NOT EXISTS gate in claim_next_job. Both # features are inert without data (no applicable repo / no declared deps) -> # zero regression for enduro-trails. # premerge_rebase_always -> Level A (A-2): when True, check_branch_mergeable # ALWAYS rebases the task branch onto the CURRENT # origin/main UNDER the merge-lease (not only when # branch_is_behind_main) — a deterministic anti-phantom # that does not depend on the ancestor check's precision. # auto_rebase_onto_main is a cheap no-op on an already # up-to-date branch (rc 0, push up-to-date, CI not # retriggered). Scope = merge_gate_repos (empty -> # self-hosting). Kill-switch (False -> exactly the # ORCH-043 behaviour: rebase only when behind). Env # ORCH_PREMERGE_REBASE_ALWAYS. # task_deps_enabled -> Level B (B-2): global kill-switch for the scheduler # dependency gate. False -> claim_next_job is 1:1 as # ORCH-1 (the NOT EXISTS clause is omitted). Inert when # job_deps is empty. Env ORCH_TASK_DEPS_ENABLED. # task_deps_source -> declaration source: db|plane|hybrid (default db). # The scheduler ALWAYS reads the DB cache (offline-safe # hot path); plane/hybrid additionally ingest Plane # `blocked-by` relations into job_deps at task creation. # Env ORCH_TASK_DEPS_SOURCE. premerge_rebase_always: bool = True task_deps_enabled: bool = True task_deps_source: str = "db" # ORCH-088 (Этап 1, serial e2e): per-repo serial gate. A new task's analyst-job # does NOT enter analysis (no branch cut, no analyst agent) while the same repo # has another unfinished task (tasks.stage != 'done') OR the repo is frozen # (repo_freeze). The gate lives in claim_next_job (offline-safe hot path, like # the ORCH-026 dep-gate) + the branch cut is deferred from start_pipeline to the # analyst-job claim (launcher) so the branch base is always a fresh origin/main # that already contains the predecessor (anti-stale-base, AC-6). All additive, # never-raise, restart-safe; STAGE_TRANSITIONS / QG_CHECKS unchanged. See # docs/work-items/ORCH-088/06-adr/ADR-001-serial-gate.md. # serial_gate_enabled -> kill-switch (env ORCH_SERIAL_GATE_ENABLED). # False -> claim_next_job AND start_pipeline are 1:1 # as before ORCH-088 (clause omitted, branch cut in # start_pipeline) — zero regression (AC-7). # serial_gate_repos -> CSV scope (env ORCH_SERIAL_GATE_REPOS). Empty -> # applies to ALL registered repos (D5); non-empty -> # only the listed repos. Repo tokens are sanitised # (^[A-Za-z0-9._-]+$) before being embedded in SQL. # serial_gate_freeze_enabled-> independent tumbler for the FR-5 rollback-freeze # layer (env ORCH_SERIAL_GATE_FREEZE_ENABLED). False # -> freeze is neither set (post-deploy DEGRADED) nor # consulted in the claim gate. serial_gate_enabled: bool = True serial_gate_repos: str = "" serial_gate_freeze_enabled: bool = True # ORCH-073 (ADR-001 Р-4): main-integrity regression guard. After the merge-verify # under-gate confirms the deployed SHA is an ancestor of origin/main (FR-1), a # secondary deterministic (no-LLM) guard checks that a declarative set of markers # for recently-merged tasks (MAIN_REGRESSION_MARKERS in merge_gate.py) is still # present in origin/main — i.e. a CHANGELOG-rebase or phantom-merge did not silently # roll back a neighbouring task's code. A missing marker (deterministic count==0) -> # ALERT + HOLD (task stays on `deploy`, NOT done); an infra/git error on the grep # itself -> fail-OPEN (do not block done; SHA-in-main remains the primary gate). # regression_guard_enabled -> kill-switch (env ORCH_REGRESSION_GUARD_ENABLED); # reuses the merge_verify_applies scope (self-hosting / # merge_verify_repos), so non-self repos are a no-op. regression_guard_enabled: bool = True # ORCH-082 (ADR-001 Р-5): guarantee an open code-PR BEFORE the deterministic # merge_pr inside the merge-verify under-gate. The pipeline never guaranteed the # branch had an open PR (head==branch, base==main) at merge time — PRs are created # ONLY on the developer path with a fresh worktree commit (launcher._ensure_pr), # so a branch (e.g. after a manual main restore / a bounce with no new commits) # could reach merge-verify PR-less -> merge_pr returns "no open PR" -> a FALSE HOLD # that ORCH-073 fail-closed correctly catches but should never have to. The # idempotent leaf-actor merge_gate.ensure_open_pr creates/finds the code-PR ДО # merge_pr; ORCH-073's SHA-in-main proof is untouched and stays authoritative. # merge_verify_autocreate_pr_enabled -> kill-switch (env # ORCH_MERGE_VERIFY_AUTOCREATE_PR_ENABLED). False -> exactly the pre-ORCH-082 # behaviour (no auto-create; "no open PR" -> HOLD as before). Reuses the # merge_verify_applies scope (self-hosting / merge_verify_repos) — no separate # *_repos, since auto-create is semantically inseparable from merge-verify. merge_verify_autocreate_pr_enabled: bool = True # Telegram notifications telegram_bot_token: str = "" telegram_chat_id: str = "" # ORCH-042: режим live-трекера задачи. # bump (ДЕФОЛТ с ORCH-067) -> при обновлении старое сообщение удаляется и # карточка отправляется заново вниз чата (deleteMessage + sendMessage # + repoint message_id), тихо (disable_notification). # edit -> карточка редактируется на месте (editMessageText); доступен через # ORCH_TRACKER_MODE=edit. # Одна карточка на задачу в обоих режимах. Неизвестное/пустое значение # трактуется как edit (см. notifications). tracker_mode: str = "bump" # ORCH-067 (ADR Р-2/Р-3/Р-4): best-effort live-overlay для статус-строки # карточки. Дорисовывает ветки Plane-статуса, неотличимые offline по # tasks.stage (Needs Input / Blocked / Rejected / Cancelled / Deploying / # Monitoring after Deploy) — читая ЖИВОЙ Plane-статус с коротким таймаутом и # TTL-кэшем. Offline-ядро (stage -> статус, In Review из brd-clock) работает # всегда без сети; overlay лишь дополняет его и НИКОГДА не блокирует конвейер. # tracker_live_status -> kill-switch (False -> только offline-ядро). # tracker_live_status_ttl_s -> TTL per-issue кэша live-uuid (защита hot-path). # tracker_live_status_timeout_s -> таймаут одного live-GET в пути рендера. tracker_live_status: bool = True tracker_live_status_ttl_s: int = 60 tracker_live_status_timeout_s: int = 3 # ORCH-087 (BR-G5, ADR-001 Р-6): cap for the human BRD-review time shown on the # done card ("твоё {review}"). The brd_review clock can stay open for hours on a # desync (In Review -> Backlog), which made "твоё время" report anomalous stalls # (ORCH-087: 392m). Above this cap the value is shown capped with a "~" marker so # an abnormal stall is never presented as real human review time. Env # ORCH_TRACKER_BRD_REVIEW_CAP_S; default 7200s (2h). 0/negative -> no cap. tracker_brd_review_cap_s: int = 7200 # ORCH-069: QG-0 upper title-length limit (entry gate _qg0_errors). The 80-char # cap was a hygiene limit, not structural (slug is cut to [:30] independently, # DB title TEXT is unbounded). Configurable via env ORCH_QG0_TITLE_MAX; default # 200 (was hardcoded 80). Invalid/empty value -> default (graceful, no crash). qg0_title_max: int = 200 @field_validator("qg0_title_max", mode="before") @classmethod def _qg0_title_max_default(cls, v): # Graceful (ORCH-069 AC-3): empty / non-numeric env -> default 200, the # process must not crash on startup. Never raises (self-hosting safety). try: if v is None or (isinstance(v, str) and v.strip() == ""): return 200 return int(v) except (TypeError, ValueError): return 200 class Config: env_prefix = "ORCH_" env_file = ".env" settings = Settings()