FND/F1a: add a versioned read-only JSON endpoint GET /metrics that exposes the
orchestrator's own raw state for the future observability sidecar F1b — active
task stages, job queue, agent-liveness (pid/runtime/cpu_ticks), and cost/tokens.
The orchestrator emits ONLY raw signal it alone knows; thresholds/alerts/history
live in the separate sidecar (observer separated from observed, BRD §1).
- src/metrics.py: new leaf collector build_metrics() (never-raise per section,
serial_gate.snapshot() pattern); envelope schema_version/generated_at/clk_tck +
stages/queue/agents/cost. _read_cpu_ticks(pid) reads utime+stime from
/proc/<pid>/stat (null on None/dead/non-Linux pid — never raises).
- src/main.py: thin @app.get("/metrics") wrapper (style of GET /queue).
- src/db.py: read-only helpers get_running_agents() (dedicated SELECT, not an
extension of the hot-path get_running_jobs()), agent_cost_totals(),
queue_retry_stats(); job_status_counts() default dict gains the cancelled key.
- src/config.py: metrics_endpoint_enabled kill-switch (default True), env
ORCH_METRICS_ENABLED via explicit validation_alias so the documented switch
actually controls the flag.
- docs: README API table row + CHANGELOG entry (contract section already added
by architect); .env.example ORCH_METRICS_ENABLED.
Strictly read-only / never-raise: STAGE_TRANSITIONS / QG_CHECKS / check_* /
machine-verdict keys / DB schema untouched; /health//status//queue byte-for-byte.
Tests: tests/test_metrics.py (TC-01..TC-11) + env-alias tests in test_config.py.
Full suite green (1482).
Refs: ORCH-099
Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
340 lines
14 KiB
Python
340 lines
14 KiB
Python
"""ORCH-042: Settings.tracker_mode config field.
|
|
|
|
AC-1: tracker_mode defaults to "edit" and is read from env ORCH_TRACKER_MODE.
|
|
Settings is a Pydantic BaseSettings reading env at instantiation, so each case
|
|
builds a FRESH Settings() (the process-wide singleton is not mutated).
|
|
"""
|
|
|
|
from src.config import Settings
|
|
|
|
|
|
def test_tracker_mode_defaults_to_bump(monkeypatch):
|
|
# ORCH-067 (TC-01 / AC-1): the default flipped edit -> bump. With no env var
|
|
# the card now re-creates at the bottom of the chat out of the box; edit
|
|
# stays available via ORCH_TRACKER_MODE=edit (see test below).
|
|
monkeypatch.delenv("ORCH_TRACKER_MODE", raising=False)
|
|
assert Settings().tracker_mode == "bump"
|
|
|
|
|
|
def test_tracker_mode_reads_env_edit(monkeypatch):
|
|
# ORCH-067 (AC-4): edit mode is still available through the env var.
|
|
monkeypatch.setenv("ORCH_TRACKER_MODE", "edit")
|
|
assert Settings().tracker_mode == "edit"
|
|
|
|
|
|
def test_tracker_mode_reads_env_bump(monkeypatch):
|
|
# ORCH_TRACKER_MODE=bump -> "bump" (TC-01 / AC-1).
|
|
monkeypatch.setenv("ORCH_TRACKER_MODE", "bump")
|
|
assert Settings().tracker_mode == "bump"
|
|
|
|
|
|
def test_tracker_mode_reads_env_arbitrary(monkeypatch):
|
|
# The field is read verbatim from env; mode RESOLUTION (anything != "bump"
|
|
# -> edit) happens in notifications, not here (AC-1/AC-2 split).
|
|
monkeypatch.setenv("ORCH_TRACKER_MODE", "garbage")
|
|
assert Settings().tracker_mode == "garbage"
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# ORCH-043 / TC-25: merge-gate settings defaults + env override.
|
|
# ---------------------------------------------------------------------------
|
|
_MERGE_ENV = (
|
|
"ORCH_MERGE_GATE_ENABLED",
|
|
"ORCH_MERGE_GATE_REPOS",
|
|
"ORCH_MERGE_RETEST_TIMEOUT_S",
|
|
"ORCH_MERGE_RETEST_TARGET",
|
|
"ORCH_MERGE_LOCK_TIMEOUT_S",
|
|
"ORCH_MERGE_DEFER_DELAY_S",
|
|
"ORCH_MERGE_DEFER_MAX_ATTEMPTS",
|
|
)
|
|
|
|
|
|
def test_merge_gate_settings_defaults(monkeypatch):
|
|
"""TC-25 / AC-10: documented defaults when no env is set."""
|
|
for name in _MERGE_ENV:
|
|
monkeypatch.delenv(name, raising=False)
|
|
s = Settings()
|
|
assert s.merge_gate_enabled is True
|
|
assert s.merge_gate_repos == ""
|
|
assert s.merge_retest_timeout_s == 600
|
|
assert s.merge_retest_target == "tests/"
|
|
assert s.merge_lock_timeout_s == 300
|
|
assert s.merge_defer_delay_s == 60
|
|
assert s.merge_defer_max_attempts == 5
|
|
|
|
|
|
def test_merge_gate_settings_env_override(monkeypatch):
|
|
"""TC-25 / AC-10: each field is read from its ORCH_* env var."""
|
|
monkeypatch.setenv("ORCH_MERGE_GATE_ENABLED", "false")
|
|
monkeypatch.setenv("ORCH_MERGE_GATE_REPOS", "orchestrator,enduro-trails")
|
|
monkeypatch.setenv("ORCH_MERGE_RETEST_TIMEOUT_S", "120")
|
|
monkeypatch.setenv("ORCH_MERGE_RETEST_TARGET", "tests/unit")
|
|
monkeypatch.setenv("ORCH_MERGE_LOCK_TIMEOUT_S", "90")
|
|
monkeypatch.setenv("ORCH_MERGE_DEFER_DELAY_S", "5")
|
|
monkeypatch.setenv("ORCH_MERGE_DEFER_MAX_ATTEMPTS", "9")
|
|
s = Settings()
|
|
assert s.merge_gate_enabled is False
|
|
assert s.merge_gate_repos == "orchestrator,enduro-trails"
|
|
assert s.merge_retest_timeout_s == 120
|
|
assert s.merge_retest_target == "tests/unit"
|
|
assert s.merge_lock_timeout_s == 90
|
|
assert s.merge_defer_delay_s == 5
|
|
assert s.merge_defer_max_attempts == 9
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# ORCH-053 / TC-22: reconcile_* settings defaults + env override.
|
|
# ---------------------------------------------------------------------------
|
|
_RECONCILE_ENV = (
|
|
"ORCH_RECONCILE_ENABLED",
|
|
"ORCH_RECONCILE_INTERVAL_S",
|
|
"ORCH_RECONCILE_PLANE_ENABLED",
|
|
"ORCH_RECONCILE_GRACE_DEFAULT_S",
|
|
"ORCH_RECONCILE_GRACE_OVERRIDES_JSON",
|
|
"ORCH_RECONCILE_NOTIFY_UNBLOCK",
|
|
)
|
|
|
|
|
|
def test_reconcile_settings_defaults(monkeypatch):
|
|
"""TC-22 / AC-13: documented defaults when no env is set."""
|
|
for name in _RECONCILE_ENV:
|
|
monkeypatch.delenv(name, raising=False)
|
|
s = Settings()
|
|
assert s.reconcile_enabled is True
|
|
assert s.reconcile_interval_s == 120
|
|
assert s.reconcile_plane_enabled is True
|
|
assert s.reconcile_grace_default_s == 600
|
|
assert s.reconcile_grace_overrides_json == ""
|
|
assert s.reconcile_notify_unblock is True
|
|
|
|
|
|
def test_reconcile_settings_env_override(monkeypatch):
|
|
"""TC-22 / AC-13: each field is read from its ORCH_* env var."""
|
|
monkeypatch.setenv("ORCH_RECONCILE_ENABLED", "false")
|
|
monkeypatch.setenv("ORCH_RECONCILE_INTERVAL_S", "300")
|
|
monkeypatch.setenv("ORCH_RECONCILE_PLANE_ENABLED", "false")
|
|
monkeypatch.setenv("ORCH_RECONCILE_GRACE_DEFAULT_S", "900")
|
|
monkeypatch.setenv("ORCH_RECONCILE_GRACE_OVERRIDES_JSON", '{"development": 300}')
|
|
monkeypatch.setenv("ORCH_RECONCILE_NOTIFY_UNBLOCK", "false")
|
|
s = Settings()
|
|
assert s.reconcile_enabled is False
|
|
assert s.reconcile_interval_s == 300
|
|
assert s.reconcile_plane_enabled is False
|
|
assert s.reconcile_grace_default_s == 900
|
|
assert s.reconcile_grace_overrides_json == '{"development": 300}'
|
|
assert s.reconcile_notify_unblock is False
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# ORCH-058 / TC-13: image-freshness settings defaults + env override.
|
|
# ---------------------------------------------------------------------------
|
|
_FRESH_ENV = (
|
|
"ORCH_IMAGE_FRESHNESS_ENABLED",
|
|
"ORCH_IMAGE_FRESHNESS_REPOS",
|
|
)
|
|
|
|
|
|
def test_image_freshness_settings_defaults(monkeypatch):
|
|
"""TC-13 / AC-9: kill-switch ON by default, empty CSV (self-hosting only)."""
|
|
for name in _FRESH_ENV:
|
|
monkeypatch.delenv(name, raising=False)
|
|
s = Settings()
|
|
assert s.image_freshness_enabled is True
|
|
assert s.image_freshness_repos == ""
|
|
|
|
|
|
def test_image_freshness_settings_env_override(monkeypatch):
|
|
"""TC-13 / AC-9: each field is read from its ORCH_* env var."""
|
|
monkeypatch.setenv("ORCH_IMAGE_FRESHNESS_ENABLED", "false")
|
|
monkeypatch.setenv("ORCH_IMAGE_FRESHNESS_REPOS", "orchestrator,enduro-trails")
|
|
s = Settings()
|
|
assert s.image_freshness_enabled is False
|
|
assert s.image_freshness_repos == "orchestrator,enduro-trails"
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# ORCH-061 / TC-09: staging_infra_tolerance_enabled kill-switch (AC-7).
|
|
# ---------------------------------------------------------------------------
|
|
def test_staging_infra_tolerance_defaults_true(monkeypatch):
|
|
"""TC-09 / AC-7: the kill-switch defaults ON (safe default — the safety net
|
|
holds regardless; the flag exists to restore legacy strictness instantly)."""
|
|
monkeypatch.delenv("ORCH_STAGING_INFRA_TOLERANCE_ENABLED", raising=False)
|
|
assert Settings().staging_infra_tolerance_enabled is True
|
|
|
|
|
|
def test_staging_infra_tolerance_env_override_false(monkeypatch):
|
|
"""TC-09 / AC-7: ORCH_STAGING_INFRA_TOLERANCE_ENABLED=false -> strict (1:1
|
|
pre-ORCH-061: infra-only FAIL again rolls back)."""
|
|
monkeypatch.setenv("ORCH_STAGING_INFRA_TOLERANCE_ENABLED", "false")
|
|
assert Settings().staging_infra_tolerance_enabled is False
|
|
|
|
|
|
def test_staging_infra_tolerance_env_override_true(monkeypatch):
|
|
"""The field is read verbatim from its ORCH_* env var."""
|
|
monkeypatch.setenv("ORCH_STAGING_INFRA_TOLERANCE_ENABLED", "true")
|
|
assert Settings().staging_infra_tolerance_enabled is True
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# ORCH-065 / TC-20: reaper_* + lease_reclaim_* settings defaults + env override.
|
|
# ---------------------------------------------------------------------------
|
|
_REAPER_ENV = (
|
|
"ORCH_REAPER_ENABLED",
|
|
"ORCH_REAPER_INTERVAL_S",
|
|
"ORCH_REAPER_DEAD_TICKS",
|
|
"ORCH_REAPER_MAX_RUNNING_S",
|
|
"ORCH_LEASE_RECLAIM_ENABLED",
|
|
)
|
|
|
|
|
|
def test_reaper_settings_defaults(monkeypatch):
|
|
"""TC-20 / §5: documented defaults when no env is set."""
|
|
for name in _REAPER_ENV:
|
|
monkeypatch.delenv(name, raising=False)
|
|
s = Settings()
|
|
assert s.reaper_enabled is True
|
|
assert s.reaper_interval_s == 60
|
|
assert s.reaper_dead_ticks == 2
|
|
assert s.reaper_max_running_s == 3600
|
|
assert s.lease_reclaim_enabled is True
|
|
|
|
|
|
def test_reaper_settings_env_override(monkeypatch):
|
|
"""TC-20 / §5 / AC-14: each field is read from its ORCH_* env var."""
|
|
monkeypatch.setenv("ORCH_REAPER_ENABLED", "false")
|
|
monkeypatch.setenv("ORCH_REAPER_INTERVAL_S", "30")
|
|
monkeypatch.setenv("ORCH_REAPER_DEAD_TICKS", "5")
|
|
monkeypatch.setenv("ORCH_REAPER_MAX_RUNNING_S", "1200")
|
|
monkeypatch.setenv("ORCH_LEASE_RECLAIM_ENABLED", "false")
|
|
s = Settings()
|
|
assert s.reaper_enabled is False
|
|
assert s.reaper_interval_s == 30
|
|
assert s.reaper_dead_ticks == 5
|
|
assert s.reaper_max_running_s == 1200
|
|
assert s.lease_reclaim_enabled is False
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# ORCH-065 / TC-19: contracts unchanged — no new stages / QG checks; the
|
|
# check_branch_mergeable signature is intact (AC-13).
|
|
# ---------------------------------------------------------------------------
|
|
def test_tc19_stage_transitions_unchanged():
|
|
"""No new pipeline EDGE was introduced by ORCH-065.
|
|
|
|
ORCH-090 (adr-0026) adds `cancelled` as a terminal SINK (parallel to `done`),
|
|
which is not a new edge — no exit-gate of any edge changed.
|
|
"""
|
|
from src.stages import STAGE_TRANSITIONS
|
|
assert set(STAGE_TRANSITIONS) == {
|
|
"created", "analysis", "architecture", "development", "review",
|
|
"testing", "deploy-staging", "deploy", "done", "cancelled",
|
|
}
|
|
|
|
|
|
def test_tc19_qg_checks_registry_unchanged():
|
|
"""No new quality-gate check was added to the registry by ORCH-065."""
|
|
from src.qg.checks import QG_CHECKS
|
|
assert set(QG_CHECKS) == {
|
|
"check_analysis_approved",
|
|
"check_analysis_complete",
|
|
"check_architecture_done",
|
|
"check_ci_green",
|
|
"check_review_approved",
|
|
"check_tests_passed",
|
|
"check_reviewer_verdict",
|
|
"check_tests_local",
|
|
"check_deploy_status",
|
|
"check_staging_status",
|
|
"check_branch_mergeable",
|
|
"check_staging_image_fresh",
|
|
"check_security_gate",
|
|
"check_coverage_gate",
|
|
}
|
|
|
|
|
|
def test_tc19_check_branch_mergeable_signature_intact():
|
|
"""check_branch_mergeable still takes exactly (repo, work_item_id, branch)."""
|
|
import inspect
|
|
from src.qg.checks import check_branch_mergeable
|
|
params = list(inspect.signature(check_branch_mergeable).parameters)
|
|
assert params == ["repo", "work_item_id", "branch"]
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# ORCH-093 / TC-13: merge_retry_* settings defaults + env override (AC-5).
|
|
# ---------------------------------------------------------------------------
|
|
_MERGE_RETRY_ENV = (
|
|
"ORCH_MERGE_RETRY_ENABLED",
|
|
"ORCH_MERGE_RETRY_MAX_ATTEMPTS",
|
|
"ORCH_MERGE_RETRY_BACKOFF_BASE_S",
|
|
"ORCH_MERGE_RETRY_BACKOFF_MAX_S",
|
|
)
|
|
|
|
|
|
def test_merge_retry_settings_defaults(monkeypatch):
|
|
"""Documented defaults when no ORCH_MERGE_RETRY_* env is set."""
|
|
for name in _MERGE_RETRY_ENV:
|
|
monkeypatch.delenv(name, raising=False)
|
|
s = Settings()
|
|
assert s.merge_retry_enabled is True
|
|
assert s.merge_retry_max_attempts == 3
|
|
assert s.merge_retry_backoff_base_s == 2
|
|
assert s.merge_retry_backoff_max_s == 5
|
|
|
|
|
|
def test_merge_retry_settings_env_override(monkeypatch):
|
|
"""Each field is read from its ORCH_MERGE_RETRY_* env var."""
|
|
monkeypatch.setenv("ORCH_MERGE_RETRY_ENABLED", "false")
|
|
monkeypatch.setenv("ORCH_MERGE_RETRY_MAX_ATTEMPTS", "5")
|
|
monkeypatch.setenv("ORCH_MERGE_RETRY_BACKOFF_BASE_S", "1")
|
|
monkeypatch.setenv("ORCH_MERGE_RETRY_BACKOFF_MAX_S", "8")
|
|
s = Settings()
|
|
assert s.merge_retry_enabled is False
|
|
assert s.merge_retry_max_attempts == 5
|
|
assert s.merge_retry_backoff_base_s == 1
|
|
assert s.merge_retry_backoff_max_s == 8
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# ORCH-094: deploy_status_guard_* settings defaults + env override.
|
|
# ---------------------------------------------------------------------------
|
|
_DEPLOY_GUARD_ENV = (
|
|
"ORCH_DEPLOY_STATUS_GUARD_ENABLED",
|
|
"ORCH_DEPLOY_STATUS_GUARD_REPOS",
|
|
)
|
|
|
|
|
|
def test_deploy_status_guard_settings_defaults(monkeypatch):
|
|
"""Documented defaults: enabled True, repos empty (self-hosting only)."""
|
|
for name in _DEPLOY_GUARD_ENV:
|
|
monkeypatch.delenv(name, raising=False)
|
|
s = Settings()
|
|
assert s.deploy_status_guard_enabled is True
|
|
assert s.deploy_status_guard_repos == ""
|
|
|
|
|
|
def test_deploy_status_guard_settings_env_override(monkeypatch):
|
|
"""Each field is read from its ORCH_DEPLOY_STATUS_GUARD_* env var."""
|
|
monkeypatch.setenv("ORCH_DEPLOY_STATUS_GUARD_ENABLED", "false")
|
|
monkeypatch.setenv("ORCH_DEPLOY_STATUS_GUARD_REPOS", "orchestrator,enduro-trails")
|
|
s = Settings()
|
|
assert s.deploy_status_guard_enabled is False
|
|
assert s.deploy_status_guard_repos == "orchestrator,enduro-trails"
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# ORCH-099 (D8): metrics_endpoint_enabled default + env alias ORCH_METRICS_ENABLED.
|
|
# The field carries an explicit validation_alias so the DOCUMENTED env var
|
|
# (README / ADR-001 D8) actually controls the flag, overriding the default
|
|
# ORCH_ + field-name mapping (which would otherwise be ORCH_METRICS_ENDPOINT_*).
|
|
# ---------------------------------------------------------------------------
|
|
def test_metrics_endpoint_enabled_default_true(monkeypatch):
|
|
monkeypatch.delenv("ORCH_METRICS_ENABLED", raising=False)
|
|
monkeypatch.delenv("ORCH_METRICS_ENDPOINT_ENABLED", raising=False)
|
|
assert Settings().metrics_endpoint_enabled is True
|
|
|
|
|
|
def test_metrics_endpoint_enabled_reads_documented_env_alias(monkeypatch):
|
|
monkeypatch.setenv("ORCH_METRICS_ENABLED", "false")
|
|
assert Settings().metrics_endpoint_enabled is False
|