"""ORCH-021 unit tests — post-deploy monitor pure logic (TC-01..TC-15). The deterministic, network-free core (classification + reaction decision + exit-code mapping + artefact frontmatter + never-raise) of ``src/post_deploy.py``. Network probes and the rollback hook are exercised via mocks; the classifier is the main subject (mirrors compute_staging_verdict in ORCH-061). """ import os import tempfile import pytest import yaml # Isolate the settings singleton onto a tmp repos_dir BEFORE importing the module. os.environ.setdefault("ORCH_GITEA_TOKEN", "test-token") os.environ.setdefault("ORCH_PLANE_API_TOKEN", "test-token") from src import post_deploy # noqa: E402 # --------------------------------------------------------------------------- # Helpers # --------------------------------------------------------------------------- def _probe(health_ok=True, total=2, fivexx=0): return {"health_ok": health_ok, "total": total, "fivexx": fivexx} @pytest.fixture(autouse=True) def _tmp_state(monkeypatch, tmp_path): monkeypatch.setattr(post_deploy.settings, "repos_dir", str(tmp_path)) monkeypatch.setattr(post_deploy.settings, "host_repos_dir", str(tmp_path)) yield # --------------------------------------------------------------------------- # TC-01..TC-05 — classification (the core) # --------------------------------------------------------------------------- def test_tc01_healthy_no_failures(): series = [_probe() for _ in range(5)] assert post_deploy.classify(series, fail_threshold=3, fivexx_threshold=0.5) == "HEALTHY" def test_tc02_degraded_consecutive_health_failures(): # Exactly fail_threshold consecutive failures -> DEGRADED (>= contract). series = [_probe(health_ok=False) for _ in range(3)] assert post_deploy.classify(series, fail_threshold=3, fivexx_threshold=0.5) == "DEGRADED" def test_tc03_degraded_by_5xx_ratio_even_when_health_200(): # /health stays 200 (health_ok True) but the 5xx ratio is above threshold. series = [_probe(health_ok=True, total=2, fivexx=2) for _ in range(3)] assert post_deploy.classify(series, fail_threshold=10, fivexx_threshold=0.5) == "DEGRADED" def test_tc04_no_false_trip_single_glitch_then_recovery(): # One isolated failure (1 < threshold) surrounded by healthy probes -> HEALTHY. series = [_probe(), _probe(health_ok=False), _probe(), _probe()] assert post_deploy.classify(series, fail_threshold=3, fivexx_threshold=0.5) == "HEALTHY" def test_tc05_thresholds_change_verdict_on_same_data(): # Same data, different threshold flips the verdict (AC-11): two consecutive fails. series = [_probe(health_ok=False), _probe(health_ok=False)] assert post_deploy.classify(series, fail_threshold=3, fivexx_threshold=0.5) == "HEALTHY" assert post_deploy.classify(series, fail_threshold=2, fivexx_threshold=0.5) == "DEGRADED" def test_classify_uses_settings_thresholds(monkeypatch): # The tick reads thresholds from Settings (env ORCH_*) — verify the wiring point. monkeypatch.setattr(post_deploy.settings, "post_deploy_fail_threshold", 2) series = [_probe(health_ok=False), _probe(health_ok=False)] assert post_deploy.classify( series, post_deploy.settings.post_deploy_fail_threshold, post_deploy.settings.post_deploy_5xx_threshold, ) == "DEGRADED" # --------------------------------------------------------------------------- # TC-06..TC-08 — reaction decision (self-hosting safety) # --------------------------------------------------------------------------- def test_tc06_nonself_auto_rollback_degraded_rolls_back(monkeypatch): monkeypatch.setattr(post_deploy.settings, "post_deploy_auto_rollback", True) assert post_deploy.decide_action("enduro-trails", "DEGRADED") == "ROLLBACK" def test_tc07_self_hosting_degraded_never_rolls_back(monkeypatch): # orchestrator (self-hosting) is ALWAYS ALERT_ONLY, even with auto_rollback on. monkeypatch.setattr(post_deploy.settings, "post_deploy_auto_rollback", True) assert post_deploy.decide_action("orchestrator", "DEGRADED") == "ALERT_ONLY" def test_tc08_healthy_means_none_for_any_repo(): assert post_deploy.decide_action("orchestrator", "HEALTHY") == "NONE" assert post_deploy.decide_action("enduro-trails", "HEALTHY") == "NONE" def test_nonself_default_policy_alert_only(monkeypatch): monkeypatch.setattr(post_deploy.settings, "post_deploy_auto_rollback", False) assert post_deploy.decide_action("enduro-trails", "DEGRADED") == "ALERT_ONLY" # --------------------------------------------------------------------------- # TC-09..TC-10 — conditionality / kill-switch # --------------------------------------------------------------------------- def test_tc09_applies_empty_repos_only_self_hosting(monkeypatch): monkeypatch.setattr(post_deploy.settings, "post_deploy_monitor_enabled", True) monkeypatch.setattr(post_deploy.settings, "post_deploy_repos", "") assert post_deploy.post_deploy_applies("orchestrator") is True assert post_deploy.post_deploy_applies("enduro-trails") is False def test_tc09_applies_explicit_repos_csv(monkeypatch): monkeypatch.setattr(post_deploy.settings, "post_deploy_monitor_enabled", True) monkeypatch.setattr(post_deploy.settings, "post_deploy_repos", "enduro-trails") assert post_deploy.post_deploy_applies("enduro-trails") is True assert post_deploy.post_deploy_applies("orchestrator") is False def test_tc10_kill_switch_disables_for_everyone(monkeypatch): monkeypatch.setattr(post_deploy.settings, "post_deploy_monitor_enabled", False) assert post_deploy.post_deploy_applies("orchestrator") is False assert post_deploy.post_deploy_applies("enduro-trails") is False # --------------------------------------------------------------------------- # TC-11..TC-12 — rollback exit-code mapping # --------------------------------------------------------------------------- def test_tc11_rollback_exit0_is_ok(): assert post_deploy.map_rollback_exit_code(0) == "ROLLBACK_OK" def test_tc12_rollback_exit_nonzero_is_failed(): assert post_deploy.map_rollback_exit_code(1) == "ROLLBACK_FAILED" assert post_deploy.map_rollback_exit_code(2) == "ROLLBACK_FAILED" assert post_deploy.map_rollback_exit_code(None) == "ROLLBACK_FAILED" assert post_deploy.map_rollback_exit_code("garbage") == "ROLLBACK_FAILED" # --------------------------------------------------------------------------- # TC-13 — artefact frontmatter # --------------------------------------------------------------------------- def test_tc13_log_frontmatter_parses(): body = post_deploy.build_post_deploy_log( "ORCH-021", "DEGRADED", "ALERT_ONLY", 900, 12, 4 ) assert body.startswith("---\n") fm = body.split("---", 2)[1] data = yaml.safe_load(fm) assert data["post_deploy_status"] == "DEGRADED" assert data["action_taken"] == "ALERT_ONLY" assert data["work_item"] == "ORCH-021" assert data["window_s"] == 900 assert data["checks_total"] == 12 assert data["checks_failed"] == 4 # --------------------------------------------------------------------------- # TC-14..TC-15 — never-raise # --------------------------------------------------------------------------- def test_tc14_probe_network_error_is_conservative_not_raise(monkeypatch): # urlopen raises on every call -> health bad + monitored endpoints counted as # 5xx, but NO exception propagates (the helper swallows and reports code 0). def boom(*a, **k): raise OSError("network down") monkeypatch.setattr(post_deploy.urllib.request, "urlopen", boom) res = post_deploy.probe_signals("http://localhost:8500") assert res.health_ok is False assert res.total == 2 assert res.fivexx == 2 # unreachable endpoints counted as failures def test_tc14_classify_junk_input_swallowed(): # If classify gets junk it must not raise (fail-safe to HEALTHY). assert post_deploy.classify("not-a-list", 3, 0.5) == "HEALTHY" assert post_deploy.classify([{"bad": "row"}], 3, 0.5) == "HEALTHY" assert post_deploy.classify(None, 3, 0.5) == "HEALTHY" def test_tc15_write_log_no_worktree_returns_false(monkeypatch): # get_worktree_path raises -> write returns False, no exception (best-effort). def boom(repo, branch): raise FileNotFoundError("no worktree") monkeypatch.setattr("src.git_worktree.get_worktree_path", boom) ok = post_deploy.write_post_deploy_log( "nope-repo", "ORCH-021", "feature/x", "HEALTHY", "NONE", 900, 3, 0 ) assert ok is False # --------------------------------------------------------------------------- # Sentinel state restart-safe counters # --------------------------------------------------------------------------- def test_series_append_and_read_roundtrip(): post_deploy.write_marker("orchestrator", "ORCH-021", post_deploy.ARMED, "armed") post_deploy.append_probe("orchestrator", "ORCH-021", post_deploy.ProbeResult(False, 2, 1, "x")) post_deploy.append_probe("orchestrator", "ORCH-021", post_deploy.ProbeResult(True, 2, 0, "y")) series = post_deploy.read_series("orchestrator", "ORCH-021") assert len(series) == 2 assert series[0]["health_ok"] is False assert series[1]["health_ok"] is True def test_mark_done_idempotency_marker(): assert post_deploy.has_marker("orchestrator", "ORCH-021", post_deploy.DONE) is False post_deploy.mark_done("orchestrator", "ORCH-021") assert post_deploy.has_marker("orchestrator", "ORCH-021", post_deploy.DONE) is True