Add the `watchdog/` package (thin Python-3.12 stdlib-only daemon) and the `orchestrator-watchdog` compose service — the brain half of the domain-0 observability pair. F1a (ORCH-099) exposes GET /metrics raw signal; F1b reads it, augments with host / container / dependency probes, runs each signal through a generalised pure decision function (decide(signal_active, prev, now, cooldown), a strict superset of disk_watchdog.decide_action) with per-signal in-memory dedup/throttle/recovery, and alerts over its OWN independent Telegram channel. Key properties (ADR-001): - Observer separated from observed: separate container; /metrics not answering is itself the master `orch_down` alarm (debounced K ticks — no flap on a hiccup). - Strictly read-only: docker.sock GET-only + mounted :ro (double guard), host paths :ro, no DB/disk writes, no process control — self-hosting-safe. - never-raise on three levels (per-source/per-tick/per-send) + WATCHDOG_ENABLED kill-switch (disabled -> inert idle-loop, not exit). - Disk anti-duplicate (D6): disk_watchdog (ORCH-063) stays sole owner of the 85% alert; sidecar carries orch_down + an opt-in 97% ceiling (default off). - NO import from src/** (C-1); src/**, STAGE_TRANSITIONS, QG_CHECKS, check_*, DB schema — untouched. env_file optional so a missing .env.watchdog never breaks `docker compose up` for the prod orchestrator. Tests: tests/watchdog/ (TC-01…TC-13) + full tests/ regression green (TC-14). Docs: CHANGELOG, .env.example canon (WATCHDOG_*); architecture README + adr-0033 authored at the architecture stage. Refs: ORCH-100 Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
80 lines
2.6 KiB
Python
80 lines
2.6 KiB
Python
"""TC-09: self-hosting safety — the Docker client is read-only by construction.
|
|
|
|
The client exposes ONLY read methods (list/inspect), its single request
|
|
primitive hard-codes the ``GET`` HTTP method, and the source carries no
|
|
mutating Docker verb (start/stop/restart/kill/exec/POST). ``classify_container``
|
|
is a pure status mapper.
|
|
"""
|
|
import inspect as _inspect
|
|
|
|
from watchdog.collectors import containers as cmod
|
|
|
|
|
|
def test_request_primitive_is_get_only(monkeypatch):
|
|
captured = {}
|
|
|
|
class _FakeConn:
|
|
def __init__(self, *a, **k):
|
|
pass
|
|
|
|
def request(self, method, path):
|
|
captured["method"] = method
|
|
captured["path"] = path
|
|
|
|
def getresponse(self):
|
|
class _R:
|
|
status = 200
|
|
|
|
def read(self_inner):
|
|
return b"[]"
|
|
|
|
return _R()
|
|
|
|
def close(self):
|
|
pass
|
|
|
|
monkeypatch.setattr(cmod, "_UnixHTTPConnection", _FakeConn)
|
|
reader = cmod.DockerSockReader("/var/run/docker.sock")
|
|
reader.list_containers()
|
|
assert captured["method"] == "GET"
|
|
reader.inspect("orchestrator")
|
|
assert captured["method"] == "GET"
|
|
|
|
|
|
def test_no_mutating_verbs_in_source():
|
|
src = _inspect.getsource(cmod)
|
|
lowered = src.lower()
|
|
# No write/control verbs should appear as Docker actions in this module.
|
|
for verb in ("/start", "/stop", "/restart", "/kill", "/exec", "\"post\"", "'post'"):
|
|
assert verb not in lowered, f"mutating verb leaked into containers.py: {verb}"
|
|
|
|
|
|
def test_reader_exposes_only_read_methods():
|
|
public = [
|
|
n for n in dir(cmod.DockerSockReader)
|
|
if not n.startswith("_")
|
|
]
|
|
assert set(public) == {"list_containers", "inspect"}
|
|
|
|
|
|
def test_classify_container_pure_mapping():
|
|
assert cmod.classify_container({"State": {"Status": "running"}}) == "running"
|
|
assert cmod.classify_container({"State": {"Status": "exited"}}) == "exited"
|
|
assert cmod.classify_container(
|
|
{"State": {"Status": "running", "Health": {"Status": "unhealthy"}}}
|
|
) == "unhealthy"
|
|
assert cmod.classify_container(
|
|
{"State": {"Status": "running", "Health": {"Status": "healthy"}}}
|
|
) == "healthy"
|
|
assert cmod.classify_container(None) == "unknown"
|
|
assert cmod.classify_container({}) == "unknown"
|
|
|
|
|
|
def test_container_alarm_semantics():
|
|
assert cmod.container_alarm("running") is False
|
|
assert cmod.container_alarm("healthy") is False
|
|
assert cmod.container_alarm("exited") is True
|
|
assert cmod.container_alarm("restarting") is True
|
|
assert cmod.container_alarm("unhealthy") is True
|
|
assert cmod.container_alarm("unknown") is True
|