"""Generalised pure alert-decision function + in-memory anti-spam state (D4). ``src/disk_watchdog.py::decide_action`` is hard-wired to ``used_pct >= threshold``. F1b has many heterogeneous signals (booleans — "orch down", "container unhealthy"; counters — "job-failed delta"; thresholds — "memory %", "agent hung N min"), so the *comparison is lifted out* and this function works on an already-computed boolean ``signal_active``. The set of outcomes, the cooldown / recovery semantics and the in-memory best-effort state are a strict generalisation of the disk variant (BRD §BR-9 names it the template). ``now`` and ``cooldown_s`` are injected so the cooldown / recovery logic is testable deterministically without a real timer (TC-01…TC-04). """ from __future__ import annotations from dataclasses import dataclass # Decision outcomes — same vocabulary as ``disk_watchdog`` (1:1 semantics). ACTION_NONE = "none" ACTION_ALERT = "alert" ACTION_REALERT = "realert" ACTION_RECOVERY = "recovery" @dataclass class AlertState: """In-memory anti-spam state for one signal key (1:1 with ``PathAlertState``). Best-effort: lives only in the daemon (no DB row, no migration). After a process restart ``alerting`` resets to ``False`` -> a still-standing problem re-alerts once, which is safe (an early signal, not an SLA; FR-7). """ alerting: bool = False last_alert_at: float | None = None def decide( signal_active: bool, prev: AlertState, now: float, cooldown_s: float, ) -> str: """Pure alert decision — testable without a thread or a real timer (D4). Returns one of ``ACTION_{NONE,ALERT,REALERT,RECOVERY}`` as a function of the current boolean signal, the previous per-key state and the injected clock: * not alerting & active -> ALERT (threshold crossed) * alerting & active & cooldown elapsed -> REALERT (re-alert) * alerting & active & in cooldown -> NONE (anti-spam) * alerting & not active -> RECOVERY (back to normal) * not alerting & not active -> NONE (normal) """ if not prev.alerting: return ACTION_ALERT if signal_active else ACTION_NONE # prev.alerting is True if not signal_active: return ACTION_RECOVERY last = prev.last_alert_at if last is None or (now - last) >= cooldown_s: return ACTION_REALERT return ACTION_NONE