"""ORCH-062: build-cache-pruner tests (TC-01..TC-12). The pruner never runs a real ``docker builder prune``: ``subprocess.run`` is monkeypatched, ``send_telegram`` is captured, and the anti-frequency clock is injected through ``now_provider`` so time-dependent decisions are tested without a real timer (same convention as ``test_disk_watchdog.py``). No test touches the real docker daemon or frees real disk. """ import os import tempfile import pytest # Override env before importing app modules (same convention as test_disk_watchdog.py). os.environ.setdefault("ORCH_DB_PATH", os.path.join(tempfile.gettempdir(), "test_orch_bcp.db")) os.environ.setdefault("ORCH_GITEA_TOKEN", "test-token") os.environ.setdefault("ORCH_PLANE_API_TOKEN", "test-token") import src.build_cache_pruner as bcp # noqa: E402 from src.build_cache_pruner import ( # noqa: E402 BuildCachePruner, build_prune_command, decide_prune, parse_reclaimed, ) # --------------------------------------------------------------------------- # # Helpers # --------------------------------------------------------------------------- # class _Completed: """Minimal stand-in for ``subprocess.CompletedProcess``.""" def __init__(self, returncode=0, stdout="", stderr=""): self.returncode = returncode self.stdout = stdout self.stderr = stderr @pytest.fixture def ssh_configured(monkeypatch): """Configure an ssh target so ``_ssh_target()`` is not None.""" monkeypatch.setattr(bcp.settings, "deploy_ssh_host", "mva154", raising=False) monkeypatch.setattr(bcp.settings, "deploy_ssh_user", "slin", raising=False) @pytest.fixture def prune_defaults(monkeypatch): """Default prune policy (until=24h, all=False, timeout=120, silent).""" monkeypatch.setattr(bcp.settings, "build_cache_prune_enabled", True, raising=False) monkeypatch.setattr(bcp.settings, "build_cache_prune_until", "24h", raising=False) monkeypatch.setattr(bcp.settings, "build_cache_prune_all", False, raising=False) monkeypatch.setattr(bcp.settings, "build_cache_prune_timeout_s", 120, raising=False) monkeypatch.setattr(bcp.settings, "build_cache_prune_notify_min_gb", 0.0, raising=False) # --------------------------------------------------------------------------- # # TC-01 / TC-02: pure anti-frequency decision # --------------------------------------------------------------------------- # def test_tc01_decide_prune_when_interval_elapsed(): """TC-01: never pruned yet -> PRUNE; interval elapsed since last -> PRUNE.""" assert decide_prune(None, now=1000.0, interval_s=21600) is True assert decide_prune(1000.0, now=1000.0 + 21600, interval_s=21600) is True assert decide_prune(1000.0, now=1000.0 + 30000, interval_s=21600) is True def test_tc02_decide_skip_within_interval(): """TC-02: interval not yet elapsed -> SKIP (anti-frequency, NFR-4).""" assert decide_prune(1000.0, now=1000.0 + 10, interval_s=21600) is False assert decide_prune(1000.0, now=1000.0 + 21599, interval_s=21600) is False # --------------------------------------------------------------------------- # # TC-03: safe command construction (retention filter, no image/system prune) # --------------------------------------------------------------------------- # def test_tc03_command_carries_until_and_is_builder_only(): """TC-03: command is `docker builder prune` with until=, never image/system prune (FR-2/FR-3/AC-2/AC-3).""" cmd = build_prune_command("slin@mva154", "24h", prune_all=False) assert cmd[0] == "ssh" assert "slin@mva154" in cmd remote = cmd[-1] assert "docker builder prune" in remote assert "--filter until=24h" in remote # Strictly build cache — never images/system/containers. assert "image prune" not in remote assert "system prune" not in remote assert "-a" not in remote.split() # all-flag not set by default def test_tc03_all_flag_only_paired_with_until(): """TC-03: -a is added ONLY together with the age filter (D2/AC-2).""" cmd = build_prune_command("slin@mva154", "24h", prune_all=True) remote = cmd[-1] assert "docker builder prune" in remote assert "-a" in remote.split() assert "--filter until=24h" in remote # never a bare nuke # --------------------------------------------------------------------------- # # TC-04: never-raise on subprocess exception / non-zero rc # --------------------------------------------------------------------------- # def test_tc04_subprocess_exception_does_not_raise(monkeypatch, ssh_configured, prune_defaults): """TC-04: a raising subprocess is swallowed; the tick survives, error logged.""" def _boom(*a, **k): raise OSError("ssh exploded") monkeypatch.setattr(bcp.subprocess, "run", _boom) pruner = BuildCachePruner(now_provider=lambda: 1000.0) pruner.tick() # must not raise assert pruner._last_error is not None assert pruner.status()["last_error"] is not None def test_tc04_nonzero_rc_recorded(monkeypatch, ssh_configured, prune_defaults): """TC-04: a non-zero rc is recorded as an error, never raised.""" monkeypatch.setattr( bcp.subprocess, "run", lambda *a, **k: _Completed(returncode=1, stderr="permission denied"), ) pruner = BuildCachePruner(now_provider=lambda: 1000.0) pruner.tick() assert "rc=1" in pruner._last_error # --------------------------------------------------------------------------- # # TC-05: never-raise on docker.sock / ssh unavailability # --------------------------------------------------------------------------- # def test_tc05_socket_unavailable_skips_tick(monkeypatch, ssh_configured, prune_defaults): """TC-05: FileNotFoundError / PermissionError -> tick skipped, loop alive.""" def _enoent(*a, **k): raise FileNotFoundError("docker.sock missing") monkeypatch.setattr(bcp.subprocess, "run", _enoent) pruner = BuildCachePruner(now_provider=lambda: 1000.0) pruner.tick() # must not raise assert pruner._last_error is not None def test_tc05_no_ssh_target_is_noop(monkeypatch, prune_defaults): """TC-05: no ssh host configured -> tick is a no-op (no subprocess call).""" monkeypatch.setattr(bcp.settings, "deploy_ssh_host", "", raising=False) called = {"n": 0} monkeypatch.setattr(bcp.subprocess, "run", lambda *a, **k: called.__setitem__("n", called["n"] + 1)) pruner = BuildCachePruner(now_provider=lambda: 1000.0) pruner.tick() assert called["n"] == 0 assert "no ssh host" in pruner._last_error # --------------------------------------------------------------------------- # # TC-06: never-raise on timeout # --------------------------------------------------------------------------- # def test_tc06_timeout_swallowed(monkeypatch, ssh_configured, prune_defaults): """TC-06: TimeoutExpired is swallowed; the background loop continues (FR-6/AC-4).""" def _timeout(*a, **k): raise bcp.subprocess.TimeoutExpired(cmd="ssh ... docker builder prune", timeout=120) monkeypatch.setattr(bcp.subprocess, "run", _timeout) pruner = BuildCachePruner(now_provider=lambda: 1000.0) pruner.tick() # must not raise assert "timeout" in pruner._last_error # --------------------------------------------------------------------------- # # TC-07: kill-switch # --------------------------------------------------------------------------- # def test_tc07_killswitch_does_not_start(monkeypatch): """TC-07: build_cache_prune_enabled=False -> start() is a no-op (no thread).""" monkeypatch.setattr(bcp.settings, "build_cache_prune_enabled", False, raising=False) pruner = BuildCachePruner() pruner.start() assert pruner._thread is None def test_tc07_killswitch_status_block(monkeypatch): """TC-07: status() reports enabled=False under the kill-switch.""" monkeypatch.setattr(bcp.settings, "build_cache_prune_enabled", False, raising=False) pruner = BuildCachePruner() assert pruner.status()["enabled"] is False # --------------------------------------------------------------------------- # # TC-08: config validation -> safe defaults # --------------------------------------------------------------------------- # def test_tc08_invalid_interval_falls_back_to_default(): """TC-08: a non-positive / non-numeric interval -> the safe default (no crash).""" from src.config import Settings s = Settings(build_cache_prune_interval_s=0, build_cache_prune_timeout_s=-5) assert s.build_cache_prune_interval_s == 21600 assert s.build_cache_prune_timeout_s == 120 s2 = Settings(build_cache_prune_interval_s="not-a-number") assert s2.build_cache_prune_interval_s == 21600 def test_tc08_invalid_until_falls_back_to_24h(): """TC-08: an `until` not matching ^\\d+[smhdw]?$ -> the safe default 24h.""" from src.config import Settings assert Settings(build_cache_prune_until="garbage").build_cache_prune_until == "24h" assert Settings(build_cache_prune_until="").build_cache_prune_until == "24h" # Valid values are preserved. assert Settings(build_cache_prune_until="48h").build_cache_prune_until == "48h" assert Settings(build_cache_prune_until="30m").build_cache_prune_until == "30m" assert Settings(build_cache_prune_until="7d").build_cache_prune_until == "7d" def test_tc08_negative_notify_min_gb_falls_back_to_zero(): """TC-08: a negative notify threshold -> 0 (silent), never a crash.""" from src.config import Settings assert Settings(build_cache_prune_notify_min_gb=-3).build_cache_prune_notify_min_gb == 0.0 assert Settings(build_cache_prune_notify_min_gb=2.5).build_cache_prune_notify_min_gb == 2.5 # --------------------------------------------------------------------------- # # TC-09: status() never-raise + best-effort last result # --------------------------------------------------------------------------- # def test_tc09_status_shape(monkeypatch, prune_defaults): """TC-09: status() carries enabled/interval_s/until/last_run_ts + reclaimed.""" monkeypatch.setattr(bcp.settings, "build_cache_prune_enabled", True, raising=False) pruner = BuildCachePruner() st = pruner.status() for key in ( "enabled", "interval_s", "until", "all", "last_run_ts", "last_reclaimed", "last_reclaimed_bytes", "last_error", ): assert key in st assert st["last_run_ts"] is None # no tick yet def test_tc09_status_reflects_last_prune(monkeypatch, ssh_configured, prune_defaults): """TC-09: after a successful tick status() carries last_run_ts + reclaimed.""" monkeypatch.setattr( bcp.subprocess, "run", lambda *a, **k: _Completed(returncode=0, stdout="Total reclaimed space: 11.05GB"), ) pruner = BuildCachePruner(now_provider=lambda: 1234.0) pruner.tick() st = pruner.status() assert st["last_run_ts"] == 1234.0 assert st["last_error"] is None assert st["last_reclaimed_bytes"] == int(11.05 * (1000 ** 3)) assert "GB" in st["last_reclaimed"] def test_parse_reclaimed_variants(): """parse_reclaimed: decimal/binary units + absent line (best-effort, never raises).""" assert parse_reclaimed("Total reclaimed space: 0B") == 0 assert parse_reclaimed("Total reclaimed space: 500MB") == 500 * 1000 ** 2 assert parse_reclaimed("Total reclaimed space: 1.5GiB") == int(1.5 * 1024 ** 3) assert parse_reclaimed("no such line here") is None assert parse_reclaimed("") is None def test_notify_on_significant_reclaim(monkeypatch, ssh_configured, prune_defaults): """Optional Telegram when reclaimed >= notify_min_gb; below threshold stays silent.""" sends = [] monkeypatch.setattr(bcp, "send_telegram", lambda text, **k: sends.append(text)) monkeypatch.setattr(bcp.settings, "build_cache_prune_notify_min_gb", 1.0, raising=False) monkeypatch.setattr( bcp.subprocess, "run", lambda *a, **k: _Completed(returncode=0, stdout="Total reclaimed space: 5.0GB"), ) pruner = BuildCachePruner(now_provider=lambda: 1.0) pruner.tick() assert len(sends) == 1 and "build-cache-pruner" in sends[0] # A small reclaim below the threshold stays silent. sends.clear() monkeypatch.setattr( bcp.subprocess, "run", lambda *a, **k: _Completed(returncode=0, stdout="Total reclaimed space: 100MB"), ) pruner2 = BuildCachePruner(now_provider=lambda: 1.0) pruner2.tick() assert sends == [] # --------------------------------------------------------------------------- # # TC-10: leaf isolation from the Quality Gate / stage machine # --------------------------------------------------------------------------- # def test_tc10_module_is_leaf_no_pipeline_imports(): """TC-10: the pruner is a leaf — it does not import stage_engine/stages/qg. Inspects the actual import statements (via AST), not the docstring text — the module legitimately *mentions* those names in prose explaining what it does NOT touch. """ import ast import inspect tree = ast.parse(inspect.getsource(bcp)) imported = set() for node in ast.walk(tree): if isinstance(node, ast.Import): imported.update(a.name for a in node.names) elif isinstance(node, ast.ImportFrom): base = ("." * (node.level or 0)) + (node.module or "") imported.add(base) imported.update(f"{base}.{a.name}" for a in node.names) forbidden = ("stage_engine", "stages", "qg") for imp in imported: tail = imp.lstrip(".") assert not any( tail == f or tail.endswith("." + f) or tail.startswith(f + ".") for f in forbidden ), f"pruner must not import a pipeline module, found: {imp}" def test_tc10_stage_transitions_and_qg_unchanged(): """TC-10: STAGE_TRANSITIONS / QG_CHECKS carry no build-cache-prune element (AC-8).""" from src.stages import STAGE_TRANSITIONS from src.qg.checks import QG_CHECKS blob = repr(STAGE_TRANSITIONS) + repr(list(QG_CHECKS.keys())) assert "build_cache" not in blob assert "builder prune" not in blob # --------------------------------------------------------------------------- # # TC-11: lifespan integration # --------------------------------------------------------------------------- # def test_tc11_lifespan_starts_and_stops(monkeypatch): """TC-11: with the flag on the daemon starts in lifespan and stops cleanly, docker mocked (FR-1/AC-1).""" monkeypatch.setattr(bcp.settings, "build_cache_prune_enabled", True, raising=False) # A very long interval so the loop sleeps immediately after the first tick; # subprocess is mocked so no real docker call happens. monkeypatch.setattr(bcp.settings, "build_cache_prune_interval_s", 3600, raising=False) monkeypatch.setattr(bcp.settings, "deploy_ssh_host", "", raising=False) # no-op tick pruner = BuildCachePruner(interval_s=3600) pruner.start() assert pruner._thread is not None and pruner._thread.is_alive() pruner.stop(timeout=5.0) assert not pruner._thread.is_alive() # --------------------------------------------------------------------------- # # TC-12: GET /queue integration # --------------------------------------------------------------------------- # def test_tc12_queue_has_build_cache_block(monkeypatch): """TC-12: GET /queue carries an additive build_cache_prune block; existing keys kept.""" import asyncio import src.db as db from src.db import init_db from src import main dbfile = os.path.join(tempfile.gettempdir(), "test_bcp_queue.db") monkeypatch.setattr(db.settings, "db_path", dbfile, raising=False) init_db() payload = asyncio.run(main.queue()) for key in ( "counts", "max_concurrency", "poll_interval", "resilience", "reconcile", "reaper", "post_deploy", "merge_verify", "task_deps", "serial_gate", "auto_labels", "disk_monitor", "recent", ): assert key in payload, f"existing /queue key '{key}' must be preserved" assert "build_cache_prune" in payload block = payload["build_cache_prune"] assert "enabled" in block and "interval_s" in block and "until" in block assert "last_run_ts" in block def test_tc12_queue_disabled_block(monkeypatch): """TC-12: with the kill-switch off, /queue reports build_cache_prune.enabled=false.""" import asyncio import src.db as db from src.db import init_db from src import main from src import build_cache_pruner as bcpmod dbfile = os.path.join(tempfile.gettempdir(), "test_bcp_queue2.db") monkeypatch.setattr(db.settings, "db_path", dbfile, raising=False) monkeypatch.setattr(bcpmod.settings, "build_cache_prune_enabled", False, raising=False) init_db() payload = asyncio.run(main.queue()) assert payload["build_cache_prune"]["enabled"] is False