"""ORCH-101 (FR-6 / AC-1, AC-7): structural anti-regression scanner — no host hardcodes in executable platform code. Scans ``src/**/*.py`` + ``watchdog/**/*.py`` for forbidden host-specific literals (current host IP / home dir / hostname). Judges CODE only: comments and docstrings are excluded via :mod:`tokenize` (NFR-5 — token types, not line regexes, so the verdict is deterministic). Structural exclusion (ADR-001 ORCH-101 D10): ``src/config.py`` and ``watchdog/config.py`` are skipped ENTIRELY — they are the canonical (and only legitimate, BR-1) home of host-value defaults, and those defaults are REQUIRED to equal the current production values (BR-5: /home/slin, mva154.local). Scanning them would mean an eternally non-empty allowlist; the exclusion is a rule of this test, not an allowlist entry. The per-(file, literal) ALLOWLIST exists as a mechanism and MUST be empty at ORCH-101 acceptance (AC-1): every code blocker A1–A4 is closed by Settings keys. A future entry requires a justification string. Negative self-check (TC-02): the scanner is exercised against synthetic sources with a planted literal and must catch it — the test can never go evergreen by accident. """ import io import tokenize from pathlib import Path REPO_ROOT = Path(__file__).resolve().parents[1] # Single point of truth for the forbidden literals (AC-7: centralised list). FORBIDDEN: tuple[str, ...] = ( "82.22.50.71", "/home/slin", "mva154", "duckdns", ) # Scan zone: executable platform code only. tests/**, docs/**, scripts/** # (the deploy hook carries a legitimate shell-default, ADR D7) and .env* are # OUT of scope by construction. SCAN_DIRS: tuple[str, ...] = ("src", "watchdog") # Structural rule (ADR-001 D10), NOT an allowlist entry — see module docstring. EXCLUDED_FILES: frozenset[str] = frozenset({"src/config.py", "watchdog/config.py"}) # {(relative_path, literal): "justification"} — MUST stay empty (AC-1/AC-7). ALLOWLIST: dict[tuple[str, str], str] = {} # Token types that are never judged: comments and non-logical newlines. _TRIVIA = frozenset({tokenize.COMMENT, tokenize.NL, tokenize.ENCODING}) # A STRING token opening a logical line (after NEWLINE/INDENT/DEDENT or at # file start) is a docstring / bare string statement -> not executable data. _DOCSTRING_PREV = frozenset({None, tokenize.NEWLINE, tokenize.INDENT, tokenize.DEDENT}) def find_violations(source: str, forbidden: tuple[str, ...] = FORBIDDEN) -> list[tuple[int, str, str]]: """Return ``[(lineno, literal, token_text)]`` for forbidden literals in CODE. Comments are skipped (COMMENT tokens); docstrings are skipped (STRING tokens in statement position). Everything else — including string *values* assigned or passed in code — is judged: a hardcoded host value in an executable string is exactly the regression this test exists to block. """ violations: list[tuple[int, str, str]] = [] prev_significant: int | None = None for tok in tokenize.generate_tokens(io.StringIO(source).readline): if tok.type in _TRIVIA: continue # comments / blank-line NLs never update statement position if tok.type == tokenize.STRING and prev_significant in _DOCSTRING_PREV: prev_significant = tok.type # docstring / bare string statement continue for literal in forbidden: if literal in tok.string: violations.append((tok.start[0], literal, tok.string)) prev_significant = tok.type return violations def _scan_files() -> list[Path]: """Deterministic (sorted) list of python files in the scan zone.""" files: list[Path] = [] for d in SCAN_DIRS: root = REPO_ROOT / d if root.is_dir(): files.extend(sorted(root.glob("**/*.py"))) return [ f for f in files if f.relative_to(REPO_ROOT).as_posix() not in EXCLUDED_FILES ] # --------------------------------------------------------------------------- # TC-01: the platform code carries no forbidden host literals (AC-1). # --------------------------------------------------------------------------- def test_no_host_hardcodes_in_executable_code(): offenders: list[str] = [] for path in _scan_files(): rel = path.relative_to(REPO_ROOT).as_posix() source = path.read_text(encoding="utf-8") for lineno, literal, token_text in find_violations(source): if (rel, literal) in ALLOWLIST: continue offenders.append(f"{rel}:{lineno}: forbidden literal {literal!r} in {token_text!r}") assert not offenders, ( "Host-specific hardcodes found in executable code (read the value from " "src/config.py Settings instead — see ORCH-101 ADR-001 D1/D2):\n" + "\n".join(offenders) ) def test_scan_zone_is_nonempty(): """Guard against the scanner silently scanning nothing (path drift).""" files = _scan_files() assert len(files) > 10, f"scan zone unexpectedly small: {len(files)} files" rels = {f.relative_to(REPO_ROOT).as_posix() for f in files} assert "src/config.py" not in rels # structural exclusion intact assert "src/plane_sync.py" in rels # the A1 blocker file IS scanned def test_allowlist_is_empty_at_acceptance(): """AC-1/AC-7: the allowlist mechanism exists but carries no entries.""" assert ALLOWLIST == {}, ( "ORCH-101 ships with an EMPTY allowlist; a new entry needs an explicit " "justification and reviewer sign-off" ) # --------------------------------------------------------------------------- # TC-02: negative self-check — the scanner actually catches a planted literal # (the test is not evergreen) and actually skips comments/docstrings (NFR-5). # --------------------------------------------------------------------------- def test_scanner_catches_planted_literal_in_code(): planted = 'BASE = "http://git.mva154.duckdns.org"\n' hits = find_violations(planted) assert hits, "scanner failed to catch a forbidden literal planted in code" assert {lit for _, lit, _ in hits} == {"mva154", "duckdns"} def test_scanner_catches_planted_literal_in_env_dict(): planted = 'env = {**os.environ, "HOME": "/home/slin"}\n' hits = find_violations(planted) assert [(lineno, lit) for lineno, lit, _ in hits] == [(1, "/home/slin")] def test_scanner_catches_planted_literal_in_fstring(): planted = 'url = f"http://{host}.mva154.local/x"\n' hits = find_violations(planted) assert any(lit == "mva154" for _, lit, _ in hits) def test_scanner_ignores_comments_and_docstrings(): clean = ( '"""Module docstring mentioning mva154 and /home/slin and duckdns."""\n' "\n" "# a comment about 82.22.50.71 and /home/slin\n" "def f():\n" ' """Docstring: mva154.local lives here historically."""\n' " return 1 # trailing comment: duckdns\n" ) assert find_violations(clean) == [] def test_scanner_judges_string_values_not_in_statement_position(): # A string VALUE (right-hand side) with a literal must be caught even when # a docstring with the same literal is present above it. mixed = ( "def f():\n" ' """mva154 in a docstring is fine."""\n' ' return "/home/slin"\n' ) hits = find_violations(mixed) assert [(lineno, lit) for lineno, lit, _ in hits] == [(3, "/home/slin")]