Files
orchestrator/tests/test_no_host_hardcodes.py
claude-bot f1635ddb39
All checks were successful
CI / test (push) Successful in 57s
CI / test (pull_request) Successful in 55s
feat(replication): расхардкод хоста + секреты нового хоста + smoke-runbook
Фундамент тиража 10-common (эпик ORCH-10): платформа разворачивается на
новой инфре без правки кода — только env/конфиг. Каждый дефолт = боевому
значению (пустой .env => поведение 1:1, kill-switch-природа, NFR-2);
STAGE_TRANSITIONS/QG_CHECKS/check_*/machine-verdict/схема БД не тронуты.

- config: agent_home_dir / agent_git_name / git_email_domain / staging_port
  (ADR-001 D2/D4); код-блокеры A1-A4 закрыты: plane_sync ссылки из
  gitea_public_url+gitea_owner, launcher - единый agent_git_env() (x2 места),
  self_deploy/post_deploy - HOME+домен из Settings (имена системных акторов -
  платформенные литералы)
- image_freshness: staging_port из конфига + fail-closed guard
  staging_port == прод-порт -> отказ ДО ssh/build (инвариант ORCH-058 AC-9
  стал исполняемым); REPO= передаётся хуку явно обоими инвокерами (D7)
- SELF_HOSTING_REPO - нормативная платформенная константа (D3, пин-тест)
- compose: полная ${VAR:-default}-интерполяция (реестр B, карта D6); группа
  ORCH-040 uid/gid/HOME/маунты двигается согласованно (build.args APP_*);
  group_add "МИНА 1" сохранён x3; оба app-сервиса с явным command:
- Dockerfile: ARG APP_UID/APP_GID/APP_USER/APP_HOME (CMD exec-form 8500
  сознательно не тронут - D5); deploy-hook: REPO="${REPO:-...}" (D1 реестра)
- секреты: stdlib scripts/gen_secrets.py (token_hex(32); печать по умолчанию;
  --write никогда не перезаписывает существующий .env молча, exit=2;
  перезапись только --force); .env.example дополнен до полноты ключей старта
- доки: новый docs/operations/REPLICATION.md (карта env, чек-лист секретов,
  smoke-процедура с PASS/FAIL, границы 10-common/Lite/Bundled), INFRA.md,
  README, CLAUDE.md, CHANGELOG
- анти-регресс: tests/test_no_host_hardcodes.py (tokenize-сканер запрещённых
  литералов, config-модули - структурное исключение, allowlist пуст,
  негативная самопроверка) + test_host_config_keys / test_infra_parametrization
  / test_secrets_gen / test_replication_smoke; согласованные структурные
  правки test_orch040_compose (судит резолв дефолтов) и
  test_deploy_hook_rollback_sim (REPO через env-override = контракт D7)

Полный регресс: 1764 passed.

Refs: ORCH-101

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-10 20:50:43 +03:00

174 lines
7.3 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""ORCH-101 (FR-6 / AC-1, AC-7): structural anti-regression scanner — no host
hardcodes in executable platform code.
Scans ``src/**/*.py`` + ``watchdog/**/*.py`` for forbidden host-specific
literals (current host IP / home dir / hostname). Judges CODE only: comments
and docstrings are excluded via :mod:`tokenize` (NFR-5 — token types, not
line regexes, so the verdict is deterministic).
Structural exclusion (ADR-001 ORCH-101 D10): ``src/config.py`` and
``watchdog/config.py`` are skipped ENTIRELY — they are the canonical (and only
legitimate, BR-1) home of host-value defaults, and those defaults are REQUIRED
to equal the current production values (BR-5: /home/slin, mva154.local).
Scanning them would mean an eternally non-empty allowlist; the exclusion is a
rule of this test, not an allowlist entry.
The per-(file, literal) ALLOWLIST exists as a mechanism and MUST be empty at
ORCH-101 acceptance (AC-1): every code blocker A1A4 is closed by Settings
keys. A future entry requires a justification string.
Negative self-check (TC-02): the scanner is exercised against synthetic
sources with a planted literal and must catch it — the test can never go
evergreen by accident.
"""
import io
import tokenize
from pathlib import Path
REPO_ROOT = Path(__file__).resolve().parents[1]
# Single point of truth for the forbidden literals (AC-7: centralised list).
FORBIDDEN: tuple[str, ...] = (
"82.22.50.71",
"/home/slin",
"mva154",
"duckdns",
)
# Scan zone: executable platform code only. tests/**, docs/**, scripts/**
# (the deploy hook carries a legitimate shell-default, ADR D7) and .env* are
# OUT of scope by construction.
SCAN_DIRS: tuple[str, ...] = ("src", "watchdog")
# Structural rule (ADR-001 D10), NOT an allowlist entry — see module docstring.
EXCLUDED_FILES: frozenset[str] = frozenset({"src/config.py", "watchdog/config.py"})
# {(relative_path, literal): "justification"} — MUST stay empty (AC-1/AC-7).
ALLOWLIST: dict[tuple[str, str], str] = {}
# Token types that are never judged: comments and non-logical newlines.
_TRIVIA = frozenset({tokenize.COMMENT, tokenize.NL, tokenize.ENCODING})
# A STRING token opening a logical line (after NEWLINE/INDENT/DEDENT or at
# file start) is a docstring / bare string statement -> not executable data.
_DOCSTRING_PREV = frozenset({None, tokenize.NEWLINE, tokenize.INDENT, tokenize.DEDENT})
def find_violations(source: str, forbidden: tuple[str, ...] = FORBIDDEN) -> list[tuple[int, str, str]]:
"""Return ``[(lineno, literal, token_text)]`` for forbidden literals in CODE.
Comments are skipped (COMMENT tokens); docstrings are skipped (STRING tokens
in statement position). Everything else — including string *values* assigned
or passed in code — is judged: a hardcoded host value in an executable
string is exactly the regression this test exists to block.
"""
violations: list[tuple[int, str, str]] = []
prev_significant: int | None = None
for tok in tokenize.generate_tokens(io.StringIO(source).readline):
if tok.type in _TRIVIA:
continue # comments / blank-line NLs never update statement position
if tok.type == tokenize.STRING and prev_significant in _DOCSTRING_PREV:
prev_significant = tok.type # docstring / bare string statement
continue
for literal in forbidden:
if literal in tok.string:
violations.append((tok.start[0], literal, tok.string))
prev_significant = tok.type
return violations
def _scan_files() -> list[Path]:
"""Deterministic (sorted) list of python files in the scan zone."""
files: list[Path] = []
for d in SCAN_DIRS:
root = REPO_ROOT / d
if root.is_dir():
files.extend(sorted(root.glob("**/*.py")))
return [
f for f in files
if f.relative_to(REPO_ROOT).as_posix() not in EXCLUDED_FILES
]
# ---------------------------------------------------------------------------
# TC-01: the platform code carries no forbidden host literals (AC-1).
# ---------------------------------------------------------------------------
def test_no_host_hardcodes_in_executable_code():
offenders: list[str] = []
for path in _scan_files():
rel = path.relative_to(REPO_ROOT).as_posix()
source = path.read_text(encoding="utf-8")
for lineno, literal, token_text in find_violations(source):
if (rel, literal) in ALLOWLIST:
continue
offenders.append(f"{rel}:{lineno}: forbidden literal {literal!r} in {token_text!r}")
assert not offenders, (
"Host-specific hardcodes found in executable code (read the value from "
"src/config.py Settings instead — see ORCH-101 ADR-001 D1/D2):\n"
+ "\n".join(offenders)
)
def test_scan_zone_is_nonempty():
"""Guard against the scanner silently scanning nothing (path drift)."""
files = _scan_files()
assert len(files) > 10, f"scan zone unexpectedly small: {len(files)} files"
rels = {f.relative_to(REPO_ROOT).as_posix() for f in files}
assert "src/config.py" not in rels # structural exclusion intact
assert "src/plane_sync.py" in rels # the A1 blocker file IS scanned
def test_allowlist_is_empty_at_acceptance():
"""AC-1/AC-7: the allowlist mechanism exists but carries no entries."""
assert ALLOWLIST == {}, (
"ORCH-101 ships with an EMPTY allowlist; a new entry needs an explicit "
"justification and reviewer sign-off"
)
# ---------------------------------------------------------------------------
# TC-02: negative self-check — the scanner actually catches a planted literal
# (the test is not evergreen) and actually skips comments/docstrings (NFR-5).
# ---------------------------------------------------------------------------
def test_scanner_catches_planted_literal_in_code():
planted = 'BASE = "http://git.mva154.duckdns.org"\n'
hits = find_violations(planted)
assert hits, "scanner failed to catch a forbidden literal planted in code"
assert {lit for _, lit, _ in hits} == {"mva154", "duckdns"}
def test_scanner_catches_planted_literal_in_env_dict():
planted = 'env = {**os.environ, "HOME": "/home/slin"}\n'
hits = find_violations(planted)
assert [(lineno, lit) for lineno, lit, _ in hits] == [(1, "/home/slin")]
def test_scanner_catches_planted_literal_in_fstring():
planted = 'url = f"http://{host}.mva154.local/x"\n'
hits = find_violations(planted)
assert any(lit == "mva154" for _, lit, _ in hits)
def test_scanner_ignores_comments_and_docstrings():
clean = (
'"""Module docstring mentioning mva154 and /home/slin and duckdns."""\n'
"\n"
"# a comment about 82.22.50.71 and /home/slin\n"
"def f():\n"
' """Docstring: mva154.local lives here historically."""\n'
" return 1 # trailing comment: duckdns\n"
)
assert find_violations(clean) == []
def test_scanner_judges_string_values_not_in_statement_position():
# A string VALUE (right-hand side) with a literal must be caught even when
# a docstring with the same literal is present above it.
mixed = (
"def f():\n"
' """mva154 in a docstring is fine."""\n'
' return "/home/slin"\n'
)
hits = find_violations(mixed)
assert [(lineno, lit) for lineno, lit, _ in hits] == [(3, "/home/slin")]