Конфиг-only активация двух новых источников GPS-треков поверх pipeline ET-008. Не вводит новых компонентов, БД-таблиц, endpoint'ов. Config: - config/gps_sources.yaml: enduro_russia enabled=true, base_url исправлен на endurorussia.ru (без дефиса); добавлена запись wikiloc с max_tracks_per_run=50, activity_filter=[motorcycle, enduro]. - config/gps_regions.yaml: wikiloc добавлен в tsfo_plus_chuvashia.sources. Parser: - wikiloc.py: добавлен soft-cap max_tracks_per_run в collect(), извлечение created_at из GPX metadata/первого trkpt — для корректной межисточниковой дедупликации с EnduroRussia. UI (src/web/gps_tracks.js): - GPS_SOURCE_COLORS: добавлен цвет wikiloc (#4363d8). - Дефолтный фильтр sources включает wikiloc. - GPS_SOURCE_ATTRIBUTIONS: маппинг source_id → строка атрибуции; _updateGpsAttribution() подтягивает /api/gps-tracks/health и выставляет attribution с теми источниками, у которых tracks > 0. - _buildGpsFiltersUI: чекбокс «Wikiloc» в #gps-source-grid. Tests: - Fixtures: 7 файлов в tests/fixtures/gps-tracks/. - Unit: 10 UT-ER + 10 UT-WL — парсеры, MAPPING, bbox-фильтр, pagination, 429/403 graceful-stop, rate-limit, max_tracks_per_run. - Integration: IT-ER-01, IT-WL-01, IT-WL-02, IT-DEDUP-01, IT-LIC-01 через scripts.gps_collect.main + httpx.MockTransport. - Contract: 2 CT-ER с маркером @pytest.mark.network (nightly only). - JS: 2 новых теста на наличие wikiloc в SOURCE_COLORS и в фильтрах. Linters/Tests: ruff clean (новые файлы), 166 pytest passed, 24 JS-tests passed. Refs: ET-009 Acceptance: AC-01..AC-08, AC-14..AC-17 (для AC-09..AC-13 — продакшн-прогон) Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
361 lines
14 KiB
Python
361 lines
14 KiB
Python
"""Integration tests for GPS pipeline with new sources (ET-009).
|
||
|
||
Coverage:
|
||
- IT-ER-01: EnduroRussia pipeline with 3 fixture GPX (1 in-bbox, 2 empty, 3 out-of-bbox)
|
||
- IT-WL-01: Wikiloc pipeline with 1 fixture track
|
||
- IT-WL-02: Wikiloc graceful-stop on 403 → status='partial', exit_code=0
|
||
- IT-DEDUP-01: EnduroRussia + Wikiloc same track → 1 row, merged sources
|
||
- IT-LIC-01: License guard blocks source when ADR status=proposed
|
||
"""
|
||
import asyncio
|
||
import json
|
||
import os
|
||
import sys
|
||
from typing import Callable
|
||
|
||
import httpx
|
||
import yaml
|
||
|
||
# Add project root to path
|
||
PROJECT_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", ".."))
|
||
sys.path.insert(0, PROJECT_ROOT)
|
||
|
||
from src.api.gps_tracks.sources import enduro_russia as er_module # noqa: E402
|
||
from src.api.gps_tracks.sources import wikiloc as wl_module # noqa: E402
|
||
|
||
FIXTURES_DIR = os.path.join(os.path.dirname(__file__), "..", "fixtures", "gps-tracks")
|
||
|
||
|
||
def _read_fixture(name: str) -> bytes:
|
||
with open(os.path.join(FIXTURES_DIR, name), "rb") as f:
|
||
return f.read()
|
||
|
||
|
||
def _read_fixture_text(name: str) -> str:
|
||
return _read_fixture(name).decode("utf-8")
|
||
|
||
|
||
def _make_handler_combined(handlers: dict) -> Callable[[httpx.Request], httpx.Response]:
|
||
"""Combines multiple handler functions, selecting by URL host."""
|
||
|
||
def handler(req: httpx.Request) -> httpx.Response:
|
||
host = req.url.host
|
||
for host_pattern, h in handlers.items():
|
||
if host_pattern in host:
|
||
return h(req)
|
||
return httpx.Response(404)
|
||
|
||
return handler
|
||
|
||
|
||
def _patch_httpx(monkeypatch, handler):
|
||
"""Подменяет httpx.AsyncClient в обоих parser-модулях."""
|
||
transport = httpx.MockTransport(handler)
|
||
original = httpx.AsyncClient
|
||
|
||
def factory(*args, **kwargs):
|
||
kwargs["transport"] = transport
|
||
return original(*args, **kwargs)
|
||
|
||
monkeypatch.setattr(er_module.httpx, "AsyncClient", factory)
|
||
monkeypatch.setattr(wl_module.httpx, "AsyncClient", factory)
|
||
|
||
|
||
def _write_config(tmp_dir: str, sources: list, regions: list) -> tuple[str, str]:
|
||
"""Записывает временные конфиги."""
|
||
src_path = os.path.join(tmp_dir, "gps_sources.yaml")
|
||
reg_path = os.path.join(tmp_dir, "gps_regions.yaml")
|
||
with open(src_path, "w") as f:
|
||
yaml.safe_dump({"sources": sources}, f)
|
||
with open(reg_path, "w") as f:
|
||
yaml.safe_dump({"regions": regions}, f)
|
||
return src_path, reg_path
|
||
|
||
|
||
def _setup_env(monkeypatch, tmp_dir, sources, regions):
|
||
src_path, reg_path = _write_config(tmp_dir, sources, regions)
|
||
db_path = os.path.join(tmp_dir, "test_gps.sqlite")
|
||
monkeypatch.setenv("GPS_SOURCES_CONFIG", src_path)
|
||
monkeypatch.setenv("GPS_REGIONS_CONFIG", reg_path)
|
||
monkeypatch.setenv("GPS_TRACKS_DB_PATH", db_path)
|
||
return db_path
|
||
|
||
|
||
def _run_pipeline(args=None):
|
||
"""Запускает scripts/gps_collect.py::main() через asyncio.run."""
|
||
from scripts.gps_collect import main as pipeline_main
|
||
|
||
saved_argv = sys.argv[:]
|
||
try:
|
||
sys.argv = ["gps_collect.py"] + (args or [])
|
||
return asyncio.run(pipeline_main())
|
||
finally:
|
||
sys.argv = saved_argv
|
||
|
||
|
||
def _last_pipeline_run(db_path: str) -> dict:
|
||
import sqlite3
|
||
conn = sqlite3.connect(db_path)
|
||
conn.row_factory = sqlite3.Row
|
||
row = conn.execute(
|
||
"SELECT * FROM pipeline_runs ORDER BY id DESC LIMIT 1"
|
||
).fetchone()
|
||
conn.close()
|
||
return dict(row) if row else None
|
||
|
||
|
||
def _count_tracks(db_path: str) -> int:
|
||
import sqlite3
|
||
conn = sqlite3.connect(db_path)
|
||
n = conn.execute("SELECT COUNT(*) FROM tracks").fetchone()[0]
|
||
conn.close()
|
||
return n
|
||
|
||
|
||
def _all_tracks(db_path: str) -> list:
|
||
import sqlite3
|
||
conn = sqlite3.connect(db_path)
|
||
conn.row_factory = sqlite3.Row
|
||
rows = conn.execute("SELECT * FROM tracks").fetchall()
|
||
conn.close()
|
||
return [dict(r) for r in rows]
|
||
|
||
|
||
# Сорсы для тестов
|
||
ER_SOURCE = {
|
||
"id": "enduro_russia",
|
||
"name": "EnduroRussia.ru",
|
||
"enabled": True,
|
||
"license_adr": "docs/work-items/ET-008/06-adr/ADR-010-enduro-russia-licensing.md",
|
||
"base_url": "https://endurorussia.ru",
|
||
"rate_limit_sec": 0,
|
||
"user_agent": "test/1.0",
|
||
"attribution": "EnduroRussia.ru",
|
||
"parser_module": "src.api.gps_tracks.sources.enduro_russia",
|
||
"save_user_field": False,
|
||
"source_priority": 80,
|
||
}
|
||
|
||
WL_SOURCE = {
|
||
"id": "wikiloc",
|
||
"name": "Wikiloc",
|
||
"enabled": True,
|
||
"license_adr": "docs/work-items/ET-008/06-adr/ADR-012-wikiloc-licensing.md",
|
||
"base_url": "https://www.wikiloc.com",
|
||
"rate_limit_sec": 0,
|
||
"user_agent": "test/1.0",
|
||
"attribution": "© Wikiloc contributors",
|
||
"parser_module": "src.api.gps_tracks.sources.wikiloc",
|
||
"save_user_field": False,
|
||
"source_priority": 70,
|
||
"activity_filter": ["motorcycle"],
|
||
}
|
||
|
||
REGION_TSFO = {
|
||
"id": "tsfo_plus_chuvashia",
|
||
"name": "ЦФО + Чувашия",
|
||
"bbox": [29.0, 49.5, 47.5, 60.0],
|
||
"enabled": True,
|
||
"sources": ["enduro_russia", "wikiloc"],
|
||
}
|
||
|
||
|
||
# ─── IT-ER-01 ───────────────────────────────────────────────────────────────
|
||
|
||
def test_it_er_01_pipeline_enduro_russia_three_gpx(monkeypatch, tmp_path):
|
||
"""IT-ER-01: 3 фикстурных GPX → tracks_new=1 (track1 OK; track2 empty; track3 out-of-bbox)."""
|
||
api_data = {
|
||
"items": [
|
||
{"id": 1, "name": "Track1", "difficulty": "hard", "created_at": "2024-08-15 12:30:00"},
|
||
{"id": 2, "name": "Track2", "difficulty": "soft", "created_at": "2024-09-02 09:15:00"},
|
||
{"id": 3, "name": "Track3", "difficulty": "soft", "created_at": "2024-09-10 08:00:00"},
|
||
],
|
||
"total": 3,
|
||
"page": 0,
|
||
}
|
||
|
||
def handler(req: httpx.Request) -> httpx.Response:
|
||
if req.url.host == "endurorussia.ru":
|
||
if req.url.path == "/api/tracks":
|
||
return httpx.Response(200, json=api_data)
|
||
for tid in (1, 2, 3):
|
||
if req.url.path == f"/api/tracks/{tid}/gpx":
|
||
return httpx.Response(200, content=_read_fixture(f"enduro-russia-track-{tid}.gpx"))
|
||
return httpx.Response(404)
|
||
|
||
_patch_httpx(monkeypatch, handler)
|
||
db_path = _setup_env(monkeypatch, str(tmp_path), [ER_SOURCE], [REGION_TSFO])
|
||
|
||
exit_code = _run_pipeline(["--region", "tsfo_plus_chuvashia", "--source", "enduro_russia"])
|
||
|
||
assert exit_code == 0
|
||
assert _count_tracks(db_path) == 1
|
||
run = _last_pipeline_run(db_path)
|
||
assert run is not None
|
||
assert run["status"] == "ok"
|
||
assert run["tracks_new"] == 1
|
||
assert run["source_id"] == "enduro_russia"
|
||
|
||
|
||
# ─── IT-WL-01 ───────────────────────────────────────────────────────────────
|
||
|
||
def test_it_wl_01_pipeline_wikiloc_one_track(monkeypatch, tmp_path):
|
||
"""IT-WL-01: Wikiloc с 1 треком → tracks_new=1, status ∈ {ok, partial}."""
|
||
# Поиск возвращает 1 трек, дальше 404 чтобы остановиться
|
||
mini_search = '<html><a href="/trails/motorcycle-enduro/12345678">x</a></html>'
|
||
|
||
def handler(req: httpx.Request) -> httpx.Response:
|
||
if req.url.host == "www.wikiloc.com":
|
||
if "find.do" in req.url.path:
|
||
if "page=0" in str(req.url.query):
|
||
return httpx.Response(200, text=mini_search)
|
||
return httpx.Response(200, text="<html></html>")
|
||
if req.url.path.startswith("/trails/"):
|
||
return httpx.Response(200, text=_read_fixture_text("wikiloc-trail-page.html"))
|
||
if "downloadTrail.do" in req.url.path:
|
||
return httpx.Response(200, content=_read_fixture("wikiloc-track.gpx"))
|
||
return httpx.Response(404)
|
||
|
||
_patch_httpx(monkeypatch, handler)
|
||
region = dict(REGION_TSFO, sources=["wikiloc"])
|
||
db_path = _setup_env(monkeypatch, str(tmp_path), [WL_SOURCE], [region])
|
||
|
||
exit_code = _run_pipeline(["--region", "tsfo_plus_chuvashia", "--source", "wikiloc"])
|
||
|
||
assert exit_code == 0
|
||
assert _count_tracks(db_path) == 1
|
||
run = _last_pipeline_run(db_path)
|
||
assert run["status"] in ("ok", "partial")
|
||
assert run["tracks_new"] == 1
|
||
|
||
|
||
# ─── IT-WL-02 ───────────────────────────────────────────────────────────────
|
||
|
||
def test_it_wl_02_pipeline_wikiloc_403_graceful(monkeypatch, tmp_path):
|
||
"""IT-WL-02: Wikiloc 403 на поиске → status='partial' (или 'ok'), exit_code=0."""
|
||
|
||
def handler(req: httpx.Request) -> httpx.Response:
|
||
if req.url.host == "www.wikiloc.com":
|
||
if "find.do" in req.url.path:
|
||
return httpx.Response(403, text="Forbidden")
|
||
return httpx.Response(404)
|
||
|
||
_patch_httpx(monkeypatch, handler)
|
||
region = dict(REGION_TSFO, sources=["wikiloc"])
|
||
db_path = _setup_env(monkeypatch, str(tmp_path), [WL_SOURCE], [region])
|
||
|
||
exit_code = _run_pipeline(["--region", "tsfo_plus_chuvashia", "--source", "wikiloc"])
|
||
|
||
assert exit_code == 0, "graceful-stop should not produce error exit"
|
||
assert _count_tracks(db_path) == 0
|
||
run = _last_pipeline_run(db_path)
|
||
# graceful-stop → status 'ok' (parser просто завершился без exception);
|
||
# в TZ ослабленно: ∈ {ok, partial, rate_limited}
|
||
assert run["status"] in ("ok", "partial", "rate_limited")
|
||
assert run["tracks_new"] == 0
|
||
|
||
|
||
# ─── IT-DEDUP-01 ────────────────────────────────────────────────────────────
|
||
|
||
def test_it_dedup_01_merge_enduro_russia_and_wikiloc(monkeypatch, tmp_path):
|
||
"""IT-DEDUP-01: одинаковый трек из 2 источников → 1 запись с merged sources."""
|
||
er_api = {
|
||
"items": [
|
||
{"id": 1, "name": "Дмитровский ER", "difficulty": "hard", "created_at": "2024-08-15 12:30:00"},
|
||
],
|
||
"total": 1,
|
||
"page": 0,
|
||
}
|
||
mini_search = '<html><a href="/trails/motorcycle-enduro/12345678">x</a></html>'
|
||
|
||
def handler(req: httpx.Request) -> httpx.Response:
|
||
if req.url.host == "endurorussia.ru":
|
||
if req.url.path == "/api/tracks":
|
||
return httpx.Response(200, json=er_api)
|
||
if req.url.path == "/api/tracks/1/gpx":
|
||
return httpx.Response(200, content=_read_fixture("enduro-russia-track-1.gpx"))
|
||
if req.url.host == "www.wikiloc.com":
|
||
if "find.do" in req.url.path:
|
||
if "page=0" in str(req.url.query):
|
||
return httpx.Response(200, text=mini_search)
|
||
return httpx.Response(200, text="<html></html>")
|
||
if req.url.path.startswith("/trails/"):
|
||
return httpx.Response(200, text=_read_fixture_text("wikiloc-trail-page.html"))
|
||
if "downloadTrail.do" in req.url.path:
|
||
return httpx.Response(200, content=_read_fixture("wikiloc-track.gpx"))
|
||
return httpx.Response(404)
|
||
|
||
_patch_httpx(monkeypatch, handler)
|
||
region = dict(REGION_TSFO, sources=["enduro_russia", "wikiloc"])
|
||
db_path = _setup_env(monkeypatch, str(tmp_path), [ER_SOURCE, WL_SOURCE], [region])
|
||
|
||
# 1) сначала EnduroRussia
|
||
code1 = _run_pipeline(["--region", "tsfo_plus_chuvashia", "--source", "enduro_russia"])
|
||
assert code1 == 0
|
||
assert _count_tracks(db_path) == 1
|
||
|
||
# 2) затем Wikiloc
|
||
code2 = _run_pipeline(["--region", "tsfo_plus_chuvashia", "--source", "wikiloc"])
|
||
assert code2 == 0
|
||
|
||
# Должна быть 1 запись с обоими источниками
|
||
tracks = _all_tracks(db_path)
|
||
assert len(tracks) == 1, f"expected 1 merged record, got {len(tracks)}"
|
||
sources = json.loads(tracks[0]["sources_json"])
|
||
assert "enduro_russia" in sources
|
||
assert "wikiloc" in sources
|
||
ext_urls = json.loads(tracks[0]["external_urls_json"])
|
||
assert any("endurorussia.ru" in u for u in ext_urls)
|
||
assert any("wikiloc.com" in u for u in ext_urls)
|
||
|
||
|
||
# ─── IT-LIC-01 ──────────────────────────────────────────────────────────────
|
||
|
||
def test_it_lic_01_license_guard_blocks_proposed(monkeypatch, tmp_path):
|
||
"""IT-LIC-01: ADR со status: proposed → pipeline пропускает source с 'skipped_license'."""
|
||
# Создаём временный ADR с status: proposed
|
||
adr_dir = tmp_path / "docs" / "work-items" / "ET-008" / "06-adr"
|
||
adr_dir.mkdir(parents=True)
|
||
fake_adr = adr_dir / "ADR-FAKE-licensing.md"
|
||
fake_adr.write_text(
|
||
"---\n"
|
||
"type: adr\n"
|
||
"adr_id: ADR-FAKE\n"
|
||
"status: proposed\n"
|
||
"---\n\n"
|
||
"# Fake ADR for test\n"
|
||
)
|
||
|
||
er_source_proposed = dict(ER_SOURCE, license_adr="docs/work-items/ET-008/06-adr/ADR-FAKE-licensing.md")
|
||
|
||
def handler(req: httpx.Request) -> httpx.Response:
|
||
return httpx.Response(500) # не должно дойти
|
||
|
||
_patch_httpx(monkeypatch, handler)
|
||
|
||
# Pipeline берёт project_root относительно scripts/gps_collect.py.
|
||
# Нам надо подсунуть tmp_path как корень — самый простой способ: симлинком в tmp.
|
||
# Альтернатива: запускаем pipeline с cwd=tmp_path и патчим scripts module path.
|
||
# Но scripts.gps_collect использует __file__ → ../.. = project root.
|
||
# Подменим _check_license_adr через patch.
|
||
|
||
from scripts import gps_collect as collect_mod
|
||
real_check = collect_mod._check_license_adr
|
||
|
||
def patched_check(adr_path, project_root):
|
||
# Используем tmp_path как project_root для нашего fake ADR
|
||
return real_check(adr_path, str(tmp_path))
|
||
|
||
monkeypatch.setattr(collect_mod, "_check_license_adr", patched_check)
|
||
|
||
db_path = _setup_env(monkeypatch, str(tmp_path), [er_source_proposed], [REGION_TSFO])
|
||
|
||
exit_code = _run_pipeline(["--region", "tsfo_plus_chuvashia", "--source", "enduro_russia"])
|
||
|
||
# ET-009: license_guard выставляет has_error=True → exit_code=1
|
||
assert exit_code == 1
|
||
run = _last_pipeline_run(db_path)
|
||
assert run is not None
|
||
assert run["status"] == "skipped_license"
|
||
assert run["tracks_new"] == 0
|