Конфиг-only активация двух новых источников GPS-треков поверх pipeline ET-008. Не вводит новых компонентов, БД-таблиц, endpoint'ов. Config: - config/gps_sources.yaml: enduro_russia enabled=true, base_url исправлен на endurorussia.ru (без дефиса); добавлена запись wikiloc с max_tracks_per_run=50, activity_filter=[motorcycle, enduro]. - config/gps_regions.yaml: wikiloc добавлен в tsfo_plus_chuvashia.sources. Parser: - wikiloc.py: добавлен soft-cap max_tracks_per_run в collect(), извлечение created_at из GPX metadata/первого trkpt — для корректной межисточниковой дедупликации с EnduroRussia. UI (src/web/gps_tracks.js): - GPS_SOURCE_COLORS: добавлен цвет wikiloc (#4363d8). - Дефолтный фильтр sources включает wikiloc. - GPS_SOURCE_ATTRIBUTIONS: маппинг source_id → строка атрибуции; _updateGpsAttribution() подтягивает /api/gps-tracks/health и выставляет attribution с теми источниками, у которых tracks > 0. - _buildGpsFiltersUI: чекбокс «Wikiloc» в #gps-source-grid. Tests: - Fixtures: 7 файлов в tests/fixtures/gps-tracks/. - Unit: 10 UT-ER + 10 UT-WL — парсеры, MAPPING, bbox-фильтр, pagination, 429/403 graceful-stop, rate-limit, max_tracks_per_run. - Integration: IT-ER-01, IT-WL-01, IT-WL-02, IT-DEDUP-01, IT-LIC-01 через scripts.gps_collect.main + httpx.MockTransport. - Contract: 2 CT-ER с маркером @pytest.mark.network (nightly only). - JS: 2 новых теста на наличие wikiloc в SOURCE_COLORS и в фильтрах. Linters/Tests: ruff clean (новые файлы), 166 pytest passed, 24 JS-tests passed. Refs: ET-009 Acceptance: AC-01..AC-08, AC-14..AC-17 (для AC-09..AC-13 — продакшн-прогон) Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
263 lines
11 KiB
Python
263 lines
11 KiB
Python
"""Unit tests for WikilocParser (ET-009).
|
||
|
||
Coverage:
|
||
- UT-WL-01: _extract_track_paths returns ≥ 5 unique paths
|
||
- UT-WL-02: _extract_gpx_url with downloadTrail.do
|
||
- UT-WL-03: _extract_gpx_url fallback by track_id
|
||
- UT-WL-04: _extract_track_name from <h1>
|
||
- UT-WL-05: _parse_gpx success — activity_type='moto', source_id='wikiloc'
|
||
- UT-WL-06: MAPPING translates categories
|
||
- UT-WL-07: HTTP 403 on search → graceful stop
|
||
- UT-WL-08: HTTP 429 on track page → graceful stop, earlier preserved
|
||
- UT-WL-09: rate_limit_sec respected
|
||
- UT-WL-10: max_tracks_per_run cap stops yield exactly
|
||
"""
|
||
import asyncio
|
||
import os
|
||
from typing import Callable
|
||
|
||
import httpx
|
||
|
||
from src.api.gps_tracks.sources import wikiloc as wl_module
|
||
from src.api.gps_tracks.sources.wikiloc import (
|
||
WikilocParser,
|
||
_extract_gpx_url,
|
||
_extract_track_name,
|
||
_extract_track_paths,
|
||
_parse_gpx,
|
||
)
|
||
|
||
FIXTURES_DIR = os.path.join(os.path.dirname(__file__), "..", "fixtures", "gps-tracks")
|
||
|
||
BBOX_TSFO = (29.0, 49.5, 47.5, 60.0)
|
||
|
||
|
||
def _read_fixture(name: str) -> bytes:
|
||
with open(os.path.join(FIXTURES_DIR, name), "rb") as f:
|
||
return f.read()
|
||
|
||
|
||
def _read_fixture_text(name: str) -> str:
|
||
return _read_fixture(name).decode("utf-8")
|
||
|
||
|
||
def _make_config(**overrides) -> dict:
|
||
cfg = {
|
||
"id": "wikiloc",
|
||
"base_url": "https://www.wikiloc.com",
|
||
"rate_limit_sec": 0,
|
||
"user_agent": "test-agent",
|
||
"source_priority": 70,
|
||
"activity_filter": ["motorcycle"],
|
||
}
|
||
cfg.update(overrides)
|
||
return cfg
|
||
|
||
|
||
def _patch_client(monkeypatch, handler: Callable[[httpx.Request], httpx.Response]) -> None:
|
||
"""Подменяет httpx.AsyncClient в модуле wikiloc на клиент с MockTransport."""
|
||
transport = httpx.MockTransport(handler)
|
||
original = httpx.AsyncClient
|
||
|
||
def factory(*args, **kwargs):
|
||
kwargs["transport"] = transport
|
||
return original(*args, **kwargs)
|
||
|
||
monkeypatch.setattr(wl_module.httpx, "AsyncClient", factory)
|
||
|
||
|
||
async def _collect_all(parser, bbox):
|
||
tracks = []
|
||
async for t in parser.collect(bbox, {}):
|
||
tracks.append(t)
|
||
return tracks
|
||
|
||
|
||
# ─── UT-WL-01 ───────────────────────────────────────────────────────────────
|
||
|
||
def test_ut_wl_01_extract_track_paths():
|
||
"""UT-WL-01: _extract_track_paths возвращает ≥ 5 уникальных путей."""
|
||
html = _read_fixture_text("wikiloc-search-page1.html")
|
||
paths = _extract_track_paths(html)
|
||
assert len(paths) >= 5
|
||
assert len(set(paths)) == len(paths) # все уникальны
|
||
for p in paths:
|
||
assert p.startswith("/trails/")
|
||
|
||
|
||
# ─── UT-WL-02 ───────────────────────────────────────────────────────────────
|
||
|
||
def test_ut_wl_02_extract_gpx_url_downloadtrail():
|
||
"""UT-WL-02: _extract_gpx_url возвращает абсолютный URL для downloadTrail.do?id=X."""
|
||
html = '<html><body><a href="/wikiloc/downloadTrail.do?id=12345">GPX</a></body></html>'
|
||
url = _extract_gpx_url(html, "https://www.wikiloc.com", "12345")
|
||
assert url == "https://www.wikiloc.com/wikiloc/downloadTrail.do?id=12345"
|
||
|
||
|
||
# ─── UT-WL-03 ───────────────────────────────────────────────────────────────
|
||
|
||
def test_ut_wl_03_extract_gpx_url_fallback():
|
||
"""UT-WL-03: _extract_gpx_url fallback по track_id если нет явных ссылок."""
|
||
html = "<html><body><p>No GPX link here at all.</p></body></html>"
|
||
url = _extract_gpx_url(html, "https://www.wikiloc.com", "99999")
|
||
assert url == "https://www.wikiloc.com/wikiloc/downloadTrail.do?id=99999"
|
||
|
||
|
||
# ─── UT-WL-04 ───────────────────────────────────────────────────────────────
|
||
|
||
def test_ut_wl_04_extract_track_name():
|
||
"""UT-WL-04: _extract_track_name извлекает текст <h1>."""
|
||
html = "<html><body><h1>Test Trail</h1></body></html>"
|
||
assert _extract_track_name(html) == "Test Trail"
|
||
|
||
# Из фикстуры
|
||
html2 = _read_fixture_text("wikiloc-trail-page.html")
|
||
assert _extract_track_name(html2) == "Дмитровский лес"
|
||
|
||
|
||
# ─── UT-WL-05 ───────────────────────────────────────────────────────────────
|
||
|
||
def test_ut_wl_05_parse_gpx_success():
|
||
"""UT-WL-05: _parse_gpx на wikiloc-track.gpx → activity_type='moto'."""
|
||
content = _read_fixture("wikiloc-track.gpx")
|
||
track = _parse_gpx(
|
||
content,
|
||
track_id="12345678",
|
||
name="Дмитровский лес",
|
||
activity_type="moto",
|
||
source_id="wikiloc",
|
||
track_url="https://www.wikiloc.com/trails/motorcycle-enduro/dmitrovsky-loop-12345678",
|
||
source_priority=70,
|
||
)
|
||
assert track is not None
|
||
assert track.activity_type == "moto"
|
||
assert track.source_id == "wikiloc"
|
||
assert "wikiloc.com" in track.external_url
|
||
assert track.points_count >= 10
|
||
assert track.length_m > 0
|
||
|
||
|
||
# ─── UT-WL-06 ───────────────────────────────────────────────────────────────
|
||
|
||
def test_ut_wl_06_mapping_categories():
|
||
"""UT-WL-06: MAPPING маппит motorcycle/hiking/mtb."""
|
||
m = WikilocParser.MAPPING
|
||
assert m["motorcycle"] == "moto"
|
||
assert m["hiking"] == "hike"
|
||
assert m["mtb"] == "bicycle"
|
||
|
||
|
||
# ─── UT-WL-07 ───────────────────────────────────────────────────────────────
|
||
|
||
async def test_ut_wl_07_http_403_search_graceful_stop(monkeypatch):
|
||
"""UT-WL-07: 403 на странице поиска → graceful stop, 0 yields."""
|
||
|
||
def handler(req: httpx.Request) -> httpx.Response:
|
||
if "find.do" in req.url.path:
|
||
return httpx.Response(403, text="Forbidden")
|
||
return httpx.Response(404)
|
||
|
||
_patch_client(monkeypatch, handler)
|
||
parser = WikilocParser(_make_config())
|
||
tracks = await _collect_all(parser, BBOX_TSFO)
|
||
assert tracks == []
|
||
|
||
|
||
# ─── UT-WL-08 ───────────────────────────────────────────────────────────────
|
||
|
||
async def test_ut_wl_08_http_429_track_graceful_stop(monkeypatch):
|
||
"""UT-WL-08: 429 на 2-м треке → 1-й трек yield-нут, потом graceful stop."""
|
||
search_html = _read_fixture_text("wikiloc-search-page1.html")
|
||
trail_html = _read_fixture_text("wikiloc-trail-page.html")
|
||
gpx_bytes = _read_fixture("wikiloc-track.gpx")
|
||
|
||
call_count = {"track_page": 0}
|
||
|
||
def handler(req: httpx.Request) -> httpx.Response:
|
||
path = req.url.path
|
||
if "find.do" in path:
|
||
return httpx.Response(200, text=search_html)
|
||
if path.startswith("/trails/"):
|
||
call_count["track_page"] += 1
|
||
if call_count["track_page"] == 1:
|
||
return httpx.Response(200, text=trail_html)
|
||
# 2-й трек → 429
|
||
return httpx.Response(429, text="Too Many Requests")
|
||
if "downloadTrail.do" in path:
|
||
return httpx.Response(200, content=gpx_bytes)
|
||
return httpx.Response(404)
|
||
|
||
_patch_client(monkeypatch, handler)
|
||
parser = WikilocParser(_make_config())
|
||
tracks = await _collect_all(parser, BBOX_TSFO)
|
||
assert len(tracks) == 1
|
||
assert "wikiloc.com" in tracks[0].external_url
|
||
|
||
|
||
# ─── UT-WL-09 ───────────────────────────────────────────────────────────────
|
||
|
||
async def test_ut_wl_09_rate_limit_respected(monkeypatch):
|
||
"""UT-WL-09: asyncio.sleep вызывается между запросами с rate_limit_sec."""
|
||
trail_html = _read_fixture_text("wikiloc-trail-page.html")
|
||
gpx_bytes = _read_fixture("wikiloc-track.gpx")
|
||
|
||
def handler(req: httpx.Request) -> httpx.Response:
|
||
path = req.url.path
|
||
if "find.do" in path:
|
||
# вернём только одну ссылку, чтобы один трек обработался
|
||
mini_html = '<html><a href="/trails/motorcycle-enduro/12345">x</a></html>'
|
||
# Если page=0 → даём 1 трек, иначе пусто
|
||
if "page=0" in str(req.url.query):
|
||
return httpx.Response(200, text=mini_html)
|
||
return httpx.Response(200, text="<html></html>")
|
||
if path.startswith("/trails/"):
|
||
return httpx.Response(200, text=trail_html)
|
||
if "downloadTrail.do" in path:
|
||
return httpx.Response(200, content=gpx_bytes)
|
||
return httpx.Response(404)
|
||
|
||
_patch_client(monkeypatch, handler)
|
||
|
||
sleep_calls = []
|
||
real_sleep = asyncio.sleep
|
||
|
||
async def mock_sleep(sec):
|
||
sleep_calls.append(sec)
|
||
# вызываем реальный sleep с 0, чтобы быстро
|
||
await real_sleep(0)
|
||
|
||
monkeypatch.setattr(wl_module.asyncio, "sleep", mock_sleep)
|
||
|
||
parser = WikilocParser(_make_config(rate_limit_sec=10))
|
||
await _collect_all(parser, BBOX_TSFO)
|
||
|
||
# Между запросами должно быть несколько sleep'ов с аргументом ≥ 10
|
||
assert len(sleep_calls) >= 2, f"expected ≥ 2 sleep calls, got {sleep_calls}"
|
||
assert all(s >= 10 for s in sleep_calls), f"all sleep args must be ≥ 10, got {sleep_calls}"
|
||
|
||
|
||
# ─── UT-WL-10 ───────────────────────────────────────────────────────────────
|
||
|
||
async def test_ut_wl_10_max_tracks_per_run_cap(monkeypatch):
|
||
"""UT-WL-10: max_tracks_per_run=2, поиск выдаёт ≥ 5 треков → yield ровно 2."""
|
||
search_html = _read_fixture_text("wikiloc-search-page1.html")
|
||
trail_html = _read_fixture_text("wikiloc-trail-page.html")
|
||
gpx_bytes = _read_fixture("wikiloc-track.gpx")
|
||
|
||
def handler(req: httpx.Request) -> httpx.Response:
|
||
path = req.url.path
|
||
if "find.do" in path:
|
||
if "page=0" in str(req.url.query):
|
||
return httpx.Response(200, text=search_html)
|
||
return httpx.Response(200, text="<html></html>")
|
||
if path.startswith("/trails/"):
|
||
return httpx.Response(200, text=trail_html)
|
||
if "downloadTrail.do" in path:
|
||
return httpx.Response(200, content=gpx_bytes)
|
||
return httpx.Response(404)
|
||
|
||
_patch_client(monkeypatch, handler)
|
||
parser = WikilocParser(_make_config(max_tracks_per_run=2))
|
||
tracks = await _collect_all(parser, BBOX_TSFO)
|
||
assert len(tracks) == 2
|