"""Unit tests for WikilocParser (ET-009).
Coverage:
- UT-WL-01: _extract_track_paths returns ≥ 5 unique paths
- UT-WL-02: _extract_gpx_url with downloadTrail.do
- UT-WL-03: _extract_gpx_url fallback by track_id
- UT-WL-04: _extract_track_name from
- UT-WL-05: _parse_gpx success — activity_type='moto', source_id='wikiloc'
- UT-WL-06: MAPPING translates categories
- UT-WL-07: HTTP 403 on search → graceful stop
- UT-WL-08: HTTP 429 on track page → graceful stop, earlier preserved
- UT-WL-09: rate_limit_sec respected
- UT-WL-10: max_tracks_per_run cap stops yield exactly
"""
import asyncio
import os
from typing import Callable
import httpx
from src.api.gps_tracks.sources import wikiloc as wl_module
from src.api.gps_tracks.sources.wikiloc import (
WikilocParser,
_extract_gpx_url,
_extract_track_name,
_extract_track_paths,
_parse_gpx,
)
FIXTURES_DIR = os.path.join(os.path.dirname(__file__), "..", "fixtures", "gps-tracks")
BBOX_TSFO = (29.0, 49.5, 47.5, 60.0)
def _read_fixture(name: str) -> bytes:
with open(os.path.join(FIXTURES_DIR, name), "rb") as f:
return f.read()
def _read_fixture_text(name: str) -> str:
return _read_fixture(name).decode("utf-8")
def _make_config(**overrides) -> dict:
cfg = {
"id": "wikiloc",
"base_url": "https://www.wikiloc.com",
"rate_limit_sec": 0,
"user_agent": "test-agent",
"source_priority": 70,
"activity_filter": ["motorcycle"],
}
cfg.update(overrides)
return cfg
def _patch_client(monkeypatch, handler: Callable[[httpx.Request], httpx.Response]) -> None:
"""Подменяет httpx.AsyncClient в модуле wikiloc на клиент с MockTransport."""
transport = httpx.MockTransport(handler)
original = httpx.AsyncClient
def factory(*args, **kwargs):
kwargs["transport"] = transport
return original(*args, **kwargs)
monkeypatch.setattr(wl_module.httpx, "AsyncClient", factory)
async def _collect_all(parser, bbox):
tracks = []
async for t in parser.collect(bbox, {}):
tracks.append(t)
return tracks
# ─── UT-WL-01 ───────────────────────────────────────────────────────────────
def test_ut_wl_01_extract_track_paths():
"""UT-WL-01: _extract_track_paths возвращает ≥ 5 уникальных путей."""
html = _read_fixture_text("wikiloc-search-page1.html")
paths = _extract_track_paths(html)
assert len(paths) >= 5
assert len(set(paths)) == len(paths) # все уникальны
for p in paths:
assert p.startswith("/trails/")
# ─── UT-WL-02 ───────────────────────────────────────────────────────────────
def test_ut_wl_02_extract_gpx_url_downloadtrail():
"""UT-WL-02: _extract_gpx_url возвращает абсолютный URL для downloadTrail.do?id=X."""
html = 'GPX'
url = _extract_gpx_url(html, "https://www.wikiloc.com", "12345")
assert url == "https://www.wikiloc.com/wikiloc/downloadTrail.do?id=12345"
# ─── UT-WL-03 ───────────────────────────────────────────────────────────────
def test_ut_wl_03_extract_gpx_url_fallback():
"""UT-WL-03: _extract_gpx_url fallback по track_id если нет явных ссылок."""
html = "
No GPX link here at all.
"
url = _extract_gpx_url(html, "https://www.wikiloc.com", "99999")
assert url == "https://www.wikiloc.com/wikiloc/downloadTrail.do?id=99999"
# ─── UT-WL-04 ───────────────────────────────────────────────────────────────
def test_ut_wl_04_extract_track_name():
"""UT-WL-04: _extract_track_name извлекает текст ."""
html = "Test Trail
"
assert _extract_track_name(html) == "Test Trail"
# Из фикстуры
html2 = _read_fixture_text("wikiloc-trail-page.html")
assert _extract_track_name(html2) == "Дмитровский лес"
# ─── UT-WL-05 ───────────────────────────────────────────────────────────────
def test_ut_wl_05_parse_gpx_success():
"""UT-WL-05: _parse_gpx на wikiloc-track.gpx → activity_type='moto'."""
content = _read_fixture("wikiloc-track.gpx")
track = _parse_gpx(
content,
track_id="12345678",
name="Дмитровский лес",
activity_type="moto",
source_id="wikiloc",
track_url="https://www.wikiloc.com/trails/motorcycle-enduro/dmitrovsky-loop-12345678",
source_priority=70,
)
assert track is not None
assert track.activity_type == "moto"
assert track.source_id == "wikiloc"
assert "wikiloc.com" in track.external_url
assert track.points_count >= 10
assert track.length_m > 0
# ─── UT-WL-06 ───────────────────────────────────────────────────────────────
def test_ut_wl_06_mapping_categories():
"""UT-WL-06: MAPPING маппит motorcycle/hiking/mtb."""
m = WikilocParser.MAPPING
assert m["motorcycle"] == "moto"
assert m["hiking"] == "hike"
assert m["mtb"] == "bicycle"
# ─── UT-WL-07 ───────────────────────────────────────────────────────────────
async def test_ut_wl_07_http_403_search_graceful_stop(monkeypatch):
"""UT-WL-07: 403 на странице поиска → graceful stop, 0 yields."""
def handler(req: httpx.Request) -> httpx.Response:
if "find.do" in req.url.path:
return httpx.Response(403, text="Forbidden")
return httpx.Response(404)
_patch_client(monkeypatch, handler)
parser = WikilocParser(_make_config())
tracks = await _collect_all(parser, BBOX_TSFO)
assert tracks == []
# ─── UT-WL-08 ───────────────────────────────────────────────────────────────
async def test_ut_wl_08_http_429_track_graceful_stop(monkeypatch):
"""UT-WL-08: 429 на 2-м треке → 1-й трек yield-нут, потом graceful stop."""
search_html = _read_fixture_text("wikiloc-search-page1.html")
trail_html = _read_fixture_text("wikiloc-trail-page.html")
gpx_bytes = _read_fixture("wikiloc-track.gpx")
call_count = {"track_page": 0}
def handler(req: httpx.Request) -> httpx.Response:
path = req.url.path
if "find.do" in path:
return httpx.Response(200, text=search_html)
if path.startswith("/trails/"):
call_count["track_page"] += 1
if call_count["track_page"] == 1:
return httpx.Response(200, text=trail_html)
# 2-й трек → 429
return httpx.Response(429, text="Too Many Requests")
if "downloadTrail.do" in path:
return httpx.Response(200, content=gpx_bytes)
return httpx.Response(404)
_patch_client(monkeypatch, handler)
parser = WikilocParser(_make_config())
tracks = await _collect_all(parser, BBOX_TSFO)
assert len(tracks) == 1
assert "wikiloc.com" in tracks[0].external_url
# ─── UT-WL-09 ───────────────────────────────────────────────────────────────
async def test_ut_wl_09_rate_limit_respected(monkeypatch):
"""UT-WL-09: asyncio.sleep вызывается между запросами с rate_limit_sec."""
trail_html = _read_fixture_text("wikiloc-trail-page.html")
gpx_bytes = _read_fixture("wikiloc-track.gpx")
def handler(req: httpx.Request) -> httpx.Response:
path = req.url.path
if "find.do" in path:
# вернём только одну ссылку, чтобы один трек обработался
mini_html = 'x'
# Если page=0 → даём 1 трек, иначе пусто
if "page=0" in str(req.url.query):
return httpx.Response(200, text=mini_html)
return httpx.Response(200, text="")
if path.startswith("/trails/"):
return httpx.Response(200, text=trail_html)
if "downloadTrail.do" in path:
return httpx.Response(200, content=gpx_bytes)
return httpx.Response(404)
_patch_client(monkeypatch, handler)
sleep_calls = []
real_sleep = asyncio.sleep
async def mock_sleep(sec):
sleep_calls.append(sec)
# вызываем реальный sleep с 0, чтобы быстро
await real_sleep(0)
monkeypatch.setattr(wl_module.asyncio, "sleep", mock_sleep)
parser = WikilocParser(_make_config(rate_limit_sec=10))
await _collect_all(parser, BBOX_TSFO)
# Между запросами должно быть несколько sleep'ов с аргументом ≥ 10
assert len(sleep_calls) >= 2, f"expected ≥ 2 sleep calls, got {sleep_calls}"
assert all(s >= 10 for s in sleep_calls), f"all sleep args must be ≥ 10, got {sleep_calls}"
# ─── UT-WL-10 ───────────────────────────────────────────────────────────────
async def test_ut_wl_10_max_tracks_per_run_cap(monkeypatch):
"""UT-WL-10: max_tracks_per_run=2, поиск выдаёт ≥ 5 треков → yield ровно 2."""
search_html = _read_fixture_text("wikiloc-search-page1.html")
trail_html = _read_fixture_text("wikiloc-trail-page.html")
gpx_bytes = _read_fixture("wikiloc-track.gpx")
def handler(req: httpx.Request) -> httpx.Response:
path = req.url.path
if "find.do" in path:
if "page=0" in str(req.url.query):
return httpx.Response(200, text=search_html)
return httpx.Response(200, text="")
if path.startswith("/trails/"):
return httpx.Response(200, text=trail_html)
if "downloadTrail.do" in path:
return httpx.Response(200, content=gpx_bytes)
return httpx.Response(404)
_patch_client(monkeypatch, handler)
parser = WikilocParser(_make_config(max_tracks_per_run=2))
tracks = await _collect_all(parser, BBOX_TSFO)
assert len(tracks) == 2