"""Unit tests for WikilocParser (ET-009). Coverage: - UT-WL-01: _extract_track_paths returns ≥ 5 unique paths - UT-WL-02: _extract_gpx_url with downloadTrail.do - UT-WL-03: _extract_gpx_url fallback by track_id - UT-WL-04: _extract_track_name from

- UT-WL-05: _parse_gpx success — activity_type='moto', source_id='wikiloc' - UT-WL-06: MAPPING translates categories - UT-WL-07: HTTP 403 on search → graceful stop - UT-WL-08: HTTP 429 on track page → graceful stop, earlier preserved - UT-WL-09: rate_limit_sec respected - UT-WL-10: max_tracks_per_run cap stops yield exactly """ import asyncio import os from typing import Callable import httpx from src.api.gps_tracks.sources import wikiloc as wl_module from src.api.gps_tracks.sources.wikiloc import ( WikilocParser, _extract_gpx_url, _extract_track_name, _extract_track_paths, _parse_gpx, ) FIXTURES_DIR = os.path.join(os.path.dirname(__file__), "..", "fixtures", "gps-tracks") BBOX_TSFO = (29.0, 49.5, 47.5, 60.0) def _read_fixture(name: str) -> bytes: with open(os.path.join(FIXTURES_DIR, name), "rb") as f: return f.read() def _read_fixture_text(name: str) -> str: return _read_fixture(name).decode("utf-8") def _make_config(**overrides) -> dict: cfg = { "id": "wikiloc", "base_url": "https://www.wikiloc.com", "rate_limit_sec": 0, "user_agent": "test-agent", "source_priority": 70, "activity_filter": ["motorcycle"], } cfg.update(overrides) return cfg def _patch_client(monkeypatch, handler: Callable[[httpx.Request], httpx.Response]) -> None: """Подменяет httpx.AsyncClient в модуле wikiloc на клиент с MockTransport.""" transport = httpx.MockTransport(handler) original = httpx.AsyncClient def factory(*args, **kwargs): kwargs["transport"] = transport return original(*args, **kwargs) monkeypatch.setattr(wl_module.httpx, "AsyncClient", factory) async def _collect_all(parser, bbox): tracks = [] async for t in parser.collect(bbox, {}): tracks.append(t) return tracks # ─── UT-WL-01 ─────────────────────────────────────────────────────────────── def test_ut_wl_01_extract_track_paths(): """UT-WL-01: _extract_track_paths возвращает ≥ 5 уникальных путей.""" html = _read_fixture_text("wikiloc-search-page1.html") paths = _extract_track_paths(html) assert len(paths) >= 5 assert len(set(paths)) == len(paths) # все уникальны for p in paths: assert p.startswith("/trails/") # ─── UT-WL-02 ─────────────────────────────────────────────────────────────── def test_ut_wl_02_extract_gpx_url_downloadtrail(): """UT-WL-02: _extract_gpx_url возвращает абсолютный URL для downloadTrail.do?id=X.""" html = 'GPX' url = _extract_gpx_url(html, "https://www.wikiloc.com", "12345") assert url == "https://www.wikiloc.com/wikiloc/downloadTrail.do?id=12345" # ─── UT-WL-03 ─────────────────────────────────────────────────────────────── def test_ut_wl_03_extract_gpx_url_fallback(): """UT-WL-03: _extract_gpx_url fallback по track_id если нет явных ссылок.""" html = "

No GPX link here at all.

" url = _extract_gpx_url(html, "https://www.wikiloc.com", "99999") assert url == "https://www.wikiloc.com/wikiloc/downloadTrail.do?id=99999" # ─── UT-WL-04 ─────────────────────────────────────────────────────────────── def test_ut_wl_04_extract_track_name(): """UT-WL-04: _extract_track_name извлекает текст

.""" html = "

Test Trail

" assert _extract_track_name(html) == "Test Trail" # Из фикстуры html2 = _read_fixture_text("wikiloc-trail-page.html") assert _extract_track_name(html2) == "Дмитровский лес" # ─── UT-WL-05 ─────────────────────────────────────────────────────────────── def test_ut_wl_05_parse_gpx_success(): """UT-WL-05: _parse_gpx на wikiloc-track.gpx → activity_type='moto'.""" content = _read_fixture("wikiloc-track.gpx") track = _parse_gpx( content, track_id="12345678", name="Дмитровский лес", activity_type="moto", source_id="wikiloc", track_url="https://www.wikiloc.com/trails/motorcycle-enduro/dmitrovsky-loop-12345678", source_priority=70, ) assert track is not None assert track.activity_type == "moto" assert track.source_id == "wikiloc" assert "wikiloc.com" in track.external_url assert track.points_count >= 10 assert track.length_m > 0 # ─── UT-WL-06 ─────────────────────────────────────────────────────────────── def test_ut_wl_06_mapping_categories(): """UT-WL-06: MAPPING маппит motorcycle/hiking/mtb.""" m = WikilocParser.MAPPING assert m["motorcycle"] == "moto" assert m["hiking"] == "hike" assert m["mtb"] == "bicycle" # ─── UT-WL-07 ─────────────────────────────────────────────────────────────── async def test_ut_wl_07_http_403_search_graceful_stop(monkeypatch): """UT-WL-07: 403 на странице поиска → graceful stop, 0 yields.""" def handler(req: httpx.Request) -> httpx.Response: if "find.do" in req.url.path: return httpx.Response(403, text="Forbidden") return httpx.Response(404) _patch_client(monkeypatch, handler) parser = WikilocParser(_make_config()) tracks = await _collect_all(parser, BBOX_TSFO) assert tracks == [] # ─── UT-WL-08 ─────────────────────────────────────────────────────────────── async def test_ut_wl_08_http_429_track_graceful_stop(monkeypatch): """UT-WL-08: 429 на 2-м треке → 1-й трек yield-нут, потом graceful stop.""" search_html = _read_fixture_text("wikiloc-search-page1.html") trail_html = _read_fixture_text("wikiloc-trail-page.html") gpx_bytes = _read_fixture("wikiloc-track.gpx") call_count = {"track_page": 0} def handler(req: httpx.Request) -> httpx.Response: path = req.url.path if "find.do" in path: return httpx.Response(200, text=search_html) if path.startswith("/trails/"): call_count["track_page"] += 1 if call_count["track_page"] == 1: return httpx.Response(200, text=trail_html) # 2-й трек → 429 return httpx.Response(429, text="Too Many Requests") if "downloadTrail.do" in path: return httpx.Response(200, content=gpx_bytes) return httpx.Response(404) _patch_client(monkeypatch, handler) parser = WikilocParser(_make_config()) tracks = await _collect_all(parser, BBOX_TSFO) assert len(tracks) == 1 assert "wikiloc.com" in tracks[0].external_url # ─── UT-WL-09 ─────────────────────────────────────────────────────────────── async def test_ut_wl_09_rate_limit_respected(monkeypatch): """UT-WL-09: asyncio.sleep вызывается между запросами с rate_limit_sec.""" trail_html = _read_fixture_text("wikiloc-trail-page.html") gpx_bytes = _read_fixture("wikiloc-track.gpx") def handler(req: httpx.Request) -> httpx.Response: path = req.url.path if "find.do" in path: # вернём только одну ссылку, чтобы один трек обработался mini_html = 'x' # Если page=0 → даём 1 трек, иначе пусто if "page=0" in str(req.url.query): return httpx.Response(200, text=mini_html) return httpx.Response(200, text="") if path.startswith("/trails/"): return httpx.Response(200, text=trail_html) if "downloadTrail.do" in path: return httpx.Response(200, content=gpx_bytes) return httpx.Response(404) _patch_client(monkeypatch, handler) sleep_calls = [] real_sleep = asyncio.sleep async def mock_sleep(sec): sleep_calls.append(sec) # вызываем реальный sleep с 0, чтобы быстро await real_sleep(0) monkeypatch.setattr(wl_module.asyncio, "sleep", mock_sleep) parser = WikilocParser(_make_config(rate_limit_sec=10)) await _collect_all(parser, BBOX_TSFO) # Между запросами должно быть несколько sleep'ов с аргументом ≥ 10 assert len(sleep_calls) >= 2, f"expected ≥ 2 sleep calls, got {sleep_calls}" assert all(s >= 10 for s in sleep_calls), f"all sleep args must be ≥ 10, got {sleep_calls}" # ─── UT-WL-10 ─────────────────────────────────────────────────────────────── async def test_ut_wl_10_max_tracks_per_run_cap(monkeypatch): """UT-WL-10: max_tracks_per_run=2, поиск выдаёт ≥ 5 треков → yield ровно 2.""" search_html = _read_fixture_text("wikiloc-search-page1.html") trail_html = _read_fixture_text("wikiloc-trail-page.html") gpx_bytes = _read_fixture("wikiloc-track.gpx") def handler(req: httpx.Request) -> httpx.Response: path = req.url.path if "find.do" in path: if "page=0" in str(req.url.query): return httpx.Response(200, text=search_html) return httpx.Response(200, text="") if path.startswith("/trails/"): return httpx.Response(200, text=trail_html) if "downloadTrail.do" in path: return httpx.Response(200, content=gpx_bytes) return httpx.Response(404) _patch_client(monkeypatch, handler) parser = WikilocParser(_make_config(max_tracks_per_run=2)) tracks = await _collect_all(parser, BBOX_TSFO) assert len(tracks) == 2