"""Unit тесты для дедупликации GPS-треков (ET-008). U-10: два трека с одинаковым bbox+length+date → один ключ U-11: разные даты → разные ключи U-12: bbox-округление до 0.01° U-13: merge sources при upsert U-14: merge external_urls """ import json import pytest from src.api.gps_tracks.dedup import compute_dedup_key from src.api.gps_tracks.db import open_db, init_db, upsert_track from src.api.gps_tracks.models import TrackInsert def _make_track( external_id="T1", source_id="osm", length_m=5000.0, created_at="2024-05-12T10:00:00Z", min_lon=37.61, min_lat=55.75, max_lon=37.62, max_lat=55.76, external_url=None, name=None, source_priority=50, ) -> TrackInsert: """Хелпер для создания TrackInsert с тестовой WKB геометрией.""" from shapely.geometry import LineString from shapely import wkb coords = [(min_lon, min_lat), (max_lon, max_lat)] geom_wkb = wkb.dumps(LineString(coords)) return TrackInsert( external_id=external_id, source_id=source_id, external_url=external_url, name=name, description=None, activity_type="other", user=None, created_at=created_at, length_m=length_m, points_count=2, geom_wkb=geom_wkb, min_lon=min_lon, min_lat=min_lat, max_lon=max_lon, max_lat=max_lat, tags=[], source_priority=source_priority, ) @pytest.fixture def db(tmp_path): """Создаёт изолированную БД в tmp_path.""" db_path = str(tmp_path / "test.sqlite") conn = open_db(db_path) init_db(conn) yield conn conn.close() # ─── U-10: одинаковый bbox+length+date → один ключ ─────────────────────────── def test_u10_same_key_for_same_track(): """U-10: два трека с одинаковым bbox+length+date дают одинаковый ключ.""" bounds = (37.61, 55.75, 37.62, 55.76) meta = {"length_m": 5000.0, "created_at": "2024-05-12T10:00:00Z"} key1 = compute_dedup_key(bounds, meta) key2 = compute_dedup_key(bounds, meta) assert key1 == key2 # ─── U-11: разные даты → разные ключи ──────────────────────────────────────── def test_u11_different_dates_give_different_keys(): """U-11: треки с разными датами дают разные ключи.""" bounds = (37.61, 55.75, 37.62, 55.76) key1 = compute_dedup_key(bounds, {"length_m": 5000.0, "created_at": "2024-05-12"}) key2 = compute_dedup_key(bounds, {"length_m": 5000.0, "created_at": "2024-05-13"}) assert key1 != key2 # ─── U-12: bbox-округление до 0.01° ───────────────────────────────────────── def test_u12_bbox_rounding_to_2_decimals(): """U-12: bbox округляется до 0.01°, незначительные отличия игнорируются.""" # Оба варианта округляются к (37.61, 55.75, 37.62, 55.76) # Используем значения в середине диапазона, гарантированно округляемые одинаково bounds1 = (37.6111, 55.7512, 37.6192, 55.7563) bounds2 = (37.6144, 55.7533, 37.6188, 55.7571) meta = {"length_m": 5000.0, "created_at": "2024-05-12"} key1 = compute_dedup_key(bounds1, meta) key2 = compute_dedup_key(bounds2, meta) # Оба bbox округляются к (37.61, 55.75, 37.62, 55.76) — ключи одинаковы assert key1 == key2 def test_u12_significantly_different_bbox_gives_different_key(): """U-12: существенно разные bbox дают разные ключи.""" bounds1 = (37.61, 55.75, 37.62, 55.76) bounds2 = (38.00, 56.00, 38.10, 56.10) meta = {"length_m": 5000.0, "created_at": "2024-05-12"} key1 = compute_dedup_key(bounds1, meta) key2 = compute_dedup_key(bounds2, meta) assert key1 != key2 # ─── U-13: merge sources при upsert ────────────────────────────────────────── def test_u13_merge_sources_on_upsert(db): """U-13: при upsert с тем же dedup_key sources мержатся (union без дублей).""" bounds = (37.61, 55.75, 37.62, 55.76) meta = {"length_m": 5000.0, "created_at": "2024-05-12T10:00:00Z"} dedup_key = compute_dedup_key(bounds, meta) # Первая вставка — от osm track1 = _make_track(external_id="T1", source_id="osm", source_priority=50) result1 = upsert_track(db, track1, dedup_key, source_priority=50) assert result1 == "inserted" # Вторая вставка — от другого источника с тем же dedup_key track2 = _make_track(external_id="T2", source_id="enduro_russia", source_priority=10) result2 = upsert_track(db, track2, dedup_key, source_priority=10) assert result2 == "updated" # Проверяем merged sources cur = db.cursor() cur.execute("SELECT sources_json FROM tracks WHERE dedup_key = ?", (dedup_key,)) row = cur.fetchone() sources = json.loads(row["sources_json"]) assert "osm" in sources assert "enduro_russia" in sources assert len(sources) == 2 # без дублей def test_u13_no_duplicate_sources_on_repeated_upsert(db): """U-13: повторный upsert от того же источника не создаёт дублей в sources.""" bounds = (37.61, 55.75, 37.62, 55.76) meta = {"length_m": 5000.0, "created_at": "2024-05-12T10:00:00Z"} dedup_key = compute_dedup_key(bounds, meta) track = _make_track(external_id="T1", source_id="osm") upsert_track(db, track, dedup_key, source_priority=50) upsert_track(db, track, dedup_key, source_priority=50) upsert_track(db, track, dedup_key, source_priority=50) cur = db.cursor() cur.execute("SELECT sources_json FROM tracks WHERE dedup_key = ?", (dedup_key,)) row = cur.fetchone() sources = json.loads(row["sources_json"]) assert sources.count("osm") == 1 # ─── U-14: merge external_urls ─────────────────────────────────────────────── def test_u14_merge_external_urls_on_upsert(db): """U-14: external_urls мержатся без дублей при upsert.""" bounds = (37.61, 55.75, 37.62, 55.76) meta = {"length_m": 5000.0, "created_at": "2024-05-12T10:00:00Z"} dedup_key = compute_dedup_key(bounds, meta) url1 = "https://www.openstreetmap.org/user/alice/traces/12345" url2 = "https://enduro-russia.ru/track/99" track1 = _make_track(external_id="T1", source_id="osm", external_url=url1) upsert_track(db, track1, dedup_key, source_priority=50) track2 = _make_track(external_id="T2", source_id="enduro_russia", external_url=url2) upsert_track(db, track2, dedup_key, source_priority=10) cur = db.cursor() cur.execute("SELECT external_urls_json FROM tracks WHERE dedup_key = ?", (dedup_key,)) row = cur.fetchone() urls = json.loads(row["external_urls_json"]) assert url1 in urls assert url2 in urls assert len(urls) == 2 def test_u14_no_duplicate_urls_on_repeated_upsert(db): """U-14: повторный upsert с тем же URL не дублирует его.""" bounds = (37.61, 55.75, 37.62, 55.76) meta = {"length_m": 5000.0, "created_at": "2024-05-12T10:00:00Z"} dedup_key = compute_dedup_key(bounds, meta) url = "https://www.openstreetmap.org/user/alice/traces/12345" track = _make_track(external_id="T1", source_id="osm", external_url=url) upsert_track(db, track, dedup_key, source_priority=50) upsert_track(db, track, dedup_key, source_priority=50) cur = db.cursor() cur.execute("SELECT external_urls_json FROM tracks WHERE dedup_key = ?", (dedup_key,)) row = cur.fetchone() urls = json.loads(row["external_urls_json"]) assert urls.count(url) == 1