#!/usr/bin/env python3 """ parse.py — парсинг OSM PBF → Spatialite для Enduro Trails Читает region.osm.pbf, сохраняет trails и POI в centralfederal.sqlite """ import os import sys import json import math import sqlite3 import argparse try: import osmium except ImportError: print("ERROR: python-osmium не установлен. pip install python-osmium") sys.exit(1) try: # pysqlite3-binary предоставляет sqlite3 с поддержкой расширений import pysqlite3 as sqlite3_ext HAS_PYSQLITE3 = True except ImportError: HAS_PYSQLITE3 = False sqlite3_ext = sqlite3 from shapely.geometry import LineString, Point from shapely import wkb as shapely_wkb # ─── Константы ──────────────────────────────────────────────────────────────── HIGHWAY_TYPES = {"track", "path", "bridleway", "cycleway", "footway"} POI_FILTERS = { "natural": {"water", "peak", "cave_entrance"}, "tourism": {"viewpoint"}, "historic": {"ruins"}, "ford": {"yes"}, } EARTH_RADIUS_M = 6_371_000.0 # ─── Утилиты ────────────────────────────────────────────────────────────────── def haversine_length(coords): """Длина ломаной в метрах по списку (lon, lat) пар.""" total = 0.0 for i in range(len(coords) - 1): lon1, lat1 = math.radians(coords[i][0]), math.radians(coords[i][1]) lon2, lat2 = math.radians(coords[i+1][0]), math.radians(coords[i+1][1]) dlat = lat2 - lat1 dlon = lon2 - lon1 a = math.sin(dlat/2)**2 + math.cos(lat1)*math.cos(lat2)*math.sin(dlon/2)**2 total += 2 * EARTH_RADIUS_M * math.asin(math.sqrt(a)) return total def geom_to_wkb_hex(geom): """Shapely geometry → WKB hex string для Spatialite.""" return shapely_wkb.dumps(geom, hex=True) # ─── OSM Handlers ───────────────────────────────────────────────────────────── class TrailHandler(osmium.SimpleHandler): """Собирает highway=track/path/... из OSM.""" def __init__(self): super().__init__() self.trails = [] def way(self, w): tags = w.tags hw = tags.get("highway", "") if hw not in HIGHWAY_TYPES: return try: coords = [(n.lon, n.lat) for n in w.nodes if n.location.valid()] except Exception: return if len(coords) < 2: return length_m = haversine_length(coords) geom = LineString(coords) extra_tags = {} for tag in w.tags: extra_tags[tag.k] = tag.v self.trails.append({ "osm_id": w.id, "highway_type": hw, "track_type": tags.get("tracktype", None), "surface": tags.get("surface", None), "name": tags.get("name", None), "length_m": length_m, "mtb_scale": tags.get("mtb:scale", None), "visibility": tags.get("trail_visibility", None), "smoothness": tags.get("smoothness", None), "access": tags.get("access", None), "tags": json.dumps(extra_tags, ensure_ascii=False), "geom_wkb": geom_to_wkb_hex(geom), }) class POIHandler(osmium.SimpleHandler): """Собирает POI: вершины, родники, смотровые и т.д.""" def __init__(self): super().__init__() self.pois = [] def _check_tags(self, tags): """Возвращает poi_type если тег совпадает с фильтром.""" for key, values in POI_FILTERS.items(): val = tags.get(key, "") if val in values: return f"{key}={val}" return None def node(self, n): poi_type = self._check_tags(n.tags) if not poi_type: return if not n.location.valid(): return geom = Point(n.location.lon, n.location.lat) self.pois.append({ "osm_id": n.id, "poi_type": poi_type, "name": n.tags.get("name", None), "geom_wkb": geom_to_wkb_hex(geom), }) def way(self, w): """Для водоёмов-полигонов берём центроид.""" poi_type = self._check_tags(w.tags) if not poi_type: return try: coords = [(n.lon, n.lat) for n in w.nodes if n.location.valid()] except Exception: return if len(coords) < 2: return geom = LineString(coords).centroid self.pois.append({ "osm_id": w.id, "poi_type": poi_type, "name": w.tags.get("name", None), "geom_wkb": geom_to_wkb_hex(geom), }) # ─── Spatialite ─────────────────────────────────────────────────────────────── def open_spatialite(db_path): """Открывает соединение с Spatialite, загружает расширение.""" conn = sqlite3_ext.connect(db_path) conn.enable_load_extension(True) # Пробуем разные пути к mod_spatialite spatialite_paths = [ "mod_spatialite", "/usr/lib/x86_64-linux-gnu/mod_spatialite.so", "/usr/lib/mod_spatialite.so", "/usr/local/lib/mod_spatialite.so", ] loaded = False for path in spatialite_paths: try: conn.load_extension(path) loaded = True print(f" Spatialite загружен: {path}") break except Exception: continue if not loaded: print("WARNING: mod_spatialite не найден — геометрия будет храниться как WKB blob без пространственных индексов") return conn, loaded def init_db(conn, has_spatialite): """Создаёт таблицы и индексы.""" cur = conn.cursor() if has_spatialite: cur.execute("SELECT InitSpatialMetaData(1)") cur.executescript(""" DROP TABLE IF EXISTS trails; CREATE TABLE trails ( id INTEGER PRIMARY KEY AUTOINCREMENT, osm_id INTEGER NOT NULL, highway_type TEXT, track_type TEXT, surface TEXT, name TEXT, length_m REAL, mtb_scale TEXT, visibility TEXT, smoothness TEXT, access TEXT, tags TEXT, geom GEOMETRY ); DROP TABLE IF EXISTS poi; CREATE TABLE poi ( id INTEGER PRIMARY KEY AUTOINCREMENT, osm_id INTEGER NOT NULL, poi_type TEXT, name TEXT, geom GEOMETRY ); """) if has_spatialite: try: cur.execute("SELECT AddGeometryColumn('trails', 'geom', 4326, 'LINESTRING', 'XY')") except Exception: pass # колонка уже добавлена через CREATE TABLE try: cur.execute("SELECT AddGeometryColumn('poi', 'geom', 4326, 'POINT', 'XY')") except Exception: pass cur.executescript(""" CREATE INDEX IF NOT EXISTS idx_trails_highway ON trails(highway_type); CREATE INDEX IF NOT EXISTS idx_trails_surface ON trails(surface); CREATE INDEX IF NOT EXISTS idx_poi_type ON poi(poi_type); """) conn.commit() def insert_trails(conn, trails, has_spatialite): cur = conn.cursor() batch = [] for t in trails: if has_spatialite: geom_expr = f"GeomFromWKB(x'{t['geom_wkb']}', 4326)" else: geom_expr = f"x'{t['geom_wkb']}'" batch.append(( t["osm_id"], t["highway_type"], t["track_type"], t["surface"], t["name"], t["length_m"], t["mtb_scale"], t["visibility"], t["smoothness"], t["access"], t["tags"], )) # Вставляем батчами по 1000 BATCH = 1000 for i in range(0, len(trails), BATCH): chunk = trails[i:i+BATCH] for t in chunk: cur.execute(""" INSERT INTO trails (osm_id, highway_type, track_type, surface, name, length_m, mtb_scale, visibility, smoothness, access, tags, geom) VALUES (?,?,?,?,?,?,?,?,?,?,?,?) """, ( t["osm_id"], t["highway_type"], t["track_type"], t["surface"], t["name"], t["length_m"], t["mtb_scale"], t["visibility"], t["smoothness"], t["access"], t["tags"], bytes.fromhex(t["geom_wkb"]), )) conn.commit() print(f" trails: вставлено {min(i+BATCH, len(trails))}/{len(trails)}") def insert_pois(conn, pois): cur = conn.cursor() BATCH = 1000 for i in range(0, len(pois), BATCH): chunk = pois[i:i+BATCH] for p in chunk: cur.execute(""" INSERT INTO poi (osm_id, poi_type, name, geom) VALUES (?,?,?,?) """, ( p["osm_id"], p["poi_type"], p["name"], bytes.fromhex(p["geom_wkb"]), )) conn.commit() print(f" poi: вставлено {min(i+BATCH, len(pois))}/{len(pois)}") def create_spatial_indexes(conn, has_spatialite): if not has_spatialite: return cur = conn.cursor() try: cur.execute("SELECT CreateSpatialIndex('trails', 'geom')") conn.commit() print(" Пространственный индекс trails создан") except Exception as e: print(f" WARNING: индекс trails: {e}") try: cur.execute("SELECT CreateSpatialIndex('poi', 'geom')") conn.commit() print(" Пространственный индекс poi создан") except Exception as e: print(f" WARNING: индекс poi: {e}") # ─── Main ───────────────────────────────────────────────────────────────────── def main(): parser = argparse.ArgumentParser(description="Парсинг OSM PBF → Spatialite") parser.add_argument( "--pbf", default=os.path.join(os.path.dirname(__file__), "../data/region.osm.pbf"), help="Путь к PBF файлу", ) parser.add_argument( "--db", default=os.path.join(os.path.dirname(__file__), "../data/centralfederal.sqlite"), help="Путь к выходному SQLite/Spatialite файлу", ) args = parser.parse_args() pbf_path = os.path.abspath(args.pbf) db_path = os.path.abspath(args.db) if not os.path.exists(pbf_path): print(f"ERROR: PBF файл не найден: {pbf_path}") print("Сначала запустите scripts/download.sh") sys.exit(1) print(f"==> Читаем PBF: {pbf_path}") print(" Парсим дороги...") trail_handler = TrailHandler() trail_handler.apply_file(pbf_path, locations=True) print(f" Найдено дорог: {len(trail_handler.trails)}") print(" Парсим POI...") poi_handler = POIHandler() poi_handler.apply_file(pbf_path, locations=True) print(f" Найдено POI: {len(poi_handler.pois)}") print(f"==> Открываем БД: {db_path}") os.makedirs(os.path.dirname(db_path), exist_ok=True) conn, has_spatialite = open_spatialite(db_path) print("==> Инициализируем схему...") init_db(conn, has_spatialite) print("==> Вставляем дороги...") insert_trails(conn, trail_handler.trails, has_spatialite) print("==> Вставляем POI...") insert_pois(conn, poi_handler.pois) print("==> Создаём пространственные индексы...") create_spatial_indexes(conn, has_spatialite) conn.close() print(f"\n✓ Готово! БД сохранена: {db_path}") if __name__ == "__main__": main()