From e03d82a3f2645313d818d47cc988499b6ddff5b4 Mon Sep 17 00:00:00 2001 From: Stream Date: Sat, 2 May 2026 08:30:02 +0300 Subject: [PATCH] auto-sync: 2026-05-02 08:30:01 --- tasks/enduro-trails/scripts/parse.py | 464 ++++++++++++--------------- 1 file changed, 204 insertions(+), 260 deletions(-) diff --git a/tasks/enduro-trails/scripts/parse.py b/tasks/enduro-trails/scripts/parse.py index 8481c3d..ac759df 100644 --- a/tasks/enduro-trails/scripts/parse.py +++ b/tasks/enduro-trails/scripts/parse.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 """ parse.py — парсинг OSM PBF → Spatialite для Enduro Trails -Читает region.osm.pbf, сохраняет trails и POI в centralfederal.sqlite +Использует osmium export → GeoJSONSeq → построчный парсинг (низкое потребление памяти) """ import os @@ -10,42 +10,18 @@ import json import math import sqlite3 import argparse - -try: - import osmium -except ImportError: - print("ERROR: python-osmium не установлен. pip install python-osmium") - sys.exit(1) - -try: - # pysqlite3-binary предоставляет sqlite3 с поддержкой расширений - import pysqlite3 as sqlite3_ext - HAS_PYSQLITE3 = True -except ImportError: - HAS_PYSQLITE3 = False - sqlite3_ext = sqlite3 - -from shapely.geometry import LineString, Point -from shapely import wkb as shapely_wkb +import subprocess +import tempfile # ─── Константы ──────────────────────────────────────────────────────────────── HIGHWAY_TYPES = {"track", "path", "bridleway", "cycleway", "footway"} -POI_FILTERS = { - "natural": {"water", "peak", "cave_entrance"}, - "tourism": {"viewpoint"}, - "historic": {"ruins"}, - "ford": {"yes"}, -} - EARTH_RADIUS_M = 6_371_000.0 -# ─── Утилиты ────────────────────────────────────────────────────────────────── - def haversine_length(coords): - """Длина ломаной в метрах по списку (lon, lat) пар.""" + """Длина ломаной в метрах по списку [lon, lat] пар.""" total = 0.0 for i in range(len(coords) - 1): lon1, lat1 = math.radians(coords[i][0]), math.radians(coords[i][1]) @@ -57,146 +33,35 @@ def haversine_length(coords): return total -def geom_to_wkb_hex(geom): - """Shapely geometry → WKB hex string для Spatialite.""" - return shapely_wkb.dumps(geom, hex=True) - - -# ─── OSM Handlers ───────────────────────────────────────────────────────────── - -class TrailHandler(osmium.SimpleHandler): - """Собирает highway=track/path/... из OSM.""" - - def __init__(self): - super().__init__() - self.trails = [] - - def way(self, w): - tags = w.tags - hw = tags.get("highway", "") - if hw not in HIGHWAY_TYPES: - return - - try: - coords = [(n.lon, n.lat) for n in w.nodes if n.location.valid()] - except Exception: - return - - if len(coords) < 2: - return - - length_m = haversine_length(coords) - geom = LineString(coords) - - extra_tags = {} - for tag in w.tags: - extra_tags[tag.k] = tag.v - - self.trails.append({ - "osm_id": w.id, - "highway_type": hw, - "track_type": tags.get("tracktype", None), - "surface": tags.get("surface", None), - "name": tags.get("name", None), - "length_m": length_m, - "mtb_scale": tags.get("mtb:scale", None), - "visibility": tags.get("trail_visibility", None), - "smoothness": tags.get("smoothness", None), - "access": tags.get("access", None), - "tags": json.dumps(extra_tags, ensure_ascii=False), - "geom_wkb": geom_to_wkb_hex(geom), - }) - - -class POIHandler(osmium.SimpleHandler): - """Собирает POI: вершины, родники, смотровые и т.д.""" - - def __init__(self): - super().__init__() - self.pois = [] - - def _check_tags(self, tags): - """Возвращает poi_type если тег совпадает с фильтром.""" - for key, values in POI_FILTERS.items(): - val = tags.get(key, "") - if val in values: - return f"{key}={val}" - return None - - def node(self, n): - poi_type = self._check_tags(n.tags) - if not poi_type: - return - if not n.location.valid(): - return - - geom = Point(n.location.lon, n.location.lat) - self.pois.append({ - "osm_id": n.id, - "poi_type": poi_type, - "name": n.tags.get("name", None), - "geom_wkb": geom_to_wkb_hex(geom), - }) - - def way(self, w): - """Для водоёмов-полигонов берём центроид.""" - poi_type = self._check_tags(w.tags) - if not poi_type: - return - - try: - coords = [(n.lon, n.lat) for n in w.nodes if n.location.valid()] - except Exception: - return - - if len(coords) < 2: - return - - geom = LineString(coords).centroid - self.pois.append({ - "osm_id": w.id, - "poi_type": poi_type, - "name": w.tags.get("name", None), - "geom_wkb": geom_to_wkb_hex(geom), - }) - - -# ─── Spatialite ─────────────────────────────────────────────────────────────── - -def open_spatialite(db_path): - """Открывает соединение с Spatialite, загружает расширение.""" - conn = sqlite3_ext.connect(db_path) +def open_db(db_path): + """Открывает SQLite с попыткой загрузить Spatialite.""" + conn = sqlite3.connect(db_path) conn.enable_load_extension(True) - - # Пробуем разные пути к mod_spatialite - spatialite_paths = [ - "mod_spatialite", - "/usr/lib/x86_64-linux-gnu/mod_spatialite.so", - "/usr/lib/mod_spatialite.so", - "/usr/local/lib/mod_spatialite.so", - ] - loaded = False - for path in spatialite_paths: + has_spatialite = False + for path in ["mod_spatialite", + "/usr/lib/x86_64-linux-gnu/mod_spatialite.so", + "/usr/lib/mod_spatialite.so", + "/usr/local/lib/mod_spatialite.so"]: try: conn.load_extension(path) - loaded = True - print(f" Spatialite загружен: {path}") + has_spatialite = True + print(f" Spatialite: {path}") break except Exception: continue - - if not loaded: - print("WARNING: mod_spatialite не найден — геометрия будет храниться как WKB blob без пространственных индексов") - - return conn, loaded + if not has_spatialite: + print(" WARNING: mod_spatialite не найден — без пространственных индексов") + conn.enable_load_extension(False) + return conn, has_spatialite def init_db(conn, has_spatialite): - """Создаёт таблицы и индексы.""" cur = conn.cursor() - if has_spatialite: - cur.execute("SELECT InitSpatialMetaData(1)") + try: + cur.execute("SELECT InitSpatialMetaData(1)") + except Exception: + pass cur.executescript(""" DROP TABLE IF EXISTS trails; @@ -213,8 +78,10 @@ def init_db(conn, has_spatialite): smoothness TEXT, access TEXT, tags TEXT, - geom GEOMETRY + geom BLOB ); + CREATE INDEX IF NOT EXISTS idx_trails_highway ON trails(highway_type); + CREATE INDEX IF NOT EXISTS idx_trails_surface ON trails(surface); DROP TABLE IF EXISTS poi; CREATE TABLE poi ( @@ -222,153 +89,230 @@ def init_db(conn, has_spatialite): osm_id INTEGER NOT NULL, poi_type TEXT, name TEXT, - geom GEOMETRY + geom BLOB ); - """) - - if has_spatialite: - try: - cur.execute("SELECT AddGeometryColumn('trails', 'geom', 4326, 'LINESTRING', 'XY')") - except Exception: - pass # колонка уже добавлена через CREATE TABLE - try: - cur.execute("SELECT AddGeometryColumn('poi', 'geom', 4326, 'POINT', 'XY')") - except Exception: - pass - - cur.executescript(""" - CREATE INDEX IF NOT EXISTS idx_trails_highway ON trails(highway_type); - CREATE INDEX IF NOT EXISTS idx_trails_surface ON trails(surface); CREATE INDEX IF NOT EXISTS idx_poi_type ON poi(poi_type); """) - conn.commit() -def insert_trails(conn, trails, has_spatialite): +def coords_to_wkb_linestring(coords): + """Конвертирует список [lon, lat] в WKB LineString (little-endian, SRID=4326).""" + import struct + n = len(coords) + # WKB: byte order (1) + type (2=LineString, with SRID flag 0x20000000) + SRID + num_points + points + buf = struct.pack('= BATCH_SIZE: + flush_trails() + + # ── POI (Point) ── + elif geom_type == "Point": + poi_type = None + if props.get("natural") in ("water", "peak", "cave_entrance"): + poi_type = f"natural={props['natural']}" + elif props.get("tourism") == "viewpoint": + poi_type = "tourism=viewpoint" + elif props.get("historic") == "ruins": + poi_type = "historic=ruins" + elif props.get("ford") == "yes": + poi_type = "ford=yes" + elif props.get("abandoned"): + poi_type = "abandoned" + + if not poi_type: + continue + + coords = geom.get("coordinates", []) + if len(coords) < 2: + continue + wkb = coords_to_wkb_point(coords[0], coords[1]) + batch_poi.append(( + osm_id, + poi_type, + props.get("name"), + wkb, + )) + if len(batch_poi) >= BATCH_SIZE: + flush_poi() + + flush_trails() + flush_poi() + print(f"\n Итого trails: {trails_count}, poi: {poi_count}") + return trails_count, poi_count def create_spatial_indexes(conn, has_spatialite): if not has_spatialite: return cur = conn.cursor() - try: - cur.execute("SELECT CreateSpatialIndex('trails', 'geom')") - conn.commit() - print(" Пространственный индекс trails создан") - except Exception as e: - print(f" WARNING: индекс trails: {e}") - try: - cur.execute("SELECT CreateSpatialIndex('poi', 'geom')") - conn.commit() - print(" Пространственный индекс poi создан") - except Exception as e: - print(f" WARNING: индекс poi: {e}") + for table, col in [("trails", "geom"), ("poi", "geom")]: + try: + cur.execute(f"SELECT CreateSpatialIndex('{table}', '{col}')") + conn.commit() + print(f" Пространственный индекс {table} создан") + except Exception as e: + print(f" WARNING индекс {table}: {e}") -# ─── Main ───────────────────────────────────────────────────────────────────── - def main(): - parser = argparse.ArgumentParser(description="Парсинг OSM PBF → Spatialite") - parser.add_argument( - "--pbf", - default=os.path.join(os.path.dirname(__file__), "../data/region.osm.pbf"), - help="Путь к PBF файлу", - ) - parser.add_argument( - "--db", - default=os.path.join(os.path.dirname(__file__), "../data/centralfederal.sqlite"), - help="Путь к выходному SQLite/Spatialite файлу", - ) + parser = argparse.ArgumentParser() + parser.add_argument("--pbf", default="/data/region.osm.pbf") + parser.add_argument("--db", default="/data/centralfederal.sqlite") args = parser.parse_args() pbf_path = os.path.abspath(args.pbf) db_path = os.path.abspath(args.db) if not os.path.exists(pbf_path): - print(f"ERROR: PBF файл не найден: {pbf_path}") - print("Сначала запустите scripts/download.sh") + print(f"ERROR: PBF не найден: {pbf_path}") sys.exit(1) - print(f"==> Читаем PBF: {pbf_path}") + print(f"==> PBF: {pbf_path} ({os.path.getsize(pbf_path) // 1024 // 1024} МБ)") - print(" Парсим дороги...") - trail_handler = TrailHandler() - trail_handler.apply_file(pbf_path, locations=True) - print(f" Найдено дорог: {len(trail_handler.trails)}") - - print(" Парсим POI...") - poi_handler = POIHandler() - poi_handler.apply_file(pbf_path, locations=True) - print(f" Найдено POI: {len(poi_handler.pois)}") + # Экспортируем в GeoJSONSeq + geojson_path = db_path.replace(".sqlite", ".geojsonseq") + print("==> Конвертируем PBF → GeoJSONSeq (osmium export)...") + export_to_geojsonseq(pbf_path, geojson_path) print(f"==> Открываем БД: {db_path}") os.makedirs(os.path.dirname(db_path), exist_ok=True) - conn, has_spatialite = open_spatialite(db_path) + conn, has_spatialite = open_db(db_path) print("==> Инициализируем схему...") init_db(conn, has_spatialite) - print("==> Вставляем дороги...") - insert_trails(conn, trail_handler.trails, has_spatialite) - - print("==> Вставляем POI...") - insert_pois(conn, poi_handler.pois) + print("==> Парсим GeoJSONSeq построчно...") + parse_geojsonseq(geojson_path, conn) print("==> Создаём пространственные индексы...") create_spatial_indexes(conn, has_spatialite) conn.close() - print(f"\n✓ Готово! БД сохранена: {db_path}") + + # Удаляем временный GeoJSONSeq + try: + os.remove(geojson_path) + except Exception: + pass + + print(f"\n✓ Готово! БД: {db_path} ({os.path.getsize(db_path) // 1024 // 1024} МБ)") if __name__ == "__main__":