auto-sync: 2026-05-02 08:30:01

This commit is contained in:
Stream
2026-05-02 08:30:02 +03:00
parent f910a79ea2
commit e03d82a3f2

View File

@@ -1,7 +1,7 @@
#!/usr/bin/env python3
"""
parse.py — парсинг OSM PBF → Spatialite для Enduro Trails
Читает region.osm.pbf, сохраняет trails и POI в centralfederal.sqlite
Использует osmium export → GeoJSONSeq → построчный парсинг (низкое потребление памяти)
"""
import os
@@ -10,42 +10,18 @@ import json
import math
import sqlite3
import argparse
try:
import osmium
except ImportError:
print("ERROR: python-osmium не установлен. pip install python-osmium")
sys.exit(1)
try:
# pysqlite3-binary предоставляет sqlite3 с поддержкой расширений
import pysqlite3 as sqlite3_ext
HAS_PYSQLITE3 = True
except ImportError:
HAS_PYSQLITE3 = False
sqlite3_ext = sqlite3
from shapely.geometry import LineString, Point
from shapely import wkb as shapely_wkb
import subprocess
import tempfile
# ─── Константы ────────────────────────────────────────────────────────────────
HIGHWAY_TYPES = {"track", "path", "bridleway", "cycleway", "footway"}
POI_FILTERS = {
"natural": {"water", "peak", "cave_entrance"},
"tourism": {"viewpoint"},
"historic": {"ruins"},
"ford": {"yes"},
}
EARTH_RADIUS_M = 6_371_000.0
# ─── Утилиты ──────────────────────────────────────────────────────────────────
def haversine_length(coords):
"""Длина ломаной в метрах по списку (lon, lat) пар."""
"""Длина ломаной в метрах по списку [lon, lat] пар."""
total = 0.0
for i in range(len(coords) - 1):
lon1, lat1 = math.radians(coords[i][0]), math.radians(coords[i][1])
@@ -57,146 +33,35 @@ def haversine_length(coords):
return total
def geom_to_wkb_hex(geom):
"""Shapely geometry → WKB hex string для Spatialite."""
return shapely_wkb.dumps(geom, hex=True)
# ─── OSM Handlers ─────────────────────────────────────────────────────────────
class TrailHandler(osmium.SimpleHandler):
"""Собирает highway=track/path/... из OSM."""
def __init__(self):
super().__init__()
self.trails = []
def way(self, w):
tags = w.tags
hw = tags.get("highway", "")
if hw not in HIGHWAY_TYPES:
return
try:
coords = [(n.lon, n.lat) for n in w.nodes if n.location.valid()]
except Exception:
return
if len(coords) < 2:
return
length_m = haversine_length(coords)
geom = LineString(coords)
extra_tags = {}
for tag in w.tags:
extra_tags[tag.k] = tag.v
self.trails.append({
"osm_id": w.id,
"highway_type": hw,
"track_type": tags.get("tracktype", None),
"surface": tags.get("surface", None),
"name": tags.get("name", None),
"length_m": length_m,
"mtb_scale": tags.get("mtb:scale", None),
"visibility": tags.get("trail_visibility", None),
"smoothness": tags.get("smoothness", None),
"access": tags.get("access", None),
"tags": json.dumps(extra_tags, ensure_ascii=False),
"geom_wkb": geom_to_wkb_hex(geom),
})
class POIHandler(osmium.SimpleHandler):
"""Собирает POI: вершины, родники, смотровые и т.д."""
def __init__(self):
super().__init__()
self.pois = []
def _check_tags(self, tags):
"""Возвращает poi_type если тег совпадает с фильтром."""
for key, values in POI_FILTERS.items():
val = tags.get(key, "")
if val in values:
return f"{key}={val}"
return None
def node(self, n):
poi_type = self._check_tags(n.tags)
if not poi_type:
return
if not n.location.valid():
return
geom = Point(n.location.lon, n.location.lat)
self.pois.append({
"osm_id": n.id,
"poi_type": poi_type,
"name": n.tags.get("name", None),
"geom_wkb": geom_to_wkb_hex(geom),
})
def way(self, w):
"""Для водоёмов-полигонов берём центроид."""
poi_type = self._check_tags(w.tags)
if not poi_type:
return
try:
coords = [(n.lon, n.lat) for n in w.nodes if n.location.valid()]
except Exception:
return
if len(coords) < 2:
return
geom = LineString(coords).centroid
self.pois.append({
"osm_id": w.id,
"poi_type": poi_type,
"name": w.tags.get("name", None),
"geom_wkb": geom_to_wkb_hex(geom),
})
# ─── Spatialite ───────────────────────────────────────────────────────────────
def open_spatialite(db_path):
"""Открывает соединение с Spatialite, загружает расширение."""
conn = sqlite3_ext.connect(db_path)
def open_db(db_path):
"""Открывает SQLite с попыткой загрузить Spatialite."""
conn = sqlite3.connect(db_path)
conn.enable_load_extension(True)
# Пробуем разные пути к mod_spatialite
spatialite_paths = [
"mod_spatialite",
"/usr/lib/x86_64-linux-gnu/mod_spatialite.so",
"/usr/lib/mod_spatialite.so",
"/usr/local/lib/mod_spatialite.so",
]
loaded = False
for path in spatialite_paths:
has_spatialite = False
for path in ["mod_spatialite",
"/usr/lib/x86_64-linux-gnu/mod_spatialite.so",
"/usr/lib/mod_spatialite.so",
"/usr/local/lib/mod_spatialite.so"]:
try:
conn.load_extension(path)
loaded = True
print(f" Spatialite загружен: {path}")
has_spatialite = True
print(f" Spatialite: {path}")
break
except Exception:
continue
if not loaded:
print("WARNING: mod_spatialite не найден — геометрия будет храниться как WKB blob без пространственных индексов")
return conn, loaded
if not has_spatialite:
print(" WARNING: mod_spatialite не найден — без пространственных индексов")
conn.enable_load_extension(False)
return conn, has_spatialite
def init_db(conn, has_spatialite):
"""Создаёт таблицы и индексы."""
cur = conn.cursor()
if has_spatialite:
cur.execute("SELECT InitSpatialMetaData(1)")
try:
cur.execute("SELECT InitSpatialMetaData(1)")
except Exception:
pass
cur.executescript("""
DROP TABLE IF EXISTS trails;
@@ -213,8 +78,10 @@ def init_db(conn, has_spatialite):
smoothness TEXT,
access TEXT,
tags TEXT,
geom GEOMETRY
geom BLOB
);
CREATE INDEX IF NOT EXISTS idx_trails_highway ON trails(highway_type);
CREATE INDEX IF NOT EXISTS idx_trails_surface ON trails(surface);
DROP TABLE IF EXISTS poi;
CREATE TABLE poi (
@@ -222,153 +89,230 @@ def init_db(conn, has_spatialite):
osm_id INTEGER NOT NULL,
poi_type TEXT,
name TEXT,
geom GEOMETRY
geom BLOB
);
""")
if has_spatialite:
try:
cur.execute("SELECT AddGeometryColumn('trails', 'geom', 4326, 'LINESTRING', 'XY')")
except Exception:
pass # колонка уже добавлена через CREATE TABLE
try:
cur.execute("SELECT AddGeometryColumn('poi', 'geom', 4326, 'POINT', 'XY')")
except Exception:
pass
cur.executescript("""
CREATE INDEX IF NOT EXISTS idx_trails_highway ON trails(highway_type);
CREATE INDEX IF NOT EXISTS idx_trails_surface ON trails(surface);
CREATE INDEX IF NOT EXISTS idx_poi_type ON poi(poi_type);
""")
conn.commit()
def insert_trails(conn, trails, has_spatialite):
def coords_to_wkb_linestring(coords):
"""Конвертирует список [lon, lat] в WKB LineString (little-endian, SRID=4326)."""
import struct
n = len(coords)
# WKB: byte order (1) + type (2=LineString, with SRID flag 0x20000000) + SRID + num_points + points
buf = struct.pack('<B', 1) # little endian
buf += struct.pack('<I', 0x20000002) # LineString with SRID
buf += struct.pack('<I', 4326) # SRID
buf += struct.pack('<I', n)
for lon, lat in coords:
buf += struct.pack('<dd', lon, lat)
return buf
def coords_to_wkb_point(lon, lat):
"""Конвертирует lon/lat в WKB Point (little-endian, SRID=4326)."""
import struct
buf = struct.pack('<B', 1)
buf += struct.pack('<I', 0x20000001) # Point with SRID
buf += struct.pack('<I', 4326)
buf += struct.pack('<dd', lon, lat)
return buf
def export_to_geojsonseq(pbf_path, output_path):
"""Запускает osmium export для конвертации PBF → GeoJSONSeq."""
print(f" osmium export: {pbf_path}{output_path}")
cmd = [
"osmium", "export",
"--geometry-types=linestring,point",
"--output-format=geojsonseq",
"--overwrite",
"-o", output_path,
pbf_path
]
result = subprocess.run(cmd, capture_output=True, text=True)
if result.returncode != 0:
print(f" ERROR osmium export: {result.stderr}")
sys.exit(1)
print(f" osmium export завершён")
def parse_geojsonseq(geojson_path, conn):
"""Построчно читает GeoJSONSeq и вставляет в БД."""
cur = conn.cursor()
batch = []
for t in trails:
if has_spatialite:
geom_expr = f"GeomFromWKB(x'{t['geom_wkb']}', 4326)"
else:
geom_expr = f"x'{t['geom_wkb']}'"
trails_count = 0
poi_count = 0
batch_trails = []
batch_poi = []
BATCH_SIZE = 500
batch.append((
t["osm_id"], t["highway_type"], t["track_type"], t["surface"],
t["name"], t["length_m"], t["mtb_scale"], t["visibility"],
t["smoothness"], t["access"], t["tags"],
))
# Вставляем батчами по 1000
BATCH = 1000
for i in range(0, len(trails), BATCH):
chunk = trails[i:i+BATCH]
for t in chunk:
cur.execute("""
def flush_trails():
nonlocal trails_count
if batch_trails:
cur.executemany("""
INSERT INTO trails
(osm_id, highway_type, track_type, surface, name, length_m,
mtb_scale, visibility, smoothness, access, tags, geom)
VALUES (?,?,?,?,?,?,?,?,?,?,?,?)
""", (
t["osm_id"], t["highway_type"], t["track_type"], t["surface"],
t["name"], t["length_m"], t["mtb_scale"], t["visibility"],
t["smoothness"], t["access"], t["tags"],
bytes.fromhex(t["geom_wkb"]),
))
conn.commit()
print(f" trails: вставлено {min(i+BATCH, len(trails))}/{len(trails)}")
""", batch_trails)
conn.commit()
trails_count += len(batch_trails)
batch_trails.clear()
print(f" trails: {trails_count}", end="\r", flush=True)
def insert_pois(conn, pois):
cur = conn.cursor()
BATCH = 1000
for i in range(0, len(pois), BATCH):
chunk = pois[i:i+BATCH]
for p in chunk:
cur.execute("""
def flush_poi():
nonlocal poi_count
if batch_poi:
cur.executemany("""
INSERT INTO poi (osm_id, poi_type, name, geom)
VALUES (?,?,?,?)
""", (
p["osm_id"], p["poi_type"], p["name"],
bytes.fromhex(p["geom_wkb"]),
))
conn.commit()
print(f" poi: вставлено {min(i+BATCH, len(pois))}/{len(pois)}")
""", batch_poi)
conn.commit()
poi_count += len(batch_poi)
batch_poi.clear()
print(f" poi: {poi_count}", end="\r", flush=True)
with open(geojson_path, "r", encoding="utf-8") as f:
for line in f:
line = line.strip()
if not line:
continue
try:
feat = json.loads(line)
except Exception:
continue
geom = feat.get("geometry", {})
props = feat.get("properties", {}) or {}
osm_id = feat.get("id", 0)
# osmium export id format: "w123456789" or "n123456789"
if isinstance(osm_id, str):
osm_id = int(osm_id[1:]) if osm_id and osm_id[0] in "wnr" else 0
geom_type = geom.get("type", "")
# ── Trails (LineString) ──
if geom_type == "LineString":
hw = props.get("highway", "")
if hw not in HIGHWAY_TYPES:
continue
coords = geom.get("coordinates", [])
if len(coords) < 2:
continue
length_m = haversine_length(coords)
wkb = coords_to_wkb_linestring(coords)
extra = {k: v for k, v in props.items()
if k not in ("highway", "tracktype", "surface", "name",
"mtb:scale", "trail_visibility", "smoothness", "access")}
batch_trails.append((
osm_id,
hw,
props.get("tracktype"),
props.get("surface"),
props.get("name"),
length_m,
props.get("mtb:scale"),
props.get("trail_visibility"),
props.get("smoothness"),
props.get("access"),
json.dumps(extra, ensure_ascii=False),
wkb,
))
if len(batch_trails) >= BATCH_SIZE:
flush_trails()
# ── POI (Point) ──
elif geom_type == "Point":
poi_type = None
if props.get("natural") in ("water", "peak", "cave_entrance"):
poi_type = f"natural={props['natural']}"
elif props.get("tourism") == "viewpoint":
poi_type = "tourism=viewpoint"
elif props.get("historic") == "ruins":
poi_type = "historic=ruins"
elif props.get("ford") == "yes":
poi_type = "ford=yes"
elif props.get("abandoned"):
poi_type = "abandoned"
if not poi_type:
continue
coords = geom.get("coordinates", [])
if len(coords) < 2:
continue
wkb = coords_to_wkb_point(coords[0], coords[1])
batch_poi.append((
osm_id,
poi_type,
props.get("name"),
wkb,
))
if len(batch_poi) >= BATCH_SIZE:
flush_poi()
flush_trails()
flush_poi()
print(f"\n Итого trails: {trails_count}, poi: {poi_count}")
return trails_count, poi_count
def create_spatial_indexes(conn, has_spatialite):
if not has_spatialite:
return
cur = conn.cursor()
try:
cur.execute("SELECT CreateSpatialIndex('trails', 'geom')")
conn.commit()
print(" Пространственный индекс trails создан")
except Exception as e:
print(f" WARNING: индекс trails: {e}")
try:
cur.execute("SELECT CreateSpatialIndex('poi', 'geom')")
conn.commit()
print(" Пространственный индекс poi создан")
except Exception as e:
print(f" WARNING: индекс poi: {e}")
for table, col in [("trails", "geom"), ("poi", "geom")]:
try:
cur.execute(f"SELECT CreateSpatialIndex('{table}', '{col}')")
conn.commit()
print(f" Пространственный индекс {table} создан")
except Exception as e:
print(f" WARNING индекс {table}: {e}")
# ─── Main ─────────────────────────────────────────────────────────────────────
def main():
parser = argparse.ArgumentParser(description="Парсинг OSM PBF → Spatialite")
parser.add_argument(
"--pbf",
default=os.path.join(os.path.dirname(__file__), "../data/region.osm.pbf"),
help="Путь к PBF файлу",
)
parser.add_argument(
"--db",
default=os.path.join(os.path.dirname(__file__), "../data/centralfederal.sqlite"),
help="Путь к выходному SQLite/Spatialite файлу",
)
parser = argparse.ArgumentParser()
parser.add_argument("--pbf", default="/data/region.osm.pbf")
parser.add_argument("--db", default="/data/centralfederal.sqlite")
args = parser.parse_args()
pbf_path = os.path.abspath(args.pbf)
db_path = os.path.abspath(args.db)
if not os.path.exists(pbf_path):
print(f"ERROR: PBF файл не найден: {pbf_path}")
print("Сначала запустите scripts/download.sh")
print(f"ERROR: PBF не найден: {pbf_path}")
sys.exit(1)
print(f"==> Читаем PBF: {pbf_path}")
print(f"==> PBF: {pbf_path} ({os.path.getsize(pbf_path) // 1024 // 1024} МБ)")
print(" Парсим дороги...")
trail_handler = TrailHandler()
trail_handler.apply_file(pbf_path, locations=True)
print(f" Найдено дорог: {len(trail_handler.trails)}")
print(" Парсим POI...")
poi_handler = POIHandler()
poi_handler.apply_file(pbf_path, locations=True)
print(f" Найдено POI: {len(poi_handler.pois)}")
# Экспортируем в GeoJSONSeq
geojson_path = db_path.replace(".sqlite", ".geojsonseq")
print("==> Конвертируем PBF → GeoJSONSeq (osmium export)...")
export_to_geojsonseq(pbf_path, geojson_path)
print(f"==> Открываем БД: {db_path}")
os.makedirs(os.path.dirname(db_path), exist_ok=True)
conn, has_spatialite = open_spatialite(db_path)
conn, has_spatialite = open_db(db_path)
print("==> Инициализируем схему...")
init_db(conn, has_spatialite)
print("==> Вставляем дороги...")
insert_trails(conn, trail_handler.trails, has_spatialite)
print("==> Вставляем POI...")
insert_pois(conn, poi_handler.pois)
print("==> Парсим GeoJSONSeq построчно...")
parse_geojsonseq(geojson_path, conn)
print("==> Создаём пространственные индексы...")
create_spatial_indexes(conn, has_spatialite)
conn.close()
print(f"\n✓ Готово! БД сохранена: {db_path}")
# Удаляем временный GeoJSONSeq
try:
os.remove(geojson_path)
except Exception:
pass
print(f"\n✓ Готово! БД: {db_path} ({os.path.getsize(db_path) // 1024 // 1024} МБ)")
if __name__ == "__main__":