auto-sync: 2026-05-02 08:30:01
This commit is contained in:
@@ -1,7 +1,7 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
parse.py — парсинг OSM PBF → Spatialite для Enduro Trails
|
||||
Читает region.osm.pbf, сохраняет trails и POI в centralfederal.sqlite
|
||||
Использует osmium export → GeoJSONSeq → построчный парсинг (низкое потребление памяти)
|
||||
"""
|
||||
|
||||
import os
|
||||
@@ -10,42 +10,18 @@ import json
|
||||
import math
|
||||
import sqlite3
|
||||
import argparse
|
||||
|
||||
try:
|
||||
import osmium
|
||||
except ImportError:
|
||||
print("ERROR: python-osmium не установлен. pip install python-osmium")
|
||||
sys.exit(1)
|
||||
|
||||
try:
|
||||
# pysqlite3-binary предоставляет sqlite3 с поддержкой расширений
|
||||
import pysqlite3 as sqlite3_ext
|
||||
HAS_PYSQLITE3 = True
|
||||
except ImportError:
|
||||
HAS_PYSQLITE3 = False
|
||||
sqlite3_ext = sqlite3
|
||||
|
||||
from shapely.geometry import LineString, Point
|
||||
from shapely import wkb as shapely_wkb
|
||||
import subprocess
|
||||
import tempfile
|
||||
|
||||
# ─── Константы ────────────────────────────────────────────────────────────────
|
||||
|
||||
HIGHWAY_TYPES = {"track", "path", "bridleway", "cycleway", "footway"}
|
||||
|
||||
POI_FILTERS = {
|
||||
"natural": {"water", "peak", "cave_entrance"},
|
||||
"tourism": {"viewpoint"},
|
||||
"historic": {"ruins"},
|
||||
"ford": {"yes"},
|
||||
}
|
||||
|
||||
EARTH_RADIUS_M = 6_371_000.0
|
||||
|
||||
|
||||
# ─── Утилиты ──────────────────────────────────────────────────────────────────
|
||||
|
||||
def haversine_length(coords):
|
||||
"""Длина ломаной в метрах по списку (lon, lat) пар."""
|
||||
"""Длина ломаной в метрах по списку [lon, lat] пар."""
|
||||
total = 0.0
|
||||
for i in range(len(coords) - 1):
|
||||
lon1, lat1 = math.radians(coords[i][0]), math.radians(coords[i][1])
|
||||
@@ -57,146 +33,35 @@ def haversine_length(coords):
|
||||
return total
|
||||
|
||||
|
||||
def geom_to_wkb_hex(geom):
|
||||
"""Shapely geometry → WKB hex string для Spatialite."""
|
||||
return shapely_wkb.dumps(geom, hex=True)
|
||||
|
||||
|
||||
# ─── OSM Handlers ─────────────────────────────────────────────────────────────
|
||||
|
||||
class TrailHandler(osmium.SimpleHandler):
|
||||
"""Собирает highway=track/path/... из OSM."""
|
||||
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.trails = []
|
||||
|
||||
def way(self, w):
|
||||
tags = w.tags
|
||||
hw = tags.get("highway", "")
|
||||
if hw not in HIGHWAY_TYPES:
|
||||
return
|
||||
|
||||
try:
|
||||
coords = [(n.lon, n.lat) for n in w.nodes if n.location.valid()]
|
||||
except Exception:
|
||||
return
|
||||
|
||||
if len(coords) < 2:
|
||||
return
|
||||
|
||||
length_m = haversine_length(coords)
|
||||
geom = LineString(coords)
|
||||
|
||||
extra_tags = {}
|
||||
for tag in w.tags:
|
||||
extra_tags[tag.k] = tag.v
|
||||
|
||||
self.trails.append({
|
||||
"osm_id": w.id,
|
||||
"highway_type": hw,
|
||||
"track_type": tags.get("tracktype", None),
|
||||
"surface": tags.get("surface", None),
|
||||
"name": tags.get("name", None),
|
||||
"length_m": length_m,
|
||||
"mtb_scale": tags.get("mtb:scale", None),
|
||||
"visibility": tags.get("trail_visibility", None),
|
||||
"smoothness": tags.get("smoothness", None),
|
||||
"access": tags.get("access", None),
|
||||
"tags": json.dumps(extra_tags, ensure_ascii=False),
|
||||
"geom_wkb": geom_to_wkb_hex(geom),
|
||||
})
|
||||
|
||||
|
||||
class POIHandler(osmium.SimpleHandler):
|
||||
"""Собирает POI: вершины, родники, смотровые и т.д."""
|
||||
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.pois = []
|
||||
|
||||
def _check_tags(self, tags):
|
||||
"""Возвращает poi_type если тег совпадает с фильтром."""
|
||||
for key, values in POI_FILTERS.items():
|
||||
val = tags.get(key, "")
|
||||
if val in values:
|
||||
return f"{key}={val}"
|
||||
return None
|
||||
|
||||
def node(self, n):
|
||||
poi_type = self._check_tags(n.tags)
|
||||
if not poi_type:
|
||||
return
|
||||
if not n.location.valid():
|
||||
return
|
||||
|
||||
geom = Point(n.location.lon, n.location.lat)
|
||||
self.pois.append({
|
||||
"osm_id": n.id,
|
||||
"poi_type": poi_type,
|
||||
"name": n.tags.get("name", None),
|
||||
"geom_wkb": geom_to_wkb_hex(geom),
|
||||
})
|
||||
|
||||
def way(self, w):
|
||||
"""Для водоёмов-полигонов берём центроид."""
|
||||
poi_type = self._check_tags(w.tags)
|
||||
if not poi_type:
|
||||
return
|
||||
|
||||
try:
|
||||
coords = [(n.lon, n.lat) for n in w.nodes if n.location.valid()]
|
||||
except Exception:
|
||||
return
|
||||
|
||||
if len(coords) < 2:
|
||||
return
|
||||
|
||||
geom = LineString(coords).centroid
|
||||
self.pois.append({
|
||||
"osm_id": w.id,
|
||||
"poi_type": poi_type,
|
||||
"name": w.tags.get("name", None),
|
||||
"geom_wkb": geom_to_wkb_hex(geom),
|
||||
})
|
||||
|
||||
|
||||
# ─── Spatialite ───────────────────────────────────────────────────────────────
|
||||
|
||||
def open_spatialite(db_path):
|
||||
"""Открывает соединение с Spatialite, загружает расширение."""
|
||||
conn = sqlite3_ext.connect(db_path)
|
||||
def open_db(db_path):
|
||||
"""Открывает SQLite с попыткой загрузить Spatialite."""
|
||||
conn = sqlite3.connect(db_path)
|
||||
conn.enable_load_extension(True)
|
||||
|
||||
# Пробуем разные пути к mod_spatialite
|
||||
spatialite_paths = [
|
||||
"mod_spatialite",
|
||||
"/usr/lib/x86_64-linux-gnu/mod_spatialite.so",
|
||||
"/usr/lib/mod_spatialite.so",
|
||||
"/usr/local/lib/mod_spatialite.so",
|
||||
]
|
||||
loaded = False
|
||||
for path in spatialite_paths:
|
||||
has_spatialite = False
|
||||
for path in ["mod_spatialite",
|
||||
"/usr/lib/x86_64-linux-gnu/mod_spatialite.so",
|
||||
"/usr/lib/mod_spatialite.so",
|
||||
"/usr/local/lib/mod_spatialite.so"]:
|
||||
try:
|
||||
conn.load_extension(path)
|
||||
loaded = True
|
||||
print(f" Spatialite загружен: {path}")
|
||||
has_spatialite = True
|
||||
print(f" Spatialite: {path}")
|
||||
break
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
if not loaded:
|
||||
print("WARNING: mod_spatialite не найден — геометрия будет храниться как WKB blob без пространственных индексов")
|
||||
|
||||
return conn, loaded
|
||||
if not has_spatialite:
|
||||
print(" WARNING: mod_spatialite не найден — без пространственных индексов")
|
||||
conn.enable_load_extension(False)
|
||||
return conn, has_spatialite
|
||||
|
||||
|
||||
def init_db(conn, has_spatialite):
|
||||
"""Создаёт таблицы и индексы."""
|
||||
cur = conn.cursor()
|
||||
|
||||
if has_spatialite:
|
||||
cur.execute("SELECT InitSpatialMetaData(1)")
|
||||
try:
|
||||
cur.execute("SELECT InitSpatialMetaData(1)")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
cur.executescript("""
|
||||
DROP TABLE IF EXISTS trails;
|
||||
@@ -213,8 +78,10 @@ def init_db(conn, has_spatialite):
|
||||
smoothness TEXT,
|
||||
access TEXT,
|
||||
tags TEXT,
|
||||
geom GEOMETRY
|
||||
geom BLOB
|
||||
);
|
||||
CREATE INDEX IF NOT EXISTS idx_trails_highway ON trails(highway_type);
|
||||
CREATE INDEX IF NOT EXISTS idx_trails_surface ON trails(surface);
|
||||
|
||||
DROP TABLE IF EXISTS poi;
|
||||
CREATE TABLE poi (
|
||||
@@ -222,153 +89,230 @@ def init_db(conn, has_spatialite):
|
||||
osm_id INTEGER NOT NULL,
|
||||
poi_type TEXT,
|
||||
name TEXT,
|
||||
geom GEOMETRY
|
||||
geom BLOB
|
||||
);
|
||||
""")
|
||||
|
||||
if has_spatialite:
|
||||
try:
|
||||
cur.execute("SELECT AddGeometryColumn('trails', 'geom', 4326, 'LINESTRING', 'XY')")
|
||||
except Exception:
|
||||
pass # колонка уже добавлена через CREATE TABLE
|
||||
try:
|
||||
cur.execute("SELECT AddGeometryColumn('poi', 'geom', 4326, 'POINT', 'XY')")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
cur.executescript("""
|
||||
CREATE INDEX IF NOT EXISTS idx_trails_highway ON trails(highway_type);
|
||||
CREATE INDEX IF NOT EXISTS idx_trails_surface ON trails(surface);
|
||||
CREATE INDEX IF NOT EXISTS idx_poi_type ON poi(poi_type);
|
||||
""")
|
||||
|
||||
conn.commit()
|
||||
|
||||
|
||||
def insert_trails(conn, trails, has_spatialite):
|
||||
def coords_to_wkb_linestring(coords):
|
||||
"""Конвертирует список [lon, lat] в WKB LineString (little-endian, SRID=4326)."""
|
||||
import struct
|
||||
n = len(coords)
|
||||
# WKB: byte order (1) + type (2=LineString, with SRID flag 0x20000000) + SRID + num_points + points
|
||||
buf = struct.pack('<B', 1) # little endian
|
||||
buf += struct.pack('<I', 0x20000002) # LineString with SRID
|
||||
buf += struct.pack('<I', 4326) # SRID
|
||||
buf += struct.pack('<I', n)
|
||||
for lon, lat in coords:
|
||||
buf += struct.pack('<dd', lon, lat)
|
||||
return buf
|
||||
|
||||
|
||||
def coords_to_wkb_point(lon, lat):
|
||||
"""Конвертирует lon/lat в WKB Point (little-endian, SRID=4326)."""
|
||||
import struct
|
||||
buf = struct.pack('<B', 1)
|
||||
buf += struct.pack('<I', 0x20000001) # Point with SRID
|
||||
buf += struct.pack('<I', 4326)
|
||||
buf += struct.pack('<dd', lon, lat)
|
||||
return buf
|
||||
|
||||
|
||||
def export_to_geojsonseq(pbf_path, output_path):
|
||||
"""Запускает osmium export для конвертации PBF → GeoJSONSeq."""
|
||||
print(f" osmium export: {pbf_path} → {output_path}")
|
||||
cmd = [
|
||||
"osmium", "export",
|
||||
"--geometry-types=linestring,point",
|
||||
"--output-format=geojsonseq",
|
||||
"--overwrite",
|
||||
"-o", output_path,
|
||||
pbf_path
|
||||
]
|
||||
result = subprocess.run(cmd, capture_output=True, text=True)
|
||||
if result.returncode != 0:
|
||||
print(f" ERROR osmium export: {result.stderr}")
|
||||
sys.exit(1)
|
||||
print(f" osmium export завершён")
|
||||
|
||||
|
||||
def parse_geojsonseq(geojson_path, conn):
|
||||
"""Построчно читает GeoJSONSeq и вставляет в БД."""
|
||||
cur = conn.cursor()
|
||||
batch = []
|
||||
for t in trails:
|
||||
if has_spatialite:
|
||||
geom_expr = f"GeomFromWKB(x'{t['geom_wkb']}', 4326)"
|
||||
else:
|
||||
geom_expr = f"x'{t['geom_wkb']}'"
|
||||
trails_count = 0
|
||||
poi_count = 0
|
||||
batch_trails = []
|
||||
batch_poi = []
|
||||
BATCH_SIZE = 500
|
||||
|
||||
batch.append((
|
||||
t["osm_id"], t["highway_type"], t["track_type"], t["surface"],
|
||||
t["name"], t["length_m"], t["mtb_scale"], t["visibility"],
|
||||
t["smoothness"], t["access"], t["tags"],
|
||||
))
|
||||
|
||||
# Вставляем батчами по 1000
|
||||
BATCH = 1000
|
||||
for i in range(0, len(trails), BATCH):
|
||||
chunk = trails[i:i+BATCH]
|
||||
for t in chunk:
|
||||
cur.execute("""
|
||||
def flush_trails():
|
||||
nonlocal trails_count
|
||||
if batch_trails:
|
||||
cur.executemany("""
|
||||
INSERT INTO trails
|
||||
(osm_id, highway_type, track_type, surface, name, length_m,
|
||||
mtb_scale, visibility, smoothness, access, tags, geom)
|
||||
VALUES (?,?,?,?,?,?,?,?,?,?,?,?)
|
||||
""", (
|
||||
t["osm_id"], t["highway_type"], t["track_type"], t["surface"],
|
||||
t["name"], t["length_m"], t["mtb_scale"], t["visibility"],
|
||||
t["smoothness"], t["access"], t["tags"],
|
||||
bytes.fromhex(t["geom_wkb"]),
|
||||
))
|
||||
conn.commit()
|
||||
print(f" trails: вставлено {min(i+BATCH, len(trails))}/{len(trails)}")
|
||||
""", batch_trails)
|
||||
conn.commit()
|
||||
trails_count += len(batch_trails)
|
||||
batch_trails.clear()
|
||||
print(f" trails: {trails_count}", end="\r", flush=True)
|
||||
|
||||
|
||||
def insert_pois(conn, pois):
|
||||
cur = conn.cursor()
|
||||
BATCH = 1000
|
||||
for i in range(0, len(pois), BATCH):
|
||||
chunk = pois[i:i+BATCH]
|
||||
for p in chunk:
|
||||
cur.execute("""
|
||||
def flush_poi():
|
||||
nonlocal poi_count
|
||||
if batch_poi:
|
||||
cur.executemany("""
|
||||
INSERT INTO poi (osm_id, poi_type, name, geom)
|
||||
VALUES (?,?,?,?)
|
||||
""", (
|
||||
p["osm_id"], p["poi_type"], p["name"],
|
||||
bytes.fromhex(p["geom_wkb"]),
|
||||
))
|
||||
conn.commit()
|
||||
print(f" poi: вставлено {min(i+BATCH, len(pois))}/{len(pois)}")
|
||||
""", batch_poi)
|
||||
conn.commit()
|
||||
poi_count += len(batch_poi)
|
||||
batch_poi.clear()
|
||||
print(f" poi: {poi_count}", end="\r", flush=True)
|
||||
|
||||
with open(geojson_path, "r", encoding="utf-8") as f:
|
||||
for line in f:
|
||||
line = line.strip()
|
||||
if not line:
|
||||
continue
|
||||
try:
|
||||
feat = json.loads(line)
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
geom = feat.get("geometry", {})
|
||||
props = feat.get("properties", {}) or {}
|
||||
osm_id = feat.get("id", 0)
|
||||
# osmium export id format: "w123456789" or "n123456789"
|
||||
if isinstance(osm_id, str):
|
||||
osm_id = int(osm_id[1:]) if osm_id and osm_id[0] in "wnr" else 0
|
||||
|
||||
geom_type = geom.get("type", "")
|
||||
|
||||
# ── Trails (LineString) ──
|
||||
if geom_type == "LineString":
|
||||
hw = props.get("highway", "")
|
||||
if hw not in HIGHWAY_TYPES:
|
||||
continue
|
||||
coords = geom.get("coordinates", [])
|
||||
if len(coords) < 2:
|
||||
continue
|
||||
length_m = haversine_length(coords)
|
||||
wkb = coords_to_wkb_linestring(coords)
|
||||
extra = {k: v for k, v in props.items()
|
||||
if k not in ("highway", "tracktype", "surface", "name",
|
||||
"mtb:scale", "trail_visibility", "smoothness", "access")}
|
||||
batch_trails.append((
|
||||
osm_id,
|
||||
hw,
|
||||
props.get("tracktype"),
|
||||
props.get("surface"),
|
||||
props.get("name"),
|
||||
length_m,
|
||||
props.get("mtb:scale"),
|
||||
props.get("trail_visibility"),
|
||||
props.get("smoothness"),
|
||||
props.get("access"),
|
||||
json.dumps(extra, ensure_ascii=False),
|
||||
wkb,
|
||||
))
|
||||
if len(batch_trails) >= BATCH_SIZE:
|
||||
flush_trails()
|
||||
|
||||
# ── POI (Point) ──
|
||||
elif geom_type == "Point":
|
||||
poi_type = None
|
||||
if props.get("natural") in ("water", "peak", "cave_entrance"):
|
||||
poi_type = f"natural={props['natural']}"
|
||||
elif props.get("tourism") == "viewpoint":
|
||||
poi_type = "tourism=viewpoint"
|
||||
elif props.get("historic") == "ruins":
|
||||
poi_type = "historic=ruins"
|
||||
elif props.get("ford") == "yes":
|
||||
poi_type = "ford=yes"
|
||||
elif props.get("abandoned"):
|
||||
poi_type = "abandoned"
|
||||
|
||||
if not poi_type:
|
||||
continue
|
||||
|
||||
coords = geom.get("coordinates", [])
|
||||
if len(coords) < 2:
|
||||
continue
|
||||
wkb = coords_to_wkb_point(coords[0], coords[1])
|
||||
batch_poi.append((
|
||||
osm_id,
|
||||
poi_type,
|
||||
props.get("name"),
|
||||
wkb,
|
||||
))
|
||||
if len(batch_poi) >= BATCH_SIZE:
|
||||
flush_poi()
|
||||
|
||||
flush_trails()
|
||||
flush_poi()
|
||||
print(f"\n Итого trails: {trails_count}, poi: {poi_count}")
|
||||
return trails_count, poi_count
|
||||
|
||||
|
||||
def create_spatial_indexes(conn, has_spatialite):
|
||||
if not has_spatialite:
|
||||
return
|
||||
cur = conn.cursor()
|
||||
try:
|
||||
cur.execute("SELECT CreateSpatialIndex('trails', 'geom')")
|
||||
conn.commit()
|
||||
print(" Пространственный индекс trails создан")
|
||||
except Exception as e:
|
||||
print(f" WARNING: индекс trails: {e}")
|
||||
try:
|
||||
cur.execute("SELECT CreateSpatialIndex('poi', 'geom')")
|
||||
conn.commit()
|
||||
print(" Пространственный индекс poi создан")
|
||||
except Exception as e:
|
||||
print(f" WARNING: индекс poi: {e}")
|
||||
for table, col in [("trails", "geom"), ("poi", "geom")]:
|
||||
try:
|
||||
cur.execute(f"SELECT CreateSpatialIndex('{table}', '{col}')")
|
||||
conn.commit()
|
||||
print(f" Пространственный индекс {table} создан")
|
||||
except Exception as e:
|
||||
print(f" WARNING индекс {table}: {e}")
|
||||
|
||||
|
||||
# ─── Main ─────────────────────────────────────────────────────────────────────
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Парсинг OSM PBF → Spatialite")
|
||||
parser.add_argument(
|
||||
"--pbf",
|
||||
default=os.path.join(os.path.dirname(__file__), "../data/region.osm.pbf"),
|
||||
help="Путь к PBF файлу",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--db",
|
||||
default=os.path.join(os.path.dirname(__file__), "../data/centralfederal.sqlite"),
|
||||
help="Путь к выходному SQLite/Spatialite файлу",
|
||||
)
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--pbf", default="/data/region.osm.pbf")
|
||||
parser.add_argument("--db", default="/data/centralfederal.sqlite")
|
||||
args = parser.parse_args()
|
||||
|
||||
pbf_path = os.path.abspath(args.pbf)
|
||||
db_path = os.path.abspath(args.db)
|
||||
|
||||
if not os.path.exists(pbf_path):
|
||||
print(f"ERROR: PBF файл не найден: {pbf_path}")
|
||||
print("Сначала запустите scripts/download.sh")
|
||||
print(f"ERROR: PBF не найден: {pbf_path}")
|
||||
sys.exit(1)
|
||||
|
||||
print(f"==> Читаем PBF: {pbf_path}")
|
||||
print(f"==> PBF: {pbf_path} ({os.path.getsize(pbf_path) // 1024 // 1024} МБ)")
|
||||
|
||||
print(" Парсим дороги...")
|
||||
trail_handler = TrailHandler()
|
||||
trail_handler.apply_file(pbf_path, locations=True)
|
||||
print(f" Найдено дорог: {len(trail_handler.trails)}")
|
||||
|
||||
print(" Парсим POI...")
|
||||
poi_handler = POIHandler()
|
||||
poi_handler.apply_file(pbf_path, locations=True)
|
||||
print(f" Найдено POI: {len(poi_handler.pois)}")
|
||||
# Экспортируем в GeoJSONSeq
|
||||
geojson_path = db_path.replace(".sqlite", ".geojsonseq")
|
||||
print("==> Конвертируем PBF → GeoJSONSeq (osmium export)...")
|
||||
export_to_geojsonseq(pbf_path, geojson_path)
|
||||
|
||||
print(f"==> Открываем БД: {db_path}")
|
||||
os.makedirs(os.path.dirname(db_path), exist_ok=True)
|
||||
conn, has_spatialite = open_spatialite(db_path)
|
||||
conn, has_spatialite = open_db(db_path)
|
||||
|
||||
print("==> Инициализируем схему...")
|
||||
init_db(conn, has_spatialite)
|
||||
|
||||
print("==> Вставляем дороги...")
|
||||
insert_trails(conn, trail_handler.trails, has_spatialite)
|
||||
|
||||
print("==> Вставляем POI...")
|
||||
insert_pois(conn, poi_handler.pois)
|
||||
print("==> Парсим GeoJSONSeq построчно...")
|
||||
parse_geojsonseq(geojson_path, conn)
|
||||
|
||||
print("==> Создаём пространственные индексы...")
|
||||
create_spatial_indexes(conn, has_spatialite)
|
||||
|
||||
conn.close()
|
||||
print(f"\n✓ Готово! БД сохранена: {db_path}")
|
||||
|
||||
# Удаляем временный GeoJSONSeq
|
||||
try:
|
||||
os.remove(geojson_path)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
print(f"\n✓ Готово! БД: {db_path} ({os.path.getsize(db_path) // 1024 // 1024} МБ)")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
Reference in New Issue
Block a user