180 lines
5.3 KiB
Python
180 lines
5.3 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Snowbike RAG - Flask API
|
|
Порт: 5557
|
|
GET /search?q={query}&topics={topic_ids}&limit={limit}
|
|
"""
|
|
|
|
import os
|
|
import sys
|
|
import json
|
|
from pathlib import Path
|
|
|
|
# Добавляем папку scripts в путь
|
|
sys.path.insert(0, str(Path(__file__).parent / "scripts"))
|
|
|
|
from flask import Flask, request, jsonify, Response, render_template
|
|
|
|
app = Flask(__name__)
|
|
|
|
|
|
def load_env():
|
|
"""Загружаем переменные окружения из ~/.openclaw/.env"""
|
|
env_file = Path.home() / ".openclaw" / ".env"
|
|
if env_file.exists():
|
|
with open(env_file) as f:
|
|
for line in f:
|
|
line = line.strip()
|
|
if line and not line.startswith("#") and "=" in line:
|
|
key, _, val = line.partition("=")
|
|
os.environ.setdefault(key.strip(), val.strip())
|
|
|
|
|
|
load_env()
|
|
|
|
|
|
@app.route("/", methods=["GET"])
|
|
def index():
|
|
"""Главная страница — веб-интерфейс поиска."""
|
|
return render_template("index.html")
|
|
|
|
|
|
@app.route("/api/search", methods=["GET"])
|
|
def api_search_endpoint():
|
|
"""Алиас /search → /api/search (новый роут)."""
|
|
return search_endpoint()
|
|
|
|
|
|
@app.route("/api/stats", methods=["GET"])
|
|
def api_stats_endpoint():
|
|
"""Алиас /stats → /api/stats (новый роут)."""
|
|
return stats()
|
|
|
|
|
|
@app.route("/health", methods=["GET"])
|
|
def health():
|
|
return jsonify({"status": "ok", "service": "snowbike-rag"})
|
|
|
|
|
|
@app.route("/search", methods=["GET"])
|
|
def search_endpoint():
|
|
"""
|
|
Гибридный поиск по базе сообщений Telegram-группы.
|
|
|
|
Параметры:
|
|
q (str): поисковый запрос (обязательный)
|
|
topics (str): список topic_id через запятую (опционально)
|
|
limit (int): максимальное количество источников (по умолчанию 15)
|
|
|
|
Ответ:
|
|
{
|
|
"query": "...",
|
|
"answer": "...",
|
|
"sources": [...],
|
|
"count": N,
|
|
"time_ms": N
|
|
}
|
|
"""
|
|
query = request.args.get("q", "").strip()
|
|
if not query:
|
|
return jsonify({"error": "Параметр 'q' обязателен"}), 400
|
|
|
|
# Парсим topic_ids
|
|
topics_param = request.args.get("topics", "")
|
|
topic_ids = None
|
|
if topics_param:
|
|
try:
|
|
topic_ids = [int(t.strip()) for t in topics_param.split(",") if t.strip()]
|
|
except ValueError:
|
|
return jsonify({"error": "Параметр 'topics' должен содержать числа через запятую"}), 400
|
|
|
|
# Лимит
|
|
try:
|
|
limit = int(request.args.get("limit", 15))
|
|
limit = max(1, min(limit, 30)) # от 1 до 30
|
|
except ValueError:
|
|
limit = 15
|
|
|
|
try:
|
|
from search import search
|
|
result = search(query, topic_ids=topic_ids, limit=limit)
|
|
return Response(
|
|
json.dumps(result, ensure_ascii=False, indent=2),
|
|
mimetype="application/json; charset=utf-8"
|
|
)
|
|
except Exception as e:
|
|
import traceback
|
|
return jsonify({
|
|
"error": str(e),
|
|
"traceback": traceback.format_exc()
|
|
}), 500
|
|
|
|
|
|
@app.route("/topics", methods=["GET"])
|
|
def list_topics():
|
|
"""Список доступных топиков."""
|
|
try:
|
|
meta_file = "/home/node/.openclaw/workspace/data/telegram-collector/raw/1242788123/meta.json"
|
|
with open(meta_file) as f:
|
|
meta = json.load(f)
|
|
|
|
topics = [
|
|
{"id": int(k), "title": v}
|
|
for k, v in meta.get("topics", {}).items()
|
|
]
|
|
topics.sort(key=lambda x: x["id"])
|
|
|
|
return jsonify({"topics": topics})
|
|
except Exception as e:
|
|
return jsonify({"error": str(e)}), 500
|
|
|
|
|
|
@app.route("/stats", methods=["GET"])
|
|
def stats():
|
|
"""Статистика индексов."""
|
|
result = {}
|
|
|
|
# Meilisearch — используем прямой HTTP запрос без прокси
|
|
try:
|
|
import requests as req
|
|
s = req.Session()
|
|
s.trust_env = False
|
|
r = s.get("http://127.0.0.1:7700/indexes/snowbike_messages/stats", timeout=5)
|
|
r.raise_for_status()
|
|
data = r.json()
|
|
result["meilisearch"] = {
|
|
"status": "ok",
|
|
"documents": data.get("numberOfDocuments", 0),
|
|
}
|
|
except Exception as e:
|
|
result["meilisearch"] = {"status": "error", "error": str(e)}
|
|
|
|
# ChromaDB
|
|
try:
|
|
import chromadb
|
|
chroma_path = str(Path(__file__).parent / "data" / "chromadb")
|
|
client = chromadb.PersistentClient(path=chroma_path)
|
|
collection = client.get_collection("snowbike_embeddings")
|
|
result["chromadb"] = {
|
|
"status": "ok",
|
|
"documents": collection.count(),
|
|
}
|
|
except Exception as e:
|
|
result["chromadb"] = {"status": "error", "error": str(e)}
|
|
|
|
return jsonify(result)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
print("=" * 50)
|
|
print("Snowbike RAG API запускается...")
|
|
print("URL: http://localhost:5557")
|
|
print("Эндпоинты:")
|
|
print(" GET /health")
|
|
print(" GET /search?q=<запрос>&topics=<id1,id2>&limit=<N>")
|
|
print(" GET /topics")
|
|
print(" GET /stats")
|
|
print("=" * 50)
|
|
|
|
app.run(host="0.0.0.0", port=5557, debug=False)
|