#!/usr/bin/env python3 """Yandex Search API v2 (async) — web search for Russian-language content.""" import os import sys import json import time import urllib.request import urllib.parse import xml.etree.ElementTree as ET API_KEY = os.environ.get("YANDEX_API_KEY", "") SEARCH_URL = "https://searchapi.api.cloud.yandex.net/v2/web/searchAsync" OPERATION_URL = "https://operation.api.cloud.yandex.net/operations/{}" def search(query: str, max_results: int = 5, lang: str = "ru") -> list[dict]: """Search Yandex and return list of results.""" search_type = "SEARCH_TYPE_RU" if lang == "ru" else "SEARCH_TYPE_COM" payload = json.dumps({ "query": { "searchType": search_type, "queryText": query }, "sortSpec": {"sortType": "SORT_TYPE_RL"}, "groupingSpec": { "groupMode": "GROUP_MODE_DEEP", "groupsOnPage": max_results, "docsInGroup": 1 } }).encode("utf-8") req = urllib.request.Request( SEARCH_URL, data=payload, headers={ "Authorization": f"Api-Key {API_KEY}", "Content-Type": "application/json" }, method="POST" ) with urllib.request.urlopen(req) as r: operation = json.loads(r.read()) operation_id = operation["id"] # Poll until done (max 15 sec) for _ in range(15): time.sleep(1) poll_req = urllib.request.Request( OPERATION_URL.format(operation_id), headers={"Authorization": f"Api-Key {API_KEY}"} ) with urllib.request.urlopen(poll_req) as r: status = json.loads(r.read()) if status.get("done"): break if not status.get("done"): return [{"error": "Search timed out"}] import base64 raw = status.get("response", {}).get("rawData", "") xml_data = base64.b64decode(raw).decode("utf-8", errors="ignore") return parse_xml(xml_data, max_results) def parse_xml(xml_data: str, max_results: int) -> list[dict]: """Parse Yandex XML response into structured results.""" try: root = ET.fromstring(xml_data) except ET.ParseError: return [{"error": "Failed to parse XML response"}] results = [] for group in root.findall(".//group")[:max_results]: doc = group.find("doc") if doc is None: continue url = doc.findtext("url", "") title_el = doc.find("title") title = "".join(title_el.itertext()) if title_el is not None else "" passages = [] for p in doc.findall(".//passage"): text = "".join(p.itertext()) if text: passages.append(text) snippet = " ".join(passages[:2])[:300] if passages else "" domain = doc.findtext("domain", "") results.append({ "title": title, "url": url, "domain": domain, "snippet": snippet, }) return results if __name__ == "__main__": query = sys.argv[1] if len(sys.argv) > 1 else "openclaw" max_r = int(sys.argv[2]) if len(sys.argv) > 2 else 5 lang = sys.argv[3] if len(sys.argv) > 3 else "ru" results = search(query, max_r, lang) print(json.dumps(results, ensure_ascii=False, indent=2))