443 lines
16 KiB
Python
443 lines
16 KiB
Python
"""
|
||
vprok.ru (Перекрёсток Впрок) Python Client
|
||
==========================================
|
||
|
||
⚠️ ВАЖНО: vprok.ru защищён DDoS Guard.
|
||
Скрипт работает ТОЛЬКО с жилых IP (домашний интернет).
|
||
С серверов/VPS все запросы блокируются (ошибка #625116).
|
||
|
||
Получение cookies (ручной шаг):
|
||
1. Открыть vprok.ru в браузере, авторизоваться
|
||
2. DevTools → Application → Cookies → www.vprok.ru
|
||
3. Скопировать: remember_xo-fo_*, ngx_s_id, XSRF-TOKEN, laravel_session
|
||
|
||
Использование:
|
||
client = VprokClient(cookies={
|
||
"remember_xo-fo_<hash>": "<value>",
|
||
"ngx_s_id": "<value>",
|
||
"XSRF-TOKEN": "<value>",
|
||
"laravel_session": "<value>",
|
||
})
|
||
products = client.search("молоко")
|
||
client.add_to_cart(products[0]["id"], quantity=2)
|
||
cart = client.get_cart()
|
||
"""
|
||
|
||
import re
|
||
import json
|
||
import logging
|
||
from typing import Optional
|
||
from urllib.parse import urlencode, quote
|
||
|
||
import requests
|
||
from bs4 import BeautifulSoup
|
||
|
||
logger = logging.getLogger(__name__)
|
||
|
||
BASE_URL = "https://www.vprok.ru"
|
||
|
||
DEFAULT_HEADERS = {
|
||
"User-Agent": (
|
||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
|
||
"AppleWebKit/537.36 (KHTML, like Gecko) "
|
||
"Chrome/120.0.0.0 Safari/537.36"
|
||
),
|
||
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
||
"Accept-Language": "ru-RU,ru;q=0.9,en-US;q=0.8,en;q=0.7",
|
||
"Referer": "https://www.vprok.ru/",
|
||
}
|
||
|
||
|
||
class DDoSGuardError(Exception):
|
||
"""Raised when DDoS Guard blocks the request (server/VPS IP detected)."""
|
||
pass
|
||
|
||
|
||
class VprokAuthError(Exception):
|
||
"""Raised when authentication fails or cookies are invalid."""
|
||
pass
|
||
|
||
|
||
class VprokClient:
|
||
"""
|
||
Client for vprok.ru (Перекрёсток Впрок).
|
||
|
||
Authentication is cookie-based. Cookies must be obtained manually from
|
||
a browser session (see module docstring).
|
||
|
||
Args:
|
||
cookies: dict of cookies. Required keys:
|
||
- remember_xo-fo_<hash>: long-lived auth token
|
||
- ngx_s_id: DDoS Guard session ID
|
||
Optional but recommended:
|
||
- XSRF-TOKEN: for POST requests
|
||
- laravel_session: PHP session
|
||
proxies: optional proxy dict for requests
|
||
e.g. {"https": "http://user:pass@proxy:8080"}
|
||
Use residential proxies if running on a server/VPS!
|
||
"""
|
||
|
||
def __init__(
|
||
self,
|
||
cookies: dict[str, str],
|
||
proxies: Optional[dict] = None,
|
||
):
|
||
self.session = requests.Session()
|
||
self.session.headers.update(DEFAULT_HEADERS)
|
||
self.session.cookies.update(cookies)
|
||
if proxies:
|
||
self.session.proxies.update(proxies)
|
||
self._csrf_token: Optional[str] = None
|
||
|
||
# -------------------------------------------------------------------------
|
||
# Internal helpers
|
||
# -------------------------------------------------------------------------
|
||
|
||
def _get(self, path: str, params: Optional[dict] = None, **kwargs) -> requests.Response:
|
||
url = BASE_URL + path
|
||
resp = self.session.get(url, params=params, timeout=30, **kwargs)
|
||
self._check_response(resp)
|
||
return resp
|
||
|
||
def _post(self, path: str, data=None, json_body=None, **kwargs) -> requests.Response:
|
||
url = BASE_URL + path
|
||
headers = kwargs.pop("headers", {})
|
||
# Attach CSRF token for POST requests
|
||
if self._csrf_token:
|
||
headers["X-XSRF-TOKEN"] = self._csrf_token
|
||
headers["X-CSRF-TOKEN"] = self._csrf_token
|
||
resp = self.session.post(
|
||
url, data=data, json=json_body,
|
||
headers=headers, timeout=30, **kwargs
|
||
)
|
||
self._check_response(resp)
|
||
return resp
|
||
|
||
def _check_response(self, resp: requests.Response) -> None:
|
||
"""Check for DDoS Guard block and auth errors."""
|
||
# DDoS Guard returns 200 with an error page
|
||
if "Ошибка #625116" in resp.text:
|
||
raise DDoSGuardError(
|
||
"DDoS Guard blocked the request. "
|
||
"Run this script from a residential IP (home network), not a server/VPS. "
|
||
"Or use residential proxies."
|
||
)
|
||
# Check for auth redirect
|
||
if resp.url and "/login" in resp.url and "/profile" in str(resp.request.url):
|
||
raise VprokAuthError(
|
||
"Redirected to login page — cookies are likely expired or invalid."
|
||
)
|
||
|
||
def _get_csrf_token(self) -> str:
|
||
"""Fetch CSRF token from the main page."""
|
||
resp = self._get("/")
|
||
soup = BeautifulSoup(resp.text, "html.parser")
|
||
meta = soup.find("meta", {"name": "csrf-token"})
|
||
if meta:
|
||
self._csrf_token = meta.get("content", "")
|
||
return self._csrf_token
|
||
# Try from cookie
|
||
xsrf = self.session.cookies.get("XSRF-TOKEN")
|
||
if xsrf:
|
||
self._csrf_token = xsrf
|
||
return xsrf
|
||
raise VprokAuthError("Could not retrieve CSRF token")
|
||
|
||
# -------------------------------------------------------------------------
|
||
# Search
|
||
# -------------------------------------------------------------------------
|
||
|
||
def search(self, query: str, limit: int = 20, page: int = 1) -> list[dict]:
|
||
"""
|
||
Search for products by name.
|
||
|
||
Returns list of dicts:
|
||
[{id, name, price, old_price, unit, available, image, link}]
|
||
|
||
First tries JSON API (/api/v1/catalog/search), falls back to HTML parsing.
|
||
"""
|
||
# Try JSON API first
|
||
try:
|
||
return self._search_json(query, limit, page)
|
||
except Exception as e:
|
||
logger.debug(f"JSON search failed ({e}), falling back to HTML parser")
|
||
|
||
# Fallback: HTML scraping
|
||
return self._search_html(query, page)
|
||
|
||
def _search_json(self, query: str, limit: int, page: int) -> list[dict]:
|
||
"""Try undocumented JSON search API."""
|
||
resp = self._get(
|
||
"/api/v1/catalog/search",
|
||
params={"text": query, "limit": limit, "page": page},
|
||
headers={"Accept": "application/json"},
|
||
)
|
||
if "application/json" not in resp.headers.get("Content-Type", ""):
|
||
raise ValueError("Response is not JSON")
|
||
data = resp.json()
|
||
items = data.get("items") or data.get("products") or data.get("data") or []
|
||
return [self._normalize_product(item) for item in items]
|
||
|
||
def _search_html(self, query: str, page: int) -> list[dict]:
|
||
"""Parse HTML search results page."""
|
||
resp = self._get(
|
||
"/catalog/search",
|
||
params={"text": query, "page": page},
|
||
headers={"Accept": "text/html,application/xhtml+xml"},
|
||
)
|
||
soup = BeautifulSoup(resp.text, "html.parser")
|
||
products = []
|
||
|
||
# Try data attributes (owox tracking data)
|
||
for item in soup.find_all(attrs={"data-owox-product-id": True}):
|
||
try:
|
||
product = {
|
||
"id": item.get("data-owox-product-id", ""),
|
||
"name": item.get("data-owox-product-name", ""),
|
||
"price": float(item.get("data-owox-product-price", 0) or 0),
|
||
"old_price": None,
|
||
"unit": "шт",
|
||
"available": True,
|
||
"image": "",
|
||
"link": "",
|
||
}
|
||
# Try to get link
|
||
link_tag = item.find("a")
|
||
if link_tag:
|
||
product["link"] = BASE_URL + link_tag.get("href", "")
|
||
# Try to get image
|
||
img_tag = item.find("img")
|
||
if img_tag:
|
||
product["image"] = img_tag.get("src", "")
|
||
products.append(product)
|
||
except Exception as e:
|
||
logger.debug(f"Error parsing product item: {e}")
|
||
|
||
if not products:
|
||
logger.warning(
|
||
"HTML search returned no results. "
|
||
"The site structure may have changed."
|
||
)
|
||
return products
|
||
|
||
def _normalize_product(self, raw: dict) -> dict:
|
||
"""Normalize product data from JSON API response."""
|
||
return {
|
||
"id": str(raw.get("id") or raw.get("productId") or ""),
|
||
"name": raw.get("name") or raw.get("title") or "",
|
||
"price": float(raw.get("price") or raw.get("cost") or 0),
|
||
"old_price": raw.get("oldPrice") or raw.get("priceOld"),
|
||
"unit": raw.get("unit") or raw.get("measure") or "шт",
|
||
"available": raw.get("available", True),
|
||
"image": raw.get("image") or raw.get("img") or "",
|
||
"link": BASE_URL + (raw.get("link") or raw.get("url") or ""),
|
||
}
|
||
|
||
# -------------------------------------------------------------------------
|
||
# Cart
|
||
# -------------------------------------------------------------------------
|
||
|
||
def get_cart(self) -> list[dict]:
|
||
"""
|
||
Fetch current cart contents.
|
||
|
||
Returns list of dicts:
|
||
[{id, name, price, quantity, unit, subtotal}]
|
||
"""
|
||
# Try JSON API
|
||
try:
|
||
return self._get_cart_json()
|
||
except Exception as e:
|
||
logger.debug(f"JSON cart fetch failed ({e}), falling back to HTML")
|
||
|
||
return self._get_cart_html()
|
||
|
||
def _get_cart_json(self) -> list[dict]:
|
||
"""Try undocumented JSON basket API."""
|
||
resp = self._get(
|
||
"/api/v1/basket",
|
||
headers={"Accept": "application/json"},
|
||
)
|
||
if "application/json" not in resp.headers.get("Content-Type", ""):
|
||
raise ValueError("Response is not JSON")
|
||
data = resp.json()
|
||
items = data.get("items") or data.get("products") or []
|
||
return [
|
||
{
|
||
"id": str(item.get("id") or item.get("productId") or ""),
|
||
"name": item.get("name") or item.get("title") or "",
|
||
"price": float(item.get("price") or 0),
|
||
"quantity": int(item.get("quantity") or item.get("amount") or 1),
|
||
"unit": item.get("unit") or "шт",
|
||
"subtotal": float(item.get("subtotal") or item.get("total") or 0),
|
||
}
|
||
for item in items
|
||
]
|
||
|
||
def _get_cart_html(self) -> list[dict]:
|
||
"""Parse basket HTML page."""
|
||
resp = self._get("/basket")
|
||
soup = BeautifulSoup(resp.text, "html.parser")
|
||
items = []
|
||
|
||
for item in soup.find_all(attrs={"data-owox-product-id": True}):
|
||
try:
|
||
qty_span = item.find("span", class_=re.compile(r"count"))
|
||
qty = 1
|
||
if qty_span:
|
||
qty_text = qty_span.text.strip().split()[0]
|
||
qty = int(float(qty_text))
|
||
|
||
items.append({
|
||
"id": item.get("data-owox-product-id", ""),
|
||
"name": item.get("data-owox-product-name", ""),
|
||
"price": float(item.get("data-owox-product-price", 0) or 0),
|
||
"quantity": qty,
|
||
"unit": "шт",
|
||
"subtotal": 0.0,
|
||
})
|
||
except Exception as e:
|
||
logger.debug(f"Error parsing cart item: {e}")
|
||
|
||
return items
|
||
|
||
def add_to_cart(self, product_id: str, quantity: int = 1) -> bool:
|
||
"""
|
||
Add a product to cart.
|
||
|
||
Args:
|
||
product_id: product ID (from search results)
|
||
quantity: number of items to add
|
||
|
||
Returns:
|
||
True if successful, False otherwise
|
||
"""
|
||
if not self._csrf_token:
|
||
self._get_csrf_token()
|
||
|
||
# Try JSON API first
|
||
try:
|
||
return self._add_to_cart_json(product_id, quantity)
|
||
except Exception as e:
|
||
logger.debug(f"JSON add_to_cart failed ({e}), trying form POST")
|
||
|
||
# Fallback: form POST
|
||
return self._add_to_cart_form(product_id, quantity)
|
||
|
||
def _add_to_cart_json(self, product_id: str, quantity: int) -> bool:
|
||
resp = self._post(
|
||
"/basket/add",
|
||
json_body={"product_id": product_id, "quantity": quantity},
|
||
headers={"Accept": "application/json", "Content-Type": "application/json"},
|
||
)
|
||
if resp.status_code in (200, 201):
|
||
data = resp.json() if "application/json" in resp.headers.get("Content-Type", "") else {}
|
||
return data.get("success", True)
|
||
return False
|
||
|
||
def _add_to_cart_form(self, product_id: str, quantity: int) -> bool:
|
||
resp = self._post(
|
||
"/basket/add",
|
||
data={"product_id": product_id, "quantity": quantity, "_token": self._csrf_token},
|
||
)
|
||
return resp.status_code in (200, 201, 302)
|
||
|
||
def remove_from_cart(self, product_id: str) -> bool:
|
||
"""Remove a product from cart."""
|
||
if not self._csrf_token:
|
||
self._get_csrf_token()
|
||
try:
|
||
resp = self._post(
|
||
"/basket/remove",
|
||
json_body={"product_id": product_id},
|
||
headers={"Accept": "application/json", "Content-Type": "application/json"},
|
||
)
|
||
return resp.status_code in (200, 204)
|
||
except Exception as e:
|
||
logger.error(f"remove_from_cart failed: {e}")
|
||
return False
|
||
|
||
def update_cart_quantity(self, product_id: str, quantity: int) -> bool:
|
||
"""Update quantity of a product in cart."""
|
||
if not self._csrf_token:
|
||
self._get_csrf_token()
|
||
try:
|
||
resp = self._post(
|
||
"/basket/update",
|
||
json_body={"product_id": product_id, "quantity": quantity},
|
||
headers={"Accept": "application/json", "Content-Type": "application/json"},
|
||
)
|
||
return resp.status_code in (200, 204)
|
||
except Exception as e:
|
||
logger.error(f"update_cart_quantity failed: {e}")
|
||
return False
|
||
|
||
# -------------------------------------------------------------------------
|
||
# Orders (bonus — from PerekrestokOrderParser)
|
||
# -------------------------------------------------------------------------
|
||
|
||
def get_orders(self) -> list[dict]:
|
||
"""
|
||
Get list of past orders.
|
||
Returns: [{id, date}]
|
||
"""
|
||
resp = self._get("/profile/orders/history")
|
||
pattern = (
|
||
r'class="xf-lk-order__group _number">\s+<span>Заказ №:</span>\s+<span>'
|
||
r'(?P<order_id>\d+)</span>.*?'
|
||
r'class="xf-lk-order__group _date.*?">\s+<span>Дата:</span>\s+<span>'
|
||
r'(?P<date>[\d\.]+)</span>'
|
||
)
|
||
matches = re.findall(pattern, resp.text, re.DOTALL)
|
||
return [{"id": m[0], "date": m[1]} for m in matches]
|
||
|
||
|
||
# =============================================================================
|
||
# Example usage / smoke test
|
||
# =============================================================================
|
||
|
||
if __name__ == "__main__":
|
||
import sys
|
||
|
||
logging.basicConfig(level=logging.DEBUG)
|
||
|
||
# Paste your cookies here (from browser DevTools)
|
||
MY_COOKIES = {
|
||
# "remember_xo-fo_<hash>": "<value>", # <-- required for auth
|
||
# "ngx_s_id": "<value>", # <-- required (DDoS Guard)
|
||
# "XSRF-TOKEN": "<value>", # <-- needed for POST
|
||
# "laravel_session": "<value>",
|
||
}
|
||
|
||
if not any("remember_xo" in k for k in MY_COOKIES):
|
||
print("⚠️ No auth cookie provided. Search will work only on residential IP.")
|
||
print(" Fill in MY_COOKIES dict with your browser cookies.\n")
|
||
|
||
client = VprokClient(cookies=MY_COOKIES)
|
||
|
||
try:
|
||
print("🔍 Searching for 'молоко'...")
|
||
results = client.search("молоко", limit=5)
|
||
if results:
|
||
print(f"Found {len(results)} products:")
|
||
for p in results:
|
||
print(f" [{p['id']}] {p['name']} — {p['price']} ₽/{p['unit']}")
|
||
else:
|
||
print("No results (likely blocked by DDoS Guard or site changed)")
|
||
|
||
print("\n🛒 Fetching cart...")
|
||
cart = client.get_cart()
|
||
if cart:
|
||
print(f"Cart has {len(cart)} items:")
|
||
for item in cart:
|
||
print(f" {item['name']} x{item['quantity']}")
|
||
else:
|
||
print("Cart is empty or could not be fetched")
|
||
|
||
except DDoSGuardError as e:
|
||
print(f"\n❌ DDoS Guard blocked: {e}")
|
||
sys.exit(1)
|
||
except VprokAuthError as e:
|
||
print(f"\n❌ Auth error: {e}")
|
||
sys.exit(1)
|