Files
wiki/tasks/internet-orders/vprok_client.py
2026-04-12 21:55:33 +03:00

443 lines
16 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
vprok.ru (Перекрёсток Впрок) Python Client
==========================================
⚠️ ВАЖНО: vprok.ru защищён DDoS Guard.
Скрипт работает ТОЛЬКО с жилых IP (домашний интернет).
С серверов/VPS все запросы блокируются (ошибка #625116).
Получение cookies (ручной шаг):
1. Открыть vprok.ru в браузере, авторизоваться
2. DevTools → Application → Cookies → www.vprok.ru
3. Скопировать: remember_xo-fo_*, ngx_s_id, XSRF-TOKEN, laravel_session
Использование:
client = VprokClient(cookies={
"remember_xo-fo_<hash>": "<value>",
"ngx_s_id": "<value>",
"XSRF-TOKEN": "<value>",
"laravel_session": "<value>",
})
products = client.search("молоко")
client.add_to_cart(products[0]["id"], quantity=2)
cart = client.get_cart()
"""
import re
import json
import logging
from typing import Optional
from urllib.parse import urlencode, quote
import requests
from bs4 import BeautifulSoup
logger = logging.getLogger(__name__)
BASE_URL = "https://www.vprok.ru"
DEFAULT_HEADERS = {
"User-Agent": (
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
"AppleWebKit/537.36 (KHTML, like Gecko) "
"Chrome/120.0.0.0 Safari/537.36"
),
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
"Accept-Language": "ru-RU,ru;q=0.9,en-US;q=0.8,en;q=0.7",
"Referer": "https://www.vprok.ru/",
}
class DDoSGuardError(Exception):
"""Raised when DDoS Guard blocks the request (server/VPS IP detected)."""
pass
class VprokAuthError(Exception):
"""Raised when authentication fails or cookies are invalid."""
pass
class VprokClient:
"""
Client for vprok.ru (Перекрёсток Впрок).
Authentication is cookie-based. Cookies must be obtained manually from
a browser session (see module docstring).
Args:
cookies: dict of cookies. Required keys:
- remember_xo-fo_<hash>: long-lived auth token
- ngx_s_id: DDoS Guard session ID
Optional but recommended:
- XSRF-TOKEN: for POST requests
- laravel_session: PHP session
proxies: optional proxy dict for requests
e.g. {"https": "http://user:pass@proxy:8080"}
Use residential proxies if running on a server/VPS!
"""
def __init__(
self,
cookies: dict[str, str],
proxies: Optional[dict] = None,
):
self.session = requests.Session()
self.session.headers.update(DEFAULT_HEADERS)
self.session.cookies.update(cookies)
if proxies:
self.session.proxies.update(proxies)
self._csrf_token: Optional[str] = None
# -------------------------------------------------------------------------
# Internal helpers
# -------------------------------------------------------------------------
def _get(self, path: str, params: Optional[dict] = None, **kwargs) -> requests.Response:
url = BASE_URL + path
resp = self.session.get(url, params=params, timeout=30, **kwargs)
self._check_response(resp)
return resp
def _post(self, path: str, data=None, json_body=None, **kwargs) -> requests.Response:
url = BASE_URL + path
headers = kwargs.pop("headers", {})
# Attach CSRF token for POST requests
if self._csrf_token:
headers["X-XSRF-TOKEN"] = self._csrf_token
headers["X-CSRF-TOKEN"] = self._csrf_token
resp = self.session.post(
url, data=data, json=json_body,
headers=headers, timeout=30, **kwargs
)
self._check_response(resp)
return resp
def _check_response(self, resp: requests.Response) -> None:
"""Check for DDoS Guard block and auth errors."""
# DDoS Guard returns 200 with an error page
if "Ошибка #625116" in resp.text:
raise DDoSGuardError(
"DDoS Guard blocked the request. "
"Run this script from a residential IP (home network), not a server/VPS. "
"Or use residential proxies."
)
# Check for auth redirect
if resp.url and "/login" in resp.url and "/profile" in str(resp.request.url):
raise VprokAuthError(
"Redirected to login page — cookies are likely expired or invalid."
)
def _get_csrf_token(self) -> str:
"""Fetch CSRF token from the main page."""
resp = self._get("/")
soup = BeautifulSoup(resp.text, "html.parser")
meta = soup.find("meta", {"name": "csrf-token"})
if meta:
self._csrf_token = meta.get("content", "")
return self._csrf_token
# Try from cookie
xsrf = self.session.cookies.get("XSRF-TOKEN")
if xsrf:
self._csrf_token = xsrf
return xsrf
raise VprokAuthError("Could not retrieve CSRF token")
# -------------------------------------------------------------------------
# Search
# -------------------------------------------------------------------------
def search(self, query: str, limit: int = 20, page: int = 1) -> list[dict]:
"""
Search for products by name.
Returns list of dicts:
[{id, name, price, old_price, unit, available, image, link}]
First tries JSON API (/api/v1/catalog/search), falls back to HTML parsing.
"""
# Try JSON API first
try:
return self._search_json(query, limit, page)
except Exception as e:
logger.debug(f"JSON search failed ({e}), falling back to HTML parser")
# Fallback: HTML scraping
return self._search_html(query, page)
def _search_json(self, query: str, limit: int, page: int) -> list[dict]:
"""Try undocumented JSON search API."""
resp = self._get(
"/api/v1/catalog/search",
params={"text": query, "limit": limit, "page": page},
headers={"Accept": "application/json"},
)
if "application/json" not in resp.headers.get("Content-Type", ""):
raise ValueError("Response is not JSON")
data = resp.json()
items = data.get("items") or data.get("products") or data.get("data") or []
return [self._normalize_product(item) for item in items]
def _search_html(self, query: str, page: int) -> list[dict]:
"""Parse HTML search results page."""
resp = self._get(
"/catalog/search",
params={"text": query, "page": page},
headers={"Accept": "text/html,application/xhtml+xml"},
)
soup = BeautifulSoup(resp.text, "html.parser")
products = []
# Try data attributes (owox tracking data)
for item in soup.find_all(attrs={"data-owox-product-id": True}):
try:
product = {
"id": item.get("data-owox-product-id", ""),
"name": item.get("data-owox-product-name", ""),
"price": float(item.get("data-owox-product-price", 0) or 0),
"old_price": None,
"unit": "шт",
"available": True,
"image": "",
"link": "",
}
# Try to get link
link_tag = item.find("a")
if link_tag:
product["link"] = BASE_URL + link_tag.get("href", "")
# Try to get image
img_tag = item.find("img")
if img_tag:
product["image"] = img_tag.get("src", "")
products.append(product)
except Exception as e:
logger.debug(f"Error parsing product item: {e}")
if not products:
logger.warning(
"HTML search returned no results. "
"The site structure may have changed."
)
return products
def _normalize_product(self, raw: dict) -> dict:
"""Normalize product data from JSON API response."""
return {
"id": str(raw.get("id") or raw.get("productId") or ""),
"name": raw.get("name") or raw.get("title") or "",
"price": float(raw.get("price") or raw.get("cost") or 0),
"old_price": raw.get("oldPrice") or raw.get("priceOld"),
"unit": raw.get("unit") or raw.get("measure") or "шт",
"available": raw.get("available", True),
"image": raw.get("image") or raw.get("img") or "",
"link": BASE_URL + (raw.get("link") or raw.get("url") or ""),
}
# -------------------------------------------------------------------------
# Cart
# -------------------------------------------------------------------------
def get_cart(self) -> list[dict]:
"""
Fetch current cart contents.
Returns list of dicts:
[{id, name, price, quantity, unit, subtotal}]
"""
# Try JSON API
try:
return self._get_cart_json()
except Exception as e:
logger.debug(f"JSON cart fetch failed ({e}), falling back to HTML")
return self._get_cart_html()
def _get_cart_json(self) -> list[dict]:
"""Try undocumented JSON basket API."""
resp = self._get(
"/api/v1/basket",
headers={"Accept": "application/json"},
)
if "application/json" not in resp.headers.get("Content-Type", ""):
raise ValueError("Response is not JSON")
data = resp.json()
items = data.get("items") or data.get("products") or []
return [
{
"id": str(item.get("id") or item.get("productId") or ""),
"name": item.get("name") or item.get("title") or "",
"price": float(item.get("price") or 0),
"quantity": int(item.get("quantity") or item.get("amount") or 1),
"unit": item.get("unit") or "шт",
"subtotal": float(item.get("subtotal") or item.get("total") or 0),
}
for item in items
]
def _get_cart_html(self) -> list[dict]:
"""Parse basket HTML page."""
resp = self._get("/basket")
soup = BeautifulSoup(resp.text, "html.parser")
items = []
for item in soup.find_all(attrs={"data-owox-product-id": True}):
try:
qty_span = item.find("span", class_=re.compile(r"count"))
qty = 1
if qty_span:
qty_text = qty_span.text.strip().split()[0]
qty = int(float(qty_text))
items.append({
"id": item.get("data-owox-product-id", ""),
"name": item.get("data-owox-product-name", ""),
"price": float(item.get("data-owox-product-price", 0) or 0),
"quantity": qty,
"unit": "шт",
"subtotal": 0.0,
})
except Exception as e:
logger.debug(f"Error parsing cart item: {e}")
return items
def add_to_cart(self, product_id: str, quantity: int = 1) -> bool:
"""
Add a product to cart.
Args:
product_id: product ID (from search results)
quantity: number of items to add
Returns:
True if successful, False otherwise
"""
if not self._csrf_token:
self._get_csrf_token()
# Try JSON API first
try:
return self._add_to_cart_json(product_id, quantity)
except Exception as e:
logger.debug(f"JSON add_to_cart failed ({e}), trying form POST")
# Fallback: form POST
return self._add_to_cart_form(product_id, quantity)
def _add_to_cart_json(self, product_id: str, quantity: int) -> bool:
resp = self._post(
"/basket/add",
json_body={"product_id": product_id, "quantity": quantity},
headers={"Accept": "application/json", "Content-Type": "application/json"},
)
if resp.status_code in (200, 201):
data = resp.json() if "application/json" in resp.headers.get("Content-Type", "") else {}
return data.get("success", True)
return False
def _add_to_cart_form(self, product_id: str, quantity: int) -> bool:
resp = self._post(
"/basket/add",
data={"product_id": product_id, "quantity": quantity, "_token": self._csrf_token},
)
return resp.status_code in (200, 201, 302)
def remove_from_cart(self, product_id: str) -> bool:
"""Remove a product from cart."""
if not self._csrf_token:
self._get_csrf_token()
try:
resp = self._post(
"/basket/remove",
json_body={"product_id": product_id},
headers={"Accept": "application/json", "Content-Type": "application/json"},
)
return resp.status_code in (200, 204)
except Exception as e:
logger.error(f"remove_from_cart failed: {e}")
return False
def update_cart_quantity(self, product_id: str, quantity: int) -> bool:
"""Update quantity of a product in cart."""
if not self._csrf_token:
self._get_csrf_token()
try:
resp = self._post(
"/basket/update",
json_body={"product_id": product_id, "quantity": quantity},
headers={"Accept": "application/json", "Content-Type": "application/json"},
)
return resp.status_code in (200, 204)
except Exception as e:
logger.error(f"update_cart_quantity failed: {e}")
return False
# -------------------------------------------------------------------------
# Orders (bonus — from PerekrestokOrderParser)
# -------------------------------------------------------------------------
def get_orders(self) -> list[dict]:
"""
Get list of past orders.
Returns: [{id, date}]
"""
resp = self._get("/profile/orders/history")
pattern = (
r'class="xf-lk-order__group _number">\s+<span>Заказ №:</span>\s+<span>'
r'(?P<order_id>\d+)</span>.*?'
r'class="xf-lk-order__group _date.*?">\s+<span>Дата:</span>\s+<span>'
r'(?P<date>[\d\.]+)</span>'
)
matches = re.findall(pattern, resp.text, re.DOTALL)
return [{"id": m[0], "date": m[1]} for m in matches]
# =============================================================================
# Example usage / smoke test
# =============================================================================
if __name__ == "__main__":
import sys
logging.basicConfig(level=logging.DEBUG)
# Paste your cookies here (from browser DevTools)
MY_COOKIES = {
# "remember_xo-fo_<hash>": "<value>", # <-- required for auth
# "ngx_s_id": "<value>", # <-- required (DDoS Guard)
# "XSRF-TOKEN": "<value>", # <-- needed for POST
# "laravel_session": "<value>",
}
if not any("remember_xo" in k for k in MY_COOKIES):
print("⚠️ No auth cookie provided. Search will work only on residential IP.")
print(" Fill in MY_COOKIES dict with your browser cookies.\n")
client = VprokClient(cookies=MY_COOKIES)
try:
print("🔍 Searching for 'молоко'...")
results = client.search("молоко", limit=5)
if results:
print(f"Found {len(results)} products:")
for p in results:
print(f" [{p['id']}] {p['name']}{p['price']} ₽/{p['unit']}")
else:
print("No results (likely blocked by DDoS Guard or site changed)")
print("\n🛒 Fetching cart...")
cart = client.get_cart()
if cart:
print(f"Cart has {len(cart)} items:")
for item in cart:
print(f" {item['name']} x{item['quantity']}")
else:
print("Cart is empty or could not be fetched")
except DDoSGuardError as e:
print(f"\n❌ DDoS Guard blocked: {e}")
sys.exit(1)
except VprokAuthError as e:
print(f"\n❌ Auth error: {e}")
sys.exit(1)