Files
wiki/tasks/bytik/tts.py
2026-04-15 01:00:01 +03:00

81 lines
2.4 KiB
Python

import os
import subprocess
import tempfile
import logging
from config import ELEVENLABS_API_KEY
logger = logging.getLogger(__name__)
async def text_to_speech(text: str) -> bytes | None:
if not ELEVENLABS_API_KEY:
logger.debug("ElevenLabs API key не установлен, TTS пропущен")
return None
logger.info(f"Генерация TTS: {text[:50]}...")
try:
import aiohttp
headers = {
"xi-api-key": ELEVENLABS_API_KEY,
"Content-Type": "application/json",
}
payload = {
"text": text,
"model_id": "eleven_monolingual_v1",
"voice_settings": {
"stability": 0.5,
"similarity_boost": 0.75,
}
}
async with aiohttp.ClientSession() as session:
async with session.post(
"https://api.elevenlabs.io/v1/text-to-speech/21m00Tcm4TlvDq8ikWAM/stream",
json=payload,
headers=headers,
) as resp:
if resp.status != 200:
logger.error(f"ElevenLabs error: {resp.status}")
return None
mp3_data = await resp.read()
mp3_path = None
ogg_path = None
try:
with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as mp3_tmp:
mp3_tmp.write(mp3_data)
mp3_path = mp3_tmp.name
ogg_fd, ogg_path = tempfile.mkstemp(suffix=".ogg")
os.close(ogg_fd)
ffmpeg_cmd = [
"ffmpeg", "-y",
"-i", mp3_path,
"-acodec", "libopus",
"-b:a", "48k",
"-vbr", "on",
ogg_path,
]
subprocess.run(ffmpeg_cmd, check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
with open(ogg_path, "rb") as f:
ogg_data = f.read()
return ogg_data
except Exception as e:
logger.error(f"Ошибка при конвертации TTS: {e}")
return None
finally:
for path in [mp3_path, ogg_path]:
if path and os.path.exists(path):
os.remove(path)
except ImportError:
logger.warning("aiohttp не установлен, TTS недоступен")
return None
except Exception as e:
logger.error(f"Ошибка TTS: {e}")
return None