Files
wiki/tasks/karaoke/render.py
2026-04-30 00:40:01 +03:00

252 lines
9.0 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
render.py — MoviePy/FFmpeg рендер караоке-видео
Накладывает текст на видео-фон, синхронизированный с таймингами.
"""
import os
import subprocess
import tempfile
from pathlib import Path
from PIL import Image, ImageDraw, ImageFont
# ---------- Настройки ----------
WIDTH = 1280
HEIGHT = 720
FPS = 30
FONT_ACTIVE = "/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf"
FONT_INACTIVE = "/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf"
FONT_SIZE = 48
ACTIVE_COLOR = (255, 255, 0) # жёлтый
INACTIVE_COLOR = (255, 255, 255) # белый
SHADOW_COLOR = (0, 0, 0)
FADE_FRAMES = 9 # ~0.3 сек на 30fps
FFMPEG = os.environ.get("FFMPEG_BIN", os.path.expanduser("~/bin/ffmpeg-7.0.2-amd64-static/ffmpeg"))
# ---------- Утилиты ----------
def _load_font(path: str, size: int) -> ImageFont.FreeTypeFont:
try:
return ImageFont.truetype(path, size)
except Exception:
return ImageFont.load_default()
def _draw_text_centered(image: Image.Image, text: str,
font_active, font_inactive,
active: bool, fade_alpha: int,
y_ratio: float = 0.82):
"""Рисует текст по центру кадра, с тенью и цветом по статусу."""
draw = ImageDraw.Draw(image)
font = font_active if active else font_inactive
color = ACTIVE_COLOR if active else INACTIVE_COLOR
# Тень
bbox = draw.textbbox((0, 0), text, font=font)
tw = bbox[2] - bbox[0]
th = bbox[3] - bbox[1]
x = (WIDTH - tw) // 2
y = int(HEIGHT * y_ratio) - th // 2
# Тень (чёрная, с офсетом)
draw.text((x + 3, y + 3), text, font=font, fill=SHADOW_COLOR)
# Основной текст с alpha
overlay = Image.new("RGBA", (WIDTH, HEIGHT), (0, 0, 0, 0))
d = ImageDraw.Draw(overlay)
d.text((x, y), text, font=font, fill=(*color, fade_alpha))
image.paste(overlay.convert("RGB"), (0, 0), overlay)
# ---------- Рендер через FFmpeg ----------
def render(segments: list[dict], audio_path: str, bg_video: str,
output_path: str, width: int = WIDTH, height: int = HEIGHT,
fps: int = FPS):
"""
Рендерит караоке-видео:
1. Генерирует PNG-кадры для каждой секунды видео
2. Собирает ffmpeg-ом видео + аудио
segments: [{start, end, text}, ...]
"""
# Найти максимальное время
total_duration = max(s["end"] for s in segments) if segments else 10
print(f"[render] Генерируем кадры: {total_duration:.1f}s @ {fps}fps = {int(total_duration * fps)} кадров…")
font_active = _load_font(FONT_ACTIVE, FONT_SIZE)
font_inactive = _load_font(FONT_INACTIVE, FONT_SIZE)
# Временный каталог для кадров
tmpdir = tempfile.mkdtemp(prefix="karaoke_")
frame_count = int(total_duration * fps)
for i in range(frame_count):
t = i / fps
bg = Image.new("RGB", (WIDTH, HEIGHT), (0, 0, 0))
# Определяем активный сегмент
active_seg = None
for seg in segments:
if seg["start"] <= t <= seg["end"]:
active_seg = seg
break
# Рисуем текст — активный внизу по центру
if active_seg:
# Fade in: первые FADE_FRAMES кадра сегмента
frames_from_start = int((t - active_seg["start"]) * fps)
fade_alpha = min(255, int(255 * frames_from_start / max(FADE_FRAMES, 1)))
# Fade out: последние FADE_FRAMES кадра сегмента
frames_to_end = int((active_seg["end"] - t) * fps)
fade_alpha = min(fade_alpha, int(255 * frames_to_end / max(FADE_FRAMES, 1)))
_draw_text_centered(bg, active_seg["text"],
font_active, font_inactive,
True, max(fade_alpha, 128))
else:
# Показываем предыдущий сегмент с пониженной заметностью
prev_seg = None
for seg in segments:
if seg["end"] <= t:
prev_seg = seg
else:
break
if prev_seg:
_draw_text_centered(bg, prev_seg["text"],
font_active, font_inactive,
False, 128)
bg.save(os.path.join(tmpdir, f"frame_{i:07d}.png"), "PNG")
# Собираем видео через ffmpeg: PNG-кадры + аудио
print(f"[render] Собираем видео через FFmpeg…")
cmd = [
FFMPEG,
"-framerate", str(fps),
"-i", os.path.join(tmpdir, "frame_%07d.png"),
"-i", audio_path,
"-c:v", "libx264",
"-pix_fmt", "yuv420p",
"-c:a", "aac",
"-b:a", "192k",
"-shortest",
"-y",
output_path
]
subprocess.run(cmd, check=True)
# Чистим временные файлы
for f in os.listdir(tmpdir):
os.remove(os.path.join(tmpdir, f))
os.rmdir(tmpdir)
print(f"[render] Готово: {output_path}")
return output_path
# ---------- Альтернативный рендер: bg-video + overlay ----------
def render_with_bg(segments: list[dict], audio_path: str, bg_video: str,
output_path: str, width: int = WIDTH, height: int = HEIGHT,
fps: int = FPS):
"""
Использует реальный видео-фон (не чёрный) + overlay с текстом.
Работает в два этапа:
1. Генерация PNG-кадров с прозрачным фоном
2. FFmpeg overlay + audio merge
"""
total_duration = max(s["end"] for s in segments) if segments else 10
# Подготавливаем bg_video: обрезаем/зацикливаем до нужной длины
bg_tmp = os.path.join(os.path.dirname(output_path), "bg_trimmed.mp4")
bg_dur_cmd = [FFMPEG, "-i", bg_video, "-t", str(total_duration),
"-c:v", "libx264", "-pix_fmt", "yuv420p", "-an",
"-y", bg_tmp]
subprocess.run(bg_dur_cmd, check=True, capture_output=True)
print(f"[render] Генерируем overlay-кадры…")
font_active = _load_font(FONT_ACTIVE, FONT_SIZE)
font_inactive = _load_font(FONT_INACTIVE, FONT_SIZE)
tmpdir = tempfile.mkdtemp(prefix="karaoke_overlay_")
frame_count = int(total_duration * fps)
for i in range(frame_count):
t = i / fps
frame = Image.new("RGBA", (WIDTH, HEIGHT), (0, 0, 0, 0))
active_seg = None
for seg in segments:
if seg["start"] <= t <= seg["end"]:
active_seg = seg
break
if active_seg:
frames_from_start = int((t - active_seg["start"]) * fps)
fade_alpha = min(255, int(255 * frames_from_start / max(FADE_FRAMES, 1)))
frames_to_end = int((active_seg["end"] - t) * fps)
fade_alpha = min(fade_alpha, int(255 * frames_to_end / max(FADE_FRAMES, 1)))
_draw_text_centered(frame, active_seg["text"],
font_active, font_inactive,
True, max(fade_alpha, 128))
frame.save(os.path.join(tmpdir, f"ov_{i:07d}.png"), "PNG")
# Комбинирование: ffmpeg complex filter
print(f"[render] Собираем итоговое видео…")
# Генерируем файл с количеством кадров для filter
ov_pattern = os.path.join(tmpdir, "ov_%07d.png")
out_abs = os.path.abspath(output_path)
audio_abs = os.path.abspath(audio_path)
bg_abs = os.path.abspath(bg_tmp)
pat_abs = os.path.abspath(ov_pattern)
cmd = [
FFMPEG,
"-framerate", str(fps),
"-i", pat_abs,
"-i", bg_abs,
"-i", audio_abs,
"-filter_complex",
f"[0:v]scale={width}:{height},setpts=PTS-STARTPTS[ovr];"
f"[1:v]scale={width}:{height},setpts=PTS-STARTPTS[bg];"
f"[bg][ovr]overlay=0:0[final]",
"-map", "[final]",
"-map", "2:a",
"-c:v", "libx264",
"-pix_fmt", "yuv420p",
"-c:a", "aac",
"-b:a", "192k",
"-shortest",
"-y",
out_abs
]
result = subprocess.run(cmd, capture_output=True, text=True)
if result.returncode != 0:
print(f"[render] Ошибка FFmpeg: {result.stderr[-500:]}")
# Fallback: простой render без bg
print("[render] Fallback: чёрный фон…")
return render(segments, audio_path, bg_video, output_path, width, height, fps)
# Чистка
for f in os.listdir(tmpdir):
os.remove(os.path.join(tmpdir, f))
os.rmdir(tmpdir)
try:
os.remove(bg_tmp)
except:
pass
print(f"[render] Готово: {output_path}")
return output_path