wiki/tasks/karaoke/render.py

"""
render.py — MoviePy/FFmpeg рендер караоке-видео

Накладывает текст на видео-фон, синхронизированный с таймингами.
"""

import os
import subprocess
import tempfile
from pathlib import Path
from PIL import Image, ImageDraw, ImageFont

# ---------- Настройки ----------

WIDTH = 1280
HEIGHT = 720
FPS = 30
FONT_ACTIVE = "/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf"
FONT_INACTIVE = "/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf"
FONT_SIZE = 48
ACTIVE_COLOR = (255, 255, 0)    # жёлтый
INACTIVE_COLOR = (255, 255, 255)  # белый
SHADOW_COLOR = (0, 0, 0)
FADE_FRAMES = 9  # ~0.3 сек на 30fps

FFMPEG = os.environ.get("FFMPEG_BIN", os.path.expanduser("~/bin/ffmpeg-7.0.2-amd64-static/ffmpeg"))

# ---------- Утилиты ----------

def _load_font(path: str, size: int) -> ImageFont.FreeTypeFont:
    try:
        return ImageFont.truetype(path, size)
    except Exception:
        return ImageFont.load_default()


def _draw_text_centered(image: Image.Image, text: str,
                        font_active, font_inactive,
                        active: bool, fade_alpha: int,
                        y_ratio: float = 0.82,
                        alpha: int = None):
    """Рисует текст по центру кадра, с полупрозрачной тенью и цветом.

    Работает корректно как с RGB, так и с RGBA-изображениями.
    Если `alpha` передан — используется как общий множитель прозрачности.
    """
    # Определяем цвет с учётом alpha
    if active:
        base_color = (255, 220, 0)  # жёлтый
    else:
        base_color = (255, 255, 255)  # белый

    # Глобальный alpha (по умолчанию = fade_alpha)
    g_alpha = alpha if alpha is not None else fade_alpha

    # Цвет текста (RGBA)
    txt_color = (base_color[0], base_color[1], base_color[2], g_alpha)
    # Цвет тени — чёрный с альфа ~180, масштабируется по g_alpha
    sh_alpha = int(180 * g_alpha / 255)
    sh_color = (0, 0, 0, sh_alpha)

    font = font_active if active else font_inactive

    bbox = font.getbbox(text)
    tw = bbox[2] - bbox[0]
    th = bbox[3] - bbox[1]
    x = (WIDTH - tw) // 2
    y = int(HEIGHT * y_ratio) - th // 2

    # Отрисовка на overlay с правильным alpha-композитингом
    overlay = Image.new("RGBA", (WIDTH, HEIGHT), (0, 0, 0, 0))
    d = ImageDraw.Draw(overlay)
    # Тень
    d.text((x + 3, y + 3), text, font=font, fill=sh_color)
    # Основной текст
    d.text((x, y), text, font=font, fill=txt_color)

    # Композитим поверх переданного image
    if image.mode == "RGBA":
        image.alpha_composite(overlay)
    else:
        image.paste(overlay.convert("RGB"), (0, 0), overlay)


def draw_karaoke_line(frame: Image.Image, text: str, words: list,
                      t: float, font, y_ratio: float = 0.82):
    """Рисует karaoke-строку: каждое слово своим цветом по таймингам.

    Слова до текущего момента → жёлтые (255, 220, 0)
    Текущее слово → жёлтое + glow (чуть крупнее, ярче)
    Слова после → белые (255, 255, 255)
    """
    if not words:
        # Fallback: нет word-timestamps — рисуем всю строку жёлтым
        _draw_text_centered(frame, text, font, font, True, 255, y_ratio=y_ratio, alpha=255)
        return

    draw = ImageDraw.Draw(frame)

    # Измеряем общую ширину
    total_width = sum(draw.textlength(w["word"] + " ", font=font) for w in words)
    x = (WIDTH - total_width) // 2
    y = int(HEIGHT * y_ratio)

    COLOR_DONE = (255, 220, 0, 255)       # уже пропето — жёлтый
    COLOR_WHITE = (255, 255, 255, 200)    # ещё не пропето — белый
    COLOR_SHADOW = (0, 0, 0, 180)

    overlay = Image.new("RGBA", (WIDTH, HEIGHT), (0, 0, 0, 0))
    od = ImageDraw.Draw(overlay)

    # Глоу-шрифт (чуть крупнее для текущего слова)
    try:
        glow_font = ImageFont.truetype(FONT_ACTIVE, FONT_SIZE + 4)
    except Exception:
        glow_font = font

    for w in words:
        word_text = w["word"] + " "
        ww = draw.textlength(word_text, font=font)

        if t >= w["end"]:
            color = COLOR_DONE
            use_font = font
        elif t >= w["start"]:
            # Текущее слово — жёлтый + glow
            color = (255, 235, 50, 255)  # чуть ярче
            use_font = glow_font
            # Glow: рисуем ещё раз с небольшим сдвигом и полупрозрачностью
            od.text((x - 1, y - 1), word_text, font=glow_font, fill=(255, 220, 0, 80))
            od.text((x + 1, y + 1), word_text, font=glow_font, fill=(255, 220, 0, 80))
        else:
            color = COLOR_WHITE
            use_font = font

        # Тень
        od.text((x + 2, y + 2), word_text, font=use_font, fill=COLOR_SHADOW)
        # Основной текст
        od.text((x, y), word_text, font=use_font, fill=color)
        x = int(x + ww)

    if frame.mode == "RGBA":
        frame.alpha_composite(overlay)
    else:
        frame.paste(overlay.convert("RGB"), (0, 0), overlay)


def draw_text_with_alpha(image: Image.Image, text: str,
                         font_active, font_inactive,
                         alpha: int = 255, active: bool = True,
                         y_ratio: float = 0.82):
    """Удобная обёртка: нарисовать одну строку с заданной прозрачностью."""
    _draw_text_centered(image, text, font_active, font_inactive,
                        active=active, fade_alpha=alpha,
                        y_ratio=y_ratio, alpha=alpha)


def draw_progress_bar(image: Image.Image, current_time: float,
                      prev_end: float, next_start: float):
    """Рисует прогресс-бар для инструментальных пауз > 20 сек.

    - Горизонтальная полоса по центру экрана (50% высоты)
    - Ширина 60% кадра, высота 8px, скруглённые углы (r=4)
    - Под полосой — таймер оставшегося времени
    """
    draw = ImageDraw.Draw(image)

    bar_w = int(WIDTH * 0.60)
    bar_h = 8
    bar_x = (WIDTH - bar_w) // 2
    bar_y = HEIGHT // 2

    # Прогресс (0.0–1.0)
    total_gap = next_start - prev_end
    progress = max(0.0, min(1.0, (current_time - prev_end) / total_gap))
    fill_w = int(bar_w * progress)

    radius = 4

    def _rounded_rect(draw_, x, y, w, h, r, fill):
        """Рисует прямоугольник со скруглёнными углами на RGBA overlay."""
        ov = Image.new("RGBA", (WIDTH, HEIGHT), (0, 0, 0, 0))
        d = ImageDraw.Draw(ov)
        d.rounded_rectangle([x, y, x + w, y + h], radius=r, fill=fill)
        if image.mode == "RGBA":
            image.alpha_composite(ov)
        else:
            image.paste(ov.convert("RGB"), (0, 0), ov)

    # Фон полосы
    _rounded_rect(draw, bar_x, bar_y, bar_w, bar_h, radius, (80, 80, 80, 200))
    # Заполнение
    if fill_w > 0:
        fill_r = min(radius, fill_w // 2, radius)
        _rounded_rect(draw, bar_x, bar_y, fill_w, bar_h, fill_r, (255, 255, 255, 220))

    # Таймер: оставшееся время до следующей строки
    remaining = max(0, next_start - current_time)
    mins = int(remaining) // 60
    secs = int(remaining) % 60
    timer_text = f"{mins}:{secs:02d}"

    # Рисуем таймер под полосой (шрифт 32px)
    try:
        timer_font = ImageFont.truetype(FONT_INACTIVE, 32)
    except Exception:
        timer_font = ImageFont.load_default()

    overlay = Image.new("RGBA", (WIDTH, HEIGHT), (0, 0, 0, 0))
    d = ImageDraw.Draw(overlay)
    bbox = d.textbbox((0, 0), timer_text, font=timer_font)
    tw = bbox[2] - bbox[0]
    tx = (WIDTH - tw) // 2
    ty = bar_y + bar_h + 8
    d.text((tx + 2, ty + 2), timer_text, font=timer_font, fill=(0, 0, 0, 140))
    d.text((tx, ty), timer_text, font=timer_font, fill=(255, 255, 255, 220))
    if image.mode == "RGBA":
        image.alpha_composite(overlay)
    else:
        image.paste(overlay.convert("RGB"), (0, 0), overlay)


# ---------- Рендер через FFmpeg ----------

def render(segments: list[dict], audio_path: str, bg_video: str,
           output_path: str, width: int = WIDTH, height: int = HEIGHT,
           fps: int = FPS):
    """
    Рендерит караоке-видео:
    1. Генерирует PNG-кадры для каждой секунды видео
    2. Собирает ffmpeg-ом видео + аудио

    segments: [{start, end, text}, ...]
    """
    # Найти максимальное время
    total_duration = max(s["end"] for s in segments) if segments else 10

    print(f"[render] Генерируем кадры: {total_duration:.1f}s @ {fps}fps = {int(total_duration * fps)} кадров…")

    font_active = _load_font(FONT_ACTIVE, FONT_SIZE)
    font_inactive = _load_font(FONT_INACTIVE, FONT_SIZE)

    # Временный каталог для кадров
    tmpdir = tempfile.mkdtemp(prefix="karaoke_")

    frame_count = int(total_duration * fps)
    for i in range(frame_count):
        t = i / fps
        bg = Image.new("RGB", (WIDTH, HEIGHT), (0, 0, 0))

        # Определяем активный сегмент
        active_seg = None
        for seg in segments:
            if seg["start"] <= t <= seg["end"]:
                active_seg = seg
                break

        # Рисуем текст — активный внизу по центру
        if active_seg:
            # Fade in: первые FADE_FRAMES кадра сегмента
            frames_from_start = int((t - active_seg["start"]) * fps)
            fade_alpha = min(255, int(255 * frames_from_start / max(FADE_FRAMES, 1)))

            # Fade out: последние FADE_FRAMES кадра сегмента
            frames_to_end = int((active_seg["end"] - t) * fps)
            fade_alpha = min(fade_alpha, int(255 * frames_to_end / max(FADE_FRAMES, 1)))

            _draw_text_centered(bg, active_seg["text"],
                                font_active, font_inactive,
                                True, max(fade_alpha, 128))
        else:
            # Показываем предыдущий сегмент с пониженной заметностью
            prev_seg = None
            for seg in segments:
                if seg["end"] <= t:
                    prev_seg = seg
                else:
                    break
            if prev_seg:
                _draw_text_centered(bg, prev_seg["text"],
                                    font_active, font_inactive,
                                    False, 128)

        bg.save(os.path.join(tmpdir, f"frame_{i:07d}.png"), "PNG")

    # Собираем видео через ffmpeg: PNG-кадры + аудио
    print(f"[render] Собираем видео через FFmpeg…")
    cmd = [
        FFMPEG,
        "-framerate", str(fps),
        "-i", os.path.join(tmpdir, "frame_%07d.png"),
        "-i", audio_path,
        "-c:v", "libx264",
        "-pix_fmt", "yuv420p",
        "-c:a", "aac",
        "-b:a", "192k",
        "-shortest",
        "-y",
        output_path
    ]
    subprocess.run(cmd, check=True)

    # Чистим временные файлы
    for f in os.listdir(tmpdir):
        os.remove(os.path.join(tmpdir, f))
    os.rmdir(tmpdir)

    print(f"[render] Готово: {output_path}")
    return output_path


# ---------- Альтернативный рендер: bg-video + overlay ----------

def render_with_bg(segments: list[dict], audio_path: str, bg_video: str,
                   output_path: str, width: int = WIDTH, height: int = HEIGHT,
                   fps: int = FPS):
    """
    Использует реальный видео-фон (не чёрный) + overlay с текстом.

    Работает в два этапа:
    1. Генерация PNG-кадров с прозрачным фоном
    2. FFmpeg overlay + audio merge
    """
    total_duration = max(s["end"] for s in segments) if segments else 10

    # Подготавливаем bg_video: обрезаем/зацикливаем до нужной длины
    bg_tmp = os.path.join(os.path.dirname(output_path), "bg_trimmed.mp4")
    bg_dur_cmd = [FFMPEG, "-i", bg_video, "-t", str(total_duration),
                  "-c:v", "libx264", "-pix_fmt", "yuv420p", "-an",
                  "-y", bg_tmp]
    subprocess.run(bg_dur_cmd, check=True, capture_output=True)

    print(f"[render] Генерируем overlay-кадры…")

    font_active = _load_font(FONT_ACTIVE, FONT_SIZE)
    font_inactive = _load_font(FONT_INACTIVE, FONT_SIZE)

    tmpdir = tempfile.mkdtemp(prefix="karaoke_overlay_")
    frame_count = int(total_duration * fps)

    for i in range(frame_count):
        t = i / fps
        frame = Image.new("RGBA", (WIDTH, HEIGHT), (0, 0, 0, 0))

        # Определяем активный сегмент
        active_seg = None
        seg_idx = -1
        for idx, seg in enumerate(segments):
            if seg["start"] <= t <= seg["end"]:
                active_seg = seg
                seg_idx = idx
                break

        # Предыдущий сегмент (для прогресс-бара и fallback-текста)
        prev_seg = None
        prev_seg_end = 0.0
        for seg in segments:
            if seg["end"] <= t:
                prev_seg = seg
                prev_seg_end = seg["end"]
            else:
                break

        # Следующий сегмент
        next_seg = None
        for seg in segments:
            if seg["start"] > t:
                next_seg = seg
                break

        gap = (next_seg["start"] - t) if next_seg else 999

        if active_seg:
            # Активный сегмент — karaoke-эффект если есть word-timestamps
            has_words = isinstance(active_seg.get("words"), list) and len(active_seg.get("words", [])) > 0
            if has_words:
                draw_karaoke_line(frame, active_seg["text"], active_seg["words"], t, font_active)
            else:
                # Fallback без word-timestamps — старый fade in/out
                frames_from_start = int((t - active_seg["start"]) * fps)
                fade_alpha = min(255, int(255 * frames_from_start / max(FADE_FRAMES, 1)))
                frames_to_end = int((active_seg["end"] - t) * fps)
                fade_alpha = min(fade_alpha, int(255 * frames_to_end / max(FADE_FRAMES, 1)))

                _draw_text_centered(frame, active_seg["text"],
                                    font_active, font_inactive,
                                    True, max(fade_alpha, 128))

        elif gap > 20 and next_seg:
            # Длинная пауза (>20s) — показываем прогресс-бар
            draw_progress_bar(frame, t, prev_seg_end, next_seg["start"])

        elif gap <= 5 and next_seg:
            # За 5 сек до следующей строки — плавное появление (fade in)
            fade = 1.0 - (gap / 5.0)  # 0.0 → 1.0
            draw_text_with_alpha(frame, next_seg["text"],
                                 font_active, font_inactive,
                                 alpha=int(fade * 255), active=True)

        else:
            # Показываем предыдущий сегмент с пониженной заметностью
            if prev_seg:
                _draw_text_centered(frame, prev_seg["text"],
                                    font_active, font_inactive,
                                    False, 128)

        frame.save(os.path.join(tmpdir, f"ov_{i:07d}.png"), "PNG")

    # Комбинирование: ffmpeg complex filter
    print(f"[render] Собираем итоговое видео…")

    # Генерируем файл с количеством кадров для filter
    ov_pattern = os.path.join(tmpdir, "ov_%07d.png")
    out_abs = os.path.abspath(output_path)
    audio_abs = os.path.abspath(audio_path)
    bg_abs = os.path.abspath(bg_tmp)
    pat_abs = os.path.abspath(ov_pattern)

    cmd = [
        FFMPEG,
        "-framerate", str(fps),
        "-i", pat_abs,
        "-i", bg_abs,
        "-i", audio_abs,
        "-filter_complex",
        f"[0:v]scale={width}:{height},setpts=PTS-STARTPTS[ovr];"
        f"[1:v]scale={width}:{height},setpts=PTS-STARTPTS[bg];"
        f"[bg][ovr]overlay=0:0[final]",
        "-map", "[final]",
        "-map", "2:a",
        "-c:v", "libx264",
        "-pix_fmt", "yuv420p",
        "-c:a", "aac",
        "-b:a", "192k",
        "-shortest",
        "-y",
        out_abs
    ]

    result = subprocess.run(cmd, capture_output=True, text=True)
    if result.returncode != 0:
        print(f"[render] Ошибка FFmpeg: {result.stderr[-500:]}")
        # Fallback: простой render без bg
        print("[render] Fallback: чёрный фон…")
        return render(segments, audio_path, bg_video, output_path, width, height, fps)

    # Чистка
    for f in os.listdir(tmpdir):
        os.remove(os.path.join(tmpdir, f))
    os.rmdir(tmpdir)
    try:
        os.remove(bg_tmp)
    except:
        pass

    print(f"[render] Готово: {output_path}")
    return output_path