353 lines
15 KiB
Bash
Executable File
353 lines
15 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
# send_video_note.sh — Generate TTS + lip-sync video (VEED Fabric 1.0) and send as Telegram video note
|
|
#
|
|
# Usage:
|
|
# ./send_video_note.sh "Текст для озвучки"
|
|
# ./send_video_note.sh "Текст" [VOICE_ID] [CHAT_ID]
|
|
#
|
|
# Dependencies:
|
|
# - curl, python3, base64
|
|
# - ffmpeg (path in FFMPEG_BIN)
|
|
# - ELEVENLABS_API_KEY in ~/.openclaw/.env
|
|
# - FAL_KEY in ~/.openclaw/.env (format: key_id:key_secret)
|
|
# - BOT_TOKEN read from ~/.openclaw/openclaw.json
|
|
|
|
set -euo pipefail
|
|
|
|
# ── Config ────────────────────────────────────────────────────────────────────
|
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
ENV_FILE="$HOME/.openclaw/.env"
|
|
CONFIG_FILE="$HOME/.openclaw/openclaw.json"
|
|
AVATAR="$HOME/.openclaw/workspace/data/avatars/stream-avatar.jpg"
|
|
FFMPEG_BIN="/home/node/bin/ffmpeg-7.0.2-amd64-static/ffmpeg"
|
|
FFPROBE_BIN="/home/node/bin/ffmpeg-7.0.2-amd64-static/ffprobe"
|
|
DEFAULT_VOICE="TPIitICAZ8CqlGZ81AKm"
|
|
DEFAULT_MODEL="eleven_multilingual_v2"
|
|
VIDEO_SIZE=512 # Telegram video note max dimension
|
|
MAX_DURATION=60 # Telegram video note max seconds
|
|
FAL_RESOLUTION="720p" # VEED Fabric 1.0: 720p or 480p
|
|
FAL_POLL_INTERVAL=5 # seconds between status polls
|
|
FAL_MAX_WAIT=300 # max seconds to wait for fal.ai result
|
|
TMP_DIR=$(mktemp -d /tmp/video_note_XXXXXX)
|
|
LOG_TAG="[video_note]"
|
|
|
|
# ── Cleanup on exit ───────────────────────────────────────────────────────────
|
|
cleanup() {
|
|
rm -rf "$TMP_DIR"
|
|
}
|
|
trap cleanup EXIT
|
|
|
|
# ── Logging ───────────────────────────────────────────────────────────────────
|
|
log() { echo "$LOG_TAG $*" >&2; }
|
|
err() { echo "$LOG_TAG ERROR: $*" >&2; }
|
|
die() { err "$*"; exit 1; }
|
|
|
|
# ── Argument parsing ──────────────────────────────────────────────────────────
|
|
TEXT="${1:-}"
|
|
VOICE_ID="${2:-$DEFAULT_VOICE}"
|
|
CHAT_ID="${3:-}"
|
|
|
|
[[ -z "$TEXT" ]] && die "Usage: $0 \"текст для озвучки\" [voice_id] [chat_id]"
|
|
|
|
# ── Load environment ──────────────────────────────────────────────────────────
|
|
[[ -f "$ENV_FILE" ]] || die "Env file not found: $ENV_FILE"
|
|
# shellcheck source=/dev/null
|
|
set -a; source "$ENV_FILE"; set +a
|
|
|
|
[[ -n "${ELEVENLABS_API_KEY:-}" ]] || die "ELEVENLABS_API_KEY not set in $ENV_FILE"
|
|
[[ -n "${FAL_KEY:-}" ]] || die "FAL_KEY not set in $ENV_FILE"
|
|
|
|
# ── Get bot token and chat ID from config ─────────────────────────────────────
|
|
get_bot_token() {
|
|
python3 -c "
|
|
import json, sys
|
|
with open('$CONFIG_FILE') as f:
|
|
d = json.load(f)
|
|
accounts = d.get('channels', {}).get('telegram', {}).get('accounts', {})
|
|
for name in ['default', list(accounts.keys())[0] if accounts else None]:
|
|
if name and name in accounts:
|
|
tok = accounts[name].get('botToken', '')
|
|
if tok:
|
|
print(tok)
|
|
sys.exit(0)
|
|
sys.exit(1)
|
|
" 2>/dev/null
|
|
}
|
|
|
|
BOT_TOKEN="$(get_bot_token)" || die "Cannot read bot token from $CONFIG_FILE"
|
|
[[ -n "$BOT_TOKEN" ]] || die "Bot token is empty"
|
|
|
|
if [[ -z "$CHAT_ID" ]]; then
|
|
if [[ -n "${VOICE_TTS_TARGET:-}" ]]; then
|
|
CHAT_ID="${VOICE_TTS_TARGET#telegram:}"
|
|
fi
|
|
fi
|
|
[[ -n "$CHAT_ID" ]] || die "CHAT_ID not provided. Pass as arg 3 or set VOICE_TTS_TARGET=telegram:<id>"
|
|
|
|
# ── Sanity checks ─────────────────────────────────────────────────────────────
|
|
[[ -f "$AVATAR" ]] || die "Avatar not found: $AVATAR"
|
|
[[ -f "$FFMPEG_BIN" ]] || die "FFmpeg not found: $FFMPEG_BIN"
|
|
command -v curl >/dev/null 2>&1 || die "curl not found"
|
|
command -v python3 >/dev/null 2>&1 || die "python3 not found"
|
|
command -v base64 >/dev/null 2>&1 || die "base64 not found"
|
|
|
|
# ── Step 1: Generate TTS via ElevenLabs ──────────────────────────────────────
|
|
AUDIO_FILE="$TMP_DIR/tts.mp3"
|
|
log "Step 1: Generating TTS (voice=$VOICE_ID)..."
|
|
|
|
HTTP_CODE=$(curl -s \
|
|
-X POST "https://api.elevenlabs.io/v1/text-to-speech/${VOICE_ID}" \
|
|
-H "xi-api-key: $ELEVENLABS_API_KEY" \
|
|
-H "Content-Type: application/json" \
|
|
-d "$(python3 -c "
|
|
import json, sys
|
|
payload = {
|
|
'text': sys.argv[1],
|
|
'model_id': '$DEFAULT_MODEL',
|
|
'voice_settings': {'stability': 0.5, 'similarity_boost': 0.75}
|
|
}
|
|
print(json.dumps(payload))
|
|
" "$TEXT")" \
|
|
-o "$AUDIO_FILE" \
|
|
-w "%{http_code}")
|
|
|
|
if [[ "$HTTP_CODE" != "200" ]]; then
|
|
RESP_BODY=$(cat "$AUDIO_FILE" 2>/dev/null || echo "")
|
|
die "ElevenLabs TTS failed (HTTP $HTTP_CODE): $RESP_BODY"
|
|
fi
|
|
|
|
AUDIO_SIZE=$(stat -c%s "$AUDIO_FILE" 2>/dev/null || stat -f%z "$AUDIO_FILE")
|
|
[[ "$AUDIO_SIZE" -gt 1024 ]] || die "TTS audio too small (${AUDIO_SIZE} bytes) — likely an error response"
|
|
log "✓ TTS audio: $AUDIO_FILE (${AUDIO_SIZE} bytes)"
|
|
|
|
# ── Step 2: Get audio duration ────────────────────────────────────────────────
|
|
DURATION=$("$FFPROBE_BIN" -v quiet -show_entries format=duration \
|
|
-of default=noprint_wrappers=1:nokey=1 "$AUDIO_FILE" 2>/dev/null || echo "0")
|
|
DURATION_INT=$(python3 -c "import math; print(min(int(math.ceil(float('$DURATION'))), $MAX_DURATION))")
|
|
log "✓ Audio duration: ${DURATION}s (video capped at ${DURATION_INT}s)"
|
|
|
|
# ── Step 3: Upload files to fal.ai storage ────────────────────────────────────
|
|
# fal.ai requires publicly accessible URLs — upload via fal.ai storage
|
|
log "Step 3: Uploading files to fal.ai storage..."
|
|
|
|
upload_to_fal() {
|
|
local file_path="$1"
|
|
local mime_type="$2"
|
|
local file_name="$3"
|
|
|
|
# Upload via fal.ai storage endpoint (CDN v3)
|
|
local upload_response
|
|
upload_response=$(curl -s -w "\n%{http_code}" --max-time 30 \
|
|
-X POST "https://rest.fal.ai/storage/upload/initiate?storage_type=fal-cdn-v3" \
|
|
-H "Authorization: Key ${FAL_KEY}" \
|
|
-H "Content-Type: application/json" \
|
|
-d "{\"content_type\": \"${mime_type}\", \"file_name\": \"${file_name}\"}" 2>&1)
|
|
|
|
local http_code
|
|
http_code=$(echo "$upload_response" | tail -1)
|
|
local body
|
|
body=$(echo "$upload_response" | head -n -1)
|
|
|
|
if [[ "$http_code" != "200" ]]; then
|
|
# Check for balance exhaustion
|
|
if echo "$body" | grep -qi "balance\|billing\|locked"; then
|
|
err "fal.ai account issue (HTTP $http_code): balance exhausted or locked. Top up at fal.ai/dashboard/billing"
|
|
else
|
|
err "fal.ai storage initiate failed (HTTP $http_code): $body"
|
|
fi
|
|
return 1
|
|
fi
|
|
|
|
local upload_url
|
|
upload_url=$(echo "$body" | python3 -c "import json,sys; d=json.load(sys.stdin); print(d.get('upload_url',''))" 2>/dev/null)
|
|
local file_url
|
|
file_url=$(echo "$body" | python3 -c "import json,sys; d=json.load(sys.stdin); print(d.get('file_url',''))" 2>/dev/null)
|
|
|
|
if [[ -z "$upload_url" ]]; then
|
|
err "No upload_url in fal.ai response: $body"
|
|
return 1
|
|
fi
|
|
|
|
# Upload the actual file
|
|
local put_code
|
|
put_code=$(curl -s -o /dev/null -w "%{http_code}" \
|
|
-X PUT "$upload_url" \
|
|
-H "Content-Type: ${mime_type}" \
|
|
--data-binary "@${file_path}" 2>&1)
|
|
|
|
if [[ "$put_code" != "200" && "$put_code" != "204" ]]; then
|
|
err "fal.ai file upload failed (HTTP $put_code)"
|
|
return 1
|
|
fi
|
|
|
|
echo "$file_url"
|
|
}
|
|
|
|
# Upload avatar image
|
|
log " Uploading avatar image..."
|
|
IMAGE_URL=$(upload_to_fal "$AVATAR" "image/jpeg" "avatar.jpg") || {
|
|
log " fal.ai storage upload failed, trying base64 fallback..."
|
|
# Base64 fallback: encode image as data URI
|
|
IMAGE_B64=$(base64 -w 0 "$AVATAR")
|
|
IMAGE_URL="data:image/jpeg;base64,${IMAGE_B64}"
|
|
log " Using base64 data URI for image ($(echo -n "$IMAGE_URL" | wc -c) chars)"
|
|
}
|
|
log "✓ Image URL ready"
|
|
|
|
# Upload audio file
|
|
log " Uploading TTS audio..."
|
|
AUDIO_URL=$(upload_to_fal "$AUDIO_FILE" "audio/mpeg" "tts.mp3") || {
|
|
log " fal.ai storage upload failed, trying base64 fallback..."
|
|
AUDIO_B64=$(base64 -w 0 "$AUDIO_FILE")
|
|
AUDIO_URL="data:audio/mpeg;base64,${AUDIO_B64}"
|
|
log " Using base64 data URI for audio ($(echo -n "$AUDIO_URL" | wc -c) chars)"
|
|
}
|
|
log "✓ Audio URL ready"
|
|
|
|
# ── Step 4: Submit VEED Fabric 1.0 job ───────────────────────────────────────
|
|
log "Step 4: Submitting VEED Fabric 1.0 lip sync job..."
|
|
|
|
FAL_PAYLOAD=$(python3 -c "
|
|
import json, sys
|
|
payload = {
|
|
'image_url': sys.argv[1],
|
|
'audio_url': sys.argv[2],
|
|
'resolution': '$FAL_RESOLUTION'
|
|
}
|
|
print(json.dumps(payload))
|
|
" "$IMAGE_URL" "$AUDIO_URL")
|
|
|
|
SUBMIT_RESPONSE=$(curl -s -w "\n%{http_code}" \
|
|
-X POST "https://queue.fal.run/veed/fabric-1.0" \
|
|
-H "Authorization: Key ${FAL_KEY}" \
|
|
-H "Content-Type: application/json" \
|
|
-d "$FAL_PAYLOAD" 2>&1)
|
|
|
|
SUBMIT_HTTP=$(echo "$SUBMIT_RESPONSE" | tail -1)
|
|
SUBMIT_BODY=$(echo "$SUBMIT_RESPONSE" | head -n -1)
|
|
|
|
if [[ "$SUBMIT_HTTP" != "200" && "$SUBMIT_HTTP" != "201" ]]; then
|
|
if echo "$SUBMIT_BODY" | grep -qi "balance\|billing\|locked\|exhausted"; then
|
|
die "fal.ai account issue: balance exhausted or locked. Top up at https://fal.ai/dashboard/billing"
|
|
fi
|
|
die "fal.ai submit failed (HTTP $SUBMIT_HTTP): $SUBMIT_BODY"
|
|
fi
|
|
|
|
REQUEST_ID=$(echo "$SUBMIT_BODY" | python3 -c "import json,sys; d=json.load(sys.stdin); print(d.get('request_id',''))" 2>/dev/null)
|
|
[[ -n "$REQUEST_ID" ]] || die "No request_id in fal.ai response: $SUBMIT_BODY"
|
|
log "✓ Job submitted: request_id=$REQUEST_ID"
|
|
|
|
# ── Step 5: Poll for VEED Fabric 1.0 result ──────────────────────────────────
|
|
log "Step 5: Waiting for lip sync result (max ${FAL_MAX_WAIT}s)..."
|
|
|
|
FAL_VIDEO_URL=""
|
|
WAITED=0
|
|
while [[ $WAITED -lt $FAL_MAX_WAIT ]]; do
|
|
sleep "$FAL_POLL_INTERVAL"
|
|
WAITED=$((WAITED + FAL_POLL_INTERVAL))
|
|
|
|
STATUS_RESPONSE=$(curl -s -w "\n%{http_code}" \
|
|
"https://queue.fal.run/veed/fabric-1.0/requests/${REQUEST_ID}/status" \
|
|
-H "Authorization: Key ${FAL_KEY}" 2>&1)
|
|
|
|
STATUS_HTTP=$(echo "$STATUS_RESPONSE" | tail -1)
|
|
STATUS_BODY=$(echo "$STATUS_RESPONSE" | head -n -1)
|
|
|
|
if [[ "$STATUS_HTTP" != "200" ]]; then
|
|
log " Status poll returned HTTP $STATUS_HTTP — retrying..."
|
|
continue
|
|
fi
|
|
|
|
JOB_STATUS=$(echo "$STATUS_BODY" | python3 -c "import json,sys; d=json.load(sys.stdin); print(d.get('status',''))" 2>/dev/null || echo "")
|
|
log " [${WAITED}s] Status: $JOB_STATUS"
|
|
|
|
if [[ "$JOB_STATUS" == "COMPLETED" ]]; then
|
|
# Fetch result
|
|
RESULT_RESPONSE=$(curl -s -w "\n%{http_code}" \
|
|
"https://queue.fal.run/veed/fabric-1.0/requests/${REQUEST_ID}" \
|
|
-H "Authorization: Key ${FAL_KEY}" 2>&1)
|
|
|
|
RESULT_HTTP=$(echo "$RESULT_RESPONSE" | tail -1)
|
|
RESULT_BODY=$(echo "$RESULT_RESPONSE" | head -n -1)
|
|
|
|
if [[ "$RESULT_HTTP" != "200" ]]; then
|
|
die "fal.ai result fetch failed (HTTP $RESULT_HTTP): $RESULT_BODY"
|
|
fi
|
|
|
|
FAL_VIDEO_URL=$(echo "$RESULT_BODY" | python3 -c "
|
|
import json,sys
|
|
d=json.load(sys.stdin)
|
|
# Output schema: {\"video\": {\"url\": \"...\"}}
|
|
video = d.get('video', {})
|
|
print(video.get('url', ''))
|
|
" 2>/dev/null)
|
|
|
|
[[ -n "$FAL_VIDEO_URL" ]] || die "No video URL in fal.ai result: $RESULT_BODY"
|
|
log "✓ Lip sync video ready: $FAL_VIDEO_URL"
|
|
break
|
|
|
|
elif [[ "$JOB_STATUS" == "FAILED" || "$JOB_STATUS" == "ERROR" ]]; then
|
|
ERROR_MSG=$(echo "$STATUS_BODY" | python3 -c "import json,sys; d=json.load(sys.stdin); print(d.get('error', d.get('detail', 'unknown')))" 2>/dev/null || echo "$STATUS_BODY")
|
|
die "fal.ai job failed: $ERROR_MSG"
|
|
fi
|
|
# IN_QUEUE or IN_PROGRESS — keep polling
|
|
done
|
|
|
|
[[ -n "$FAL_VIDEO_URL" ]] || die "Timeout: fal.ai did not complete within ${FAL_MAX_WAIT}s"
|
|
|
|
# ── Step 6: Download lip sync video ──────────────────────────────────────────
|
|
log "Step 6: Downloading lip sync video..."
|
|
RAW_VIDEO="$TMP_DIR/lipsync_raw.mp4"
|
|
|
|
DL_HTTP=$(curl -s -L \
|
|
-o "$RAW_VIDEO" \
|
|
-w "%{http_code}" \
|
|
"$FAL_VIDEO_URL" 2>&1)
|
|
|
|
[[ "$DL_HTTP" == "200" ]] || die "Video download failed (HTTP $DL_HTTP)"
|
|
RAW_SIZE=$(stat -c%s "$RAW_VIDEO" 2>/dev/null || stat -f%z "$RAW_VIDEO")
|
|
[[ "$RAW_SIZE" -gt 10240 ]] || die "Downloaded video too small (${RAW_SIZE} bytes)"
|
|
log "✓ Downloaded: $RAW_VIDEO (${RAW_SIZE} bytes)"
|
|
|
|
# ── Step 7: Convert to square 1:1 format for Telegram video note ─────────────
|
|
log "Step 7: Converting to ${VIDEO_SIZE}x${VIDEO_SIZE} square for Telegram..."
|
|
VIDEO_FILE="$TMP_DIR/video_note.mp4"
|
|
|
|
"$FFMPEG_BIN" -y \
|
|
-i "$RAW_VIDEO" \
|
|
-vf "scale=${VIDEO_SIZE}:${VIDEO_SIZE}:force_original_aspect_ratio=increase,crop=${VIDEO_SIZE}:${VIDEO_SIZE}" \
|
|
-c:v libx264 -preset fast -crf 23 \
|
|
-c:a aac -b:a 64k \
|
|
-t "$DURATION_INT" \
|
|
-movflags +faststart \
|
|
"$VIDEO_FILE" 2>/dev/null
|
|
|
|
[[ -f "$VIDEO_FILE" ]] || die "FFmpeg square conversion failed"
|
|
VIDEO_SIZE_BYTES=$(stat -c%s "$VIDEO_FILE" 2>/dev/null || stat -f%z "$VIDEO_FILE")
|
|
log "✓ Final video: $VIDEO_FILE (${VIDEO_SIZE_BYTES} bytes, ${DURATION_INT}s)"
|
|
|
|
# Telegram video note limit: 50MB
|
|
MAX_VIDEO_BYTES=$((50 * 1024 * 1024))
|
|
[[ "$VIDEO_SIZE_BYTES" -lt "$MAX_VIDEO_BYTES" ]] || die "Video too large for Telegram (${VIDEO_SIZE_BYTES} > 50MB)"
|
|
|
|
# ── Step 8: Send as Telegram video note (кружочек) ────────────────────────────
|
|
log "Step 8: Sending video note to Telegram (chat=$CHAT_ID)..."
|
|
|
|
RESPONSE=$(curl -s \
|
|
-X POST "https://api.telegram.org/bot${BOT_TOKEN}/sendVideoNote" \
|
|
-F "chat_id=${CHAT_ID}" \
|
|
-F "video_note=@${VIDEO_FILE}" \
|
|
-F "length=${VIDEO_SIZE}" \
|
|
-F "duration=${DURATION_INT}" 2>&1)
|
|
|
|
SUCCESS=$(echo "$RESPONSE" | python3 -c "import json,sys; d=json.load(sys.stdin); print(d.get('ok', False))" 2>/dev/null || echo "false")
|
|
|
|
if [[ "$SUCCESS" == "True" ]]; then
|
|
MSG_ID=$(echo "$RESPONSE" | python3 -c "import json,sys; d=json.load(sys.stdin); print(d['result']['message_id'])" 2>/dev/null || echo "?")
|
|
log "✅ Video note sent! Message ID: $MSG_ID"
|
|
echo "$MSG_ID"
|
|
else
|
|
TG_ERR=$(echo "$RESPONSE" | python3 -c "import json,sys; d=json.load(sys.stdin); print(d.get('description','unknown error'))" 2>/dev/null || echo "$RESPONSE")
|
|
die "Telegram API error: $TG_ERR"
|
|
fi
|