fuuuzzy commited on 9 days ago

Commit

7c71fa7

verified ·

1 Parent(s): 6b80f9b

Upload folder using huggingface_hub

Browse files

Files changed (17) hide show

.gitignore +5 -0
api.md +31 -0
api.sh +103 -0
app.pid +1 -0
app.py +349 -0
config.yaml +43 -0
merger.py +658 -0
pyproject.toml +11 -3
services/logger.py +172 -0
services/merger_service.py +57 -0
services/queue_manager.py +205 -0
services/r2_uploader.py +375 -0
services/tts_service.py +192 -0
services/uvr5_service.py +94 -0
uv.lock +0 -0
uvr5/download_models.py +329 -0
uvr5/processor.py +340 -0

.gitignore CHANGED Viewed

@@ -10,3 +10,8 @@ wheels/
 .venv
 output
 .cache

 .venv
 output
 .cache
+.cursor
+logs
+models
+data
+pt-br

api.md ADDED Viewed

	@@ -0,0 +1,31 @@

+## 配音任务
+- Request
+    - Path: ``
+    - Method: `post`
+    - Body:
+      ```json5
+      {
+      "character_voice": [
+      {
+      "character": "女主妈妈",//角色名
+      "id": "104982", //参考音频id
+      ""
+      "timbre_url": "https://xxx",//参考音频的地址
+      "timbre_text":""//参考音频文本
+      }
+      ],//参考角色音频信息
+      "content": [
+      {
+      "character": "女主妈妈",//角色名，跟character_voice对应上
+      "end": 0.9, //时间轴结束时间，时间格式的单位为秒 0.9则表示900毫秒，接收了请求处理的时候需要转换成00:00:00,100，标准的srt时间轴格式
+      "source": "你好", //原文本
+      "start": 14, //时间轴开始时间,时间格式的单位为秒，14则表示14秒，接收了请求处理的时候需要转换成00:00:00,100，标准的srt时间轴格式
+      "translation": "Hello" //需要生成语音的文本
+      }
+      ], //配音内容
+      "hook_url": "https://your-api.com/callback", //回调地址
+      "priority": 3, //优先级 1-5 最高5
+      "video_url": "https://example.com/video.mp4" //视频地址
+      }
+      ```

api.sh ADDED Viewed

	@@ -0,0 +1,103 @@

+#!/bin/bash
+# 配置
+APP_NAME="f5-tts-api"
+PID_FILE="app.pid"
+LOG_FILE="logs/startup.log"
+PYTHON_CMD="uv run app.py"
+# 获取当前脚本所在目录
+SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
+cd "$SCRIPT_DIR"
+# 确保 logs 目录存在
+mkdir -p logs
+start() {
+    if [ -f "$PID_FILE" ]; then
+        pid=$(cat "$PID_FILE")
+        if ps -p "$pid" > /dev/null; then
+            echo "$APP_NAME is already running (PID: $pid)"
+            return
+        else
+            echo "PID file exists but process is gone. Cleaning up."
+            rm "$PID_FILE"
+        fi
+    fi
+    echo "Starting $APP_NAME..."
+    nohup $PYTHON_CMD > "$LOG_FILE" 2>&1 &
+    pid=$!
+    echo "$pid" > "$PID_FILE"
+    echo "$APP_NAME started with PID $pid"
+    echo "Logs are being written to $LOG_FILE"
+}
+stop() {
+    if [ ! -f "$PID_FILE" ]; then
+        echo "$APP_NAME is not running (PID file not found)"
+        return
+    fi
+    pid=$(cat "$PID_FILE")
+    if ps -p "$pid" > /dev/null; then
+        echo "Stopping $APP_NAME (PID: $pid)..."
+        kill "$pid"
+        # 等待进程结束
+        count=0
+        while ps -p "$pid" > /dev/null; do
+            sleep 1
+            count=$((count + 1))
+            if [ "$count" -ge 10 ]; then
+                echo "Process did not stop after 10 seconds. Force killing..."
+                kill -9 "$pid"
+                break
+            fi
+        done
+        rm "$PID_FILE"
+        echo "$APP_NAME stopped"
+    else
+        echo "$APP_NAME is not running (Process not found)"
+        rm "$PID_FILE"
+    fi
+}
+restart() {
+    stop
+    sleep 2
+    start
+}
+status() {
+    if [ -f "$PID_FILE" ]; then
+        pid=$(cat "$PID_FILE")
+        if ps -p "$pid" > /dev/null; then
+            echo "$APP_NAME is running (PID: $pid)"
+        else
+            echo "$APP_NAME is stopped (PID file exists but process is gone)"
+        fi
+    else
+        echo "$APP_NAME is stopped"
+    fi
+}
+case "$1" in
+    start)
+        start
+        ;;
+    stop)
+        stop
+        ;;
+    restart)
+        restart
+        ;;
+    status)
+        status
+        ;;
+    *)
+        echo "Usage: $0 {start|stop|restart|status}"
+        exit 1
+        ;;
+esac
+exit 0

app.pid ADDED Viewed

	@@ -0,0 +1 @@


1	+ 171729

app.py ADDED Viewed

	@@ -0,0 +1,349 @@

+import os
+import shutil
+import threading
+import time
+from functools import wraps
+import logging
+import requests
+import yaml
+from flask import Flask, request, jsonify, g
+from requests.adapters import HTTPAdapter
+from urllib3.util.retry import Retry
+from services.logger import get_app_logger, RequestLogger, task_id_var, get_process_worker_logger, \
+    get_upload_worker_logger
+from services.tts_service import TTSService
+from services.queue_manager import QueueManager
+from services.r2_uploader import R2Uploader
+from services.uvr5_service import UVR5Service
+from services.merger_service import MergerService
+logger = get_app_logger()
+# 加载配置
+def load_config(config_path='config.yaml'):
+    with open(config_path, 'r', encoding='utf-8') as f:
+        return yaml.safe_load(f)
+config = load_config()
+# Auth Helpers
+def check_auth(username, password):
+    app_config = config.get('app', {})
+    return username == app_config['api_username'] and password == app_config['api_password']
+def authenticate():
+    return jsonify({'error': 'Authentication required'}), 401, {'WWW-Authenticate': 'Basic realm="Login Required"'}
+def requires_auth(f):
+    @wraps(f)
+    def decorated(*args, **kwargs):
+        auth = request.authorization
+        if not auth or not check_auth(auth.username, auth.password):
+            return authenticate()
+        return f(*args, **kwargs)
+    return decorated
+def send_hook_with_retry(url: str, data: dict, max_retries: int = 3):
+    session = requests.Session()
+    retries = Retry(total=max_retries, backoff_factor=1, status_forcelist=[500, 502, 503, 504])
+    session.mount('http://', HTTPAdapter(max_retries=retries))
+    session.mount('https://', HTTPAdapter(max_retries=retries))
+    try:
+        response = session.post(url, json=data, timeout=10)
+        response.raise_for_status()
+        return response
+    except Exception as e:
+        logger.error(f"Failed to send hook to {url}: {e}")
+        pass
+def download_file(url: str, path: str):
+    response = requests.get(url, stream=True, timeout=60)
+    response.raise_for_status()
+    with open(path, 'wb') as f:
+        for chunk in response.iter_content(chunk_size=8192):
+            f.write(chunk)
+# Initialize Flask
+app = Flask(__name__)
+app.config['MAX_CONTENT_LENGTH'] = 100 * 1024 * 1024
+# Initialize Services
+queue_manager = QueueManager(config['redis'])
+r2_uploader = R2Uploader(config['r2'])
+tts_service = TTSService(config)
+uvr5_service = UVR5Service(config)
+merger_service = MergerService(config)
+# Temp Dir for Videos
+VIDEO_TEMP_DIR = 'data/temp_videos'
+os.makedirs(VIDEO_TEMP_DIR, exist_ok=True)
+# -------------------------------------------------------------------------
+# Workers
+# -------------------------------------------------------------------------
+def process_worker():
+    """
+    Main Pipeline Worker:
+    1. Fetch Task
+    2. Download Video
+    3. Run TTS Generation
+    4. Run UVR5 Separation (get BGM)
+    5. Merge (Video + TTS + BGM)
+    6. Push to Upload Queue
+    """
+    worker_logger = get_process_worker_logger()
+    worker_logger.info("Main Process Worker started")
+    while True:
+        try:
+            task = queue_manager.get_process_task()
+            if not task:
+                time.sleep(1)
+                continue
+            task_id = task.get('task_id')
+            token = task_id_var.set(task_id)
+            # Context variables for cleanup
+            local_video_path = None
+            bgm_path = None
+            vocals_path = None
+            task_tts_dir = None
+            final_output_path = None
+            success = False
+            try:
+                worker_logger.info("Processing started.")
+                # 1. Download Video
+                video_url = task['data'].get('video_url')
+                if not video_url:
+                    raise ValueError("Missing video_url")
+                local_video_path = os.path.join(VIDEO_TEMP_DIR, f"{task_id}_input.mp4")
+                worker_logger.info(f"Downloading video from {video_url}")
+                download_file(video_url, local_video_path)
+                # 2. Run TTS
+                worker_logger.info("Running TTS...")
+                tts_result = tts_service.process_task(task)
+                segments = tts_result['segments']
+                task_tts_dir = tts_result['task_dir']
+                # 3. Run UVR5
+                worker_logger.info("Running UVR5 Separation...")
+                vocals_path, bgm_path = uvr5_service.process_audio(local_video_path, task_id)
+                if not bgm_path or not os.path.exists(bgm_path):
+                    raise Exception("UVR5 failed to produce background music.")
+                # 4. Merge
+                worker_logger.info("Merging Audio and Video...")
+                final_output_path = os.path.join(VIDEO_TEMP_DIR, f"{task_id}_final.mp4")
+                merger_service.merge_video(
+                    video_path=local_video_path,
+                    bgm_path=bgm_path,
+                    segments=segments,
+                    output_path=final_output_path
+                )
+                # 5. Push to Upload
+                upload_task = {
+                    'task_id': task_id,
+                    'file_path': final_output_path,
+                    'hook_url': task['data'].get('hook_url'),
+                }
+                queue_manager.push_upload_task(upload_task)
+                success = True
+            except Exception as e:
+                worker_logger.error(f"Task processing failed: {e}", exc_info=True)
+                if 'hook_url' in task.get('data', {}):
+                    hook_url = task['data']['hook_url']
+                    failure_payload = {
+                        "task_uuid": task_id,
+                        "status": "failed",
+                        "timestamp": int(time.time()),
+                        "error_message": str(e)
+                    }
+                    send_hook_with_retry(hook_url, failure_payload)
+            finally:
+                # Cleanup Logic
+                try:
+                    if local_video_path and os.path.exists(local_video_path):
+                        os.remove(local_video_path)
+                    if bgm_path and os.path.exists(bgm_path):
+                        os.remove(bgm_path)
+                    if vocals_path and os.path.exists(vocals_path):
+                        os.remove(vocals_path)
+                    if task_tts_dir and os.path.exists(task_tts_dir):
+                        shutil.rmtree(task_tts_dir)
+                    # Only delete final output if we FAILED.
+                    # If success, upload worker handles it.
+                    if not success and final_output_path and os.path.exists(final_output_path):
+                        os.remove(final_output_path)
+                except Exception as cleanup_err:
+                    worker_logger.warning(f"Cleanup error: {cleanup_err}")
+                task_id_var.reset(token)
+        except Exception as e:
+            worker_logger.error(f"Worker Loop Error: {e}")
+            time.sleep(5)
+def upload_worker():
+    """
+    Upload Worker:
+    1. Upload Final Video
+    2. Send Success Callback
+    3. Cleanup Final Video
+    """
+    worker_logger = get_upload_worker_logger()
+    worker_logger.info("Upload Worker started")
+    while True:
+        try:
+            result = queue_manager.get_upload_task(timeout=5)
+            if not result:
+                continue
+            task_id = result.get('task_id')
+            token = task_id_var.set(task_id)
+            file_path = result.get('file_path')
+            hook_url = result.get('hook_url')
+            try:
+                worker_logger.info(f"Uploading result: {file_path}")
+                file_url = None
+                if file_path and os.path.exists(file_path):
+                    object_key = f"{task_id}.mp4"
+                    file_url = r2_uploader.upload_file(file_path, object_key=object_key)
+                else:
+                    raise FileNotFoundError(f"File to upload not found: {file_path}")
+                if hook_url:
+                    success_payload = {
+                        "task_uuid": task_id,
+                        "status": "success",
+                        "timestamp": int(time.time()),
+                        "result_url": file_url
+                    }
+                    worker_logger.info(f"Sending success callback to {hook_url}")
+                    send_hook_with_retry(hook_url, success_payload)
+            except Exception as e:
+                worker_logger.error(f"Upload failed: {e}", exc_info=True)
+                if hook_url:
+                    failure_payload = {
+                        "task_uuid": task_id,
+                        "status": "failed",
+                        "timestamp": int(time.time()),
+                        "error_message": str(e)
+                    }
+                    send_hook_with_retry(hook_url, failure_payload)
+            finally:
+                # Cleanup the final video file
+                if file_path and os.path.exists(file_path):
+                    try:
+                        os.remove(file_path)
+                        worker_logger.info(f"Removed final video: {file_path}")
+                    except Exception as e:
+                        worker_logger.warning(f"Failed to remove file: {e}")
+                task_id_var.reset(token)
+        except Exception as e:
+            logger.error(f"Upload Loop Error: {e}")
+            time.sleep(5)
+# -------------------------------------------------------------------------
+# Flask Routes
+# -------------------------------------------------------------------------
+@app.before_request
+def before_request():
+    g.start_time = time.time()
+@app.after_request
+def after_request(response):
+    if hasattr(g, 'start_time'):
+        duration = time.time() - g.start_time
+        RequestLogger.log_request(request, response, duration)
+    return response
+@app.route('/dubbing/character', methods=['POST'])
+@requires_auth
+def generate():
+    try:
+        data = request.json
+        # Basic Validation
+        required = ['character_voice', 'content', 'hook_url', 'video_url']
+        for field in required:
+            if not data.get(field):
+                return jsonify({'error': f'Missing field: {field}'}), 400
+        priority = data.get('priority', 3)
+        if priority not in range(1, 6):
+            return jsonify({'error': 'Priority must be 1-5'}), 400
+        task_id = queue_manager.add_task(data, priority)
+        logger.info(f"Created Task: {task_id}")
+        return jsonify({
+            'task_uuid': task_id,
+            'status': 'queued',
+            'message': 'Task queued successfully'
+        }), 201
+    except Exception as e:
+        logger.error(f"API Error: {e}")
+        return jsonify({'error': str(e)}), 500
+@app.route('/dubbing/character/tasks/<task_id>/cancel', methods=['DELETE'])
+@requires_auth
+def cancel_task(task_id: str):
+    try:
+        if queue_manager.delete_process_task(task_id):
+            return jsonify({'message': 'Task canceled'}), 200
+        return jsonify({'message': 'Task not found or already processed'}), 404
+    except Exception as e:
+        return jsonify({'error': str(e)}), 500
+@app.errorhandler(500)
+def internal_error(error):
+    logger.error(f"500 Error: {error}")
+    return jsonify({'error': 'Internal server error'}), 500
+def main():
+    logger.info("Starting Service...")
+    # Directories
+    os.makedirs(config['tts']['output_dir'], exist_ok=True)
+    os.makedirs(config['tts']['voices_dir'], exist_ok=True)
+    os.makedirs(VIDEO_TEMP_DIR, exist_ok=True)
+    # Threads
+    threading.Thread(target=process_worker, daemon=True).start()
+    threading.Thread(target=upload_worker, daemon=True).start()
+    app.run(
+        host=config['app']['host'],
+        port=config['app']['port'],
+        debug=config['app']['debug']
+    )
+if __name__ == '__main__':
+    main()

config.yaml ADDED Viewed

	@@ -0,0 +1,43 @@

+app:
+  host: '0.0.0.0'
+  port: 8000
+  debug: false
+  api_username: admin
+  api_password: admin
+# Redis 配置
+redis:
+  host: 'localhost'
+  port: 6379
+  db: 0
+  password: null
+  queue_key: 'tts:generate'
+  queue_key_hash: 'tts:generate_hash'
+  upload_queue_key: 'tts:upload'
+  max_connections: 5
+# Cloudflare R2 配置
+r2:
+  access_key_id: '2c4cef629ca75ffe03376206c0a3e365'
+  secret_access_key: '42cb6c0dedd621bbe2a38eb52c5d4b4738d69038705020c8cd14018dcc30ee53'
+  bucket_name: 'ls-tts'
+  endpoint_url: 'https://3322fcf6693dc79f8e04aa2f4918bc44.r2.cloudflarestorage.com'
+  public_url: 'https://tts.luckyshort.net'
+# TTS 服务配置
+tts:
+  checkpoint_file: 'pt-br/model_last.safetensors'  # 模型文件路径
+  vocab_file: 'vocab.txt'           # 词表文件 (如果需要)
+  vocoder_name: 'vocos'             # 默认 vocoder
+  remove_silence: true
+  speed: 1.0
+  device: 'cuda'                    # 'cuda' or 'cpu'
+  # 路径配置
+  voices_dir: 'data/voices'         # 参考音频缓存目录
+  output_dir: 'data/outputs'        # 生成结果临时目录
+uvr5:
+  model_dir: './models/uvr5'
+  output_dir: './temp/uvr5'
+  uvr5_model: 'UVR-MDX-NET-Inst_HQ_4' # UVR5 模型名称

merger.py ADDED Viewed

	@@ -0,0 +1,658 @@

+#!/usr/bin/env python3
+"""
+音频自动合并脚本 - 腾讯云 TTS 克隆音频
+根据音频参数，将多个克隆音频和 BGM 混合并压制到视频中
+核心功能：
+1. 智能音频处理策略（填充/直接覆盖/提速）
+2. 防爆音优化（淡入淡出、压缩、限幅）
+3. BGM 背景音乐混合
+4. 链式 atempo 处理（突破 FFmpeg 0.5-2.0 限制）
+5. 音频压制到视频
+"""
+import logging
+import math
+import os
+import subprocess
+from dataclasses import dataclass
+from typing import Dict, List, Optional
+# 使用 process_worker 的 logger
+logger = logging.getLogger('process_worker')
+# ============================================================================
+# 常量定义
+# ============================================================================
+SAFETY_MARGIN = 0.01  # 安全间隙，单位秒
+FADE_DURATION = 0.15  # 淡入淡出时长，单位秒
+VOLUME_LEVEL = 0.95  # 预降音量级别
+COMPRESSOR_THRESHOLD = -12  # 压缩器阈值（dB）
+COMPRESSOR_RATIO = 4  # 压缩比
+LIMITER_LEVEL = 0.95  # 限幅器级别
+MAX_SPEED_RATIO = 4.0  # 最大加速倍数，防止极端加速
+# ============================================================================
+# 数据类定义
+# ============================================================================
+@dataclass
+class AudioParam:
+    """音频参数"""
+    start_secs: float  # 开始秒（必填）
+    end_secs: float  # 结束秒（必填）
+    clone_audio_path: str  # 克隆后音频地址（必填）
+    original_audio_length: float  # 原始音频长度（必填）
+    clone_audio_length: float  # 克隆后音频长度（必填）
+    audio_sort_num: int  # 音频序号（必填）
+    def __post_init__(self):
+        """验证参数"""
+        if not self.clone_audio_path:
+            raise ValueError("clone_audio_path 不能为空")
+        if not os.path.exists(self.clone_audio_path):
+            raise FileNotFoundError(f"音频文件不存在: {self.clone_audio_path}")
+        if self.start_secs < 0:
+            raise ValueError(f"start_secs 必须非负，实际值: {self.start_secs}")
+        if self.end_secs <= self.start_secs:
+            raise ValueError(f"end_secs 必须大于 start_secs，start_secs: {self.start_secs}, end_secs: {self.end_secs}")
+        if self.original_audio_length <= 0:
+            raise ValueError(f"original_audio_length 必须大于0，实际值: {self.original_audio_length}")
+        if self.clone_audio_length <= 0:
+            raise ValueError(f"clone_audio_length 必须大于0，实际值: {self.clone_audio_length}")
+        if self.audio_sort_num < 0:
+            raise ValueError(f"audio_sort_num 必须非负，实际值: {self.audio_sort_num}")
+@dataclass
+class AudioMerge:
+    """音频合并参数"""
+    output_path: str  # 输出路径（必填）
+    bgm_path: str  # bgm音频路径（必填）
+    input_path: str  # 输入路径（必填）
+    input_type: str = "video" # audio, video
+    speed_strategy: str = "max"  # 音频策略：max(默认)，mix，normal（可选）
+    audio_params: List[AudioParam] = None  # AudioParam数组（必填）
+    def __post_init__(self):
+        """验证参数"""
+        if not self.output_path:
+            raise ValueError("output_path 不能为空")
+        if not self.bgm_path:
+            raise ValueError("bgm_path 不能为空")
+        if not os.path.exists(self.bgm_path):
+            raise FileNotFoundError(f"BGM文件不存在: {self.bgm_path}")
+        if not self.input_path:
+            raise ValueError("input_path 不能为空")
+        if not os.path.exists(self.input_path):
+            raise FileNotFoundError(f"输入文件不存在: {self.input_path}")
+        # 校验输出路径和输入路径必须不同
+        output_abs = os.path.abspath(self.output_path)
+        input_abs = os.path.abspath(self.input_path)
+        if output_abs == input_abs:
+            raise ValueError(f"output_path 和 input_path 不能相同: {output_abs}")
+        if not self.audio_params or len(self.audio_params) == 0:
+            raise ValueError("audio_params 不能为空")
+        if self.speed_strategy not in ["mix", "normal", "max"]:
+            raise ValueError(f"speed_strategy 必须是 mix/normal/max 之一，实际值: {self.speed_strategy}")
+        # 按序号排序
+        self.audio_params = sorted(self.audio_params, key=lambda x: x.audio_sort_num)
+# ============================================================================
+# 工具函数
+# ============================================================================
+def get_audio_duration(audio_path: str) -> float:
+    """使用 ffprobe 获取音频文件的时长"""
+    cmd = [
+        'ffprobe', '-v', 'error',
+        '-show_entries', 'format=duration',
+        '-of', 'default=noprint_wrappers=1:nokey=1',
+        audio_path
+    ]
+    try:
+        result = subprocess.check_output(
+            cmd,
+            stderr=subprocess.STDOUT,
+            timeout=30  # 30 秒超时
+        )
+        return float(result.decode().strip())
+    except subprocess.TimeoutExpired:
+        raise Exception(f"获取音频时长超时: {audio_path}")
+    except subprocess.CalledProcessError as e:
+        error_output = e.output.decode() if e.output else "未知错误"
+        raise Exception(f"获取音频时长失败: {audio_path}\n{error_output}")
+def build_atempo_chain(speed_ratio: float) -> str:
+    """构建 atempo 滤镜链，处理超出 [0.5, 2.0] 范围的速度调整"""
+    if speed_ratio == 1.0:
+        return ""
+    if 0.5 <= speed_ratio <= 2.0:
+        return f"atempo={speed_ratio:.6f},"
+    if speed_ratio < 0.5:
+        stages = int(math.ceil(math.log(speed_ratio) / math.log(0.5)))
+        final_ratio = speed_ratio / (0.5 ** (stages - 1))
+        return "atempo=0.5," * (stages - 1) + f"atempo={final_ratio:.6f},"
+    stages = int(math.ceil(math.log(speed_ratio) / math.log(2.0)))
+    final_ratio = speed_ratio / (2.0 ** (stages - 1))
+    return "atempo=2.0," * (stages - 1) + f"atempo={final_ratio:.6f},"
+# ============================================================================
+# 音频策略计算
+# ============================================================================
+def calculate_audio_strategy(
+        audio_duration: float,
+        srt_duration: float,
+        next_gap: Optional[float],
+        speed_strategy: str = 'max',
+        start_time: float = 0.0,
+        end_time: float = 0.0
+) -> Dict:
+    """计算音频处理策略"""
+    next_gap_val = next_gap if next_gap is not None else float('inf')
+    if speed_strategy == 'mix':
+        clone_ratio = audio_duration / srt_duration if srt_duration > 0 else 0
+        description = (
+            f'[mix] 保持原音 | 原始: {srt_duration:.3f}s | 克隆: {audio_duration:.3f}s ({clone_ratio:.3f}x) | 处理后: {audio_duration:.3f}s | '
+            f'速度: {1.0:.3f}x (克隆/处理后 = {audio_duration:.3f}/{audio_duration:.3f}) | '
+            f'时间轴: {start_time:.3f}s -> {end_time:.3f}s | 超出部分会混音'
+        )
+        return {
+            'strategy': 'direct',
+            'speed_ratio': 1.0,
+            'target_duration': audio_duration,
+            'actual_duration': audio_duration,
+            'description': description
+        }
+    if speed_strategy == 'normal':
+        target_dur = srt_duration + SAFETY_MARGIN
+        if audio_duration <= target_dur:
+            clone_ratio = audio_duration / srt_duration if srt_duration > 0 else 0
+            description = (
+                f'[normal] 直接使用 | 原始: {srt_duration:.3f}s | 克隆: {audio_duration:.3f}s ({clone_ratio:.3f}x) | 处理后: {audio_duration:.3f}s | '
+                f'速度: {1.0:.3f}x (克隆/处理后 = {audio_duration:.3f}/{audio_duration:.3f}) | '
+                f'时间轴: {start_time:.3f}s -> {end_time:.3f}s | 未超出字幕时长'
+            )
+            return {
+                'strategy': 'direct',
+                'speed_ratio': 1.0,
+                'target_duration': audio_duration,
+                'actual_duration': audio_duration,
+                'description': description
+            }
+        speed_ratio = audio_duration / target_dur
+        # 限制最大加速倍数为4倍
+        if speed_ratio > MAX_SPEED_RATIO:
+            original_target_dur = target_dur
+            original_speed_ratio = speed_ratio
+            logger.warning(
+                f'⚠️  加速倍数超过限制 | 原始加速: {original_speed_ratio:.3f}x | '
+                f'已限制为: {MAX_SPEED_RATIO}x | 音频时长: {audio_duration:.3f}s | '
+                f'目标时长: {original_target_dur:.3f}s -> {audio_duration / MAX_SPEED_RATIO:.3f}s | '
+                f'时间轴: {start_time:.3f}s -> {end_time:.3f}s'
+            )
+            speed_ratio = MAX_SPEED_RATIO
+            target_dur = audio_duration / MAX_SPEED_RATIO
+        clone_ratio = audio_duration / srt_duration if srt_duration > 0 else 0
+        description = (
+            f'[normal] 提速到结束 | 原始: {srt_duration:.3f}s | 克隆: {audio_duration:.3f}s ({clone_ratio:.3f}x) | 处理后: {target_dur:.3f}s | '
+            f'速度: {speed_ratio:.3f}x (克隆/处理后 = {audio_duration:.3f}/{target_dur:.3f}) | '
+            f'时间轴: {start_time:.3f}s -> {end_time:.3f}s'
+        )
+        return {
+            'strategy': 'speedup',
+            'speed_ratio': speed_ratio,
+            'target_duration': target_dur,
+            'actual_duration': audio_duration,
+            'description': description
+        }
+    if speed_strategy == 'max':
+        max_available_dur = srt_duration + next_gap_val
+        if audio_duration <= max_available_dur:
+            clone_ratio = audio_duration / srt_duration if srt_duration > 0 else 0
+            description = (
+                f'[max] 直接使用 | 原始: {srt_duration:.3f}s | 克隆: {audio_duration:.3f}s ({clone_ratio:.3f}x) | 处理后: {audio_duration:.3f}s | '
+                f'速度: {1.0:.3f}x (克隆/��理后 = {audio_duration:.3f}/{audio_duration:.3f}) | '
+                f'时间轴: {start_time:.3f}s -> {end_time:.3f}s | 间隙: {next_gap_val:.3f}s'
+            )
+            return {
+                'strategy': 'direct',
+                'speed_ratio': 1.0,
+                'target_duration': audio_duration,
+                'actual_duration': audio_duration,
+                'description': description
+            }
+        target_dur = max_available_dur - SAFETY_MARGIN
+        speed_ratio = audio_duration / target_dur
+        # 限制最大加速倍数为4倍
+        if speed_ratio > MAX_SPEED_RATIO:
+            original_target_dur = target_dur
+            original_speed_ratio = speed_ratio
+            logger.warning(
+                f'⚠️  加速倍数超过限制 | 原始加速: {original_speed_ratio:.3f}x | '
+                f'已限制为: {MAX_SPEED_RATIO}x | 音频时长: {audio_duration:.3f}s | '
+                f'目标时长: {original_target_dur:.3f}s -> {audio_duration / MAX_SPEED_RATIO:.3f}s | '
+                f'时间轴: {start_time:.3f}s -> {end_time:.3f}s'
+            )
+            speed_ratio = MAX_SPEED_RATIO
+            target_dur = audio_duration / MAX_SPEED_RATIO
+        clone_ratio = audio_duration / srt_duration if srt_duration > 0 else 0
+        description = (
+            f'[max] 提速到下个 | 原始: {srt_duration:.3f}s | 克隆: {audio_duration:.3f}s ({clone_ratio:.3f}x) | 处理后: {target_dur:.3f}s | '
+            f'速度: {speed_ratio:.3f}x (克隆/处理后 = {audio_duration:.3f}/{target_dur:.3f}) | '
+            f'时间轴: {start_time:.3f}s -> {end_time:.3f}s | 间隙: {next_gap_val:.3f}s'
+        )
+        return {
+            'strategy': 'speedup',
+            'speed_ratio': speed_ratio,
+            'target_duration': target_dur,
+            'actual_duration': audio_duration,
+            'description': description
+        }
+    return calculate_audio_strategy(audio_duration, srt_duration, next_gap, 'normal', start_time, end_time)
+def analyze_audio_tracks(
+        audio_params: List[AudioParam],
+        speed_strategy: str = 'max',
+        task_logger=None
+) -> List[Dict]:
+    """分析音频轨道，计算处理策略
+    使用传入的 start_secs 和 end_secs 计算时间轴和间隙
+    """
+    # 使用传入的 logger 或默认的
+    log = task_logger or logger
+    tracks = []
+    for idx, param in enumerate(audio_params):
+        # 使用传入的 clone_audio_length（已在 __post_init__ 中验证）
+        audio_duration = param.clone_audio_length
+        # 使用 original_audio_length 作为字幕时长（SRT duration）
+        srt_duration = param.original_audio_length
+        # 使用传入的 start_secs 和 end_secs
+        start_time = param.start_secs
+        end_time = param.end_secs
+        # 计算到下个音频的间隙
+        next_gap = None
+        if idx < len(audio_params) - 1:
+            # 当前音频的结束时间
+            current_end_time = end_time
+            # 下一个音频的开始时间
+            next_param = audio_params[idx + 1]
+            next_start_time = next_param.start_secs
+            # 计算真实间隙：下一个音频开始时间 - 当前音频结束时间
+            # 如果连续排列，gap = 0；如果有间隙，gap > 0；如果重叠，gap < 0
+            next_gap = next_start_time - current_end_time
+        # 计算处理策略
+        # 对于最后一个音频，如果使用 max 策略，回退到 normal 策略（避免 infinity 导致 speed_ratio = 0）
+        effective_strategy = speed_strategy
+        is_last_track = (idx == len(audio_params) - 1)
+        if is_last_track and speed_strategy == 'max':
+            effective_strategy = 'normal'
+        strategy = calculate_audio_strategy(
+            audio_duration,
+            srt_duration,
+            next_gap,
+            effective_strategy,
+            start_time,
+            end_time
+        )
+        tracks.append({
+            'id': param.audio_sort_num,
+            'audio_file': param.clone_audio_path,
+            'start_time': start_time,
+            'end_time': end_time,
+            'srt_duration': srt_duration,
+            'audio_duration': audio_duration,
+            'next_gap': next_gap,
+            'strategy': strategy,
+            'param': param
+        })
+        log.info(f"   → 音频 [{param.audio_sort_num:03d}]: {strategy['description']}")
+    return tracks
+# ============================================================================
+# FFmpeg Filter Complex 构建
+# ============================================================================
+def build_filter_complex_for_video(
+        audio_tracks: List[Dict],
+        has_bgm: bool
+) -> str:
+    """构建 FFmpeg filter_complex 字符串（包含视频压制）"""
+    filters = []
+    # 1. 处理每个克隆音频
+    for idx, track in enumerate(audio_tracks):
+        input_idx = idx + 1  # 输入索引：[0:视频] [1:音频1] [2:音频2] ...
+        audio_label = f"a{idx}"
+        strategy = track['strategy']
+        speed_ratio = strategy['speed_ratio']
+        target_duration = strategy['target_duration']
+        start_time = track['start_time']
+        # 构建 atempo 链
+        atempo_chain = build_atempo_chain(speed_ratio)
+        # 计算安全的淡入淡出时长
+        safe_fade_dur = min(FADE_DURATION, target_duration / 2.0)
+        # 构建滤镜：变速 → 裁剪 → 重置PTS → 降音量 → 淡入淡出 → 延迟
+        audio_filter = (
+            f"[{input_idx}:a]"
+            f"{atempo_chain}"  # 变速（如需要）
+            f"atrim=start=0:end={target_duration:.3f},"  # 裁剪到目标时长
+            f"asetpts=PTS-STARTPTS,"  # 重置时间戳
+            f"volume={VOLUME_LEVEL},"  # 预降音量
+            f"afade=t=in:st=0:d={safe_fade_dur:.3f}:curve=esin,"  # 淡入
+            f"afade=t=out:st={max(0.0, target_duration - safe_fade_dur):.3f}:d={safe_fade_dur:.3f}:curve=esin,"  # 淡出
+            f"adelay={int(start_time * 1000)}|{int(start_time * 1000)}"  # 延迟对齐（最后一个滤镜，不需要逗号）
+            f"[{audio_label}]"
+        )
+        filters.append(audio_filter)
+    # 2. 处理 BGM
+    if has_bgm:
+        bgm_input_idx = len(audio_tracks) + 1  # BGM 在最后一个输入
+        bgm_filter = f"[{bgm_input_idx}:a]volume=1.0[bgm]"
+        filters.append(bgm_filter)
+    # 3. 混音
+    audio_labels = "".join([f"[a{i}]" for i in range(len(audio_tracks))])
+    if has_bgm:
+        audio_labels += "[bgm]"
+        mix_input_count = len(audio_tracks) + 1
+    else:
+        mix_input_count = len(audio_tracks)
+    mix_filter = (
+        f"{audio_labels}"
+        f"amix=inputs={mix_input_count}:duration=longest:normalize=0[mixed]"
+    )
+    filters.append(mix_filter)
+    # 4. 动态处理：压缩器 + 限幅器
+    dynamics_filter = (
+        f"[mixed]"
+        f"acompressor=threshold={COMPRESSOR_THRESHOLD}dB:ratio={COMPRESSOR_RATIO}:attack=5:release=50,"
+        f"alimiter=limit={LIMITER_LEVEL}"
+        f"[mixout]"
+    )
+    filters.append(dynamics_filter)
+    # 5. 视频流（直接映射，不处理字幕）
+    # 注意：视频流不走 filter，直接映射 0:v
+    # 在命令行中使用 -map 0:v 而不是 -map [vout]
+    # 过滤掉空字符串，避免产生空的滤镜
+    filters = [f for f in filters if f and f.strip()]
+    return ";".join(filters)
+def build_filter_complex_for_audio(
+        audio_tracks: List[Dict],
+        has_bgm: bool
+) -> str:
+    """
+    构建 FFmpeg filter_complex 字符串
+    处理流程：
+    1. 每个音频：变速（如需要）→ 裁剪 → 重置时间戳 → 降音量 → 淡入淡出 → 延迟对齐
+    2. BGM：调整音量
+    3. 混音：amix
+    4. 动态处理：压缩器 + 限幅器
+    Args:
+        audio_tracks: 准备好的音频轨道列表
+        has_bgm: 是否有 BGM 音轨
+    Returns:
+        filter_complex 字符串
+    """
+    filters = []
+    # 1. 处理每个克隆音频
+    for idx, track in enumerate(audio_tracks):
+        input_idx = idx  # 输入索引从 0 开始（没有视频输入）
+        audio_label = f"a{idx}"
+        strategy = track['strategy']
+        speed_ratio = strategy['speed_ratio']
+        target_duration = strategy['target_duration']
+        start_time = track['start_time']
+        # 构建 atempo 链
+        atempo_chain = build_atempo_chain(speed_ratio)
+        # 计算安全的淡入淡出时长（不超过音频时长的一半）
+        safe_fade_dur = min(FADE_DURATION, target_duration / 2.0)
+        # 构建滤镜：变速 → 裁剪 → 重置PTS → 降音量 → 淡入淡出 → 延迟
+        audio_filter = (
+            f"[{input_idx}:a]"
+            f"{atempo_chain}"  # 变速（如需要）
+            f"atrim=start=0:end={target_duration:.3f},"  # 裁剪到目标时长
+            f"asetpts=PTS-STARTPTS,"  # 重置时间戳
+            f"volume={VOLUME_LEVEL},"  # 预降音量
+            f"afade=t=in:st=0:d={safe_fade_dur:.3f}:curve=esin,"  # 淡入
+            f"afade=t=out:st={max(0.0, target_duration - safe_fade_dur):.3f}:d={safe_fade_dur:.3f}:curve=esin,"  # 淡出
+            f"adelay={int(start_time * 1000)}|{int(start_time * 1000)}"  # 延迟对齐
+            f"[{audio_label}]"
+        )
+        filters.append(audio_filter)
+    # 2. 处理 BGM（如果有）
+    if has_bgm:
+        bgm_input_idx = len(audio_tracks)  # BGM 在最后一个输入
+        bgm_filter = f"[{bgm_input_idx}:a]volume=1.0[bgm]"
+        filters.append(bgm_filter)
+    # 3. 混音
+    audio_labels = "".join([f"[a{i}]" for i in range(len(audio_tracks))])
+    if has_bgm:
+        audio_labels += "[bgm]"
+        mix_input_count = len(audio_tracks) + 1
+    else:
+        mix_input_count = len(audio_tracks)
+    mix_filter = (
+        f"{audio_labels}"
+        f"amix=inputs={mix_input_count}:duration=longest:normalize=0[mixed]"
+    )
+    filters.append(mix_filter)
+    # 4. 动���处理：压缩器 + 限幅器
+    dynamics_filter = (
+        f"[mixed]"
+        f"acompressor=threshold={COMPRESSOR_THRESHOLD}dB:ratio={COMPRESSOR_RATIO}:attack=5:release=50,"
+        f"alimiter=limit={LIMITER_LEVEL}"
+        f"[out]"
+    )
+    filters.append(dynamics_filter)
+    # 过滤掉空字符串，避免产生空的滤镜
+    filters = [f for f in filters if f and f.strip()]
+    return ";".join(filters)
+# ============================================================================
+# 主函数
+# ============================================================================
+def audio_auto_merge(audio_merge: AudioMerge, task_logger=None) -> Dict:
+    """
+    音频自动合并函数
+    根据 AudioMerge 参数，将多个克隆音频和 BGM 混合并压制到视频中
+    Args:
+        audio_merge: 音频合并参数类
+        task_logger: 带task_id的logger（可选）
+    Returns:
+        结果字典，包含 success、output_file 等
+    """
+    # 使用传入的 logger 或默认的
+    log = task_logger or logger
+    log.info(f"开始音频合并 (策略: {audio_merge.speed_strategy})")
+    # 验证输入文件（静默）
+    if not os.path.exists(audio_merge.input_path):
+        raise FileNotFoundError(f"输入文件不存在: {audio_merge.input_path}")
+    if not os.path.exists(audio_merge.bgm_path):
+        raise FileNotFoundError(f"BGM文件不存在: {audio_merge.bgm_path}")
+    bgm_duration = get_audio_duration(audio_merge.bgm_path)
+    log.debug(f"BGM 时长: {bgm_duration:.2f}s")
+    # 分析音频轨道
+    log.info(f"分析 {len(audio_merge.audio_params)} 个音频轨道...")
+    audio_tracks = analyze_audio_tracks(audio_merge.audio_params, audio_merge.speed_strategy, log)
+    # 构建 filter_complex
+    log.debug(f"构建 FFmpeg 滤镜...")
+    if audio_merge.input_type == 'audio':
+        filter_complex = build_filter_complex_for_audio(audio_tracks, True)  # 总是有 BGM
+    else:
+        filter_complex = build_filter_complex_for_video(audio_tracks, True)  # 总是有 BGM
+    log.debug(f"滤镜长度: {len(filter_complex)} 字符")
+    # 4. 构建 FFmpeg 命令
+    ffmpeg_cmd = ['ffmpeg', '-nostdin']
+    # 添加输入文件：视频 + 音频 + BGM
+    if audio_merge.input_type == "video":
+        ffmpeg_cmd.extend(['-i', audio_merge.input_path])
+    for track in audio_tracks:
+        ffmpeg_cmd.extend(['-i', track['audio_file']])
+    ffmpeg_cmd.extend(['-i', audio_merge.bgm_path])
+    if audio_merge.input_type == "audio":
+        ffmpeg_cmd.extend([
+            '-filter_complex', filter_complex,
+            '-map', '[out]',
+            '-c:a', 'pcm_s16le',  # WAV 格式使用 PCM 编码
+            '-ar', '44100',  # 采样率 44.1kHz
+            '-ac', '2',  # 双声道
+            '-y',
+            audio_merge.output_path
+        ])
+    else:
+        # 添加滤镜和输出设置
+        ffmpeg_cmd.extend([
+            '-filter_complex', filter_complex,
+            '-map', '0:v',  # 直接映射原始视频流（不走 filter）
+            '-map', '[mixout]',  # 映射混合后的音频
+            '-c:v', 'copy',  # 视频流复制，不重新编码
+            '-movflags', '+faststart',
+            '-c:a', 'aac',  # 音频编码为 AAC
+            '-b:a', '128k',  # 音频比特率
+            '-avoid_negative_ts', '1',
+            '-f', 'mp4',
+            '-y',
+            audio_merge.output_path
+        ])
+    # 执行 FFmpeg
+    log.info(f"执行音频混合和视频合成...")
+    log.debug(f"FFmpeg 命令: {' '.join(ffmpeg_cmd)}")
+    process = None
+    try:
+        # 实时输出 FFmpeg 日志（FFmpeg 输出到 stderr，合并到 stdout）
+        process = subprocess.Popen(
+            ffmpeg_cmd,
+            stdout=subprocess.PIPE,
+            stderr=subprocess.STDOUT,  # 将 stderr 重定向到 stdout
+            universal_newlines=True,
+            bufsize=1
+        )
+        # 实时打印输出（仅 DEBUG 级别）
+        try:
+            for line in process.stdout:
+                log.debug(f"FFmpeg: {line.rstrip()}")
+        finally:
+            # 确保 stdout 被关闭
+            if process.stdout and not process.stdout.closed:
+                process.stdout.close()
+        # 等待进程完成，设置超时（30 分钟）
+        try:
+            process.wait(timeout=1800)
+        except subprocess.TimeoutExpired:
+            log.error(f"FFmpeg 执行超时（30分钟），强制终止进程")
+            process.kill()
+            process.wait()
+            raise Exception("FFmpeg 执行超时（30分钟）")
+        if process.returncode != 0:
+            raise subprocess.CalledProcessError(process.returncode, ffmpeg_cmd)
+        # 6. 验证输出
+        if not os.path.exists(audio_merge.output_path):
+            raise Exception("输出文件未生成")
+        file_size = os.path.getsize(audio_merge.output_path)
+        if file_size < 1024:
+            raise Exception(f"输出文件异常（大小: {file_size} bytes）")
+        log.info(
+            f"✓ 音频合并完成: {os.path.basename(audio_merge.output_path)} ({file_size / 1024 / 1024:.2f} MB, {len(audio_tracks)} 轨道)")
+        return {
+            'output_file': audio_merge.output_path,
+            'file_size': file_size,
+            'track_count': len(audio_tracks),
+            'has_bgm': True
+        }
+    except subprocess.CalledProcessError as e:
+        error_msg = f"FFmpeg 执行失败，返回码: {e.returncode}"
+        log.error(f"❌ {error_msg}")
+        raise Exception(error_msg)
+    except Exception as e:
+        log.error(f"❌ 音频合并失败: {e}")
+        raise
+    finally:
+        # 确保子进程被清理
+        if process is not None:
+            try:
+                # 如果进程还在运行，强制终止
+                if process.poll() is None:
+                    log.warning(f"清理残留 FFmpeg 进程...")
+                    try:
+                        process.kill()
+                        process.wait(timeout=5)
+                    except subprocess.TimeoutExpired:
+                        log.error(f"FFmpeg 进程无法终止，可能需要手动清理")
+            except Exception as cleanup_error:
+                log.error(f"   ⚠️  清理进程时出错: {cleanup_error}")
+            finally:
+                # 确保 stdout 被关闭
+                if process.stdout and not process.stdout.closed:
+                    try:
+                        process.stdout.close()
+                    except:
+                        pass

pyproject.toml CHANGED Viewed

@@ -1,11 +1,19 @@
 [project]
 name = "f5-tts-pt-br"
 version = "0.1.0"
-description = "Add your description here"
 readme = "README.md"
-requires-python = ">=3.11"
 dependencies = [
     "f5-tts>=1.1.10",
-    "torch>=2.9.1",
     "tqdm>=4.67.1",
 ]

 [project]
 name = "f5-tts-pt-br"
 version = "0.1.0"
+description = "F5-TTS Voice Cloning API with UVR5 and Audio Merging"
 readme = "README.md"
+requires-python = ">=3.10"
 dependencies = [
     "f5-tts>=1.1.10",
+    "torch>=2.1.0",
     "tqdm>=4.67.1",
+    "flask>=3.0.0",
+    "redis>=5.0.0",
+    "requests>=2.31.0",
+    "pyyaml>=6.0.0",
+    "boto3>=1.34.0",
+    "audio-separator[gpu]>=0.17.0",
+    "onnxruntime-gpu>=1.17.0",
+    "ffmpeg-python>=0.2.0"
 ]

services/logger.py ADDED Viewed

	@@ -0,0 +1,172 @@

+import logging
+import os
+import contextvars
+from logging.handlers import RotatingFileHandler
+# 日志配置常量
+BASE_LOG_DIR = 'logs'
+MAX_BYTES = 10 * 1024 * 1024  # 10MB
+BACKUP_COUNT = 5
+# ContextVar for task_id
+task_id_var = contextvars.ContextVar("task_id", default=None)
+class TaskIdFormatter(logging.Formatter):
+    """
+    Custom Formatter that injects task_id if available, or 'N/A' if not.
+    Does NOT rely on Filter injection, making it robust for third-party loggers.
+    """
+    def format(self, record):
+        # 1. Try to get task_id from ContextVar
+        task_id = task_id_var.get()
+        # 2. If not in ContextVar, check if it's already in record (e.g. passed via extra)
+        if not hasattr(record, 'task_id'):
+            record.task_id = task_id if task_id else 'N/A'
+        elif record.task_id is None:
+             # Ensure it's not None if attribute exists
+             record.task_id = 'N/A'
+        return super().format(record)
+def setup_logging(service_name: str, level=logging.INFO):
+    """
+    集中配置日志系统，每个服务使用单独的日志文件。
+    Args:
+        service_name (str): 服务的名称（例如 'app' 或 'worker'），用于命名 logger 和日志文件。
+        level (int): 日志级别。
+    """
+    # 确保日志目录存在
+    os.makedirs(BASE_LOG_DIR, exist_ok=True)
+    # 1. 确定日志文件路径
+    log_file_name = f'{service_name}.log'
+    log_file_path = os.path.join(BASE_LOG_DIR, log_file_name)
+    # 2. 获取 logger 实例
+    logger = logging.getLogger(service_name)
+    logger.setLevel(level)
+    # 3. 使用自定义格式化器
+    # 注意：这里我们移除了 Filter，改用 Formatter 处理
+    formatter = TaskIdFormatter(
+        '%(asctime)s - [%(task_id)s] - %(name)s - %(levelname)s - %(message)s'
+    )
+    # 4. 控制台处理器 (StreamHandler)
+    console_handler = logging.StreamHandler()
+    console_handler.setFormatter(formatter)
+    # 5. 文件处理器 (RotatingFileHandler)
+    file_handler = RotatingFileHandler(
+        log_file_path,
+        maxBytes=MAX_BYTES,
+        backupCount=BACKUP_COUNT,
+        encoding='utf-8'
+    )
+    file_handler.setFormatter(formatter)
+    # 6. 配置 root logger 或 propagation
+    # 获取 log_file_path 的绝对路径进行比较
+    abs_log_file_path = os.path.abspath(log_file_path)
+    # Helper function to add handlers if not present
+    def attach_handlers_to(target_logger_name):
+        target = logging.getLogger(target_logger_name)
+        target.setLevel(level)
+        # 检查是否已经添加了对应的 FileHandler
+        has_file_handler = any(
+            isinstance(h, RotatingFileHandler) and
+            os.path.abspath(h.baseFilename) == abs_log_file_path
+            for h in target.handlers
+        )
+        if not has_file_handler:
+            target.addHandler(file_handler)
+            # 避免控制台重复
+            if not any(isinstance(h, logging.StreamHandler) for h in target.handlers):
+                target.addHandler(console_handler)
+    # 针对 worker 进程，配置 services 和 ls_ocr logger
+    attach_handlers_to('services')
+    # 虽然已经移除 OCR，但保留机制以防其他 module 使用 services logger
+    # 避免重复添加 handlers 到主 logger
+    attach_handlers_to(service_name)
+    return logger
+# 辅助函数，用于简化调用
+def get_app_logger():
+    return setup_logging(service_name='app')
+def get_process_worker_logger():
+    return setup_logging(service_name='process_worker')
+def get_upload_worker_logger():
+    return setup_logging(service_name='upload_worker')
+class RequestLogger:
+    """请求日志记录器（用于Flask）"""
+    @staticmethod
+    def log_request(request, response, duration: float = None):
+        """
+        记录HTTP请求日志
+        Args:
+            request: Flask request对象
+            response: Flask response对象
+            duration: 请求处理时间（秒）
+        """
+        logger = get_app_logger()
+        # 构建日志消息
+        msg_parts = [
+            f"{request.method} {request.path}",
+            f"status={response.status_code}",
+        ]
+        if duration is not None:
+            msg_parts.append(f"duration={duration:.3f}s")
+        # 添加查询参数
+        if request.query_string:
+            msg_parts.append(f"query={request.query_string.decode('utf-8')}")
+        # 添加客户端IP
+        client_ip = request.headers.get('X-Forwarded-For', request.remote_addr)
+        msg_parts.append(f"ip={client_ip}")
+        msg = " | ".join(msg_parts)
+        # 根据状态码选择日志级别
+        if response.status_code >= 500:
+            logger.error(msg)
+        elif response.status_code >= 400:
+            logger.warning(msg)
+        else:
+            logger.info(msg)
+    @staticmethod
+    def log_error(request, error: Exception):
+        """
+        记录错误日志
+        Args:
+            request: Flask request对象
+            error: 异常对象
+        """
+        logger = get_app_logger()
+        logger.exception(
+            f"请求错误 | {request.method} {request.path} | "
+            f"error={type(error).__name__}: {str(error)}"
+        )

services/merger_service.py ADDED Viewed

	@@ -0,0 +1,57 @@

+import os
+import logging
+import shutil
+from typing import List, Dict, Any
+from merger import audio_auto_merge, AudioMerge, AudioParam
+logger = logging.getLogger("services.merger")
+class MergerService:
+    def __init__(self, config: Dict[str, Any] = None):
+        pass
+    def merge_video(self,
+                    video_path: str,
+                    bgm_path: str,
+                    segments: List[Dict[str, Any]],
+                    output_path: str) -> str:
+        """
+        Merge TTS segments, BGM and Original Video.
+        """
+        try:
+            logger.info(f"Preparing to merge. Video: {video_path}, BGM: {bgm_path}")
+            # 1. Convert dictionary segments to AudioParam objects
+            audio_params = []
+            for seg in segments:
+                param = AudioParam(
+                    start_secs=float(seg['start_time']),
+                    end_secs=float(seg['end_time']),
+                    clone_audio_path=seg['path'],
+                    original_audio_length=float(seg['original_duration']),
+                    clone_audio_length=float(seg['gen_duration']),
+                    audio_sort_num=seg['index']
+                )
+                audio_params.append(param)
+            if not audio_params:
+                raise ValueError("No valid audio segments to merge.")
+            # 2. Create Merge Config
+            merge_config = AudioMerge(
+                output_path=output_path,
+                bgm_path=bgm_path,
+                input_path=video_path,
+                input_type="video",
+                speed_strategy="max", # Default strategy
+                audio_params=audio_params
+            )
+            # 3. Call existing merger logic
+            result = audio_auto_merge(merge_config, task_logger=logger)
+            return result['output_file']
+        except Exception as e:
+            logger.error(f"Merge failed: {e}")
+            raise e

services/queue_manager.py ADDED Viewed

	@@ -0,0 +1,205 @@

+import json
+import logging
+import time
+import uuid
+from typing import Dict, Optional, Any
+import redis
+from redis import ConnectionPool
+logger = logging.getLogger("services")
+def _calculate_score(priority: int) -> float:
+    """
+    计算任务得分 (优先级)
+    """
+    timestamp = time.time()
+    score = (6 - priority) * timestamp
+    return score
+class QueueManager:
+    """Redis 队列管理器"""
+    def __init__(self, redis_config: Dict[str, Any]):
+        """
+        初始化队列管理器
+        Args:
+            redis_config: Redis 配置字典
+        """
+        # 提取 Redis 连接参数
+        host = redis_config['host']
+        port = redis_config['port']
+        db = redis_config['db']
+        password = redis_config.get('password')
+        max_connections = redis_config.get('max_connections', 10)
+        # 1. 创建 Redis 连接池
+        self.pool = ConnectionPool(
+            host=host,
+            port=port,
+            db=db,
+            password=password,
+            max_connections=max_connections,
+            decode_responses=True
+        )
+        self.redis_client = redis.Redis(connection_pool=self.pool)
+        self.process_queue_key = redis_config['queue_key']
+        self.process_hash_queue_key = redis_config['queue_key'] + '_hash'
+        self.upload_queue_key = redis_config['upload_queue_key']
+        logger.info(
+            f"QueueManager initialized with Connection Pool (Max={max_connections}): "
+            f"Process Queue={self.process_queue_key}, Upload Queue={self.upload_queue_key}")
+        try:
+            self.redis_client.ping()
+            logger.info("Redis connection successful.")
+        except Exception as e:
+            logger.error(f"Failed to connect to Redis: {e}")
+            raise
+    def add_task(self, task_data: Dict[str, Any], priority: int = 3) -> str:
+        """
+        添加任务到处理队列
+        Args:
+            task_data: 任务数据 (包含 hook_url, text, etc.)
+            priority: 优先级 (1-5)，1 为最高优先级
+        Returns:
+            str: 任务 ID
+        """
+        task_id = str(uuid.uuid4())
+        task = {
+            'task_id': task_id,
+            'priority': priority,
+            'created_at': time.time(),
+            'data': task_data
+        }
+        score = _calculate_score(priority)
+        # 管道操作，一次网络请求完成两件事（原子性 + 性能高）
+        pipe = self.redis_client.pipeline()
+        # 1. ZSET：用于优先级排序和弹出
+        pipe.zadd(self.process_queue_key, {task_id: score})
+        # 2. HASH：专门存 uuid -> 完整 JSON（支持快速读取/修改/删除）
+        pipe.hset(self.process_hash_queue_key, task_id, json.dumps(task))
+        # 执行
+        pipe.execute()
+        logger.info(f"Task {task_id} added to process queue with priority {priority}, score {score}")
+        return task_id
+    def get_process_task(self) -> Optional[Dict[str, Any]]:
+        """
+        从处理队列 (ZSET) 中获取下一个任务
+        Returns:
+            Optional[Dict]: 任务数据，如果队列为空返回 None
+        """
+        # 使用 ZPOPMAX 获取得分最高的任务
+        result = self.redis_client.zpopmax(self.process_queue_key, 1)
+        if not result:
+            return None
+        task_id, _ = result[0]
+        pipe = self.redis_client.pipeline()
+        pipe.hget(self.process_hash_queue_key, task_id)
+        pipe.hdel(self.process_hash_queue_key, task_id)
+        task_json, _ = pipe.execute()
+        task = json.loads(task_json)
+        logger.info(f"Task {task.get('task_id', 'Unknown')} retrieved from process queue.")
+        return task
+    def push_upload_task(self, task_result: Dict[str, Any]):
+        """
+        将处理结果推送到上传队列 (List)
+        Args:
+            task_result: 任务处理结果 (包含 task_id, output_paths, hook_url等)
+        """
+        self.redis_client.lpush(self.upload_queue_key, json.dumps(task_result))
+        logger.info(f"Task {task_result['task_id']} pushed to upload queue.")
+    def get_upload_task(self, timeout: int = 5) -> Optional[Dict[str, Any]]:
+        """
+        从上传队列 (List) 中获取任务，阻塞等待
+        Args:
+            timeout: 阻塞等待时间 (秒)
+        Returns:
+            Optional[Dict]: 任务数据，如果超时返回 None
+        """
+        # 使用 BRPOP 阻塞弹出
+        result = self.redis_client.brpop(self.upload_queue_key, timeout)
+        if not result:
+            return None
+        # BRPOP 返回 (key, value)
+        task_json = result[1]
+        task = json.loads(task_json)
+        logger.debug(f"Task {task.get('task_id', 'Unknown')} retrieved from upload queue.")
+        return task
+    def get_process_queue_stats(self) -> Dict[str, Any]:
+        """
+        获取处理队列统计信息
+        """
+        queued_count = self.redis_client.zcard(self.process_queue_key)
+        upload_count = self.redis_client.llen(self.upload_queue_key)
+        return {
+            'process_queued': queued_count,
+            'upload_queued': upload_count,
+            'timestamp': time.time()
+        }
+    def delete_process_task(self, task_id: str) -> bool:
+        """
+        根据 task_id 安全、快速、原子地删除任务（推荐生产写法）
+        """
+        pipe = self.redis_client.pipeline()
+        # 1. 先查 HASH 中是否存在（O(1)）
+        pipe.hget(self.process_hash_queue_key, task_id)
+        # 2. 同时从 ZSET 删除（即使 HASH 已不存在也能删干净）
+        pipe.zrem(self.process_queue_key, task_id)
+        json_str, zrem_count = pipe.execute()
+        if json_str is not None:
+            # 任务存在于 HASH，说明之前没被消费，真正需要删除
+            self.redis_client.hdel(self.process_hash_queue_key, task_id)
+            logger.warning(f"Task {task_id} successfully removed from queue (cancelled).")
+            return True
+        if zrem_count > 0:
+            # 任务可能已经被消费了，但 ZSET 里还有残留（异常情况），也算清理成功
+            logger.info(f"Task {task_id} only existed in ZSET (stale), cleaned up.")
+            return True
+        logger.info(f"Task {task_id} not found in queue.")
+        return False

services/r2_uploader.py ADDED Viewed

	@@ -0,0 +1,375 @@

+import logging
+import os
+from pathlib import Path
+from typing import Dict, Any, Optional
+import boto3
+from botocore.config import Config
+from botocore.exceptions import ClientError, NoCredentialsError
+logger = logging.getLogger("services")
+def _get_content_type(file_path: str) -> Optional[str]:
+    """
+    根据文件扩展名获取内容类型
+    Args:
+        file_path: 文件路径
+    Returns:
+        Optional[str]: 内容类型
+    """
+    extension = Path(file_path).suffix.lower()
+    content_types = {
+        '.wav': 'audio/wav',
+        '.mp3': 'audio/mpeg',
+        '.mp4': 'video/mp4',
+        '.ogg': 'audio/ogg',
+        '.flac': 'audio/flac',
+        '.aac': 'audio/aac',
+        '.m4a': 'audio/mp4',
+        '.srt': 'text/plain; charset=utf-8',
+        '.txt': 'text/plain; charset=utf-8',
+        '.json': 'application/json; charset=utf-8',
+        '.zip': 'application/zip',
+        '.png': 'image/png',
+        '.jpg': 'image/jpeg',
+        '.jpeg': 'image/jpeg',
+        '.gif': 'image/gif',
+    }
+    return content_types.get(extension, 'application/octet-stream')  # 默认二进制
+class R2Uploader:
+    """Cloudflare R2 上传器"""
+    def __init__(self, r2_config: Dict[str, Any]):
+        """
+        初始化 R2 上传器
+        Args:
+            r2_config: R2 配置字典，包含 'bucket_name', 'endpoint_url', 'access_key_id', 'secret_access_key', 'public_url' 等
+        Raises:
+            ValueError: 配置无效
+            ClientError: 凭证或 Bucket 访问失败
+        """
+        self.config = r2_config
+        self.bucket_name = r2_config['bucket_name']
+        self.public_url = r2_config.get('public_url', '').rstrip('/')
+        # 验证必需配置
+        required_keys = ['bucket_name', 'endpoint_url', 'access_key_id', 'secret_access_key']
+        missing = [k for k in required_keys if k not in r2_config]
+        if missing:
+            raise ValueError(f"Missing required R2 config keys: {missing}")
+        client_config = Config(
+            signature_version='s3v4',  # R2 必需的签名版本
+            retries={
+                'max_attempts': 3,
+                'mode': 'standard'
+            },
+            connect_timeout=10,
+            read_timeout=10
+        )
+        try:
+            self.s3_client = boto3.client(
+                's3',
+                endpoint_url=r2_config['endpoint_url'],
+                aws_access_key_id=r2_config['access_key_id'],
+                aws_secret_access_key=r2_config['secret_access_key'],
+                config=client_config
+            )
+        except NoCredentialsError:
+            raise ValueError("Invalid AWS credentials (Access Key/Secret Key)")
+        self._validate_bucket_access()
+        logger.info(f"R2Uploader initialized for bucket: {self.bucket_name}")
+    def _validate_bucket_access(self):
+        """验证 Bucket 访问权限（测试 PutObject 权限模拟）"""
+        try:
+            # 先检查 Bucket 存在和基本访问
+            self.s3_client.head_bucket(Bucket=self.bucket_name)
+            logger.debug(f"Bucket '{self.bucket_name}' access confirmed")
+            # 简单测试：尝试列出对象（如果权限不足，会早抛 AccessDenied）
+            self.s3_client.list_objects_v2(Bucket=self.bucket_name, MaxKeys=0)
+        except ClientError as e:
+            error_code = e.response['Error']['Code']
+            error_msg = e.response['Error']['Message']
+            if error_code == 'AccessDenied':
+                raise ClientError(
+                    e.response,
+                    "AccessDenied: Check API Token permissions (requires 'Object Read & Write' for bucket). "
+                    f"Ensure token is bound to bucket '{self.bucket_name}'. Details: {error_msg}"
+                )
+            elif error_code == 'NoSuchBucket':
+                raise ClientError(
+                    e.response,
+                    f"Bucket '{self.bucket_name}' does not exist or is not accessible."
+                )
+            else:
+                raise
+    def upload_file(
+            self,
+            file_path: str,
+            object_key: Optional[str] = None,
+            metadata: Optional[Dict[str, str]] = None
+    ) -> str:
+        """
+        上传文件到 R2
+        Args:
+            file_path: 本地文件路径
+            object_key: R2 对象键（路径），如果为 None 则使用文件名
+            metadata: 文件元数据（注意：R2 支持基本 Metadata，但不支持 Tagging）
+        Returns:
+            str: 文件的公开 URL
+        Raises:
+            FileNotFoundError: 文件不存在
+            ClientError: 上传失败（包含详细错误信息）
+        """
+        if not os.path.exists(file_path):
+            raise FileNotFoundError(f"File not found: {file_path}")
+        # 如果没有指定 object_key，使用文件名
+        if object_key is None:
+            object_key = os.path.basename(file_path)
+        # 确保 object_key 不为空且无前导 /
+        object_key = object_key.lstrip('/')
+        try:
+            # 准备上传参数（避免不支持参数，如 ACL）
+            extra_args = {}
+            # 设置内容类型
+            content_type = _get_content_type(file_path)
+            if content_type:
+                extra_args['ContentType'] = content_type
+            # 设置元数据（R2 支持）
+            if metadata:
+                extra_args['Metadata'] = metadata
+            # 上传文件
+            logger.info(f"Uploading {file_path} to R2 bucket '{self.bucket_name}' as '{object_key}'")
+            self.s3_client.upload_file(
+                file_path,
+                self.bucket_name,
+                object_key,
+                ExtraArgs=extra_args
+            )
+            # 生成公开 URL
+            file_url = f"{self.public_url}/{object_key}" if self.public_url else None
+            logger.info(f"File uploaded successfully: {file_url or object_key}")
+            return file_url or object_key
+        except ClientError as e:
+            error_code = e.response['Error']['Code']
+            error_msg = e.response['Error']['Message']
+            logger.error(
+                f"Failed to upload {file_path} to R2: Code={error_code}, Message={error_msg}. "
+                f"Check: 1) API Token has 'Object Read & Write' permission bound to bucket '{self.bucket_name}'. "
+                f"2) No unsupported params (e.g., ACL, Tagging). 3) Endpoint/Region correct."
+            )
+            raise ClientError(e.response, f"Upload failed: {error_msg}")
+        except Exception as e:
+            logger.error(f"Unexpected error uploading {file_path}: {str(e)}")
+            raise
+    def upload_files(
+            self,
+            file_paths: list,
+            prefix: str = '',
+            metadata: Optional[Dict[str, str]] = None
+    ) -> Dict[str, str]:
+        """
+        批量上传文件到 R2
+        Args:
+            file_paths: 本地文件路径列表
+            prefix: R2 路径前缀（会自动添加 / 如果需要）
+            metadata: 文件元数据
+        Returns:
+            Dict[str, str]: 文件路径到 URL 的映射（失败为 None）
+        """
+        results = {}
+        prefix = prefix.lstrip('/')  # 清理前导 /
+        for file_path in file_paths:
+            try:
+                # 生成 object_key
+                filename = os.path.basename(file_path)
+                object_key = f"{prefix}/{filename}" if prefix else filename
+                # 上传文件
+                file_url = self.upload_file(file_path, object_key, metadata)
+                results[file_path] = file_url
+            except Exception as e:
+                logger.error(f"Failed to upload {file_path}: {str(e)}")
+                results[file_path] = None
+        successful = sum(1 for url in results.values() if url is not None)
+        logger.info(f"Batch upload completed: {successful}/{len(file_paths)} files uploaded to '{self.bucket_name}'")
+        return results
+    def delete_file(self, object_key: str):
+        """
+        从 R2 删除文件
+        Args:
+            object_key: R2 对象键
+        Raises:
+            ClientError: 删除失败
+        """
+        object_key = object_key.lstrip('/')
+        try:
+            logger.info(f"Deleting '{object_key}' from R2 bucket '{self.bucket_name}'")
+            self.s3_client.delete_object(
+                Bucket=self.bucket_name,
+                Key=object_key
+            )
+            logger.info(f"File deleted successfully: {object_key}")
+        except ClientError as e:
+            error_code = e.response['Error']['Code']
+            error_msg = e.response['Error']['Message']
+            logger.error(f"Failed to delete {object_key}: Code={error_code}, Message={error_msg}")
+            raise
+    def delete_files(self, object_keys: list):
+        """
+        批量删除文件
+        Args:
+            object_keys: R2 对象键列表
+        """
+        if not object_keys:
+            return
+        # 清理键
+        cleaned_keys = [k.lstrip('/') for k in object_keys if k.strip()]
+        try:
+            # 准备删除对象列表
+            delete_objects = [{'Key': key} for key in cleaned_keys]
+            logger.info(f"Deleting {len(cleaned_keys)} files from R2 bucket '{self.bucket_name}'")
+            response = self.s3_client.delete_objects(
+                Bucket=self.bucket_name,
+                Delete={'Objects': delete_objects}
+            )
+            deleted_count = len(response.get('Deleted', []))
+            errors = response.get('Errors', [])
+            if errors:
+                logger.warning(f"Batch delete errors: {errors}")
+            logger.info(f"Batch delete completed: {deleted_count}/{len(cleaned_keys)} files deleted")
+        except ClientError as e:
+            error_code = e.response['Error']['Code']
+            error_msg = e.response['Error']['Message']
+            logger.error(f"Failed to batch delete files: Code={error_code}, Message={error_msg}")
+            raise
+    def file_exists(self, object_key: str) -> bool:
+        """
+        检查文件是否存在
+        Args:
+            object_key: R2 对象键
+        Returns:
+            bool: 文件是否存在
+        """
+        object_key = object_key.lstrip('/')
+        try:
+            self.s3_client.head_object(
+                Bucket=self.bucket_name,
+                Key=object_key
+            )
+            return True
+        except ClientError as e:
+            if e.response['Error']['Code'] == '404':
+                return False
+            else:
+                error_msg = e.response['Error']['Message']
+                logger.error(f"Error checking existence of {object_key}: {error_msg}")
+                raise
+    def get_file_url(self, object_key: str) -> str:
+        """
+        获取文件的公开 URL
+        Args:
+            object_key: R2 对象键
+        Returns:
+            str: 文件 URL
+        """
+        object_key = object_key.lstrip('/')
+        return f"{self.public_url}/{object_key}" if self.public_url else object_key
+    def list_files(self, prefix: str = '', max_keys: int = 1000) -> list:
+        """
+        列出 R2 中的文件
+        Args:
+            prefix: 路径前缀
+            max_keys: 最大返回数量
+        Returns:
+            list: 文件列表 [{'key': str, 'size': int, 'last_modified': str, 'url': str}]
+        """
+        prefix = prefix.lstrip('/')
+        try:
+            response = self.s3_client.list_objects_v2(
+                Bucket=self.bucket_name,
+                Prefix=prefix,
+                MaxKeys=max_keys
+            )
+            files = []
+            if 'Contents' in response:
+                for obj in response['Contents']:
+                    files.append({
+                        'key': obj['Key'],
+                        'size': obj['Size'],
+                        'last_modified': obj['LastModified'].isoformat(),
+                        'url': self.get_file_url(obj['Key'])
+                    })
+            logger.info(f"Listed {len(files)} files with prefix '{prefix}' in '{self.bucket_name}'")
+            return files
+        except ClientError as e:
+            error_code = e.response['Error']['Code']
+            error_msg = e.response['Error']['Message']
+            logger.error(f"Failed to list files: Code={error_code}, Message={error_msg}")
+            raise

services/tts_service.py ADDED Viewed

	@@ -0,0 +1,192 @@

+import os
+import requests
+import logging
+import shutil
+import subprocess
+from urllib.parse import urlparse
+from typing import List, Dict, Any, Optional
+from AgentF5TTSChunk import AgentF5TTS
+logger = logging.getLogger("services.tts")
+def get_audio_duration(file_path: str) -> float:
+    """Get duration of audio file using ffprobe."""
+    try:
+        cmd = [
+            'ffprobe', '-v', 'error', '-show_entries', 'format=duration',
+            '-of', 'default=noprint_wrappers=1:nokey=1', file_path
+        ]
+        output = subprocess.check_output(cmd).decode().strip()
+        return float(output)
+    except Exception as e:
+        logger.error(f"Failed to get duration for {file_path}: {e}")
+        return 0.0
+class TTSService:
+    def __init__(self, config: Dict[str, Any]):
+        self.config = config['tts']
+        self.voices_dir = self.config['voices_dir']
+        self.output_dir = self.config['output_dir']
+        # Ensure directories exist
+        os.makedirs(self.voices_dir, exist_ok=True)
+        os.makedirs(self.output_dir, exist_ok=True)
+        # Load Model
+        logger.info("Loading F5-TTS Model...")
+        try:
+            self.agent = AgentF5TTS(
+                ckpt_file=self.config['checkpoint_file'],
+                device=self.config.get('device', 'cuda')
+            )
+            logger.info("F5-TTS Model Loaded successfully.")
+        except Exception as e:
+            logger.error(f"Failed to load F5-TTS Model: {e}")
+            raise e
+    def _get_extension_from_url(self, url: str) -> str:
+        parsed = urlparse(url)
+        path = parsed.path
+        ext = os.path.splitext(path)[1]
+        if not ext:
+            return ".wav"
+        return ext
+    def _download_file(self, url: str, path: str):
+        response = requests.get(url, stream=True, timeout=30)
+        response.raise_for_status()
+        with open(path, 'wb') as f:
+            for chunk in response.iter_content(chunk_size=8192):
+                f.write(chunk)
+    def prepare_voices(self, character_voices: List[Dict[str, str]]) -> Dict[str, Dict[str, str]]:
+        """
+        Ensure all reference voices are available locally.
+        Returns a map of character_name -> {'path': local_file_path, 'text': ref_text}
+        """
+        voice_map = {}
+        for cv in character_voices:
+            char_name = cv.get('character')
+            voice_id = cv.get('id')
+            url = cv.get('timbre_url')  # Updated from character_url
+            text = cv.get('timbre_text', "") # New field
+            if not voice_id:
+                continue
+            # Use ID as filename to avoid duplicates
+            ext = ".wav"
+            if url:
+                ext = self._get_extension_from_url(url)
+            filename = f"{voice_id}{ext}"
+            local_path = os.path.join(self.voices_dir, filename)
+            # Download if not exists
+            if not os.path.exists(local_path):
+                if url:
+                    try:
+                        logger.info(f"Downloading voice {voice_id}")
+                        self._download_file(url, local_path)
+                    except Exception as e:
+                        logger.error(f"Failed to download voice {voice_id}: {e}")
+                        continue
+                else:
+                    logger.warning(f"Voice {voice_id} missing locally and no URL.")
+                    continue
+            if os.path.exists(local_path):
+                voice_data = {'path': local_path, 'text': text}
+                if char_name:
+                    voice_map[char_name] = voice_data
+                voice_map[str(voice_id)] = voice_data
+        return voice_map
+    def process_task(self, task: Dict[str, Any]) -> Dict[str, Any]:
+        """
+        Process a TTS generation task.
+        Returns dictionary containing list of generated audio segments with metadata.
+        """
+        task_id = task['task_id']
+        data = task['data']
+        character_voices = data.get('character_voice', [])
+        content = data.get('content', [])
+        if not content:
+            raise ValueError("No content provided.")
+        # 1. Prepare Voices
+        voice_map = self.prepare_voices(character_voices)
+        # 2. Create Task Output Directory
+        task_out_dir = os.path.join(self.output_dir, task_id)
+        os.makedirs(task_out_dir, exist_ok=True)
+        segments_metadata = []
+        # 3. Inference Loop
+        logger.info(f"Starting inference for {len(content)} segments")
+        for idx, segment in enumerate(content):
+            char_name = segment.get('character')
+            text = segment.get('translation')
+            start_time = segment.get('start', 0.0)
+            end_time = segment.get('end', 0.0)
+            if not text:
+                continue
+            # Calculate original duration for merger
+            original_duration = max(0.0, end_time - start_time)
+            voice_data = voice_map.get(char_name)
+            if not voice_data:
+                logger.warning(f"Segment {idx}: No voice for '{char_name}'. Skipping.")
+                continue
+            ref_audio_path = voice_data['path']
+            ref_audio_text = voice_data['text']
+            out_filename = f"{idx:04d}.wav"
+            out_path = os.path.join(task_out_dir, out_filename)
+            try:
+                self.agent.infer(
+                    ref_file=ref_audio_path,
+                    ref_text=ref_audio_text, # Pass the reference text
+                    gen_text=text,
+                    file_wave=out_path,
+                    remove_silence=self.config.get('remove_silence', True),
+                    speed=self.config.get('speed', 1.0)
+                )
+                if os.path.exists(out_path):
+                    gen_duration = get_audio_duration(out_path)
+                    segments_metadata.append({
+                        'index': idx,
+                        'path': out_path,
+                        'start_time': start_time,
+                        'end_time': end_time,
+                        'original_duration': original_duration,
+                        'gen_duration': gen_duration
+                    })
+            except Exception as e:
+                logger.error(f"Inference failed for segment {idx}: {e}")
+        if not segments_metadata:
+            raise Exception("No audio generated.")
+        return {
+            'task_id': task_id,
+            'segments': segments_metadata,
+            'task_dir': task_out_dir,
+            'hook_url': data.get('hook_url'),
+            'video_url': data.get('video_url'),
+            'priority': task.get('priority', 3)
+        }

services/uvr5_service.py ADDED Viewed

	@@ -0,0 +1,94 @@

+import os
+import logging
+from typing import Tuple, Dict, Any
+from pathlib import Path
+from audio_separator.separator import Separator
+from uvr5.download_models import download_model
+logger = logging.getLogger("services.uvr5")
+class UVR5Service:
+    def __init__(self, config: Dict[str, Any]):
+        self.config = config.get('uvr5', {})
+        self.model_dir = self.config.get('model_dir', './models/uvr5')
+        self.output_dir = self.config.get('output_dir', './temp/uvr5')
+        # Default model if not specified in config
+        # 优先读取 uvr5_model，兼容旧的 model_name
+        self.model_name = self.config.get('uvr5_model') or self.config.get('model_name', 'UVR-MDX-NET-Inst_HQ_3.onnx')
+        os.makedirs(self.model_dir, exist_ok=True)
+        os.makedirs(self.output_dir, exist_ok=True)
+        self.separator = None
+        self._initialize_separator()
+    def _initialize_separator(self):
+        """Initialize the Audio Separator with configuration."""
+        try:
+            logger.info(f"Initializing UVR5 Separator with model: {self.model_name}")
+            # Ensure model exists locally
+            model_path = os.path.join(self.model_dir, f"{self.model_name}.onnx")
+            model_filename = self.model_name if self.model_name.endswith('.onnx') else f"{self.model_name}.onnx"
+            # if not os.path.exists(model_path):
+            #     logger.info(f"Model {self.model_name} not found locally. Attempting to download...")
+            #     if download_model(self.model_name, Path(self.model_dir)):
+            #         logger.info(f"Successfully downloaded {self.model_name}")
+            #     else:
+            #         raise FileNotFoundError(f"Failed to download model {self.model_name}")
+            self.separator = Separator(
+                log_level=logging.INFO,
+                model_file_dir=self.model_dir,
+                output_dir=self.output_dir,
+                output_format="wav"
+            )
+            # Load the model upfront
+            self.separator.load_model(model_filename)
+            logger.info("UVR5 Model loaded successfully.")
+        except Exception as e:
+            logger.error(f"Failed to initialize UVR5: {e}")
+            raise e
+    def process_audio(self, input_path: str, task_id: str) -> Tuple[str, str]:
+        """
+        Separate audio into Vocals and Instrumental (BGM).
+        Returns: (vocals_path, instrumental_path)
+        """
+        if not self.separator:
+            self._initialize_separator()
+        try:
+            logger.info(f"Starting UVR5 separation on {input_path}")
+            # Run separation
+            output_files = self.separator.separate(input_path)
+            vocals_path = None
+            instrumental_path = None
+            # Identify output files based on naming convention
+            for filename in output_files:
+                full_path = os.path.join(self.output_dir, filename)
+                # Separation logic usually produces "*(Vocals).wav" and "*(Instrumental).wav"
+                if "Instrumental" in filename or "Inst" in filename:
+                    instrumental_path = full_path
+                elif "Vocals" in filename:
+                    vocals_path = full_path
+            # Fallback if naming detection fails (usually first is inst, second is vocal or vice versa depending on model)
+            # But 'audio-separator' usually returns clear names.
+            if not instrumental_path:
+                raise Exception("Could not identify Instrumental track from UVR5 output.")
+            logger.info(f"UVR5 Complete. BGM: {instrumental_path}")
+            return vocals_path, instrumental_path
+        except Exception as e:
+            logger.error(f"UVR5 Processing failed: {e}")
+            raise e

uv.lock CHANGED Viewed

The diff for this file is too large to render. See raw diff

uvr5/download_models.py ADDED Viewed

	@@ -0,0 +1,329 @@

+#!/usr/bin/env python3
+"""
+UVR 模型下载脚本
+支持下载多个 UVR 模型到指定目录
+"""
+import os
+import sys
+import argparse
+import requests
+from pathlib import Path
+try:
+    from tqdm import tqdm
+    HAS_TQDM = True
+except ImportError:
+    HAS_TQDM = False
+    print("提示: 安装 tqdm 可以显示下载进度: pip install tqdm")
+try:
+    from audio_separator.separator import Separator
+    HAS_AUDIO_SEPARATOR = True
+except ImportError:
+    HAS_AUDIO_SEPARATOR = False
+    print("警告: audio-separator 未安装，将使用手动下载模式")
+# 默认配置
+DEFAULT_MODEL_DIR = './models/uvr5'
+DEFAULT_MODEL_NAME = 'UVR-MDX-NET-Inst_HQ_3'
+# 可用的模型列表
+AVAILABLE_MODELS = {
+    'UVR-MDX-NET-Inst_HQ_4': {
+        'url': 'https://github.com/TRvlvr/model_repo/releases/download/all_public_uvr_models/UVR-MDX-NET-Inst_HQ_4.onnx',
+        'size': '200 MB',
+        'description': '高质量乐器分离模型（推荐）'
+    },
+    'UVR-MDX-NET-Inst_HQ_3': {
+        'url': 'https://github.com/TRvlvr/model_repo/releases/download/all_public_uvr_models/UVR-MDX-NET-Inst_HQ_3.onnx',
+        'size': '200 MB',
+        'description': '高质量乐器分离模型 v3'
+    },
+    'UVR_MDXNET_KARA_2': {
+        'url': 'https://github.com/TRvlvr/model_repo/releases/download/all_public_uvr_models/UVR_MDXNET_KARA_2.onnx',
+        'size': '200 MB',
+        'description': 'Karaoke 人声分离模型'
+    },
+    'Kim_Vocal_2': {
+        'url': 'https://github.com/TRvlvr/model_repo/releases/download/all_public_uvr_models/Kim_Vocal_2.onnx',
+        'size': '200 MB',
+        'description': 'Kim 人声分离模型'
+    },
+}
+def download_file(url: str, output_path: Path, model_name: str):
+    """
+    下载文件并显示进度条
+    Args:
+        url: 下载地址
+        output_path: 保存路径
+        model_name: 模型名称
+    """
+    try:
+        print(f"\n📥 开始下载 {model_name}...")
+        print(f"   URL: {url}")
+        print(f"   保存到: {output_path}")
+        response = requests.get(url, stream=True, timeout=30)
+        response.raise_for_status()
+        # 获取文件大小
+        total_size = int(response.headers.get('content-length', 0))
+        # 创建进度条
+        if HAS_TQDM:
+            with open(output_path, 'wb') as f, tqdm(
+                desc=model_name,
+                total=total_size,
+                unit='iB',
+                unit_scale=True,
+                unit_divisor=1024,
+            ) as pbar:
+                for chunk in response.iter_content(chunk_size=8192):
+                    size = f.write(chunk)
+                    pbar.update(size)
+        else:
+            # 无进度条模式
+            with open(output_path, 'wb') as f:
+                downloaded = 0
+                for chunk in response.iter_content(chunk_size=8192):
+                    f.write(chunk)
+                    downloaded += len(chunk)
+                    if total_size > 0:
+                        percent = (downloaded / total_size) * 100
+                        print(f"\r   下载进度: {percent:.1f}%", end='', flush=True)
+            print()  # 换行
+        print(f"✅ {model_name} 下载完成")
+        return True
+    except requests.exceptions.RequestException as e:
+        print(f"❌ 下载失败: {str(e)}")
+        # 删除不完整的文件
+        if output_path.exists():
+            output_path.unlink()
+        return False
+    except Exception as e:
+        print(f"❌ 发生错误: {str(e)}")
+        if output_path.exists():
+            output_path.unlink()
+        return False
+def download_with_audio_separator(model_name: str, model_dir: Path):
+    """
+    使用 audio-separator 内置功能下载模型
+    Args:
+        model_name: 模型名称（不带扩展名）
+        model_dir: 模型目录
+    """
+    try:
+        print(f"\n📥 使用 audio-separator 下载 {model_name}...")
+        # 创建 Separator 实例
+        separator = Separator(
+            log_level=30,  # WARNING level
+            model_file_dir=str(model_dir),
+            output_dir=str(model_dir)
+        )
+        # 尝试加载模型，如果不存在会自动下载
+        model_filename = model_name if model_name.endswith('.onnx') else f"{model_name}.onnx"
+        separator.load_model(model_filename)
+        print(f"✅ {model_name} 下载/加载完成")
+        return True
+    except Exception as e:
+        print(f"❌ audio-separator 下载失败: {str(e)}")
+        return False
+def list_models():
+    """列出所有可用的模型"""
+    print("\n📋 可用的 UVR 模型:")
+    print("=" * 70)
+    for model_name, info in AVAILABLE_MODELS.items():
+        print(f"\n模型名称: {model_name}")
+        print(f"  大小:   {info['size']}")
+        print(f"  说明:   {info['description']}")
+    print("\n" + "=" * 70)
+def check_model_exists(model_name: str, model_dir: Path) -> bool:
+    """检查模型是否已存在"""
+    model_path = model_dir / f"{model_name}.onnx"
+    if model_path.exists():
+        file_size = model_path.stat().st_size
+        size_mb = file_size / (1024 * 1024)
+        print(f"✓ {model_name} 已存在 ({size_mb:.1f} MB)")
+        return True
+    return False
+def download_model(model_name: str, model_dir: Path, force: bool = False):
+    """
+    下载指定模型
+    Args:
+        model_name: 模型名称
+        model_dir: 模型目录
+        force: 是否强制重新下载
+    """
+    # 检查模型是否已存在
+    model_path = model_dir / f"{model_name}.onnx"
+    if model_path.exists() and not force:
+        print(f"\n✓ {model_name} 已存在")
+        print(f"  路径: {model_path}")
+        print(f"  使用 --force 强制重新下载")
+        return True
+    # 方式1: 优先使用 audio-separator 内置下载（支持更多模型）
+    if HAS_AUDIO_SEPARATOR:
+        if download_with_audio_separator(model_name, model_dir):
+            return True
+        print("   尝试手动下载...")
+    # 方式2: 手动下载（仅支持列表中的模型）
+    if model_name not in AVAILABLE_MODELS:
+        print(f"❌ 模型 {model_name} 无法通过手动下载")
+        print(f"   请安装 audio-separator 或使用以下模型之一:")
+        for name in AVAILABLE_MODELS.keys():
+            print(f"   - {name}")
+        return False
+    # 下载模型
+    model_info = AVAILABLE_MODELS[model_name]
+    return download_file(model_info['url'], model_path, model_name)
+def download_all_models(model_dir: Path, force: bool = False):
+    """下载所有模型"""
+    print(f"\n📦 准备下载所有模型到: {model_dir}")
+    success_count = 0
+    total_count = len(AVAILABLE_MODELS)
+    for model_name in AVAILABLE_MODELS.keys():
+        if download_model(model_name, model_dir, force):
+            success_count += 1
+    print(f"\n{'=' * 70}")
+    print(f"下载完成: {success_count}/{total_count} 个模型成功")
+    print(f"{'=' * 70}")
+    return success_count == total_count
+def main():
+    parser = argparse.ArgumentParser(
+        description='UVR 模型下载工具',
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+示例:
+  # 列出所有可用模型
+  python download_models.py --list
+  # 下载默认模型 (UVR-MDX-NET-Inst_HQ_4)
+  python download_models.py
+  # 下载指定模型
+  python download_models.py --model UVR_MDXNET_KARA_2
+  # 下载所有模型
+  python download_models.py --all
+  # 指定模型目录
+  python download_models.py --dir /path/to/models
+  # 强制重新下载
+  python download_models.py --force
+        """
+    )
+    parser.add_argument(
+        '--list', '-l',
+        action='store_true',
+        help='列出所有可用的模型'
+    )
+    parser.add_argument(
+        '--model', '-m',
+        type=str,
+        help='要下载的模型名称'
+    )
+    parser.add_argument(
+        '--all', '-a',
+        action='store_true',
+        help='下载所有可用的模型'
+    )
+    parser.add_argument(
+        '--dir', '-d',
+        type=str,
+        default=DEFAULT_MODEL_DIR,
+        help=f'模型保存目录 (默认: {DEFAULT_MODEL_DIR})'
+    )
+    parser.add_argument(
+        '--force', '-f',
+        action='store_true',
+        help='强制重新下载已存在的模型'
+    )
+    args = parser.parse_args()
+    # 列出模型
+    if args.list:
+        list_models()
+        return 0
+    # 创建模型目录
+    model_dir = Path(args.dir)
+    model_dir.mkdir(parents=True, exist_ok=True)
+    print(f"\n🎵 UVR 模型下载工具")
+    print(f"模型目录: {model_dir.absolute()}")
+    # 下载所有模型
+    if args.all:
+        success = download_all_models(model_dir, args.force)
+        return 0 if success else 1
+    # 下载指定模型
+    if args.model:
+        success = download_model(args.model, model_dir, args.force)
+        return 0 if success else 1
+    # 默认下载配置的模型
+    default_model = DEFAULT_MODEL_NAME
+    print(f"\n使用默认模型: {default_model}")
+    print("提示: 使用 --list 查看所有可用模型")
+    success = download_model(default_model, model_dir, args.force)
+    if success:
+        print(f"\n✅ 模型已准备就绪")
+        print(f"   模型: {default_model}")
+        print(f"   路径: {model_dir / f'{default_model}.onnx'}")
+        print(f"\n💡 现在可以启动服务:")
+        print(f"   ./start_local.sh  # 本地运行")
+        print(f"   ./start.sh        # Docker 运行")
+        return 0
+    else:
+        return 1
+if __name__ == '__main__':
+    try:
+        sys.exit(main())
+    except KeyboardInterrupt:
+        print("\n\n⚠️  下载已取消")
+        sys.exit(1)
+    except Exception as e:
+        print(f"\n❌ 发生错误: {str(e)}")
+        sys.exit(1)

uvr5/processor.py ADDED Viewed

	@@ -0,0 +1,340 @@

+#!/usr/bin/env python3
+"""
+UVR Audio Processor Consumer
+Consumes tasks from Redis priority queue, performs audio separation, and sends results
+"""
+import os
+import json
+import logging
+import time
+import signal
+import sys
+import requests
+import tempfile
+from audio_separator.separator import Separator
+import config
+from redis_queue import create_redis_client, RedisPriorityQueue
+# Configure logging
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
+)
+logger = logging.getLogger(__name__)
+class UVRProcessor:
+    """UVR audio separation processor with graceful shutdown"""
+    def __init__(self):
+        """Initialize UVR processor with model loaded once"""
+        self.separator = None
+        self.redis_client = None
+        self.task_queue = None
+        self.result_queue = None
+        self.shutdown_flag = False
+        # 注册信号处理器用于优雅关闭
+        signal.signal(signal.SIGINT, self._signal_handler)
+        signal.signal(signal.SIGTERM, self._signal_handler)
+        self._load_model()
+        self._connect_redis()
+    def _signal_handler(self, signum, frame):
+        """Handle shutdown signals gracefully"""
+        sig_name = 'SIGTERM' if signum == signal.SIGTERM else 'SIGINT'
+        logger.info(f"Received {sig_name}, initiating graceful shutdown...")
+        self.shutdown_flag = True
+    def _check_gpu_support(self):
+        """Check if GPU/CUDA is available"""
+        import platform
+        is_linux = platform.system() == 'Linux'
+        cuda_available = False
+        gpu_info = "CPU only"
+        try:
+            import onnxruntime as ort
+            providers = ort.get_available_providers()
+            if 'CUDAExecutionProvider' in providers:
+                cuda_available = True
+                gpu_info = "CUDA available"
+                logger.info("✓ CUDA Execution Provider detected")
+            elif 'TensorrtExecutionProvider' in providers:
+                cuda_available = True
+                gpu_info = "TensorRT available"
+                logger.info("✓ TensorRT Execution Provider detected")
+            else:
+                logger.info("GPU providers not available, using CPU")
+        except Exception as e:
+            logger.warning(f"Failed to check GPU support: {e}")
+        return is_linux and cuda_available, gpu_info
+    def _load_model(self):
+        """Load UVR model - called once on startup (with GPU optimization if available)"""
+        try:
+            logger.info(f"Loading UVR model: {config.MODEL_NAME}")
+            logger.info(f"Model directory: {config.MODEL_FILE_DIR}")
+            # Check GPU support
+            use_gpu, gpu_info = self._check_gpu_support()
+            logger.info(f"Hardware acceleration: {gpu_info}")
+            # Configure Separator
+            # Note: audio-separator automatically uses GPU if onnxruntime-gpu is installed
+            # and CUDAExecutionProvider is available. No explicit parameter needed.
+            separator_kwargs = {
+                'log_level': logging.INFO,
+                'model_file_dir': config.MODEL_FILE_DIR,
+                'output_dir': config.OUTPUT_DIR
+            }
+            if use_gpu:
+                logger.info("🚀 GPU acceleration will be used automatically (onnxruntime-gpu detected)")
+            else:
+                logger.info("Running on CPU mode")
+            self.separator = Separator(**separator_kwargs)
+            # 确保模型名称包含 .onnx 扩展名
+            model_filename = config.MODEL_NAME
+            if not model_filename.endswith('.onnx'):
+                model_filename = f"{model_filename}.onnx"
+            # 检查模型文件是否存在
+            model_path = os.path.join(config.MODEL_FILE_DIR, model_filename)
+            if not os.path.exists(model_path):
+                logger.warning(f"Model file not found: {model_path}")
+                logger.info("Attempting to download model automatically...")
+                # audio-separator 会自动下载模型
+            # Load the specific model
+            self.separator.load_model(model_filename)
+            if use_gpu:
+                logger.info("✅ UVR model loaded successfully with GPU acceleration")
+            else:
+                logger.info("✅ UVR model loaded successfully (CPU mode)")
+        except Exception as e:
+            logger.error(f"Failed to load UVR model: {str(e)}")
+            logger.error(f"Please ensure model exists at: {config.MODEL_FILE_DIR}/{config.MODEL_NAME}.onnx")
+            logger.error(f"You can download it using: python3 download_models.py")
+            raise
+    def _connect_redis(self):
+        """Initialize Redis client and priority queues"""
+        try:
+            # Create Redis client
+            self.redis_client = create_redis_client(
+                host=config.REDIS_HOST,
+                port=config.REDIS_PORT,
+                db=config.REDIS_DB,
+                password=config.REDIS_PASSWORD
+            )
+            # Initialize priority queues
+            self.task_queue = RedisPriorityQueue(self.redis_client, config.REDIS_TASK_QUEUE)
+            self.result_queue = RedisPriorityQueue(self.redis_client, config.REDIS_RESULT_QUEUE)
+            logger.info("Redis connections and queues initialized")
+        except Exception as e:
+            logger.error(f"Failed to connect to Redis: {str(e)}")
+            raise
+    # 下载功能已移到 API 层，此方法不再需要
+    # def _download_audio(self, audio_url, task_uuid):
+    #     已移至 app.py 的 download_audio() 函数
+    def _separate_audio(self, input_path, task_uuid):
+        """Perform audio separation using UVR"""
+        try:
+            logger.info(f"[{task_uuid}] Starting audio separation")
+            # Perform separation
+            output_files = self.separator.separate(input_path)
+            logger.info(f"[{task_uuid}] Separation complete. Output files: {output_files}")
+            # Find vocals and instrumental files
+            vocals_path = None
+            instrumental_path = None
+            for file_path in output_files:
+                # 确保使用完整路径
+                # audio-separator 返回的可能是相对路径或文件名
+                if not os.path.isabs(file_path):
+                    # 如果是相对路径，组合输出目录
+                    full_path = os.path.join(config.OUTPUT_DIR, file_path)
+                    if not os.path.exists(full_path):
+                        # 尝试直接使用返回的路径
+                        full_path = file_path
+                else:
+                    full_path = file_path
+                # 检查文件名判断类型
+                filename = os.path.basename(full_path).lower()
+                if 'vocals' in filename or 'voice' in filename:
+                    vocals_path = full_path
+                elif 'instrumental' in filename or 'inst' in filename:
+                    instrumental_path = full_path
+            # If not found by name, use order
+            if not vocals_path and len(output_files) > 0:
+                vocals_path = output_files[0]
+                if not os.path.isabs(vocals_path):
+                    vocals_path = os.path.join(config.OUTPUT_DIR, vocals_path)
+            if not instrumental_path and len(output_files) > 1:
+                instrumental_path = output_files[1]
+                if not os.path.isabs(instrumental_path):
+                    instrumental_path = os.path.join(config.OUTPUT_DIR, instrumental_path)
+            # 验证文件存在
+            if not vocals_path or not os.path.exists(vocals_path):
+                raise Exception(f"Vocals file not found: {vocals_path}")
+            if not instrumental_path or not os.path.exists(instrumental_path):
+                raise Exception(f"Instrumental file not found: {instrumental_path}")
+            logger.info(f"[{task_uuid}] Vocals: {vocals_path}")
+            logger.info(f"[{task_uuid}] Instrumental: {instrumental_path}")
+            return vocals_path, instrumental_path
+        except Exception as e:
+            logger.error(f"[{task_uuid}] Separation failed: {str(e)}")
+            raise
+    def _process_task(self, task_data):
+        """Process a single task"""
+        task_uuid = task_data['task_uuid']
+        audio_path = task_data['audio_path']  # 改为直接使用本地路径
+        hook_url = task_data['hook_url']
+        priority = task_data.get('priority', config.DEFAULT_PRIORITY)
+        vocals_path = None
+        instrumental_path = None
+        try:
+            logger.info(f"[{task_uuid}] Processing task with priority {priority}")
+            logger.info(f"[{task_uuid}] Audio file: {audio_path}")
+            # 验证音频文件存在
+            if not os.path.exists(audio_path):
+                raise Exception(f"Audio file not found: {audio_path}")
+            # Separate audio (直接使用已下载的文件)
+            vocals_path, instrumental_path = self._separate_audio(audio_path, task_uuid)
+            # Send success result to result queue with same priority
+            result = {
+                'task_uuid': task_uuid,
+                'success': True,
+                'vocals_path': vocals_path,
+                'instrumental_path': instrumental_path,
+                'hook_url': hook_url,
+                'priority': priority
+            }
+            self.result_queue.enqueue(result, priority=priority)
+            logger.info(f"[{task_uuid}] Success result sent to result queue with priority {priority}")
+        except Exception as e:
+            logger.error(f"[{task_uuid}] Task processing failed: {str(e)}")
+            # Send failure result with same priority
+            result = {
+                'task_uuid': task_uuid,
+                'success': False,
+                'error_message': str(e),
+                'hook_url': hook_url,
+                'priority': priority
+            }
+            self.result_queue.enqueue(result, priority=priority)
+            logger.info(f"[{task_uuid}] Failure result sent to result queue with priority {priority}")
+        finally:
+            # Clean up input file
+            if audio_path and os.path.exists(audio_path):
+                try:
+                    os.remove(audio_path)
+                    logger.info(f"[{task_uuid}] Cleaned up input file")
+                except Exception as e:
+                    logger.warning(f"[{task_uuid}] Failed to cleanup input file: {e}")
+    def start(self):
+        """Start consuming and processing tasks"""
+        logger.info("UVR Processor started, waiting for tasks from Redis priority queue...")
+        consecutive_errors = 0
+        max_consecutive_errors = 10
+        try:
+            while not self.shutdown_flag:
+                try:
+                    # Blocking dequeue with 5 second timeout
+                    task_data = self.task_queue.dequeue(timeout=5)
+                    if task_data is None:
+                        consecutive_errors = 0  # 重置错误计数
+                        continue  # No task available, continue polling
+                    task_uuid = task_data.get('task_uuid', 'unknown')
+                    priority = task_data.get('priority', config.DEFAULT_PRIORITY)
+                    logger.info(f"Received task: {task_uuid} with priority {priority}")
+                    try:
+                        # 处理任务
+                        self._process_task(task_data)
+                        logger.info(f"[{task_uuid}] Task completed successfully")
+                        consecutive_errors = 0  # 重置错误计数
+                    except Exception as e:
+                        logger.error(f"Error processing task {task_uuid}: {str(e)}")
+                        # 任务已经从队列中移除，错误已在 _process_task 中处理
+                except Exception as e:
+                    consecutive_errors += 1
+                    logger.error(f"Redis processor error ({consecutive_errors}/{max_consecutive_errors}): {e}")
+                    if consecutive_errors >= max_consecutive_errors:
+                        logger.critical(f"Too many consecutive errors ({consecutive_errors}), stopping processor")
+                        break
+                    # 根据错误次数调整休眠时间
+                    sleep_time = min(consecutive_errors * 2, 30)  # 最多休眠30秒
+                    time.sleep(sleep_time)
+        except KeyboardInterrupt:
+            logger.info("Received keyboard interrupt")
+        except Exception as e:
+            if not self.shutdown_flag:
+                logger.error(f"Processor error: {str(e)}")
+                raise
+        finally:
+            logger.info("Processor shutting down...")
+            self.close()
+    def close(self):
+        """Close connections gracefully"""
+        logger.info("Closing Redis connections...")
+        # 关闭 Redis client
+        if self.redis_client:
+            try:
+                logger.info("Closing Redis client...")
+                self.redis_client.close()
+                logger.info("Redis client closed")
+            except Exception as e:
+                logger.error(f"Error closing Redis client: {e}")
+        logger.info("Processor shutdown complete")
+if __name__ == '__main__':
+    processor = UVRProcessor()
+    processor.start()