1+ from openai import OpenAI
2+ from pathlib import Path
3+ import base64
4+ import os
5+ import sys
6+ sys .path .append (os .path .join (os .path .dirname (__file__ ), ".." , ".." ))
7+ from core .config_utils import load_key
8+
9+ def wav_to_base64 (wav_file_path ):
10+ with open (wav_file_path , 'rb' ) as audio_file :
11+ audio_content = audio_file .read ()
12+ base64_audio = base64 .b64encode (audio_content ).decode ('utf-8' )
13+ return base64_audio
14+
15+ def cosyvoice_tts_for_videolingo (text , save_as , number , task_df ):
16+ """
17+ 使用 CosyVoice 进行 TTS 转换,支持参考音频
18+ """
19+ prompt_text = task_df .loc [task_df ['number' ] == number , 'origin' ].values [0 ]
20+ API_KEY = load_key ("sf_cosyvoice2.api_key" )
21+ # 设置参考音频路径
22+ current_dir = Path .cwd ()
23+ ref_audio_path = current_dir / f"output/audio/refers/{ number } .wav"
24+
25+ # 如果参考音频不存在,使用第一个音频作为备选
26+ if not ref_audio_path .exists ():
27+ ref_audio_path = current_dir / "output/audio/refers/1.wav"
28+ if not ref_audio_path .exists ():
29+ try :
30+ from core .step9_extract_refer_audio import extract_refer_audio_main
31+ print (f"参考音频文件不存在,尝试提取: { ref_audio_path } " )
32+ extract_refer_audio_main ()
33+ except Exception as e :
34+ print (f"提取参考音频失败: { str (e )} " )
35+ raise
36+
37+ # 转换参考音频为 base64
38+ reference_base64 = wav_to_base64 (ref_audio_path )
39+
40+ client = OpenAI (
41+ api_key = API_KEY ,
42+ base_url = "https://api.siliconflow.cn/v1"
43+ )
44+
45+ save_path = Path (save_as )
46+ save_path .parent .mkdir (parents = True , exist_ok = True )
47+
48+ with client .audio .speech .with_streaming_response .create (
49+ model = "FunAudioLLM/CosyVoice2-0.5B" ,
50+ voice = "" ,
51+ input = text ,
52+ response_format = "wav" ,
53+ extra_body = {
54+ "references" : [
55+ {
56+ "audio" : f"data:audio/wav;base64,{ reference_base64 } " ,
57+ "text" : prompt_text
58+ }
59+ ]
60+ }
61+ ) as response :
62+ response .stream_to_file (save_path )
63+
64+ print (f"音频已成功保存至: { save_path } " )
65+ return True
0 commit comments