import os import yaml import gradio as gr import asyncio import requests from inferenceapi import setup_args, inference from thirdparty.edgetts.azuretts import textToSpeechAsync, SUPPORTED_VOICES as AZURE_SUPPORTED_VOICES from thirdparty.aliyun.aliyuntts import aliyun_text_to_speech, SUPPORTED_VOICES as ALIYUN_SUPPORTED_VOICES from thirdparty.ui.gradiotheme import EnhancedSeafoam # 安装 PyYAML 库 # pip install pyyaml # 加载Avatar配置文件 def load_avatar_configs(config_file="avatar_configs.yaml"): with open(config_file, "r", encoding="utf-8") as f: data = yaml.safe_load(f) return data avatar_data = load_avatar_configs() avatars = avatar_data["avatars"] # 定义全局模型缓存字典 models = {} # 获取所有音频文件的唯一列表,用于下拉菜单和上传组件 all_audios = list(set(aud for avatar in avatars for aud in avatar["config"]["audios"])) def run_inference(path, workspace, seed, test, test_train, aud): args_dict = { "path": path, "workspace": workspace, "seed": seed, "test": test, "test_train": test_train, "aud": aud } opt = setup_args(None, args_dict) assert os.path.exists(opt.workspace), "Workspace directory does not exist." assert os.path.exists(opt.path), "Dataset path does not exist." video_path = inference(opt) return video_path def update_params(avatar_index): config = avatars[avatar_index]["config"] default_audio = config['audios'][0] if config['audios'] else all_audios[0] return config["path"], config["workspace"], config["seed"], default_audio def find_examples(folder_path, extensions): full_path = os.path.abspath(folder_path) examples = [] for f in os.listdir(full_path): if f.endswith(extensions): file_path = os.path.join(full_path, f) if os.path.exists(file_path): examples.append(file_path) else: print(f"Warning: File not found: {file_path}") return examples base_dir = os.path.dirname(os.path.abspath(__file__)) audio_folder = os.path.join(base_dir, "assets", "") audio_examples = find_examples(audio_folder, ('.mp3', '.wav', '.ogg')) def save_uploaded_audio(file): if not os.path.exists("assets"): os.makedirs("assets") file_path = os.path.join("assets", "temp_input.wav") with open(file_path, "wb") as f: f.write(file.read()) return file_path def textToSpeechUnifiedAzure(text, voice, rate, volume): return asyncio.run(textToSpeechAsync(text, voice, rate, volume)) def textToSpeechUnifiedAliyun(text, voice, rate, volume): return asyncio.run(aliyun_text_to_speech(text, voice, rate, volume))[1] def textToSpeechUnifiedCosy(text, sft_name="康辉", speaker_name="", prompt_text="", prompt_speech=""): url = "http://localhost:8000/inference/tts" headers = {"accept": "application/json", "Content-Type": "application/json"} data = {"query": text, "speaker_name": speaker_name, "sft_name": sft_name, "prompt_text": prompt_text, "prompt_speech": prompt_speech} response = requests.post(url, headers=headers, json=data) if response.status_code == 200: audio_content = response.content file_path = f"assets/cosy_tmp.wav" with open(file_path, "wb") as f: f.write(audio_content) return file_path else: raise Exception(f"CosyVoice TTS request failed with status {response.status_code}") enhanced_seafoam = EnhancedSeafoam() logo_path = os.path.join(base_dir, "assets", "company_logo_small_1.png") def get_version_details(): with open("version_history.md", "r", encoding="utf-8") as markdown_file: version_details = markdown_file.read() return version_details def gradio_interface(): with gr.Blocks(css="""footer {display: none !important;} #audio_button {color: black !important;} #output_video {color: blue !important;} """, theme=enhanced_seafoam) as app: logo = gr.Image(logo_path, show_label=False, container=False) gr.Markdown('
Choose an avatar and configure parameters for generating animated models based on speech.
', elem_id="subtitle") with gr.Accordion("版本更新说明", open=False): gr.Markdown(get_version_details()) gr.Markdown('本应用仅供测试体验,非法版权人像传播引起的一切后果请自行承担!
', elem_id="subtitle") with gr.Row(): with gr.Column(scale=1): gr.Markdown("### Choose Avatar") avatar_gallery = gr.Gallery(label="Avatar Preview", value=[(avatar['photo'], avatar['name']) for avatar in avatars]) with gr.Column(scale=1): output_video = gr.Video(label="Inference Result", elem_id="output_video") gr.Markdown("### Input Audio驱动用的音频,以下三种方式三选一") with gr.Row(): audio_output = gr.Audio(label="Upload Audio上传或者麦克风录制语音", type="filepath") with gr.Column(scale=1): gr.Examples(examples=[[audio] for audio in audio_examples], inputs=[audio_output], label="Audio Examples", elem_id="audio_button") with gr.Column(scale=2): with gr.Tabs(): with gr.Tab("CosyVoice TTS(声音复刻,本地部署)"): with gr.Column(): cosy_input_text = gr.Textbox(label="Input Text for CosyVoice TTS") cosy_speaker_name = gr.Dropdown(choices=["康辉", "rayray","其他可用说话人"], label="Speaker Name") cosy_generate = gr.Button("点击语音合成") with gr.Tab("Aliyun TTS(推荐)"): with gr.Column(): aliyun_input_text = gr.Textbox(label="Input Text for Aliyun TTS") aliyun_voice = gr.Dropdown(choices=list(ALIYUN_SUPPORTED_VOICES.keys()), label="Aliyun TTS Voice") aliyun_generate = gr.Button("点击语音合成") with gr.Tab("Azure TTS (海外不稳定)"): with gr.Column(): azure_input_text = gr.Textbox(label="Input Text for Azure TTS") azure_voice = gr.Dropdown(choices=list(AZURE_SUPPORTED_VOICES.keys()), label="Azure TTS Voice") azure_generate = gr.Button("点击语音合成") cosy_generate.click(textToSpeechUnifiedCosy, inputs=[cosy_input_text, cosy_speaker_name], outputs=audio_output) def handle_uploaded_audio(file): if isinstance(file, str): return file else: return save_uploaded_audio(file) audio_output.change(fn=handle_uploaded_audio, inputs=[audio_output], outputs=[audio_output]) with gr.Row(): with gr.Accordion("Basic Parameters", open=False): path_input = gr.Textbox(label="Data Path", value=avatars[0]["config"]["path"]) workspace_input = gr.Textbox(label="Workspace Directory", value=avatars[0]["config"]["workspace"]) seed_input = gr.Number(label="Random Seed", value=avatars[0]["config"]["seed"], precision=0) test_input = gr.Checkbox(label="Test Mode", value=True) test_train_input = gr.Checkbox(label="Test Train Mode", value=True) with gr.Accordion("TTS Advanced Settings语音TTS高级设置", open=False): tts_rate = gr.Slider(-100, 100, value=0, label="Speech Rate") tts_volume = gr.Slider(-100, 100, value=0, label="Volume") run_button = gr.Button("生成") def select_avatar(evt: gr.SelectData): return update_params(evt.index) avatar_gallery.select(fn=select_avatar, inputs=[], outputs=[path_input, workspace_input, seed_input, audio_output]) azure_generate.click( textToSpeechUnifiedAzure, inputs=[azure_input_text, azure_voice, tts_rate, tts_volume], outputs=audio_output ) aliyun_generate.click( textToSpeechUnifiedAliyun, inputs=[aliyun_input_text, aliyun_voice, tts_rate, tts_volume], outputs=audio_output ) # 事件绑定:运行推理 run_button.click( fn=run_inference, inputs=[path_input, workspace_input, seed_input, test_input, test_train_input, audio_output], outputs=output_video ) gr.HTML("""© 2024 未来式智能
autoagents.ai