Spaces:
Runtime error
Runtime error
| import os | |
| import shutil | |
| from huggingface_hub import snapshot_download | |
| import gradio as gr | |
| os.chdir(os.path.dirname(os.path.abspath(__file__))) | |
| hallo_dir = snapshot_download(repo_id="fudan-generative-ai/hallo", local_dir="pretrained_models") | |
| joyhallo_dir = snapshot_download(repo_id="jdh-algo/JoyHallo-v1", local_dir="pretrained_models/joyhallo") | |
| wav_dir = snapshot_download(repo_id="TencentGameMate/chinese-wav2vec2-base", local_dir="pretrained_models/chinese-wav2vec2-base") | |
| print(hallo_dir, joyhallo_dir) | |
| print(os.listdir(hallo_dir)) | |
| from scripts.inference import predict | |
| def run_inference(source_image, driving_audio, progress=gr.Progress(track_tqdm=True)): | |
| return predict(source_image, driving_audio, 1.0, 1.0, 1.0, 1.2) | |
| css = ''' | |
| div#warning-ready { | |
| background-color: #ecfdf5; | |
| padding: 0 16px 16px; | |
| margin: 20px 0; | |
| color: #030303!important; | |
| } | |
| div#warning-ready > .gr-prose > h2, div#warning-ready > .gr-prose > p { | |
| color: #057857!important; | |
| } | |
| div#warning-duplicate { | |
| background-color: #ebf5ff; | |
| padding: 0 16px 16px; | |
| margin: 20px 0; | |
| color: #030303!important; | |
| } | |
| div#warning-duplicate > .gr-prose > h2, div#warning-duplicate > .gr-prose > p { | |
| color: #0f4592!important; | |
| } | |
| div#warning-duplicate strong { | |
| color: #0f4592; | |
| } | |
| p.actions { | |
| display: flex; | |
| align-items: center; | |
| margin: 20px 0; | |
| } | |
| div#warning-duplicate .actions a { | |
| display: inline-block; | |
| margin-right: 10px; | |
| } | |
| .dark #warning-duplicate { | |
| background-color: #0c0c0c !important; | |
| border: 1px solid white !important; | |
| } | |
| ''' | |
| with gr.Blocks(css=css) as demo: | |
| gr.Markdown("# JoyHallo: Digital human model for Mandarin") | |
| gr.Markdown("Generate talking head avatars driven by Mandarin speech. Data requirements:") | |
| gr.Markdown(""" | |
| Image: | |
| 1. Cropped to square shape. | |
| 2. Face should be facing forward and occupy 50%-70% of the image. | |
| Audio: | |
| 1. Use wav format. | |
| 2. Mandarin, English or mixed, with clear audio and suitable background music. | |
| ! Important: Too long audio will casue a very long processing time, please keep the audio length within 5s. | |
| """) | |
| with gr.Row(): | |
| with gr.Column(): | |
| avatar_face = gr.Image(type="filepath", label="Face") | |
| driving_audio = gr.Audio(type="filepath", label="Driving audio") | |
| generate = gr.Button("Generate") | |
| with gr.Column(): | |
| output_video = gr.Video(label="Your talking head") | |
| generate.click( | |
| fn=run_inference, | |
| inputs=[avatar_face, driving_audio], | |
| outputs=output_video | |
| ) | |
| demo.launch() |