#######################################################################################
#
# MIT License
#
# Copyright (c) [2025] [leonelhs@gmail.com]
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
#
#######################################################################################

# This file implements an API endpoint for the Mandarin Chinese Kokoro Text-to-Speech (TTS) system.
# It provides functionality to generate TTS audio from input Mandarin Chinese text using the Kokoro voice model.


# Source code is based on or inspired by several projects.
# For more details and proper attribution, please refer to the following resources:
#
# - [Kokoro] - [https://github.com/hexgrad/kokoro]
# - [Misaki] - [https://github.com/hexgrad/misaki]
# - [Kokoro-82M] - [https://huggingface.co/hexgrad/Kokoro-82M]
# - [Kokoro-onnx] - [https://github.com/thewh1teagle/kokoro-onnx]

import os

import gradio as gr
from huggingface_hub import snapshot_download
from kokoro_onnx import Kokoro
from misaki import zh

KOKORO_REPO_ID = "leonelhs/kokoro-thewh1teagle"

VOICES = {
    '🚺 001':'zf_001',
    '🚺 002':'zf_002',
    '🚺 003':'zf_003',
    '🚺 004':'zf_004',
    '🚺 005':'zf_005',
    '🚺 006':'zf_006',
    '🚺 007':'zf_007',
    '🚺 008':'zf_008',
    '🚺 017':'zf_017',
    '🚺 018':'zf_018',
    '🚺 019':'zf_019',
    '🚺 021':'zf_021',
    '🚺 022':'zf_022',
    '🚺 023':'zf_023',
    '🚺 024':'zf_024',
    '🚺 026':'zf_026',
    '🚺 027':'zf_027',
    '🚺 028':'zf_028',
    '🚺 032':'zf_032',
    '🚺 036':'zf_036',
    '🚺 038':'zf_038',
    '🚺 039':'zf_039',
    '🚺 040':'zf_040',
    '🚺 042':'zf_042',
    '🚺 043':'zf_043',
    '🚺 044':'zf_044',
    '🚺 046':'zf_046',
    '🚺 047':'zf_047',
    '🚺 048':'zf_048',
    '🚺 049':'zf_049',
    '🚺 051':'zf_051',
    '🚺 059':'zf_059',
    '🚺 060':'zf_060',
    '🚺 067':'zf_067',
    '🚺 070':'zf_070',
    '🚺 071':'zf_071',
    '🚺 072':'zf_072',
    '🚺 073':'zf_073',
    '🚺 074':'zf_074',
    '🚺 075':'zf_075',
    '🚺 076':'zf_076',
    '🚺 077':'zf_077',
    '🚺 078':'zf_078',
    '🚺 079':'zf_079',
    '🚺 083':'zf_083',
    '🚺 084':'zf_084',
    '🚺 085':'zf_085',
    '🚺 086':'zf_086',
    '🚺 087':'zf_087',
    '🚺 088':'zf_088',
    '🚺 090':'zf_090',
    '🚺 092':'zf_092',
    '🚺 093':'zf_093',
    '🚺 094':'zf_094',
    '🚺 099':'zf_099',
    '🚹 009':'zm_009',
    '🚹 010':'zm_010',
    '🚹 011':'zm_011',
    '🚹 012':'zm_012',
    '🚹 013':'zm_013',
    '🚹 014':'zm_014',
    '🚹 015':'zm_015',
    '🚹 016':'zm_016',
    '🚹 020':'zm_020',
    '🚹 025':'zm_025',
    '🚹 029':'zm_029',
    '🚹 030':'zm_030',
    '🚹 031':'zm_031',
    '🚹 033':'zm_033',
    '🚹 034':'zm_034',
    '🚹 035':'zm_035',
    '🚹 037':'zm_037',
    '🚹 041':'zm_041',
    '🚹 045':'zm_045',
    '🚹 050':'zm_050',
    '🚹 052':'zm_052',
    '🚹 053':'zm_053',
    '🚹 054':'zm_054',
    '🚹 055':'zm_055',
    '🚹 056':'zm_056',
    '🚹 057':'zm_057',
    '🚹 058':'zm_058',
    '🚹 061':'zm_061',
    '🚹 062':'zm_062',
    '🚹 063':'zm_063',
    '🚹 064':'zm_064',
    '🚹 065':'zm_065',
    '🚹 066':'zm_066',
    '🚹 068':'zm_068',
    '🚹 069':'zm_069',
    '🚹 080':'zm_080',
    '🚹 081':'zm_081',
    '🚹 082':'zm_082',
    '🚹 089':'zm_089',
    '🚹 091':'zm_091',
    '🚹 095':'zm_095',
    '🚹 096':'zm_096',
    '🚹 097':'zm_097',
    '🚹 098':'zm_098',
    '🚹 100':'zm_100',
}

snapshot = snapshot_download(repo_id=KOKORO_REPO_ID)

# Misaki G2P with espeak-ng fallback
g2p = zh.ZHG2P(version="1.1")

# Kokoro
model_path = os.path.join(snapshot, "kokoro-v1.1-zh.onnx")
voices_path = os.path.join(snapshot, "voices-v1.1-zh.bin")
kokoro = Kokoro(model_path, voices_path, vocab_config="config.json")

def predict(text, voice='zf_001', speed=1):
    """
        Generate speech audio from mandarin chinese text input.

        Parameters:
            text (string): The text to be converted into speech.
            voice (string): The selected male of female voice profile (specific voice ID).
            speed (float): The speaking rate multiplier (e.g., 1.0 = normal speed, 0.8 = slower, 1.2 = faster).

        Returns:
            path: File path to the generated audio speech.
    """

    phonemes, _ = g2p(text)
    samples, sample_rate = kokoro.create(phonemes, voice, speed, is_phonemes=True)
    return sample_rate, samples

app = gr.Interface(
    predict,
    [
        gr.Textbox(label='Input Text'),
        gr.Dropdown(list(VOICES.items()), value='zf_001', label='Voice'),
        gr.Slider(minimum=0.5, maximum=2, value=1, step=0.1, label='Speed')
    ],
    gr.Audio(label='Output Audio', interactive=False, streaming=False, autoplay=True),
    description="Kokoro TTS 🇨🇳 API Endpoint",
)

app.launch(share=False, debug=True, show_error=True, mcp_server=True)
app.queue()