{ "model": "xtts", "run_name": "xttsv2_finetune_es", "output_path": "/tmp/output_model", "datasets": [ { "formatter": "json", "meta_file_train": "/tmp/voxpopuli_es_500/metadata.json", "path": "/tmp/voxpopuli_es_500/wav_data", "language": "es" } "batch_size": 4, "eval_batch_size": 4, "num_loader_workers": 2, "num_eval_loader_workers": 1, "run_eval": true, "test_delay_epochs": 1, "epochs": 5, "save_step": 100, "print_step": 25, "eval_step": 100, "mixed_precision": true, "cudnn_benchmark": true, "lr": 1e-4, "grad_clip": 5.0, "loss_masking": true, "use_speaker_embedding": true, "compute_input_seq_cache": true, "eval_split_size": 0.05, "text_cleaner": "multilingual_cleaners", "use_phonemes": false, "phoneme_language": "es", "min_text_len": 1, "max_text_len": 200, "min_audio_len": 1000, "max_audio_len": 160000, "output_step": 25, "logger": "tensorboard", "seed": 1234 }