{ "data_config": { "n_samples": 48000, "hop_size": 240, "sampling_rate": 24000, "batch_size": 4, "num_workers": 8, "pin_memory": false }, "model_config": { "factors": [2, 3, 4, 4, 5], "upsampling_preconv_out_channels": 768, "upsampling_out_channels": [384, 192, 96, 48, 24], "upsampling_dilations": [ [1, 3, 9, 27], [1, 3, 9, 27], [1, 3, 9, 27], [1, 3, 9, 27], [1, 3, 9, 27] ], "downsampling_preconv_out_channels": 24, "downsampling_out_channels": [48, 96, 192, 384], "downsampling_dilations": [ [1, 2, 4], [1, 2, 4], [1, 2, 4], [1, 2, 4] ], "use_raw": true, "num_harmonic": 2, "harmonic_type": 2, "nhv_inchannels": 1025, "nhv_cat_type": "PLS", "nhv_noise_std": 0.03 }, "optimizer_config": { "lr": 5e-4, "scheduler_step_size": 100000, "scheduler_gamma": 0.5 }, "loss_config": { "stft": { "fft_sizes": [2048, 1024, 512, 256, 128, 64], "hop_sizes": [512, 256, 128, 64, 32, 16], "win_lengths": [2048, 1024, 512, 256, 128, 64] }, "lambda_aux": 1.0, "lambda_adv": 2.5, "generator_adv_loss_params": { "average_by_discriminators": true, "loss_type": "mse" }, "discriminator_adv_loss_params": { "average_by_discriminators": true, "loss_type": "mse" } }, "training_config": { "train_max_steps": 3000000, "discriminator_train_start_steps": 100000, "generator_grad_norm": 1, "discriminator_grad_norm": 1, "distributed": true, "rank": 0 }, "interval_config": { "out_dir": "", "num_save_intermediate_results": 5, "save_interval_steps": 25000, "eval_interval_steps": 25000, "log_interval_steps": 1000 } }