Spaces:
Runtime error
Runtime error
Update finetune_xtts_hf.py
Browse files- finetune_xtts_hf.py +35 -37
finetune_xtts_hf.py
CHANGED
|
@@ -25,7 +25,7 @@ subprocess.run([sys.executable, "-m", "pip", "uninstall", "-y", "hf_transfer"])
|
|
| 25 |
HF_MODEL_ID = "sob111/xtts-v2-finetuned" # <--- cambia con tu repo en HF
|
| 26 |
HF_TOKEN = os.environ.get("HF_TOKEN") # Debe estar definido en tu Space/entorno
|
| 27 |
DATASET_PATH = "/tmp/dataset" # Ruta a tu dataset
|
| 28 |
-
VOXPOPULI_PATH = "/tmp/dataset/
|
| 29 |
OUTPUT_PATH = "/tmp/output_model"
|
| 30 |
BASE_MODEL = "coqui/XTTS-v2"
|
| 31 |
|
|
@@ -88,7 +88,7 @@ def extract_zip(zip_file_path, destination_path):
|
|
| 88 |
print(f"❌ An unexpected error occurred: {e}")
|
| 89 |
|
| 90 |
# Example usage:
|
| 91 |
-
zip_file = "/home/user/app/
|
| 92 |
|
| 93 |
# To protect against security vulnerabilities, it is important to sanitize the destination path.
|
| 94 |
# This prevents an attacker from using a malicious ZIP file to write outside the destination folder.
|
|
@@ -103,29 +103,29 @@ print(f"safe destination {safe_destination}")
|
|
| 103 |
import json
|
| 104 |
|
| 105 |
# === Convertir metadata.json → metadata.csv ===
|
| 106 |
-
json_path = os.path.join(VOXPOPULI_PATH, "metadata.json")
|
| 107 |
-
print(f"ruta de json {json_path}")
|
| 108 |
-
csv_path = os.path.join(VOXPOPULI_PATH, "metadata.csv")
|
| 109 |
|
| 110 |
-
if os.path.exists(json_path):
|
| 111 |
-
print("🔄 Convirtiendo metadata.json → metadata.csv...")
|
| 112 |
-
with open(json_path, "r", encoding="utf-8") as f:
|
| 113 |
-
data = json.load(f)
|
| 114 |
|
| 115 |
-
with open(csv_path, "w", encoding="utf-8", newline="") as f:
|
| 116 |
-
writer = csv.writer(f, delimiter=",", quoting=csv.QUOTE_MINIMAL)
|
| 117 |
|
| 118 |
-
for entry in data:
|
| 119 |
-
path = entry["audio_filepath"]
|
| 120 |
# Quitar prefijo "voxpopuli_es_500/" si existe
|
| 121 |
-
if path.startswith("voxpopuli_es_500/"):
|
| 122 |
-
path = path.replace("voxpopuli_es_500/", "", 1)
|
| 123 |
-
text = entry["text"].replace("\n", " ").strip()
|
| 124 |
-
speaker = entry.get("speaker", "spk1")
|
| 125 |
-
writer.writerow([path, text, speaker])
|
| 126 |
-
print(f"✅ metadata.csv generado en {csv_path}")
|
| 127 |
-
else:
|
| 128 |
-
raise FileNotFoundError(f"❌ No se encontró {json_path}. Verifica el zip.")
|
| 129 |
|
| 130 |
from TTS.tts.datasets import load_tts_samples
|
| 131 |
from TTS.config.shared_configs import BaseDatasetConfig
|
|
@@ -134,30 +134,28 @@ from TTS.config.shared_configs import BaseDatasetConfig
|
|
| 134 |
config_dataset = BaseDatasetConfig(
|
| 135 |
formatter="vctk",
|
| 136 |
dataset_name="voxpopuli",
|
| 137 |
-
path="/tmp/dataset/
|
| 138 |
-
meta_file_train="metadata.csv",
|
| 139 |
-
meta_file_val="metadata.csv",
|
| 140 |
language="es",
|
| 141 |
)
|
| 142 |
|
| 143 |
# Construimos rutas completas
|
| 144 |
-
root_path = config_dataset.path
|
| 145 |
-
meta_file_train = config_dataset.meta_file_train
|
| 146 |
|
| 147 |
-
meta_path = os.path.join(root_path, meta_file_train)
|
| 148 |
-
print(f"Verificando archivo CSV: {meta_path}")
|
| 149 |
-
print(f"Existe?: {os.path.exists(meta_path)}")
|
| 150 |
|
| 151 |
# Intentamos cargar los samples
|
| 152 |
-
try:
|
| 153 |
-
train_samples, eval_samples = load_tts_samples(config_dataset)
|
| 154 |
|
| 155 |
-
print(f"Samples detectados: {len(train_samples)} training, {len(eval_samples)} eval")
|
| 156 |
-
print("Primeros 3 samples:")
|
| 157 |
-
for s in train_samples[:3]:
|
| 158 |
-
print(s)
|
| 159 |
-
except AssertionError as e:
|
| 160 |
-
print("❌ Error cargando samples:", e)
|
| 161 |
|
| 162 |
print("=== Editando configuración para fine-tuning con VoxPopuli ===")
|
| 163 |
|
|
|
|
| 25 |
HF_MODEL_ID = "sob111/xtts-v2-finetuned" # <--- cambia con tu repo en HF
|
| 26 |
HF_TOKEN = os.environ.get("HF_TOKEN") # Debe estar definido en tu Space/entorno
|
| 27 |
DATASET_PATH = "/tmp/dataset" # Ruta a tu dataset
|
| 28 |
+
VOXPOPULI_PATH = "/tmp/dataset/voxpopuli_es_500_vctk" # Ruta a tu dataset
|
| 29 |
OUTPUT_PATH = "/tmp/output_model"
|
| 30 |
BASE_MODEL = "coqui/XTTS-v2"
|
| 31 |
|
|
|
|
| 88 |
print(f"❌ An unexpected error occurred: {e}")
|
| 89 |
|
| 90 |
# Example usage:
|
| 91 |
+
zip_file = "/home/user/app/voxpopuli_es_500_vctk.zip"
|
| 92 |
|
| 93 |
# To protect against security vulnerabilities, it is important to sanitize the destination path.
|
| 94 |
# This prevents an attacker from using a malicious ZIP file to write outside the destination folder.
|
|
|
|
| 103 |
import json
|
| 104 |
|
| 105 |
# === Convertir metadata.json → metadata.csv ===
|
| 106 |
+
#json_path = os.path.join(VOXPOPULI_PATH, "metadata.json")
|
| 107 |
+
#print(f"ruta de json {json_path}")
|
| 108 |
+
#csv_path = os.path.join(VOXPOPULI_PATH, "metadata.csv")
|
| 109 |
|
| 110 |
+
#if os.path.exists(json_path):
|
| 111 |
+
# print("🔄 Convirtiendo metadata.json → metadata.csv...")
|
| 112 |
+
# with open(json_path, "r", encoding="utf-8") as f:
|
| 113 |
+
# data = json.load(f)
|
| 114 |
|
| 115 |
+
# with open(csv_path, "w", encoding="utf-8", newline="") as f:
|
| 116 |
+
# writer = csv.writer(f, delimiter=",", quoting=csv.QUOTE_MINIMAL)
|
| 117 |
|
| 118 |
+
# for entry in data:
|
| 119 |
+
# path = entry["audio_filepath"]
|
| 120 |
# Quitar prefijo "voxpopuli_es_500/" si existe
|
| 121 |
+
# if path.startswith("voxpopuli_es_500/"):
|
| 122 |
+
# path = path.replace("voxpopuli_es_500/", "", 1)
|
| 123 |
+
# text = entry["text"].replace("\n", " ").strip()
|
| 124 |
+
# speaker = entry.get("speaker", "spk1")
|
| 125 |
+
# writer.writerow([path, text, speaker])
|
| 126 |
+
# print(f"✅ metadata.csv generado en {csv_path}")
|
| 127 |
+
#else:
|
| 128 |
+
# raise FileNotFoundError(f"❌ No se encontró {json_path}. Verifica el zip.")
|
| 129 |
|
| 130 |
from TTS.tts.datasets import load_tts_samples
|
| 131 |
from TTS.config.shared_configs import BaseDatasetConfig
|
|
|
|
| 134 |
config_dataset = BaseDatasetConfig(
|
| 135 |
formatter="vctk",
|
| 136 |
dataset_name="voxpopuli",
|
| 137 |
+
path="/tmp/dataset/voxpopuli_es_500_vctk",
|
|
|
|
|
|
|
| 138 |
language="es",
|
| 139 |
)
|
| 140 |
|
| 141 |
# Construimos rutas completas
|
| 142 |
+
#root_path = config_dataset.path
|
| 143 |
+
#meta_file_train = config_dataset.meta_file_train
|
| 144 |
|
| 145 |
+
#meta_path = os.path.join(root_path, meta_file_train)
|
| 146 |
+
#print(f"Verificando archivo CSV: {meta_path}")
|
| 147 |
+
#print(f"Existe?: {os.path.exists(meta_path)}")
|
| 148 |
|
| 149 |
# Intentamos cargar los samples
|
| 150 |
+
#try:
|
| 151 |
+
# train_samples, eval_samples = load_tts_samples(config_dataset)
|
| 152 |
|
| 153 |
+
# print(f"Samples detectados: {len(train_samples)} training, {len(eval_samples)} eval")
|
| 154 |
+
# print("Primeros 3 samples:")
|
| 155 |
+
# for s in train_samples[:3]:
|
| 156 |
+
# print(s)
|
| 157 |
+
#except AssertionError as e:
|
| 158 |
+
# print("❌ Error cargando samples:", e)
|
| 159 |
|
| 160 |
print("=== Editando configuración para fine-tuning con VoxPopuli ===")
|
| 161 |
|