File size: 695 Bytes
5704ce5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
import json, csv, os

root = "/tmp/dataset/voxpopuli_es_500"

with open(os.path.join(root, "metadata.json"), "r", encoding="utf-8") as f:
    data = json.load(f)

with open(os.path.join(root, "metadata.csv"), "w", encoding="utf-8") as f:
    writer = csv.writer(f, delimiter="|", quoting=csv.QUOTE_NONE, escapechar="\\")
    for entry in data:
        # Quitar prefijo "voxpopuli_es_500/" si está presente
        path = entry["audio_filepath"]
        if path.startswith("voxpopuli_es_500/"):
            path = path.replace("voxpopuli_es_500/", "", 1)

        text = entry["text"].replace("\n", " ").strip()
        speaker = entry["speaker"]

        writer.writerow([path, text, speaker])