whisper-large-v3-lora-is / trainer_state.json
jonasaise's picture
Upload fine-tuned Icelandic Whisper LoRA adapter v1
4548a00 verified
{
"best_global_step": 180,
"best_metric": 50.112359550561806,
"best_model_checkpoint": "./whisper-large-v3-is-raddromur-lora-wandb/checkpoint-180",
"epoch": 2.9856262833675564,
"eval_steps": 30,
"global_step": 180,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.049281314168377825,
"grad_norm": 0.22914348542690277,
"learning_rate": 1.111111111111111e-06,
"loss": 1.391,
"step": 3
},
{
"epoch": 0.09856262833675565,
"grad_norm": 0.24495559930801392,
"learning_rate": 2.7777777777777783e-06,
"loss": 1.417,
"step": 6
},
{
"epoch": 0.14784394250513347,
"grad_norm": 0.2494819313287735,
"learning_rate": 4.444444444444444e-06,
"loss": 1.4382,
"step": 9
},
{
"epoch": 0.1971252566735113,
"grad_norm": 0.23504748940467834,
"learning_rate": 6.111111111111112e-06,
"loss": 1.3623,
"step": 12
},
{
"epoch": 0.2464065708418891,
"grad_norm": 0.25508585572242737,
"learning_rate": 7.77777777777778e-06,
"loss": 1.4247,
"step": 15
},
{
"epoch": 0.29568788501026694,
"grad_norm": 0.24351638555526733,
"learning_rate": 9.444444444444445e-06,
"loss": 1.4221,
"step": 18
},
{
"epoch": 0.34496919917864477,
"grad_norm": 0.2543489933013916,
"learning_rate": 9.876543209876543e-06,
"loss": 1.4149,
"step": 21
},
{
"epoch": 0.3942505133470226,
"grad_norm": 0.250897079706192,
"learning_rate": 9.691358024691358e-06,
"loss": 1.4158,
"step": 24
},
{
"epoch": 0.44353182751540043,
"grad_norm": 0.23567010462284088,
"learning_rate": 9.506172839506174e-06,
"loss": 1.3949,
"step": 27
},
{
"epoch": 0.4928131416837782,
"grad_norm": 0.24683956801891327,
"learning_rate": 9.320987654320989e-06,
"loss": 1.3688,
"step": 30
},
{
"epoch": 0.4928131416837782,
"eval_runtime": 745.8583,
"eval_samples_per_second": 1.735,
"eval_steps_per_second": 0.036,
"eval_wer": 53.24536190227332,
"step": 30
},
{
"epoch": 0.5420944558521561,
"grad_norm": 0.22976352274417877,
"learning_rate": 9.135802469135803e-06,
"loss": 1.3591,
"step": 33
},
{
"epoch": 0.5913757700205339,
"grad_norm": 0.24124480783939362,
"learning_rate": 8.950617283950618e-06,
"loss": 1.3709,
"step": 36
},
{
"epoch": 0.6406570841889117,
"grad_norm": 0.22739216685295105,
"learning_rate": 8.765432098765432e-06,
"loss": 1.4126,
"step": 39
},
{
"epoch": 0.6899383983572895,
"grad_norm": 0.2386259138584137,
"learning_rate": 8.580246913580249e-06,
"loss": 1.3458,
"step": 42
},
{
"epoch": 0.7392197125256673,
"grad_norm": 0.23364992439746857,
"learning_rate": 8.395061728395062e-06,
"loss": 1.3779,
"step": 45
},
{
"epoch": 0.7885010266940452,
"grad_norm": 0.23184379935264587,
"learning_rate": 8.209876543209876e-06,
"loss": 1.338,
"step": 48
},
{
"epoch": 0.837782340862423,
"grad_norm": 0.23423455655574799,
"learning_rate": 8.024691358024692e-06,
"loss": 1.3115,
"step": 51
},
{
"epoch": 0.8870636550308009,
"grad_norm": 0.23327411711215973,
"learning_rate": 7.839506172839507e-06,
"loss": 1.2838,
"step": 54
},
{
"epoch": 0.9363449691991786,
"grad_norm": 0.24564896523952484,
"learning_rate": 7.654320987654322e-06,
"loss": 1.3335,
"step": 57
},
{
"epoch": 0.9856262833675564,
"grad_norm": 0.21617886424064636,
"learning_rate": 7.469135802469136e-06,
"loss": 1.3044,
"step": 60
},
{
"epoch": 0.9856262833675564,
"eval_runtime": 755.2022,
"eval_samples_per_second": 1.713,
"eval_steps_per_second": 0.036,
"eval_wer": 53.106872223673896,
"step": 60
},
{
"epoch": 1.0492813141683779,
"grad_norm": 0.2329856902360916,
"learning_rate": 7.283950617283952e-06,
"loss": 1.403,
"step": 63
},
{
"epoch": 1.0985626283367556,
"grad_norm": 0.2415734827518463,
"learning_rate": 7.098765432098766e-06,
"loss": 1.2926,
"step": 66
},
{
"epoch": 1.1478439425051334,
"grad_norm": 0.22719435393810272,
"learning_rate": 6.913580246913581e-06,
"loss": 1.3266,
"step": 69
},
{
"epoch": 1.1971252566735113,
"grad_norm": 0.22385141253471375,
"learning_rate": 6.728395061728395e-06,
"loss": 1.3099,
"step": 72
},
{
"epoch": 1.2464065708418892,
"grad_norm": 0.22575075924396515,
"learning_rate": 6.543209876543211e-06,
"loss": 1.2993,
"step": 75
},
{
"epoch": 1.2956878850102669,
"grad_norm": 0.2280450165271759,
"learning_rate": 6.358024691358025e-06,
"loss": 1.2516,
"step": 78
},
{
"epoch": 1.3449691991786448,
"grad_norm": 0.21805013716220856,
"learning_rate": 6.17283950617284e-06,
"loss": 1.2796,
"step": 81
},
{
"epoch": 1.3942505133470227,
"grad_norm": 0.2454097718000412,
"learning_rate": 5.9876543209876546e-06,
"loss": 1.2567,
"step": 84
},
{
"epoch": 1.4435318275154003,
"grad_norm": 0.23440390825271606,
"learning_rate": 5.80246913580247e-06,
"loss": 1.2578,
"step": 87
},
{
"epoch": 1.4928131416837782,
"grad_norm": 0.21233566105365753,
"learning_rate": 5.617283950617285e-06,
"loss": 1.226,
"step": 90
},
{
"epoch": 1.4928131416837782,
"eval_runtime": 757.9185,
"eval_samples_per_second": 1.707,
"eval_steps_per_second": 0.036,
"eval_wer": 51.94408152599947,
"step": 90
},
{
"epoch": 1.542094455852156,
"grad_norm": 0.23111841082572937,
"learning_rate": 5.432098765432099e-06,
"loss": 1.2835,
"step": 93
},
{
"epoch": 1.5913757700205338,
"grad_norm": 0.22747503221035004,
"learning_rate": 5.246913580246914e-06,
"loss": 1.1713,
"step": 96
},
{
"epoch": 1.6406570841889117,
"grad_norm": 0.24629150331020355,
"learning_rate": 5.061728395061729e-06,
"loss": 1.2652,
"step": 99
},
{
"epoch": 1.6899383983572895,
"grad_norm": 0.20970605313777924,
"learning_rate": 4.876543209876544e-06,
"loss": 1.2063,
"step": 102
},
{
"epoch": 1.7392197125256672,
"grad_norm": 0.2347603589296341,
"learning_rate": 4.691358024691358e-06,
"loss": 1.1642,
"step": 105
},
{
"epoch": 1.7885010266940453,
"grad_norm": 0.22151677310466766,
"learning_rate": 4.506172839506173e-06,
"loss": 1.2559,
"step": 108
},
{
"epoch": 1.837782340862423,
"grad_norm": 0.21644067764282227,
"learning_rate": 4.3209876543209875e-06,
"loss": 1.2654,
"step": 111
},
{
"epoch": 1.8870636550308009,
"grad_norm": 0.2234969586133957,
"learning_rate": 4.135802469135803e-06,
"loss": 1.1653,
"step": 114
},
{
"epoch": 1.9363449691991788,
"grad_norm": 0.2156331092119217,
"learning_rate": 3.9506172839506175e-06,
"loss": 1.172,
"step": 117
},
{
"epoch": 1.9856262833675564,
"grad_norm": 0.21376466751098633,
"learning_rate": 3.7654320987654325e-06,
"loss": 1.2796,
"step": 120
},
{
"epoch": 1.9856262833675564,
"eval_runtime": 760.4652,
"eval_samples_per_second": 1.702,
"eval_steps_per_second": 0.036,
"eval_wer": 51.795139796185,
"step": 120
},
{
"epoch": 2.0492813141683777,
"grad_norm": 0.2266222983598709,
"learning_rate": 3.580246913580247e-06,
"loss": 1.3315,
"step": 123
},
{
"epoch": 2.0985626283367558,
"grad_norm": 0.22814051806926727,
"learning_rate": 3.395061728395062e-06,
"loss": 1.1759,
"step": 126
},
{
"epoch": 2.1478439425051334,
"grad_norm": 0.22590585052967072,
"learning_rate": 3.2098765432098767e-06,
"loss": 1.2064,
"step": 129
},
{
"epoch": 2.197125256673511,
"grad_norm": 0.22349856793880463,
"learning_rate": 3.0246913580246917e-06,
"loss": 1.1868,
"step": 132
},
{
"epoch": 2.246406570841889,
"grad_norm": 0.21798408031463623,
"learning_rate": 2.8395061728395062e-06,
"loss": 1.1485,
"step": 135
},
{
"epoch": 2.295687885010267,
"grad_norm": 0.23827993869781494,
"learning_rate": 2.6543209876543212e-06,
"loss": 1.1347,
"step": 138
},
{
"epoch": 2.344969199178645,
"grad_norm": 0.21975603699684143,
"learning_rate": 2.469135802469136e-06,
"loss": 1.152,
"step": 141
},
{
"epoch": 2.3942505133470227,
"grad_norm": 0.2301456183195114,
"learning_rate": 2.283950617283951e-06,
"loss": 1.212,
"step": 144
},
{
"epoch": 2.4435318275154003,
"grad_norm": 0.2236107736825943,
"learning_rate": 2.0987654320987654e-06,
"loss": 1.2156,
"step": 147
},
{
"epoch": 2.4928131416837784,
"grad_norm": 0.22880277037620544,
"learning_rate": 1.9135802469135804e-06,
"loss": 1.1885,
"step": 150
},
{
"epoch": 2.4928131416837784,
"eval_runtime": 758.1625,
"eval_samples_per_second": 1.707,
"eval_steps_per_second": 0.036,
"eval_wer": 50.802194930755164,
"step": 150
},
{
"epoch": 2.542094455852156,
"grad_norm": 0.23217734694480896,
"learning_rate": 1.7283950617283952e-06,
"loss": 1.2508,
"step": 153
},
{
"epoch": 2.5913757700205338,
"grad_norm": 0.21702837944030762,
"learning_rate": 1.54320987654321e-06,
"loss": 1.1574,
"step": 156
},
{
"epoch": 2.640657084188912,
"grad_norm": 0.22827443480491638,
"learning_rate": 1.3580246913580248e-06,
"loss": 1.1662,
"step": 159
},
{
"epoch": 2.6899383983572895,
"grad_norm": 0.22730480134487152,
"learning_rate": 1.1728395061728396e-06,
"loss": 1.1829,
"step": 162
},
{
"epoch": 2.739219712525667,
"grad_norm": 0.24221959710121155,
"learning_rate": 9.876543209876544e-07,
"loss": 1.2032,
"step": 165
},
{
"epoch": 2.7885010266940453,
"grad_norm": 0.22492796182632446,
"learning_rate": 8.024691358024692e-07,
"loss": 1.1646,
"step": 168
},
{
"epoch": 2.837782340862423,
"grad_norm": 0.23047611117362976,
"learning_rate": 6.17283950617284e-07,
"loss": 1.1689,
"step": 171
},
{
"epoch": 2.8870636550308006,
"grad_norm": 0.22853408753871918,
"learning_rate": 4.320987654320988e-07,
"loss": 1.1771,
"step": 174
},
{
"epoch": 2.9363449691991788,
"grad_norm": 0.21958370506763458,
"learning_rate": 2.469135802469136e-07,
"loss": 1.1692,
"step": 177
},
{
"epoch": 2.9856262833675564,
"grad_norm": 0.22913524508476257,
"learning_rate": 6.17283950617284e-08,
"loss": 1.1844,
"step": 180
},
{
"epoch": 2.9856262833675564,
"eval_runtime": 756.6055,
"eval_samples_per_second": 1.71,
"eval_steps_per_second": 0.036,
"eval_wer": 50.112359550561806,
"step": 180
}
],
"logging_steps": 3,
"max_steps": 180,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 30,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.1899959344350469e+20,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}