{ "best_global_step": 550, "best_metric": 0.9458296266430395, "best_model_checkpoint": "models/ner_model/checkpoint-550", "epoch": 5.0, "eval_steps": 500, "global_step": 550, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.09090909090909091, "grad_norm": 6.7610626220703125, "learning_rate": 2.7e-06, "loss": 3.1763, "step": 10 }, { "epoch": 0.18181818181818182, "grad_norm": 8.019376754760742, "learning_rate": 5.7000000000000005e-06, "loss": 3.0538, "step": 20 }, { "epoch": 0.2727272727272727, "grad_norm": 8.582132339477539, "learning_rate": 8.7e-06, "loss": 2.7102, "step": 30 }, { "epoch": 0.36363636363636365, "grad_norm": 7.020857810974121, "learning_rate": 1.1700000000000001e-05, "loss": 1.9306, "step": 40 }, { "epoch": 0.45454545454545453, "grad_norm": 6.591127872467041, "learning_rate": 1.47e-05, "loss": 1.5462, "step": 50 }, { "epoch": 0.5454545454545454, "grad_norm": 6.085566520690918, "learning_rate": 1.77e-05, "loss": 1.158, "step": 60 }, { "epoch": 0.6363636363636364, "grad_norm": 10.360540390014648, "learning_rate": 2.07e-05, "loss": 0.88, "step": 70 }, { "epoch": 0.7272727272727273, "grad_norm": 4.375982761383057, "learning_rate": 2.37e-05, "loss": 0.6575, "step": 80 }, { "epoch": 0.8181818181818182, "grad_norm": 10.760860443115234, "learning_rate": 2.6700000000000002e-05, "loss": 0.6622, "step": 90 }, { "epoch": 0.9090909090909091, "grad_norm": 8.98079776763916, "learning_rate": 2.97e-05, "loss": 0.6058, "step": 100 }, { "epoch": 1.0, "grad_norm": 7.706189155578613, "learning_rate": 2.94e-05, "loss": 0.509, "step": 110 }, { "epoch": 1.0, "eval_accuracy": 0.8916666666666667, "eval_entity_accuracy": 0.8543662742950999, "eval_f1": 0.8670505036976651, "eval_loss": 0.38678058981895447, "eval_precision": 0.861471336118019, "eval_recall": 0.8916666666666667, "eval_runtime": 2.6176, "eval_samples_per_second": 166.949, "eval_steps_per_second": 10.697, "step": 110 }, { "epoch": 1.0909090909090908, "grad_norm": 4.655617713928223, "learning_rate": 2.873333333333333e-05, "loss": 0.3582, "step": 120 }, { "epoch": 1.1818181818181819, "grad_norm": 6.469715118408203, "learning_rate": 2.8066666666666667e-05, "loss": 0.4039, "step": 130 }, { "epoch": 1.2727272727272727, "grad_norm": 6.508236408233643, "learning_rate": 2.7400000000000002e-05, "loss": 0.3456, "step": 140 }, { "epoch": 1.3636363636363638, "grad_norm": 7.3737874031066895, "learning_rate": 2.6733333333333334e-05, "loss": 0.4672, "step": 150 }, { "epoch": 1.4545454545454546, "grad_norm": 6.929003715515137, "learning_rate": 2.606666666666667e-05, "loss": 0.3472, "step": 160 }, { "epoch": 1.5454545454545454, "grad_norm": 5.1204986572265625, "learning_rate": 2.54e-05, "loss": 0.3808, "step": 170 }, { "epoch": 1.6363636363636362, "grad_norm": 3.2450709342956543, "learning_rate": 2.4733333333333333e-05, "loss": 0.2377, "step": 180 }, { "epoch": 1.7272727272727273, "grad_norm": 3.943157196044922, "learning_rate": 2.4066666666666664e-05, "loss": 0.34, "step": 190 }, { "epoch": 1.8181818181818183, "grad_norm": 4.091476917266846, "learning_rate": 2.3400000000000003e-05, "loss": 0.3028, "step": 200 }, { "epoch": 1.9090909090909092, "grad_norm": 7.115076541900635, "learning_rate": 2.2733333333333335e-05, "loss": 0.4381, "step": 210 }, { "epoch": 2.0, "grad_norm": 17.910232543945312, "learning_rate": 2.2066666666666667e-05, "loss": 0.2634, "step": 220 }, { "epoch": 2.0, "eval_accuracy": 0.9202830188679245, "eval_entity_accuracy": 0.8863947440459896, "eval_f1": 0.9039013364488941, "eval_loss": 0.2899700701236725, "eval_precision": 0.9016844893810626, "eval_recall": 0.9202830188679245, "eval_runtime": 2.6518, "eval_samples_per_second": 164.792, "eval_steps_per_second": 10.559, "step": 220 }, { "epoch": 2.090909090909091, "grad_norm": 11.105402946472168, "learning_rate": 2.1400000000000002e-05, "loss": 0.2834, "step": 230 }, { "epoch": 2.1818181818181817, "grad_norm": 6.341246604919434, "learning_rate": 2.0733333333333334e-05, "loss": 0.2477, "step": 240 }, { "epoch": 2.2727272727272725, "grad_norm": 4.475722312927246, "learning_rate": 2.0066666666666665e-05, "loss": 0.2042, "step": 250 }, { "epoch": 2.3636363636363638, "grad_norm": 3.8562307357788086, "learning_rate": 1.9399999999999997e-05, "loss": 0.2935, "step": 260 }, { "epoch": 2.4545454545454546, "grad_norm": 3.3338725566864014, "learning_rate": 1.8733333333333336e-05, "loss": 0.2232, "step": 270 }, { "epoch": 2.5454545454545454, "grad_norm": 3.610917568206787, "learning_rate": 1.8066666666666668e-05, "loss": 0.2578, "step": 280 }, { "epoch": 2.6363636363636362, "grad_norm": 6.411995887756348, "learning_rate": 1.74e-05, "loss": 0.2617, "step": 290 }, { "epoch": 2.7272727272727275, "grad_norm": 18.01243782043457, "learning_rate": 1.6733333333333335e-05, "loss": 0.2323, "step": 300 }, { "epoch": 2.8181818181818183, "grad_norm": 6.243415832519531, "learning_rate": 1.6066666666666666e-05, "loss": 0.2226, "step": 310 }, { "epoch": 2.909090909090909, "grad_norm": 3.8026018142700195, "learning_rate": 1.5399999999999998e-05, "loss": 0.1841, "step": 320 }, { "epoch": 3.0, "grad_norm": 4.6072797775268555, "learning_rate": 1.4733333333333333e-05, "loss": 0.2261, "step": 330 }, { "epoch": 3.0, "eval_accuracy": 0.9429245283018868, "eval_entity_accuracy": 0.9280043799616753, "eval_f1": 0.9367311510359461, "eval_loss": 0.22553826868534088, "eval_precision": 0.9404566874612201, "eval_recall": 0.9429245283018868, "eval_runtime": 2.6044, "eval_samples_per_second": 167.791, "eval_steps_per_second": 10.751, "step": 330 }, { "epoch": 3.090909090909091, "grad_norm": 3.598348617553711, "learning_rate": 1.4066666666666667e-05, "loss": 0.2008, "step": 340 }, { "epoch": 3.1818181818181817, "grad_norm": 3.3810553550720215, "learning_rate": 1.34e-05, "loss": 0.1801, "step": 350 }, { "epoch": 3.2727272727272725, "grad_norm": 2.5113697052001953, "learning_rate": 1.2733333333333334e-05, "loss": 0.1917, "step": 360 }, { "epoch": 3.3636363636363638, "grad_norm": 5.911487102508545, "learning_rate": 1.2066666666666666e-05, "loss": 0.1861, "step": 370 }, { "epoch": 3.4545454545454546, "grad_norm": 4.358926296234131, "learning_rate": 1.1400000000000001e-05, "loss": 0.1568, "step": 380 }, { "epoch": 3.5454545454545454, "grad_norm": 3.0283701419830322, "learning_rate": 1.0733333333333333e-05, "loss": 0.2127, "step": 390 }, { "epoch": 3.6363636363636362, "grad_norm": 8.457649230957031, "learning_rate": 1.0066666666666666e-05, "loss": 0.1752, "step": 400 }, { "epoch": 3.7272727272727275, "grad_norm": 2.7831077575683594, "learning_rate": 9.400000000000001e-06, "loss": 0.2328, "step": 410 }, { "epoch": 3.8181818181818183, "grad_norm": 2.8275463581085205, "learning_rate": 8.733333333333333e-06, "loss": 0.1345, "step": 420 }, { "epoch": 3.909090909090909, "grad_norm": 2.621518850326538, "learning_rate": 8.066666666666667e-06, "loss": 0.1343, "step": 430 }, { "epoch": 4.0, "grad_norm": 12.00428295135498, "learning_rate": 7.4e-06, "loss": 0.1415, "step": 440 }, { "epoch": 4.0, "eval_accuracy": 0.9476415094339623, "eval_entity_accuracy": 0.9277306323569668, "eval_f1": 0.940876969962022, "eval_loss": 0.20769040286540985, "eval_precision": 0.9463580459003759, "eval_recall": 0.9476415094339623, "eval_runtime": 2.6645, "eval_samples_per_second": 164.008, "eval_steps_per_second": 10.509, "step": 440 }, { "epoch": 4.090909090909091, "grad_norm": 4.293393611907959, "learning_rate": 6.733333333333334e-06, "loss": 0.1635, "step": 450 }, { "epoch": 4.181818181818182, "grad_norm": 6.984195709228516, "learning_rate": 6.0666666666666665e-06, "loss": 0.1462, "step": 460 }, { "epoch": 4.2727272727272725, "grad_norm": 2.344493865966797, "learning_rate": 5.4e-06, "loss": 0.1356, "step": 470 }, { "epoch": 4.363636363636363, "grad_norm": 2.468249797821045, "learning_rate": 4.7333333333333335e-06, "loss": 0.1248, "step": 480 }, { "epoch": 4.454545454545454, "grad_norm": 1.7062819004058838, "learning_rate": 4.066666666666667e-06, "loss": 0.0904, "step": 490 }, { "epoch": 4.545454545454545, "grad_norm": 1.412717580795288, "learning_rate": 3.4e-06, "loss": 0.1255, "step": 500 }, { "epoch": 4.636363636363637, "grad_norm": 2.9533627033233643, "learning_rate": 2.7333333333333336e-06, "loss": 0.1249, "step": 510 }, { "epoch": 4.7272727272727275, "grad_norm": 11.376725196838379, "learning_rate": 2.0666666666666666e-06, "loss": 0.1459, "step": 520 }, { "epoch": 4.818181818181818, "grad_norm": 24.830320358276367, "learning_rate": 1.4000000000000001e-06, "loss": 0.1765, "step": 530 }, { "epoch": 4.909090909090909, "grad_norm": 3.52287220954895, "learning_rate": 7.333333333333334e-07, "loss": 0.192, "step": 540 }, { "epoch": 5.0, "grad_norm": 28.654787063598633, "learning_rate": 6.666666666666667e-08, "loss": 0.1539, "step": 550 }, { "epoch": 5.0, "eval_accuracy": 0.9511006289308176, "eval_entity_accuracy": 0.9370380509170545, "eval_f1": 0.9458296266430395, "eval_loss": 0.18345355987548828, "eval_precision": 0.9486871211363513, "eval_recall": 0.9511006289308176, "eval_runtime": 2.7757, "eval_samples_per_second": 157.438, "eval_steps_per_second": 10.088, "step": 550 } ], "logging_steps": 10, "max_steps": 550, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 3, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 571021163627520.0, "train_batch_size": 16, "trial_name": null, "trial_params": null }