{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.20118343195266272, "eval_steps": 17, "global_step": 34, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.005917159763313609, "grad_norm": 0.6224875040916811, "learning_rate": 3.75e-07, "loss": 2.2194, "step": 1 }, { "epoch": 0.005917159763313609, "eval_loss": 2.1284265518188477, "eval_runtime": 197.2728, "eval_samples_per_second": 0.938, "eval_steps_per_second": 0.081, "step": 1 }, { "epoch": 0.011834319526627219, "grad_norm": 0.699774954026372, "learning_rate": 7.5e-07, "loss": 2.2098, "step": 2 }, { "epoch": 0.01775147928994083, "grad_norm": 0.8978069601988599, "learning_rate": 1.125e-06, "loss": 2.0771, "step": 3 }, { "epoch": 0.023668639053254437, "grad_norm": 0.603433372430734, "learning_rate": 1.5e-06, "loss": 2.1207, "step": 4 }, { "epoch": 0.029585798816568046, "grad_norm": 0.6705451670098455, "learning_rate": 1.875e-06, "loss": 2.0443, "step": 5 }, { "epoch": 0.03550295857988166, "grad_norm": 0.5685871807834867, "learning_rate": 2.25e-06, "loss": 2.0938, "step": 6 }, { "epoch": 0.04142011834319527, "grad_norm": 0.6813185340138558, "learning_rate": 2.6250000000000003e-06, "loss": 2.0113, "step": 7 }, { "epoch": 0.047337278106508875, "grad_norm": 1.461724410712979, "learning_rate": 3e-06, "loss": 1.9878, "step": 8 }, { "epoch": 0.05325443786982249, "grad_norm": 0.4806729570541462, "learning_rate": 2.999785830935332e-06, "loss": 2.1648, "step": 9 }, { "epoch": 0.05917159763313609, "grad_norm": 0.6450195095319927, "learning_rate": 2.999143405285129e-06, "loss": 2.1168, "step": 10 }, { "epoch": 0.0650887573964497, "grad_norm": 0.5199179785432192, "learning_rate": 2.998072967649747e-06, "loss": 2.1624, "step": 11 }, { "epoch": 0.07100591715976332, "grad_norm": 0.5888842243430149, "learning_rate": 2.9965749255929686e-06, "loss": 2.0793, "step": 12 }, { "epoch": 0.07692307692307693, "grad_norm": 0.5762628204395337, "learning_rate": 2.9946498494868224e-06, "loss": 2.1745, "step": 13 }, { "epoch": 0.08284023668639054, "grad_norm": 0.4611886576496398, "learning_rate": 2.9922984722944177e-06, "loss": 2.1977, "step": 14 }, { "epoch": 0.08875739644970414, "grad_norm": 0.4176129104372226, "learning_rate": 2.9895216892908717e-06, "loss": 2.1568, "step": 15 }, { "epoch": 0.09467455621301775, "grad_norm": 0.9682295199118589, "learning_rate": 2.9863205577224416e-06, "loss": 2.181, "step": 16 }, { "epoch": 0.10059171597633136, "grad_norm": 0.4087372617870282, "learning_rate": 2.982696296403978e-06, "loss": 2.2208, "step": 17 }, { "epoch": 0.10059171597633136, "eval_loss": 2.090203285217285, "eval_runtime": 197.1425, "eval_samples_per_second": 0.938, "eval_steps_per_second": 0.081, "step": 17 }, { "epoch": 0.10650887573964497, "grad_norm": 0.6944983731430622, "learning_rate": 2.9786502852548696e-06, "loss": 1.9687, "step": 18 }, { "epoch": 0.11242603550295859, "grad_norm": 0.42170107190665923, "learning_rate": 2.9741840647736478e-06, "loss": 2.1707, "step": 19 }, { "epoch": 0.11834319526627218, "grad_norm": 0.5031933608002007, "learning_rate": 2.9692993354514477e-06, "loss": 1.838, "step": 20 }, { "epoch": 0.1242603550295858, "grad_norm": 0.401561752549318, "learning_rate": 2.9639979571245524e-06, "loss": 2.1108, "step": 21 }, { "epoch": 0.1301775147928994, "grad_norm": 0.4745388099037245, "learning_rate": 2.958281948266274e-06, "loss": 2.0944, "step": 22 }, { "epoch": 0.13609467455621302, "grad_norm": 0.3966815146465992, "learning_rate": 2.952153485218428e-06, "loss": 2.1954, "step": 23 }, { "epoch": 0.14201183431952663, "grad_norm": 0.38324183695928576, "learning_rate": 2.9456149013627003e-06, "loss": 2.0669, "step": 24 }, { "epoch": 0.14792899408284024, "grad_norm": 0.45238869305755414, "learning_rate": 2.9386686862322264e-06, "loss": 2.0082, "step": 25 }, { "epoch": 0.15384615384615385, "grad_norm": 0.43587746677118455, "learning_rate": 2.9313174845637132e-06, "loss": 2.0851, "step": 26 }, { "epoch": 0.15976331360946747, "grad_norm": 0.3280269395122322, "learning_rate": 2.9235640952904685e-06, "loss": 2.1769, "step": 27 }, { "epoch": 0.16568047337278108, "grad_norm": 0.3364191479742792, "learning_rate": 2.91541147047672e-06, "loss": 2.1024, "step": 28 }, { "epoch": 0.17159763313609466, "grad_norm": 0.36098339761423204, "learning_rate": 2.9068627141936344e-06, "loss": 1.9672, "step": 29 }, { "epoch": 0.17751479289940827, "grad_norm": 0.39425457725953256, "learning_rate": 2.897921081337456e-06, "loss": 2.0966, "step": 30 }, { "epoch": 0.1834319526627219, "grad_norm": 0.6235048469037876, "learning_rate": 2.8885899763902215e-06, "loss": 2.0379, "step": 31 }, { "epoch": 0.1893491124260355, "grad_norm": 0.5041187119001387, "learning_rate": 2.878872952123519e-06, "loss": 1.969, "step": 32 }, { "epoch": 0.1952662721893491, "grad_norm": 0.5682354341756487, "learning_rate": 2.8687737082457906e-06, "loss": 1.9563, "step": 33 }, { "epoch": 0.20118343195266272, "grad_norm": 0.45352123274250616, "learning_rate": 2.8582960899936856e-06, "loss": 1.9577, "step": 34 }, { "epoch": 0.20118343195266272, "eval_loss": 2.0715060234069824, "eval_runtime": 197.0, "eval_samples_per_second": 0.939, "eval_steps_per_second": 0.081, "step": 34 } ], "logging_steps": 1, "max_steps": 169, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 34, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 8316801515520.0, "train_batch_size": 3, "trial_name": null, "trial_params": null }