{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.975609756097561, "eval_steps": 500, "global_step": 15, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.06504065040650407, "grad_norm": 0.5584165453910828, "learning_rate": 4.9453690018345144e-05, "loss": 0.7344, "num_input_tokens_seen": 2097152, "step": 1 }, { "epoch": 0.13008130081300814, "grad_norm": 0.4928242862224579, "learning_rate": 4.783863644106502e-05, "loss": 0.7113, "num_input_tokens_seen": 4194304, "step": 2 }, { "epoch": 0.1951219512195122, "grad_norm": 0.4566553831100464, "learning_rate": 4.522542485937369e-05, "loss": 0.7009, "num_input_tokens_seen": 6291456, "step": 3 }, { "epoch": 0.2601626016260163, "grad_norm": 0.38513678312301636, "learning_rate": 4.172826515897146e-05, "loss": 0.6704, "num_input_tokens_seen": 8388608, "step": 4 }, { "epoch": 0.3252032520325203, "grad_norm": 0.36934641003608704, "learning_rate": 3.7500000000000003e-05, "loss": 0.6504, "num_input_tokens_seen": 10485760, "step": 5 }, { "epoch": 0.3902439024390244, "grad_norm": 0.33891424536705017, "learning_rate": 3.272542485937369e-05, "loss": 0.6217, "num_input_tokens_seen": 12582912, "step": 6 }, { "epoch": 0.45528455284552843, "grad_norm": 0.30929532647132874, "learning_rate": 2.761321158169134e-05, "loss": 0.6052, "num_input_tokens_seen": 14680064, "step": 7 }, { "epoch": 0.5203252032520326, "grad_norm": 0.28951436281204224, "learning_rate": 2.238678841830867e-05, "loss": 0.6302, "num_input_tokens_seen": 16777216, "step": 8 }, { "epoch": 0.5853658536585366, "grad_norm": 0.29053160548210144, "learning_rate": 1.7274575140626318e-05, "loss": 0.5964, "num_input_tokens_seen": 18874368, "step": 9 }, { "epoch": 0.6504065040650406, "grad_norm": 0.2903811037540436, "learning_rate": 1.2500000000000006e-05, "loss": 0.6233, "num_input_tokens_seen": 20971520, "step": 10 }, { "epoch": 0.7154471544715447, "grad_norm": 0.28426122665405273, "learning_rate": 8.271734841028553e-06, "loss": 0.5964, "num_input_tokens_seen": 23068672, "step": 11 }, { "epoch": 0.7804878048780488, "grad_norm": 0.25174424052238464, "learning_rate": 4.7745751406263165e-06, "loss": 0.6106, "num_input_tokens_seen": 25165824, "step": 12 }, { "epoch": 0.8455284552845529, "grad_norm": 0.24576599895954132, "learning_rate": 2.1613635589349756e-06, "loss": 0.5916, "num_input_tokens_seen": 27262976, "step": 13 }, { "epoch": 0.9105691056910569, "grad_norm": 0.24116040766239166, "learning_rate": 5.463099816548579e-07, "loss": 0.5893, "num_input_tokens_seen": 29360128, "step": 14 }, { "epoch": 0.975609756097561, "grad_norm": 0.23365961015224457, "learning_rate": 0.0, "loss": 0.6298, "num_input_tokens_seen": 31457280, "step": 15 }, { "epoch": 0.975609756097561, "num_input_tokens_seen": 31457280, "step": 15, "total_flos": 1.2251230144089293e+18, "train_loss": 0.6374592224756876, "train_runtime": 1520.8651, "train_samples_per_second": 5.175, "train_steps_per_second": 0.01 } ], "logging_steps": 1, "max_steps": 15, "num_input_tokens_seen": 31457280, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.2251230144089293e+18, "train_batch_size": 16, "trial_name": null, "trial_params": null }