{ "best_global_step": 100, "best_metric": 0.3854297995567322, "best_model_checkpoint": "./lora_out/efu0wyi4/checkpoint-100", "epoch": 1.0, "eval_steps": 10, "global_step": 107, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "entropy": 1.1153396010398864, "epoch": 0.09411764705882353, "grad_norm": 1.2214514017105103, "learning_rate": 0.00016003636363636365, "loss": 1.2524, "mean_token_accuracy": 0.7594971776008606, "num_tokens": 115799.0, "step": 10 }, { "epoch": 0.09411764705882353, "eval_entropy": 0.9907166957855225, "eval_loss": 0.9035767912864685, "eval_mean_token_accuracy": 0.8175230169296265, "eval_num_tokens": 115799.0, "eval_runtime": 26.0099, "eval_samples_per_second": 5.767, "eval_steps_per_second": 5.767, "step": 10 }, { "entropy": 0.6309416361153126, "epoch": 0.18823529411764706, "grad_norm": 0.6099486351013184, "learning_rate": 0.0001793, "loss": 0.5626, "mean_token_accuracy": 0.8832408726215363, "num_tokens": 184486.0, "step": 20 }, { "epoch": 0.18823529411764706, "eval_entropy": 0.4709713250398636, "eval_loss": 0.5018168687820435, "eval_mean_token_accuracy": 0.8997142084439596, "eval_num_tokens": 184486.0, "eval_runtime": 26.0226, "eval_samples_per_second": 5.764, "eval_steps_per_second": 5.764, "step": 20 }, { "entropy": 0.5151705276221037, "epoch": 0.2823529411764706, "grad_norm": 0.5180553793907166, "learning_rate": 0.000158925, "loss": 0.5382, "mean_token_accuracy": 0.8940704673528671, "num_tokens": 275845.0, "step": 30 }, { "epoch": 0.2823529411764706, "eval_entropy": 0.4590779893596967, "eval_loss": 0.4454800486564636, "eval_mean_token_accuracy": 0.9063887639840444, "eval_num_tokens": 275845.0, "eval_runtime": 26.0809, "eval_samples_per_second": 5.751, "eval_steps_per_second": 5.751, "step": 30 }, { "entropy": 0.4594229131937027, "epoch": 0.3764705882352941, "grad_norm": 0.2898012399673462, "learning_rate": 0.00013855, "loss": 0.45, "mean_token_accuracy": 0.91114012748003, "num_tokens": 368424.0, "step": 40 }, { "epoch": 0.3764705882352941, "eval_entropy": 0.4673765400548776, "eval_loss": 0.43020305037498474, "eval_mean_token_accuracy": 0.9115767550468444, "eval_num_tokens": 368424.0, "eval_runtime": 26.0231, "eval_samples_per_second": 5.764, "eval_steps_per_second": 5.764, "step": 40 }, { "entropy": 0.3577877376228571, "epoch": 0.47058823529411764, "grad_norm": 0.25978532433509827, "learning_rate": 0.000118175, "loss": 0.343, "mean_token_accuracy": 0.9222506016492844, "num_tokens": 429589.0, "step": 50 }, { "epoch": 0.47058823529411764, "eval_entropy": 0.39317929953336717, "eval_loss": 0.42108139395713806, "eval_mean_token_accuracy": 0.9134278730551402, "eval_num_tokens": 429589.0, "eval_runtime": 26.002, "eval_samples_per_second": 5.769, "eval_steps_per_second": 5.769, "step": 50 }, { "entropy": 0.54765380397439, "epoch": 0.5647058823529412, "grad_norm": 0.2597196698188782, "learning_rate": 9.78e-05, "loss": 0.5644, "mean_token_accuracy": 0.8865802466869355, "num_tokens": 544259.0, "step": 60 }, { "epoch": 0.5647058823529412, "eval_entropy": 0.41075391257802646, "eval_loss": 0.4073421359062195, "eval_mean_token_accuracy": 0.9149504574139913, "eval_num_tokens": 544259.0, "eval_runtime": 26.2517, "eval_samples_per_second": 5.714, "eval_steps_per_second": 5.714, "step": 60 }, { "entropy": 0.3822147287428379, "epoch": 0.6588235294117647, "grad_norm": 0.3055135905742645, "learning_rate": 7.7425e-05, "loss": 0.3693, "mean_token_accuracy": 0.922209607064724, "num_tokens": 623365.0, "step": 70 }, { "epoch": 0.6588235294117647, "eval_entropy": 0.4195191798110803, "eval_loss": 0.3989087641239166, "eval_mean_token_accuracy": 0.915905403693517, "eval_num_tokens": 623365.0, "eval_runtime": 26.2348, "eval_samples_per_second": 5.718, "eval_steps_per_second": 5.718, "step": 70 }, { "entropy": 0.38976135551929475, "epoch": 0.7529411764705882, "grad_norm": 0.34105750918388367, "learning_rate": 5.7050000000000004e-05, "loss": 0.3761, "mean_token_accuracy": 0.9197401210665703, "num_tokens": 699013.0, "step": 80 }, { "epoch": 0.7529411764705882, "eval_entropy": 0.39603232031067215, "eval_loss": 0.3924122452735901, "eval_mean_token_accuracy": 0.917570983171463, "eval_num_tokens": 699013.0, "eval_runtime": 26.1772, "eval_samples_per_second": 5.73, "eval_steps_per_second": 5.73, "step": 80 }, { "entropy": 0.47345383167266847, "epoch": 0.8470588235294118, "grad_norm": 0.3139539957046509, "learning_rate": 3.6675000000000004e-05, "loss": 0.4697, "mean_token_accuracy": 0.9068517610430717, "num_tokens": 800061.0, "step": 90 }, { "epoch": 0.8470588235294118, "eval_entropy": 0.38997318550944327, "eval_loss": 0.39237019419670105, "eval_mean_token_accuracy": 0.917557996114095, "eval_num_tokens": 800061.0, "eval_runtime": 26.0084, "eval_samples_per_second": 5.767, "eval_steps_per_second": 5.767, "step": 90 }, { "entropy": 0.2872362457215786, "epoch": 0.9411764705882353, "grad_norm": 0.29094481468200684, "learning_rate": 1.63e-05, "loss": 0.2916, "mean_token_accuracy": 0.9335257709026337, "num_tokens": 865712.0, "step": 100 }, { "epoch": 0.9411764705882353, "eval_entropy": 0.3926775233944257, "eval_loss": 0.3854297995567322, "eval_mean_token_accuracy": 0.9184372560183207, "eval_num_tokens": 865712.0, "eval_runtime": 26.027, "eval_samples_per_second": 5.763, "eval_steps_per_second": 5.763, "step": 100 } ], "logging_steps": 10, "max_steps": 107, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2041922721504768.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }