{ "best_metric": 0.7391304347826086, "best_model_checkpoint": "swiftformer-xs-DMAE\\checkpoint-87", "epoch": 34.285714285714285, "eval_steps": 500, "global_step": 120, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.86, "eval_accuracy": 0.32608695652173914, "eval_loss": 1.3860292434692383, "eval_runtime": 0.6541, "eval_samples_per_second": 70.321, "eval_steps_per_second": 4.586, "step": 3 }, { "epoch": 2.0, "eval_accuracy": 0.5, "eval_loss": 1.383039951324463, "eval_runtime": 0.6141, "eval_samples_per_second": 74.902, "eval_steps_per_second": 4.885, "step": 7 }, { "epoch": 2.86, "learning_rate": 0.00016666666666666666, "loss": 1.3842, "step": 10 }, { "epoch": 2.86, "eval_accuracy": 0.5869565217391305, "eval_loss": 1.3756428956985474, "eval_runtime": 0.6407, "eval_samples_per_second": 71.802, "eval_steps_per_second": 4.683, "step": 10 }, { "epoch": 4.0, "eval_accuracy": 0.6086956521739131, "eval_loss": 1.3493651151657104, "eval_runtime": 0.5981, "eval_samples_per_second": 76.904, "eval_steps_per_second": 5.015, "step": 14 }, { "epoch": 4.86, "eval_accuracy": 0.6521739130434783, "eval_loss": 1.303892731666565, "eval_runtime": 0.6221, "eval_samples_per_second": 73.938, "eval_steps_per_second": 4.822, "step": 17 }, { "epoch": 5.71, "learning_rate": 0.0002941176470588235, "loss": 1.3427, "step": 20 }, { "epoch": 6.0, "eval_accuracy": 0.6521739130434783, "eval_loss": 1.1961779594421387, "eval_runtime": 0.6102, "eval_samples_per_second": 75.391, "eval_steps_per_second": 4.917, "step": 21 }, { "epoch": 6.86, "eval_accuracy": 0.6739130434782609, "eval_loss": 1.1310076713562012, "eval_runtime": 0.6321, "eval_samples_per_second": 72.769, "eval_steps_per_second": 4.746, "step": 24 }, { "epoch": 8.0, "eval_accuracy": 0.6521739130434783, "eval_loss": 1.07782781124115, "eval_runtime": 0.5902, "eval_samples_per_second": 77.946, "eval_steps_per_second": 5.083, "step": 28 }, { "epoch": 8.57, "learning_rate": 0.00026470588235294115, "loss": 1.1837, "step": 30 }, { "epoch": 8.86, "eval_accuracy": 0.6956521739130435, "eval_loss": 1.027555227279663, "eval_runtime": 0.6977, "eval_samples_per_second": 65.934, "eval_steps_per_second": 4.3, "step": 31 }, { "epoch": 10.0, "eval_accuracy": 0.717391304347826, "eval_loss": 0.9927520751953125, "eval_runtime": 0.6266, "eval_samples_per_second": 73.407, "eval_steps_per_second": 4.787, "step": 35 }, { "epoch": 10.86, "eval_accuracy": 0.717391304347826, "eval_loss": 0.9709724187850952, "eval_runtime": 0.6331, "eval_samples_per_second": 72.653, "eval_steps_per_second": 4.738, "step": 38 }, { "epoch": 11.43, "learning_rate": 0.0002352941176470588, "loss": 1.0531, "step": 40 }, { "epoch": 12.0, "eval_accuracy": 0.6739130434782609, "eval_loss": 0.9734796285629272, "eval_runtime": 0.6324, "eval_samples_per_second": 72.735, "eval_steps_per_second": 4.744, "step": 42 }, { "epoch": 12.86, "eval_accuracy": 0.6521739130434783, "eval_loss": 0.9384742379188538, "eval_runtime": 0.6201, "eval_samples_per_second": 74.177, "eval_steps_per_second": 4.838, "step": 45 }, { "epoch": 14.0, "eval_accuracy": 0.6739130434782609, "eval_loss": 0.9601091146469116, "eval_runtime": 0.6196, "eval_samples_per_second": 74.236, "eval_steps_per_second": 4.841, "step": 49 }, { "epoch": 14.29, "learning_rate": 0.00020588235294117645, "loss": 0.9883, "step": 50 }, { "epoch": 14.86, "eval_accuracy": 0.6739130434782609, "eval_loss": 0.9368714094161987, "eval_runtime": 0.5906, "eval_samples_per_second": 77.882, "eval_steps_per_second": 5.079, "step": 52 }, { "epoch": 16.0, "eval_accuracy": 0.6739130434782609, "eval_loss": 0.8928157687187195, "eval_runtime": 0.6201, "eval_samples_per_second": 74.177, "eval_steps_per_second": 4.838, "step": 56 }, { "epoch": 16.86, "eval_accuracy": 0.6739130434782609, "eval_loss": 0.8825035691261292, "eval_runtime": 0.6252, "eval_samples_per_second": 73.581, "eval_steps_per_second": 4.799, "step": 59 }, { "epoch": 17.14, "learning_rate": 0.0001764705882352941, "loss": 0.8632, "step": 60 }, { "epoch": 18.0, "eval_accuracy": 0.6739130434782609, "eval_loss": 0.8742826581001282, "eval_runtime": 0.6216, "eval_samples_per_second": 73.997, "eval_steps_per_second": 4.826, "step": 63 }, { "epoch": 18.86, "eval_accuracy": 0.6739130434782609, "eval_loss": 0.8478592038154602, "eval_runtime": 0.6091, "eval_samples_per_second": 75.517, "eval_steps_per_second": 4.925, "step": 66 }, { "epoch": 20.0, "learning_rate": 0.00014705882352941175, "loss": 0.8732, "step": 70 }, { "epoch": 20.0, "eval_accuracy": 0.6739130434782609, "eval_loss": 0.834061324596405, "eval_runtime": 0.6332, "eval_samples_per_second": 72.651, "eval_steps_per_second": 4.738, "step": 70 }, { "epoch": 20.86, "eval_accuracy": 0.717391304347826, "eval_loss": 0.8133650422096252, "eval_runtime": 0.7277, "eval_samples_per_second": 63.215, "eval_steps_per_second": 4.123, "step": 73 }, { "epoch": 22.0, "eval_accuracy": 0.6739130434782609, "eval_loss": 0.8844852447509766, "eval_runtime": 0.6501, "eval_samples_per_second": 70.753, "eval_steps_per_second": 4.614, "step": 77 }, { "epoch": 22.86, "learning_rate": 0.0001176470588235294, "loss": 0.7524, "step": 80 }, { "epoch": 22.86, "eval_accuracy": 0.6521739130434783, "eval_loss": 0.9037047624588013, "eval_runtime": 0.6262, "eval_samples_per_second": 73.464, "eval_steps_per_second": 4.791, "step": 80 }, { "epoch": 24.0, "eval_accuracy": 0.6304347826086957, "eval_loss": 0.8921377062797546, "eval_runtime": 0.6157, "eval_samples_per_second": 74.718, "eval_steps_per_second": 4.873, "step": 84 }, { "epoch": 24.86, "eval_accuracy": 0.7391304347826086, "eval_loss": 0.8765698075294495, "eval_runtime": 0.6211, "eval_samples_per_second": 74.058, "eval_steps_per_second": 4.83, "step": 87 }, { "epoch": 25.71, "learning_rate": 8.823529411764705e-05, "loss": 0.6843, "step": 90 }, { "epoch": 26.0, "eval_accuracy": 0.7391304347826086, "eval_loss": 0.8706851005554199, "eval_runtime": 0.6322, "eval_samples_per_second": 72.766, "eval_steps_per_second": 4.746, "step": 91 }, { "epoch": 26.86, "eval_accuracy": 0.7391304347826086, "eval_loss": 0.8922207951545715, "eval_runtime": 0.6461, "eval_samples_per_second": 71.192, "eval_steps_per_second": 4.643, "step": 94 }, { "epoch": 28.0, "eval_accuracy": 0.6956521739130435, "eval_loss": 0.8861702084541321, "eval_runtime": 0.6096, "eval_samples_per_second": 75.454, "eval_steps_per_second": 4.921, "step": 98 }, { "epoch": 28.57, "learning_rate": 5.88235294117647e-05, "loss": 0.7112, "step": 100 }, { "epoch": 28.86, "eval_accuracy": 0.7391304347826086, "eval_loss": 0.8954079747200012, "eval_runtime": 0.6282, "eval_samples_per_second": 73.23, "eval_steps_per_second": 4.776, "step": 101 }, { "epoch": 30.0, "eval_accuracy": 0.6956521739130435, "eval_loss": 0.9228313565254211, "eval_runtime": 0.6091, "eval_samples_per_second": 75.517, "eval_steps_per_second": 4.925, "step": 105 }, { "epoch": 30.86, "eval_accuracy": 0.6956521739130435, "eval_loss": 0.9172708988189697, "eval_runtime": 0.6367, "eval_samples_per_second": 72.253, "eval_steps_per_second": 4.712, "step": 108 }, { "epoch": 31.43, "learning_rate": 2.941176470588235e-05, "loss": 0.6885, "step": 110 }, { "epoch": 32.0, "eval_accuracy": 0.6956521739130435, "eval_loss": 0.9317991137504578, "eval_runtime": 0.6256, "eval_samples_per_second": 73.524, "eval_steps_per_second": 4.795, "step": 112 }, { "epoch": 32.86, "eval_accuracy": 0.7391304347826086, "eval_loss": 0.9049164056777954, "eval_runtime": 0.6101, "eval_samples_per_second": 75.393, "eval_steps_per_second": 4.917, "step": 115 }, { "epoch": 34.0, "eval_accuracy": 0.717391304347826, "eval_loss": 0.915714681148529, "eval_runtime": 0.7362, "eval_samples_per_second": 62.485, "eval_steps_per_second": 4.075, "step": 119 }, { "epoch": 34.29, "learning_rate": 0.0, "loss": 0.6734, "step": 120 }, { "epoch": 34.29, "eval_accuracy": 0.7391304347826086, "eval_loss": 0.9050928354263306, "eval_runtime": 0.7057, "eval_samples_per_second": 65.186, "eval_steps_per_second": 4.251, "step": 120 }, { "epoch": 34.29, "step": 120, "total_flos": 2.0027429927092224e+16, "train_loss": 0.9331819852193196, "train_runtime": 134.062, "train_samples_per_second": 63.553, "train_steps_per_second": 0.895 } ], "logging_steps": 10, "max_steps": 120, "num_input_tokens_seen": 0, "num_train_epochs": 40, "save_steps": 500, "total_flos": 2.0027429927092224e+16, "train_batch_size": 16, "trial_name": null, "trial_params": null }