{ "best_metric": 0.7608695652173914, "best_model_checkpoint": "swiftformer-xs-DMAE\\checkpoint-98", "epoch": 34.285714285714285, "eval_steps": 500, "global_step": 120, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.86, "eval_accuracy": 0.5434782608695652, "eval_loss": 1.3706265687942505, "eval_runtime": 0.7927, "eval_samples_per_second": 58.03, "eval_steps_per_second": 3.785, "step": 3 }, { "epoch": 2.0, "eval_accuracy": 0.6304347826086957, "eval_loss": 1.1672464609146118, "eval_runtime": 0.7267, "eval_samples_per_second": 63.3, "eval_steps_per_second": 4.128, "step": 7 }, { "epoch": 2.86, "learning_rate": 0.004166666666666667, "loss": 1.2937, "step": 10 }, { "epoch": 2.86, "eval_accuracy": 0.6086956521739131, "eval_loss": 1.2248375415802002, "eval_runtime": 0.6077, "eval_samples_per_second": 75.7, "eval_steps_per_second": 4.937, "step": 10 }, { "epoch": 4.0, "eval_accuracy": 0.34782608695652173, "eval_loss": 2.698575973510742, "eval_runtime": 0.6071, "eval_samples_per_second": 75.764, "eval_steps_per_second": 4.941, "step": 14 }, { "epoch": 4.86, "eval_accuracy": 0.45652173913043476, "eval_loss": 2.600794792175293, "eval_runtime": 0.6206, "eval_samples_per_second": 74.116, "eval_steps_per_second": 4.834, "step": 17 }, { "epoch": 5.71, "learning_rate": 0.004629629629629629, "loss": 1.2355, "step": 20 }, { "epoch": 6.0, "eval_accuracy": 0.41304347826086957, "eval_loss": 1.8899880647659302, "eval_runtime": 0.6442, "eval_samples_per_second": 71.411, "eval_steps_per_second": 4.657, "step": 21 }, { "epoch": 6.86, "eval_accuracy": 0.5, "eval_loss": 1.595796823501587, "eval_runtime": 0.6131, "eval_samples_per_second": 75.024, "eval_steps_per_second": 4.893, "step": 24 }, { "epoch": 8.0, "eval_accuracy": 0.5652173913043478, "eval_loss": 1.3846951723098755, "eval_runtime": 0.8307, "eval_samples_per_second": 55.373, "eval_steps_per_second": 3.611, "step": 28 }, { "epoch": 8.57, "learning_rate": 0.004166666666666667, "loss": 1.1548, "step": 30 }, { "epoch": 8.86, "eval_accuracy": 0.717391304347826, "eval_loss": 0.8761585354804993, "eval_runtime": 0.6322, "eval_samples_per_second": 72.767, "eval_steps_per_second": 4.746, "step": 31 }, { "epoch": 10.0, "eval_accuracy": 0.5434782608695652, "eval_loss": 1.0870667695999146, "eval_runtime": 0.6561, "eval_samples_per_second": 70.106, "eval_steps_per_second": 4.572, "step": 35 }, { "epoch": 10.86, "eval_accuracy": 0.5434782608695652, "eval_loss": 0.9779139757156372, "eval_runtime": 0.5857, "eval_samples_per_second": 78.544, "eval_steps_per_second": 5.122, "step": 38 }, { "epoch": 11.43, "learning_rate": 0.0037037037037037034, "loss": 1.0561, "step": 40 }, { "epoch": 12.0, "eval_accuracy": 0.5434782608695652, "eval_loss": 1.2454442977905273, "eval_runtime": 0.6291, "eval_samples_per_second": 73.116, "eval_steps_per_second": 4.768, "step": 42 }, { "epoch": 12.86, "eval_accuracy": 0.5652173913043478, "eval_loss": 1.227651834487915, "eval_runtime": 0.6021, "eval_samples_per_second": 76.395, "eval_steps_per_second": 4.982, "step": 45 }, { "epoch": 14.0, "eval_accuracy": 0.6086956521739131, "eval_loss": 0.9526649713516235, "eval_runtime": 0.6019, "eval_samples_per_second": 76.422, "eval_steps_per_second": 4.984, "step": 49 }, { "epoch": 14.29, "learning_rate": 0.0032407407407407406, "loss": 1.0546, "step": 50 }, { "epoch": 14.86, "eval_accuracy": 0.6304347826086957, "eval_loss": 0.8797388672828674, "eval_runtime": 0.6531, "eval_samples_per_second": 70.428, "eval_steps_per_second": 4.593, "step": 52 }, { "epoch": 16.0, "eval_accuracy": 0.6304347826086957, "eval_loss": 0.9479212760925293, "eval_runtime": 0.5991, "eval_samples_per_second": 76.776, "eval_steps_per_second": 5.007, "step": 56 }, { "epoch": 16.86, "eval_accuracy": 0.6739130434782609, "eval_loss": 0.869612991809845, "eval_runtime": 0.7437, "eval_samples_per_second": 61.854, "eval_steps_per_second": 4.034, "step": 59 }, { "epoch": 17.14, "learning_rate": 0.002777777777777778, "loss": 0.9493, "step": 60 }, { "epoch": 18.0, "eval_accuracy": 0.6521739130434783, "eval_loss": 0.9347758293151855, "eval_runtime": 0.6096, "eval_samples_per_second": 75.454, "eval_steps_per_second": 4.921, "step": 63 }, { "epoch": 18.86, "eval_accuracy": 0.5434782608695652, "eval_loss": 0.9889930486679077, "eval_runtime": 0.6462, "eval_samples_per_second": 71.19, "eval_steps_per_second": 4.643, "step": 66 }, { "epoch": 20.0, "learning_rate": 0.0023148148148148147, "loss": 0.9354, "step": 70 }, { "epoch": 20.0, "eval_accuracy": 0.5869565217391305, "eval_loss": 0.9073047041893005, "eval_runtime": 0.6136, "eval_samples_per_second": 74.962, "eval_steps_per_second": 4.889, "step": 70 }, { "epoch": 20.86, "eval_accuracy": 0.6304347826086957, "eval_loss": 0.8763116598129272, "eval_runtime": 0.6322, "eval_samples_per_second": 72.766, "eval_steps_per_second": 4.746, "step": 73 }, { "epoch": 22.0, "eval_accuracy": 0.6521739130434783, "eval_loss": 0.9592300653457642, "eval_runtime": 0.6382, "eval_samples_per_second": 72.082, "eval_steps_per_second": 4.701, "step": 77 }, { "epoch": 22.86, "learning_rate": 0.0018518518518518517, "loss": 0.8791, "step": 80 }, { "epoch": 22.86, "eval_accuracy": 0.6521739130434783, "eval_loss": 0.8940117955207825, "eval_runtime": 0.7087, "eval_samples_per_second": 64.911, "eval_steps_per_second": 4.233, "step": 80 }, { "epoch": 24.0, "eval_accuracy": 0.6956521739130435, "eval_loss": 0.8165251016616821, "eval_runtime": 0.6012, "eval_samples_per_second": 76.52, "eval_steps_per_second": 4.99, "step": 84 }, { "epoch": 24.86, "eval_accuracy": 0.6956521739130435, "eval_loss": 0.8249245285987854, "eval_runtime": 0.6311, "eval_samples_per_second": 72.884, "eval_steps_per_second": 4.753, "step": 87 }, { "epoch": 25.71, "learning_rate": 0.001388888888888889, "loss": 0.8017, "step": 90 }, { "epoch": 26.0, "eval_accuracy": 0.6739130434782609, "eval_loss": 0.8946475386619568, "eval_runtime": 0.7282, "eval_samples_per_second": 63.173, "eval_steps_per_second": 4.12, "step": 91 }, { "epoch": 26.86, "eval_accuracy": 0.6956521739130435, "eval_loss": 0.8210282325744629, "eval_runtime": 0.6431, "eval_samples_per_second": 71.524, "eval_steps_per_second": 4.665, "step": 94 }, { "epoch": 28.0, "eval_accuracy": 0.7608695652173914, "eval_loss": 0.7653754949569702, "eval_runtime": 0.9337, "eval_samples_per_second": 49.265, "eval_steps_per_second": 3.213, "step": 98 }, { "epoch": 28.57, "learning_rate": 0.0009259259259259259, "loss": 0.8532, "step": 100 }, { "epoch": 28.86, "eval_accuracy": 0.7391304347826086, "eval_loss": 0.7491888999938965, "eval_runtime": 0.6161, "eval_samples_per_second": 74.659, "eval_steps_per_second": 4.869, "step": 101 }, { "epoch": 30.0, "eval_accuracy": 0.6956521739130435, "eval_loss": 0.7725396156311035, "eval_runtime": 0.6542, "eval_samples_per_second": 70.319, "eval_steps_per_second": 4.586, "step": 105 }, { "epoch": 30.86, "eval_accuracy": 0.717391304347826, "eval_loss": 0.7931932210922241, "eval_runtime": 0.6467, "eval_samples_per_second": 71.135, "eval_steps_per_second": 4.639, "step": 108 }, { "epoch": 31.43, "learning_rate": 0.0004629629629629629, "loss": 0.8205, "step": 110 }, { "epoch": 32.0, "eval_accuracy": 0.7391304347826086, "eval_loss": 0.8129280209541321, "eval_runtime": 0.6256, "eval_samples_per_second": 73.524, "eval_steps_per_second": 4.795, "step": 112 }, { "epoch": 32.86, "eval_accuracy": 0.6956521739130435, "eval_loss": 0.8089176416397095, "eval_runtime": 0.6011, "eval_samples_per_second": 76.522, "eval_steps_per_second": 4.991, "step": 115 }, { "epoch": 34.0, "eval_accuracy": 0.6956521739130435, "eval_loss": 0.8150961399078369, "eval_runtime": 0.8342, "eval_samples_per_second": 55.142, "eval_steps_per_second": 3.596, "step": 119 }, { "epoch": 34.29, "learning_rate": 0.0, "loss": 0.8112, "step": 120 }, { "epoch": 34.29, "eval_accuracy": 0.6956521739130435, "eval_loss": 0.8074415326118469, "eval_runtime": 0.6487, "eval_samples_per_second": 70.916, "eval_steps_per_second": 4.625, "step": 120 }, { "epoch": 34.29, "step": 120, "total_flos": 2.0027429927092224e+16, "train_loss": 0.9870913426081339, "train_runtime": 135.9591, "train_samples_per_second": 62.666, "train_steps_per_second": 0.883 } ], "logging_steps": 10, "max_steps": 120, "num_input_tokens_seen": 0, "num_train_epochs": 40, "save_steps": 500, "total_flos": 2.0027429927092224e+16, "train_batch_size": 16, "trial_name": null, "trial_params": null }