| { | |
| "best_metric": 0.10869565217391304, | |
| "best_model_checkpoint": "swiftformer-xs-DMAE\\checkpoint-3", | |
| "epoch": 34.285714285714285, | |
| "eval_steps": 500, | |
| "global_step": 120, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.86, | |
| "eval_accuracy": 0.10869565217391304, | |
| "eval_loss": 113.81844329833984, | |
| "eval_runtime": 0.9277, | |
| "eval_samples_per_second": 49.584, | |
| "eval_steps_per_second": 3.234, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.10869565217391304, | |
| "eval_loss": 113.80943298339844, | |
| "eval_runtime": 0.5649, | |
| "eval_samples_per_second": 81.43, | |
| "eval_steps_per_second": 5.311, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 2.86, | |
| "learning_rate": 0.00025, | |
| "loss": 114.0867, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 2.86, | |
| "eval_accuracy": 0.10869565217391304, | |
| "eval_loss": 113.79438018798828, | |
| "eval_runtime": 0.6062, | |
| "eval_samples_per_second": 75.888, | |
| "eval_steps_per_second": 4.949, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.10869565217391304, | |
| "eval_loss": 113.78809356689453, | |
| "eval_runtime": 0.6076, | |
| "eval_samples_per_second": 75.702, | |
| "eval_steps_per_second": 4.937, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 4.86, | |
| "eval_accuracy": 0.10869565217391304, | |
| "eval_loss": 113.70995330810547, | |
| "eval_runtime": 0.6162, | |
| "eval_samples_per_second": 74.657, | |
| "eval_steps_per_second": 4.869, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 5.71, | |
| "learning_rate": 0.0002777777777777778, | |
| "loss": 113.3425, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_accuracy": 0.10869565217391304, | |
| "eval_loss": 113.58837890625, | |
| "eval_runtime": 0.6537, | |
| "eval_samples_per_second": 70.373, | |
| "eval_steps_per_second": 4.59, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 6.86, | |
| "eval_accuracy": 0.10869565217391304, | |
| "eval_loss": 113.49983978271484, | |
| "eval_runtime": 0.5761, | |
| "eval_samples_per_second": 79.843, | |
| "eval_steps_per_second": 5.207, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_accuracy": 0.10869565217391304, | |
| "eval_loss": 113.05781555175781, | |
| "eval_runtime": 2.1702, | |
| "eval_samples_per_second": 21.196, | |
| "eval_steps_per_second": 1.382, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 8.57, | |
| "learning_rate": 0.00025, | |
| "loss": 111.228, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 8.86, | |
| "eval_accuracy": 0.10869565217391304, | |
| "eval_loss": 112.80531311035156, | |
| "eval_runtime": 0.5921, | |
| "eval_samples_per_second": 77.685, | |
| "eval_steps_per_second": 5.066, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_accuracy": 0.10869565217391304, | |
| "eval_loss": 112.52015686035156, | |
| "eval_runtime": 0.6021, | |
| "eval_samples_per_second": 76.395, | |
| "eval_steps_per_second": 4.982, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 10.86, | |
| "eval_accuracy": 0.10869565217391304, | |
| "eval_loss": 112.58113861083984, | |
| "eval_runtime": 0.5977, | |
| "eval_samples_per_second": 76.964, | |
| "eval_steps_per_second": 5.019, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 11.43, | |
| "learning_rate": 0.00022222222222222218, | |
| "loss": 114.9647, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "eval_accuracy": 0.10869565217391304, | |
| "eval_loss": 112.60899353027344, | |
| "eval_runtime": 0.5971, | |
| "eval_samples_per_second": 77.035, | |
| "eval_steps_per_second": 5.024, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 12.86, | |
| "eval_accuracy": 0.10869565217391304, | |
| "eval_loss": 112.49734497070312, | |
| "eval_runtime": 0.5906, | |
| "eval_samples_per_second": 77.882, | |
| "eval_steps_per_second": 5.079, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "eval_accuracy": 0.10869565217391304, | |
| "eval_loss": 111.97610473632812, | |
| "eval_runtime": 0.5781, | |
| "eval_samples_per_second": 79.567, | |
| "eval_steps_per_second": 5.189, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 14.29, | |
| "learning_rate": 0.00019444444444444443, | |
| "loss": 110.7738, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 14.86, | |
| "eval_accuracy": 0.10869565217391304, | |
| "eval_loss": 111.81171417236328, | |
| "eval_runtime": 0.5761, | |
| "eval_samples_per_second": 79.841, | |
| "eval_steps_per_second": 5.207, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "eval_accuracy": 0.10869565217391304, | |
| "eval_loss": 111.65890502929688, | |
| "eval_runtime": 0.5781, | |
| "eval_samples_per_second": 79.567, | |
| "eval_steps_per_second": 5.189, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 16.86, | |
| "eval_accuracy": 0.10869565217391304, | |
| "eval_loss": 111.53672790527344, | |
| "eval_runtime": 0.5771, | |
| "eval_samples_per_second": 79.703, | |
| "eval_steps_per_second": 5.198, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 17.14, | |
| "learning_rate": 0.00016666666666666666, | |
| "loss": 111.0505, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "eval_accuracy": 0.10869565217391304, | |
| "eval_loss": 111.7016372680664, | |
| "eval_runtime": 0.6091, | |
| "eval_samples_per_second": 75.517, | |
| "eval_steps_per_second": 4.925, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 18.86, | |
| "eval_accuracy": 0.10869565217391304, | |
| "eval_loss": 111.90676879882812, | |
| "eval_runtime": 0.6357, | |
| "eval_samples_per_second": 72.366, | |
| "eval_steps_per_second": 4.72, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "learning_rate": 0.0001388888888888889, | |
| "loss": 111.4545, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "eval_accuracy": 0.10869565217391304, | |
| "eval_loss": 111.62030792236328, | |
| "eval_runtime": 0.6126, | |
| "eval_samples_per_second": 75.084, | |
| "eval_steps_per_second": 4.897, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 20.86, | |
| "eval_accuracy": 0.10869565217391304, | |
| "eval_loss": 111.12662506103516, | |
| "eval_runtime": 0.6051, | |
| "eval_samples_per_second": 76.016, | |
| "eval_steps_per_second": 4.958, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 22.0, | |
| "eval_accuracy": 0.10869565217391304, | |
| "eval_loss": 110.28787231445312, | |
| "eval_runtime": 0.6352, | |
| "eval_samples_per_second": 72.422, | |
| "eval_steps_per_second": 4.723, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 22.86, | |
| "learning_rate": 0.00011111111111111109, | |
| "loss": 111.2779, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 22.86, | |
| "eval_accuracy": 0.10869565217391304, | |
| "eval_loss": 109.85228729248047, | |
| "eval_runtime": 0.5951, | |
| "eval_samples_per_second": 77.292, | |
| "eval_steps_per_second": 5.041, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 24.0, | |
| "eval_accuracy": 0.10869565217391304, | |
| "eval_loss": 109.52828979492188, | |
| "eval_runtime": 0.5951, | |
| "eval_samples_per_second": 77.292, | |
| "eval_steps_per_second": 5.041, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 24.86, | |
| "eval_accuracy": 0.10869565217391304, | |
| "eval_loss": 109.9590072631836, | |
| "eval_runtime": 0.5836, | |
| "eval_samples_per_second": 78.816, | |
| "eval_steps_per_second": 5.14, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 25.71, | |
| "learning_rate": 8.333333333333333e-05, | |
| "loss": 110.5166, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 26.0, | |
| "eval_accuracy": 0.10869565217391304, | |
| "eval_loss": 109.97518157958984, | |
| "eval_runtime": 0.6056, | |
| "eval_samples_per_second": 75.952, | |
| "eval_steps_per_second": 4.953, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 26.86, | |
| "eval_accuracy": 0.10869565217391304, | |
| "eval_loss": 109.54348754882812, | |
| "eval_runtime": 0.6151, | |
| "eval_samples_per_second": 74.78, | |
| "eval_steps_per_second": 4.877, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 28.0, | |
| "eval_accuracy": 0.10869565217391304, | |
| "eval_loss": 109.57117462158203, | |
| "eval_runtime": 0.6156, | |
| "eval_samples_per_second": 74.718, | |
| "eval_steps_per_second": 4.873, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 28.57, | |
| "learning_rate": 5.5555555555555545e-05, | |
| "loss": 108.66, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 28.86, | |
| "eval_accuracy": 0.10869565217391304, | |
| "eval_loss": 108.89238739013672, | |
| "eval_runtime": 0.6327, | |
| "eval_samples_per_second": 72.71, | |
| "eval_steps_per_second": 4.742, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 30.0, | |
| "eval_accuracy": 0.10869565217391304, | |
| "eval_loss": 108.39895629882812, | |
| "eval_runtime": 0.6281, | |
| "eval_samples_per_second": 73.232, | |
| "eval_steps_per_second": 4.776, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 30.86, | |
| "eval_accuracy": 0.10869565217391304, | |
| "eval_loss": 108.70501708984375, | |
| "eval_runtime": 0.6417, | |
| "eval_samples_per_second": 71.69, | |
| "eval_steps_per_second": 4.675, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 31.43, | |
| "learning_rate": 2.7777777777777772e-05, | |
| "loss": 109.688, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 32.0, | |
| "eval_accuracy": 0.10869565217391304, | |
| "eval_loss": 108.7237319946289, | |
| "eval_runtime": 0.6112, | |
| "eval_samples_per_second": 75.267, | |
| "eval_steps_per_second": 4.909, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 32.86, | |
| "eval_accuracy": 0.10869565217391304, | |
| "eval_loss": 109.0678939819336, | |
| "eval_runtime": 0.6072, | |
| "eval_samples_per_second": 75.763, | |
| "eval_steps_per_second": 4.941, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 34.0, | |
| "eval_accuracy": 0.10869565217391304, | |
| "eval_loss": 108.574951171875, | |
| "eval_runtime": 0.6091, | |
| "eval_samples_per_second": 75.516, | |
| "eval_steps_per_second": 4.925, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 34.29, | |
| "learning_rate": 0.0, | |
| "loss": 109.4549, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 34.29, | |
| "eval_accuracy": 0.10869565217391304, | |
| "eval_loss": 108.5167465209961, | |
| "eval_runtime": 0.6016, | |
| "eval_samples_per_second": 76.457, | |
| "eval_steps_per_second": 4.986, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 34.29, | |
| "step": 120, | |
| "total_flos": 2.0027429927092224e+16, | |
| "train_loss": 111.37485249837239, | |
| "train_runtime": 136.0598, | |
| "train_samples_per_second": 62.62, | |
| "train_steps_per_second": 0.882 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 120, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 40, | |
| "save_steps": 500, | |
| "total_flos": 2.0027429927092224e+16, | |
| "train_batch_size": 16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |