| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 15.0, | |
| "eval_steps": 500, | |
| "global_step": 360, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 1.8181818181818183e-06, | |
| "loss": 8.9259, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 3.6363636363636366e-06, | |
| "loss": 8.9458, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 5.4545454545454545e-06, | |
| "loss": 8.1374, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 7.272727272727273e-06, | |
| "loss": 5.8659, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 9.090909090909091e-06, | |
| "loss": 2.6947, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 1.0909090909090909e-05, | |
| "loss": 1.8902, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 1.2727272727272728e-05, | |
| "loss": 1.907, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 1.4545454545454546e-05, | |
| "loss": 1.6722, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 1.6363636363636366e-05, | |
| "loss": 1.6213, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 1.8181818181818182e-05, | |
| "loss": 1.5757, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 2e-05, | |
| "loss": 1.3937, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 1.9999594849888083e-05, | |
| "loss": 0.9765, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 1.999837943238166e-05, | |
| "loss": 1.2983, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 1.9996353845966033e-05, | |
| "loss": 1.0346, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 1.9993518254774517e-05, | |
| "loss": 1.0499, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 1.998987288857513e-05, | |
| "loss": 1.071, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 1.9985418042751975e-05, | |
| "loss": 1.2056, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 1.998015407828131e-05, | |
| "loss": 0.8992, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 1.9974081421702296e-05, | |
| "loss": 1.1998, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 1.9967200565082426e-05, | |
| "loss": 0.8691, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 1.9959512065977673e-05, | |
| "loss": 0.8735, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 1.9951016547387286e-05, | |
| "loss": 0.7922, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 1.9941714697703333e-05, | |
| "loss": 0.8494, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 1.993160727065489e-05, | |
| "loss": 0.8835, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "learning_rate": 1.9920695085247012e-05, | |
| "loss": 0.5345, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "learning_rate": 1.9908979025694312e-05, | |
| "loss": 0.4796, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "learning_rate": 1.989646004134937e-05, | |
| "loss": 0.5371, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "learning_rate": 1.9883139146625763e-05, | |
| "loss": 0.4286, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "learning_rate": 1.9869017420915888e-05, | |
| "loss": 0.5679, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "learning_rate": 1.9854096008503495e-05, | |
| "loss": 0.5667, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "learning_rate": 1.9838376118470965e-05, | |
| "loss": 0.4878, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "learning_rate": 1.9821859024601345e-05, | |
| "loss": 0.6535, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "learning_rate": 1.9804546065275116e-05, | |
| "loss": 0.566, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 1.42, | |
| "learning_rate": 1.978643864336176e-05, | |
| "loss": 0.4362, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "learning_rate": 1.9767538226106078e-05, | |
| "loss": 0.6098, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "learning_rate": 1.9747846345009306e-05, | |
| "loss": 0.3936, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "learning_rate": 1.9727364595705012e-05, | |
| "loss": 0.5268, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "learning_rate": 1.9706094637829797e-05, | |
| "loss": 0.5022, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 1.62, | |
| "learning_rate": 1.9684038194888827e-05, | |
| "loss": 0.486, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "learning_rate": 1.9661197054116165e-05, | |
| "loss": 0.5562, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 1.71, | |
| "learning_rate": 1.963757306632996e-05, | |
| "loss": 0.3986, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 1.75, | |
| "learning_rate": 1.9613168145782468e-05, | |
| "loss": 0.4909, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 1.79, | |
| "learning_rate": 1.958798427000495e-05, | |
| "loss": 0.4447, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 1.83, | |
| "learning_rate": 1.956202347964743e-05, | |
| "loss": 0.4243, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 1.88, | |
| "learning_rate": 1.9535287878313315e-05, | |
| "loss": 0.4518, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "learning_rate": 1.9507779632388997e-05, | |
| "loss": 0.5424, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "learning_rate": 1.947950097086825e-05, | |
| "loss": 0.472, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "learning_rate": 1.945045418517165e-05, | |
| "loss": 0.4739, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 2.04, | |
| "learning_rate": 1.9420641628960897e-05, | |
| "loss": 0.3188, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 2.08, | |
| "learning_rate": 1.9390065717948084e-05, | |
| "loss": 0.3718, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 2.12, | |
| "learning_rate": 1.9358728929699966e-05, | |
| "loss": 0.2013, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 2.17, | |
| "learning_rate": 1.9326633803437197e-05, | |
| "loss": 0.2719, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 2.21, | |
| "learning_rate": 1.929378293982857e-05, | |
| "loss": 0.2863, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 2.25, | |
| "learning_rate": 1.926017900078031e-05, | |
| "loss": 0.2934, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 2.29, | |
| "learning_rate": 1.922582470922034e-05, | |
| "loss": 0.2446, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 2.33, | |
| "learning_rate": 1.9190722848877683e-05, | |
| "loss": 0.2367, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 2.38, | |
| "learning_rate": 1.9154876264056863e-05, | |
| "loss": 0.2255, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 2.42, | |
| "learning_rate": 1.911828785940745e-05, | |
| "loss": 0.2965, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 2.46, | |
| "learning_rate": 1.908096059968869e-05, | |
| "loss": 0.2464, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "learning_rate": 1.904289750952928e-05, | |
| "loss": 0.2446, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 2.54, | |
| "learning_rate": 1.900410167318226e-05, | |
| "loss": 0.2948, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 2.58, | |
| "learning_rate": 1.8964576234275123e-05, | |
| "loss": 0.2947, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 2.62, | |
| "learning_rate": 1.8924324395555066e-05, | |
| "loss": 0.2137, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 2.67, | |
| "learning_rate": 1.8883349418629487e-05, | |
| "loss": 0.2963, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 2.71, | |
| "learning_rate": 1.8841654623701673e-05, | |
| "loss": 0.319, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 2.75, | |
| "learning_rate": 1.8799243389301796e-05, | |
| "loss": 0.2211, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 2.79, | |
| "learning_rate": 1.8756119152013134e-05, | |
| "loss": 0.2289, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 2.83, | |
| "learning_rate": 1.8712285406193585e-05, | |
| "loss": 0.2706, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 2.88, | |
| "learning_rate": 1.866774570369257e-05, | |
| "loss": 0.2301, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 2.92, | |
| "learning_rate": 1.8622503653563173e-05, | |
| "loss": 0.2522, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 2.96, | |
| "learning_rate": 1.8576562921769727e-05, | |
| "loss": 0.2784, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "learning_rate": 1.8529927230890757e-05, | |
| "loss": 0.3307, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 3.04, | |
| "learning_rate": 1.8482600359817344e-05, | |
| "loss": 0.1112, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 3.08, | |
| "learning_rate": 1.843458614344691e-05, | |
| "loss": 0.1366, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 3.12, | |
| "learning_rate": 1.8385888472372474e-05, | |
| "loss": 0.1771, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 3.17, | |
| "learning_rate": 1.833651129256742e-05, | |
| "loss": 0.1372, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 3.21, | |
| "learning_rate": 1.828645860506573e-05, | |
| "loss": 0.1824, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 3.25, | |
| "learning_rate": 1.8235734465637794e-05, | |
| "loss": 0.1933, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 3.29, | |
| "learning_rate": 1.8184342984461766e-05, | |
| "loss": 0.2282, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 3.33, | |
| "learning_rate": 1.8132288325790518e-05, | |
| "loss": 0.1289, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 3.38, | |
| "learning_rate": 1.8079574707614202e-05, | |
| "loss": 0.1726, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 3.42, | |
| "learning_rate": 1.802620640131848e-05, | |
| "loss": 0.1802, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 3.46, | |
| "learning_rate": 1.797218773133841e-05, | |
| "loss": 0.1267, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 3.5, | |
| "learning_rate": 1.7917523074808024e-05, | |
| "loss": 0.1633, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 3.54, | |
| "learning_rate": 1.786221686120567e-05, | |
| "loss": 0.1657, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 3.58, | |
| "learning_rate": 1.7806273571995066e-05, | |
| "loss": 0.1596, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 3.62, | |
| "learning_rate": 1.7749697740262197e-05, | |
| "loss": 0.1457, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 3.67, | |
| "learning_rate": 1.769249395034797e-05, | |
| "loss": 0.1333, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 3.71, | |
| "learning_rate": 1.7634666837476765e-05, | |
| "loss": 0.2186, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 3.75, | |
| "learning_rate": 1.757622108738083e-05, | |
| "loss": 0.1399, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 3.79, | |
| "learning_rate": 1.7517161435920606e-05, | |
| "loss": 0.1465, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 3.83, | |
| "learning_rate": 1.7457492668700967e-05, | |
| "loss": 0.1402, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 3.88, | |
| "learning_rate": 1.7397219620683465e-05, | |
| "loss": 0.2108, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 3.92, | |
| "learning_rate": 1.7336347175794523e-05, | |
| "loss": 0.1197, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 3.96, | |
| "learning_rate": 1.7274880266529716e-05, | |
| "loss": 0.1889, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "learning_rate": 1.721282387355408e-05, | |
| "loss": 0.1857, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 4.04, | |
| "learning_rate": 1.715018302529852e-05, | |
| "loss": 0.0949, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 4.08, | |
| "learning_rate": 1.7086962797552376e-05, | |
| "loss": 0.102, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 4.12, | |
| "learning_rate": 1.7023168313052118e-05, | |
| "loss": 0.07, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 4.17, | |
| "learning_rate": 1.6958804741066254e-05, | |
| "loss": 0.0904, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 4.21, | |
| "learning_rate": 1.689387729697646e-05, | |
| "loss": 0.1163, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 4.25, | |
| "learning_rate": 1.6828391241854983e-05, | |
| "loss": 0.0992, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 4.29, | |
| "learning_rate": 1.6762351882038342e-05, | |
| "loss": 0.1115, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 4.33, | |
| "learning_rate": 1.669576456869733e-05, | |
| "loss": 0.0796, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 4.38, | |
| "learning_rate": 1.6628634697403447e-05, | |
| "loss": 0.0835, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 4.42, | |
| "learning_rate": 1.6560967707691663e-05, | |
| "loss": 0.1216, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 4.46, | |
| "learning_rate": 1.649276908261967e-05, | |
| "loss": 0.0637, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 4.5, | |
| "learning_rate": 1.642404434832358e-05, | |
| "loss": 0.138, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 4.54, | |
| "learning_rate": 1.635479907357016e-05, | |
| "loss": 0.1041, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 4.58, | |
| "learning_rate": 1.6285038869305565e-05, | |
| "loss": 0.0871, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 4.62, | |
| "learning_rate": 1.621476938820071e-05, | |
| "loss": 0.0819, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 4.67, | |
| "learning_rate": 1.6143996324193227e-05, | |
| "loss": 0.034, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 4.71, | |
| "learning_rate": 1.6072725412026066e-05, | |
| "loss": 0.0366, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 4.75, | |
| "learning_rate": 1.6000962426782844e-05, | |
| "loss": 0.0967, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 4.79, | |
| "learning_rate": 1.592871318341986e-05, | |
| "loss": 0.1434, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 4.83, | |
| "learning_rate": 1.585598353629492e-05, | |
| "loss": 0.1013, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 4.88, | |
| "learning_rate": 1.5782779378692957e-05, | |
| "loss": 0.1169, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 4.92, | |
| "learning_rate": 1.57091066423485e-05, | |
| "loss": 0.0837, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 4.96, | |
| "learning_rate": 1.5634971296965027e-05, | |
| "loss": 0.1023, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "learning_rate": 1.5560379349731234e-05, | |
| "loss": 0.1044, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 5.04, | |
| "learning_rate": 1.5485336844834274e-05, | |
| "loss": 0.0449, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 5.08, | |
| "learning_rate": 1.5409849862969994e-05, | |
| "loss": 0.0338, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 5.12, | |
| "learning_rate": 1.5333924520850227e-05, | |
| "loss": 0.0379, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 5.17, | |
| "learning_rate": 1.5257566970707147e-05, | |
| "loss": 0.0553, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 5.21, | |
| "learning_rate": 1.5180783399794749e-05, | |
| "loss": 0.0408, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 5.25, | |
| "learning_rate": 1.5103580029887504e-05, | |
| "loss": 0.0688, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 5.29, | |
| "learning_rate": 1.5025963116776203e-05, | |
| "loss": 0.0781, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 5.33, | |
| "learning_rate": 1.4947938949761054e-05, | |
| "loss": 0.0799, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 5.38, | |
| "learning_rate": 1.4869513851142051e-05, | |
| "loss": 0.0328, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 5.42, | |
| "learning_rate": 1.4790694175706698e-05, | |
| "loss": 0.0869, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 5.46, | |
| "learning_rate": 1.4711486310215053e-05, | |
| "loss": 0.0185, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 5.5, | |
| "learning_rate": 1.4631896672882235e-05, | |
| "loss": 0.0751, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 5.54, | |
| "learning_rate": 1.4551931712858334e-05, | |
| "loss": 0.0842, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 5.58, | |
| "learning_rate": 1.4471597909705858e-05, | |
| "loss": 0.0718, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 5.62, | |
| "learning_rate": 1.4390901772874668e-05, | |
| "loss": 0.0854, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 5.67, | |
| "learning_rate": 1.4309849841174538e-05, | |
| "loss": 0.0508, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 5.71, | |
| "learning_rate": 1.422844868224531e-05, | |
| "loss": 0.0722, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 5.75, | |
| "learning_rate": 1.4146704892024714e-05, | |
| "loss": 0.0577, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 5.79, | |
| "learning_rate": 1.40646250942139e-05, | |
| "loss": 0.0654, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 5.83, | |
| "learning_rate": 1.3982215939740726e-05, | |
| "loss": 0.0452, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 5.88, | |
| "learning_rate": 1.3899484106220816e-05, | |
| "loss": 0.0549, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 5.92, | |
| "learning_rate": 1.3816436297416496e-05, | |
| "loss": 0.0385, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 5.96, | |
| "learning_rate": 1.3733079242693572e-05, | |
| "loss": 0.0729, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "learning_rate": 1.3649419696476057e-05, | |
| "loss": 0.041, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 6.04, | |
| "learning_rate": 1.356546443769885e-05, | |
| "loss": 0.02, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 6.08, | |
| "learning_rate": 1.3481220269258449e-05, | |
| "loss": 0.0611, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 6.12, | |
| "learning_rate": 1.3396694017461708e-05, | |
| "loss": 0.0374, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 6.17, | |
| "learning_rate": 1.3311892531472705e-05, | |
| "loss": 0.0294, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 6.21, | |
| "learning_rate": 1.3226822682757745e-05, | |
| "loss": 0.0483, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 6.25, | |
| "learning_rate": 1.3141491364528576e-05, | |
| "loss": 0.0242, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 6.29, | |
| "learning_rate": 1.3055905491183822e-05, | |
| "loss": 0.0307, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 6.33, | |
| "learning_rate": 1.2970071997748712e-05, | |
| "loss": 0.0529, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 6.38, | |
| "learning_rate": 1.288399783931315e-05, | |
| "loss": 0.0542, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 6.42, | |
| "learning_rate": 1.2797689990468113e-05, | |
| "loss": 0.0297, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 6.46, | |
| "learning_rate": 1.2711155444740529e-05, | |
| "loss": 0.0292, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 6.5, | |
| "learning_rate": 1.2624401214026574e-05, | |
| "loss": 0.0176, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 6.54, | |
| "learning_rate": 1.2537434328023501e-05, | |
| "loss": 0.0154, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 6.58, | |
| "learning_rate": 1.2450261833660033e-05, | |
| "loss": 0.02, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 6.62, | |
| "learning_rate": 1.2362890794525342e-05, | |
| "loss": 0.0589, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 6.67, | |
| "learning_rate": 1.2275328290296677e-05, | |
| "loss": 0.0283, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 6.71, | |
| "learning_rate": 1.2187581416165721e-05, | |
| "loss": 0.0307, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 6.75, | |
| "learning_rate": 1.2099657282263651e-05, | |
| "loss": 0.0356, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 6.79, | |
| "learning_rate": 1.2011563013084996e-05, | |
| "loss": 0.0466, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 6.83, | |
| "learning_rate": 1.1923305746910372e-05, | |
| "loss": 0.0354, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 6.88, | |
| "learning_rate": 1.1834892635228024e-05, | |
| "loss": 0.0432, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 6.92, | |
| "learning_rate": 1.1746330842154371e-05, | |
| "loss": 0.0324, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 6.96, | |
| "learning_rate": 1.1657627543853491e-05, | |
| "loss": 0.0406, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "learning_rate": 1.156878992795563e-05, | |
| "loss": 0.0394, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 7.04, | |
| "learning_rate": 1.1479825192974791e-05, | |
| "loss": 0.0234, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 7.08, | |
| "learning_rate": 1.1390740547725443e-05, | |
| "loss": 0.0135, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 7.12, | |
| "learning_rate": 1.1301543210738383e-05, | |
| "loss": 0.0438, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 7.17, | |
| "learning_rate": 1.1212240409675825e-05, | |
| "loss": 0.015, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 7.21, | |
| "learning_rate": 1.1122839380745738e-05, | |
| "loss": 0.0083, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 7.25, | |
| "learning_rate": 1.1033347368115494e-05, | |
| "loss": 0.0212, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 7.29, | |
| "learning_rate": 1.0943771623324884e-05, | |
| "loss": 0.0245, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 7.33, | |
| "learning_rate": 1.085411940469851e-05, | |
| "loss": 0.0083, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 7.38, | |
| "learning_rate": 1.0764397976757658e-05, | |
| "loss": 0.0187, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 7.42, | |
| "learning_rate": 1.0674614609631634e-05, | |
| "loss": 0.0212, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 7.46, | |
| "learning_rate": 1.0584776578468698e-05, | |
| "loss": 0.022, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 7.5, | |
| "learning_rate": 1.0494891162846515e-05, | |
| "loss": 0.0374, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 7.54, | |
| "learning_rate": 1.040496564618233e-05, | |
| "loss": 0.014, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 7.58, | |
| "learning_rate": 1.0315007315142772e-05, | |
| "loss": 0.0146, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 7.62, | |
| "learning_rate": 1.0225023459053416e-05, | |
| "loss": 0.0226, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 7.67, | |
| "learning_rate": 1.0135021369308138e-05, | |
| "loss": 0.0176, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 7.71, | |
| "learning_rate": 1.004500833877828e-05, | |
| "loss": 0.0101, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 7.75, | |
| "learning_rate": 9.954991661221724e-06, | |
| "loss": 0.0093, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 7.79, | |
| "learning_rate": 9.864978630691865e-06, | |
| "loss": 0.0228, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 7.83, | |
| "learning_rate": 9.774976540946589e-06, | |
| "loss": 0.0194, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 7.88, | |
| "learning_rate": 9.684992684857232e-06, | |
| "loss": 0.0445, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 7.92, | |
| "learning_rate": 9.595034353817673e-06, | |
| "loss": 0.0078, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 7.96, | |
| "learning_rate": 9.505108837153489e-06, | |
| "loss": 0.0227, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "learning_rate": 9.415223421531308e-06, | |
| "loss": 0.0188, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 8.04, | |
| "learning_rate": 9.325385390368367e-06, | |
| "loss": 0.0114, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 8.08, | |
| "learning_rate": 9.23560202324235e-06, | |
| "loss": 0.0084, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 8.12, | |
| "learning_rate": 9.145880595301495e-06, | |
| "loss": 0.0092, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 8.17, | |
| "learning_rate": 9.056228376675118e-06, | |
| "loss": 0.0158, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 8.21, | |
| "learning_rate": 8.966652631884506e-06, | |
| "loss": 0.0045, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 8.25, | |
| "learning_rate": 8.877160619254264e-06, | |
| "loss": 0.0148, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 8.29, | |
| "learning_rate": 8.787759590324177e-06, | |
| "loss": 0.0076, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 8.33, | |
| "learning_rate": 8.698456789261617e-06, | |
| "loss": 0.0086, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 8.38, | |
| "learning_rate": 8.609259452274559e-06, | |
| "loss": 0.0121, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 8.42, | |
| "learning_rate": 8.52017480702521e-06, | |
| "loss": 0.01, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 8.46, | |
| "learning_rate": 8.431210072044371e-06, | |
| "loss": 0.0094, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 8.5, | |
| "learning_rate": 8.342372456146512e-06, | |
| "loss": 0.0036, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 8.54, | |
| "learning_rate": 8.253669157845632e-06, | |
| "loss": 0.0123, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 8.58, | |
| "learning_rate": 8.165107364771979e-06, | |
| "loss": 0.0262, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 8.62, | |
| "learning_rate": 8.076694253089632e-06, | |
| "loss": 0.0409, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 8.67, | |
| "learning_rate": 7.988436986915005e-06, | |
| "loss": 0.0219, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 8.71, | |
| "learning_rate": 7.900342717736354e-06, | |
| "loss": 0.0114, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 8.75, | |
| "learning_rate": 7.812418583834282e-06, | |
| "loss": 0.0184, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 8.79, | |
| "learning_rate": 7.724671709703328e-06, | |
| "loss": 0.0151, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 8.83, | |
| "learning_rate": 7.637109205474665e-06, | |
| "loss": 0.0371, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 8.88, | |
| "learning_rate": 7.5497381663399716e-06, | |
| "loss": 0.0111, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 8.92, | |
| "learning_rate": 7.462565671976504e-06, | |
| "loss": 0.0236, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 8.96, | |
| "learning_rate": 7.375598785973429e-06, | |
| "loss": 0.0131, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "learning_rate": 7.288844555259471e-06, | |
| "loss": 0.0104, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 9.04, | |
| "learning_rate": 7.202310009531886e-06, | |
| "loss": 0.0021, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 9.08, | |
| "learning_rate": 7.116002160686851e-06, | |
| "loss": 0.0202, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 9.12, | |
| "learning_rate": 7.0299280022512875e-06, | |
| "loss": 0.0089, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 9.17, | |
| "learning_rate": 6.944094508816182e-06, | |
| "loss": 0.0027, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 9.21, | |
| "learning_rate": 6.858508635471428e-06, | |
| "loss": 0.0009, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 9.25, | |
| "learning_rate": 6.773177317242257e-06, | |
| "loss": 0.0061, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 9.29, | |
| "learning_rate": 6.688107468527297e-06, | |
| "loss": 0.0023, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 9.33, | |
| "learning_rate": 6.603305982538295e-06, | |
| "loss": 0.0144, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 9.38, | |
| "learning_rate": 6.518779730741555e-06, | |
| "loss": 0.0145, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 9.42, | |
| "learning_rate": 6.434535562301153e-06, | |
| "loss": 0.01, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 9.46, | |
| "learning_rate": 6.350580303523947e-06, | |
| "loss": 0.007, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 9.5, | |
| "learning_rate": 6.266920757306429e-06, | |
| "loss": 0.0065, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 9.54, | |
| "learning_rate": 6.183563702583506e-06, | |
| "loss": 0.0218, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 9.58, | |
| "learning_rate": 6.100515893779188e-06, | |
| "loss": 0.0089, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 9.62, | |
| "learning_rate": 6.01778406025928e-06, | |
| "loss": 0.0153, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 9.67, | |
| "learning_rate": 5.935374905786102e-06, | |
| "loss": 0.0106, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 9.71, | |
| "learning_rate": 5.8532951079752895e-06, | |
| "loss": 0.0111, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 9.75, | |
| "learning_rate": 5.771551317754691e-06, | |
| "loss": 0.0289, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 9.79, | |
| "learning_rate": 5.690150158825462e-06, | |
| "loss": 0.0136, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 9.83, | |
| "learning_rate": 5.609098227125334e-06, | |
| "loss": 0.0087, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 9.88, | |
| "learning_rate": 5.528402090294142e-06, | |
| "loss": 0.0147, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 9.92, | |
| "learning_rate": 5.448068287141663e-06, | |
| "loss": 0.0122, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 9.96, | |
| "learning_rate": 5.368103327117768e-06, | |
| "loss": 0.0181, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "learning_rate": 5.288513689784951e-06, | |
| "loss": 0.0142, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 10.04, | |
| "learning_rate": 5.209305824293307e-06, | |
| "loss": 0.0038, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 10.08, | |
| "learning_rate": 5.130486148857952e-06, | |
| "loss": 0.0144, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 10.12, | |
| "learning_rate": 5.05206105023895e-06, | |
| "loss": 0.0034, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 10.17, | |
| "learning_rate": 4.974036883223798e-06, | |
| "loss": 0.0013, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 10.21, | |
| "learning_rate": 4.896419970112499e-06, | |
| "loss": 0.0083, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 10.25, | |
| "learning_rate": 4.819216600205254e-06, | |
| "loss": 0.0194, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 10.29, | |
| "learning_rate": 4.742433029292856e-06, | |
| "loss": 0.0019, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 10.33, | |
| "learning_rate": 4.6660754791497755e-06, | |
| "loss": 0.0074, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 10.38, | |
| "learning_rate": 4.590150137030009e-06, | |
| "loss": 0.0013, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 10.42, | |
| "learning_rate": 4.514663155165731e-06, | |
| "loss": 0.0098, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 10.46, | |
| "learning_rate": 4.439620650268771e-06, | |
| "loss": 0.0016, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 10.5, | |
| "learning_rate": 4.365028703034976e-06, | |
| "loss": 0.0037, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 10.54, | |
| "learning_rate": 4.290893357651502e-06, | |
| "loss": 0.0056, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 10.58, | |
| "learning_rate": 4.217220621307043e-06, | |
| "loss": 0.0148, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 10.62, | |
| "learning_rate": 4.144016463705081e-06, | |
| "loss": 0.0028, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 10.67, | |
| "learning_rate": 4.071286816580142e-06, | |
| "loss": 0.0033, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 10.71, | |
| "learning_rate": 3.999037573217157e-06, | |
| "loss": 0.0117, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 10.75, | |
| "learning_rate": 3.927274587973935e-06, | |
| "loss": 0.0108, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 10.79, | |
| "learning_rate": 3.856003675806777e-06, | |
| "loss": 0.0096, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 10.83, | |
| "learning_rate": 3.78523061179929e-06, | |
| "loss": 0.0225, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 10.88, | |
| "learning_rate": 3.7149611306944356e-06, | |
| "loss": 0.0192, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 10.92, | |
| "learning_rate": 3.645200926429844e-06, | |
| "loss": 0.0096, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 10.96, | |
| "learning_rate": 3.5759556516764205e-06, | |
| "loss": 0.0045, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "learning_rate": 3.507230917380332e-06, | |
| "loss": 0.0265, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 11.04, | |
| "learning_rate": 3.4390322923083385e-06, | |
| "loss": 0.0033, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 11.08, | |
| "learning_rate": 3.3713653025965544e-06, | |
| "loss": 0.0111, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 11.12, | |
| "learning_rate": 3.3042354313026702e-06, | |
| "loss": 0.0106, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 11.17, | |
| "learning_rate": 3.237648117961665e-06, | |
| "loss": 0.0172, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 11.21, | |
| "learning_rate": 3.1716087581450193e-06, | |
| "loss": 0.0027, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 11.25, | |
| "learning_rate": 3.1061227030235442e-06, | |
| "loss": 0.0074, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 11.29, | |
| "learning_rate": 3.041195258933749e-06, | |
| "loss": 0.0037, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 11.33, | |
| "learning_rate": 2.976831686947884e-06, | |
| "loss": 0.0006, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 11.38, | |
| "learning_rate": 2.913037202447625e-06, | |
| "loss": 0.0053, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 11.42, | |
| "learning_rate": 2.8498169747014824e-06, | |
| "loss": 0.0049, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 11.46, | |
| "learning_rate": 2.787176126445923e-06, | |
| "loss": 0.0098, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 11.5, | |
| "learning_rate": 2.725119733470284e-06, | |
| "loss": 0.0064, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 11.54, | |
| "learning_rate": 2.663652824205476e-06, | |
| "loss": 0.0051, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 11.58, | |
| "learning_rate": 2.6027803793165353e-06, | |
| "loss": 0.007, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 11.62, | |
| "learning_rate": 2.5425073312990334e-06, | |
| "loss": 0.0121, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 11.67, | |
| "learning_rate": 2.4828385640793974e-06, | |
| "loss": 0.0055, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 11.71, | |
| "learning_rate": 2.4237789126191715e-06, | |
| "loss": 0.0035, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 11.75, | |
| "learning_rate": 2.3653331625232367e-06, | |
| "loss": 0.004, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 11.79, | |
| "learning_rate": 2.307506049652031e-06, | |
| "loss": 0.0114, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 11.83, | |
| "learning_rate": 2.250302259737803e-06, | |
| "loss": 0.0004, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 11.88, | |
| "learning_rate": 2.1937264280049365e-06, | |
| "loss": 0.0247, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 11.92, | |
| "learning_rate": 2.137783138794335e-06, | |
| "loss": 0.0128, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 11.96, | |
| "learning_rate": 2.0824769251919775e-06, | |
| "loss": 0.0015, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "learning_rate": 2.027812268661592e-06, | |
| "loss": 0.0182, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 12.04, | |
| "learning_rate": 1.9737935986815205e-06, | |
| "loss": 0.004, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 12.08, | |
| "learning_rate": 1.9204252923858003e-06, | |
| "loss": 0.0028, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 12.12, | |
| "learning_rate": 1.8677116742094858e-06, | |
| "loss": 0.0056, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 12.17, | |
| "learning_rate": 1.8156570155382357e-06, | |
| "loss": 0.0048, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 12.21, | |
| "learning_rate": 1.764265534362205e-06, | |
| "loss": 0.0125, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 12.25, | |
| "learning_rate": 1.7135413949342706e-06, | |
| "loss": 0.0017, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 12.29, | |
| "learning_rate": 1.6634887074325844e-06, | |
| "loss": 0.0135, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 12.33, | |
| "learning_rate": 1.6141115276275298e-06, | |
| "loss": 0.0093, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 12.38, | |
| "learning_rate": 1.565413856553095e-06, | |
| "loss": 0.0095, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 12.42, | |
| "learning_rate": 1.5173996401826563e-06, | |
| "loss": 0.0082, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 12.46, | |
| "learning_rate": 1.470072769109242e-06, | |
| "loss": 0.0089, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 12.5, | |
| "learning_rate": 1.4234370782302742e-06, | |
| "loss": 0.0023, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 12.54, | |
| "learning_rate": 1.3774963464368295e-06, | |
| "loss": 0.0118, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 12.58, | |
| "learning_rate": 1.3322542963074314e-06, | |
| "loss": 0.0053, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 12.62, | |
| "learning_rate": 1.287714593806415e-06, | |
| "loss": 0.0128, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 12.67, | |
| "learning_rate": 1.2438808479868715e-06, | |
| "loss": 0.0042, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 12.71, | |
| "learning_rate": 1.200756610698205e-06, | |
| "loss": 0.0132, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 12.75, | |
| "learning_rate": 1.1583453762983289e-06, | |
| "loss": 0.0093, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 12.79, | |
| "learning_rate": 1.1166505813705187e-06, | |
| "loss": 0.0095, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 12.83, | |
| "learning_rate": 1.0756756044449358e-06, | |
| "loss": 0.0043, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 12.88, | |
| "learning_rate": 1.035423765724879e-06, | |
| "loss": 0.0129, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 12.92, | |
| "learning_rate": 9.958983268177425e-07, | |
| "loss": 0.0118, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 12.96, | |
| "learning_rate": 9.571024904707238e-07, | |
| "loss": 0.0036, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "learning_rate": 9.190394003113123e-07, | |
| "loss": 0.0034, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 13.04, | |
| "learning_rate": 8.817121405925543e-07, | |
| "loss": 0.0017, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 13.08, | |
| "learning_rate": 8.451237359431397e-07, | |
| "loss": 0.0046, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 13.12, | |
| "learning_rate": 8.092771511223185e-07, | |
| "loss": 0.0003, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 13.17, | |
| "learning_rate": 7.741752907796584e-07, | |
| "loss": 0.0048, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 13.21, | |
| "learning_rate": 7.398209992196914e-07, | |
| "loss": 0.0085, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 13.25, | |
| "learning_rate": 7.062170601714302e-07, | |
| "loss": 0.0036, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 13.29, | |
| "learning_rate": 6.73366196562808e-07, | |
| "loss": 0.0027, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 13.33, | |
| "learning_rate": 6.412710703000368e-07, | |
| "loss": 0.01, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 13.38, | |
| "learning_rate": 6.099342820519183e-07, | |
| "loss": 0.0037, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 13.42, | |
| "learning_rate": 5.79358371039106e-07, | |
| "loss": 0.0068, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 13.46, | |
| "learning_rate": 5.495458148283505e-07, | |
| "loss": 0.0019, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 13.5, | |
| "learning_rate": 5.204990291317535e-07, | |
| "loss": 0.0031, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 13.54, | |
| "learning_rate": 4.92220367611006e-07, | |
| "loss": 0.0131, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 13.58, | |
| "learning_rate": 4.647121216866857e-07, | |
| "loss": 0.0162, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 13.62, | |
| "learning_rate": 4.3797652035257544e-07, | |
| "loss": 0.0021, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 13.67, | |
| "learning_rate": 4.1201572999505e-07, | |
| "loss": 0.0101, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 13.71, | |
| "learning_rate": 3.8683185421753313e-07, | |
| "loss": 0.0137, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 13.75, | |
| "learning_rate": 3.6242693367004365e-07, | |
| "loss": 0.0034, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 13.79, | |
| "learning_rate": 3.38802945883836e-07, | |
| "loss": 0.0039, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 13.83, | |
| "learning_rate": 3.1596180511117235e-07, | |
| "loss": 0.0086, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 13.88, | |
| "learning_rate": 2.939053621702015e-07, | |
| "loss": 0.0113, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 13.92, | |
| "learning_rate": 2.7263540429498747e-07, | |
| "loss": 0.0192, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 13.96, | |
| "learning_rate": 2.5215365499069446e-07, | |
| "loss": 0.006, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "learning_rate": 2.3246177389392388e-07, | |
| "loss": 0.0024, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 14.04, | |
| "learning_rate": 2.1356135663824328e-07, | |
| "loss": 0.0003, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 14.08, | |
| "learning_rate": 1.9545393472488738e-07, | |
| "loss": 0.0011, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 14.12, | |
| "learning_rate": 1.7814097539865626e-07, | |
| "loss": 0.0018, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 14.17, | |
| "learning_rate": 1.6162388152903498e-07, | |
| "loss": 0.0058, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 14.21, | |
| "learning_rate": 1.4590399149650769e-07, | |
| "loss": 0.0052, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 14.25, | |
| "learning_rate": 1.309825790841146e-07, | |
| "loss": 0.006, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 14.29, | |
| "learning_rate": 1.1686085337423991e-07, | |
| "loss": 0.0049, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 14.33, | |
| "learning_rate": 1.0353995865063138e-07, | |
| "loss": 0.004, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 14.38, | |
| "learning_rate": 9.10209743056889e-08, | |
| "loss": 0.009, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 14.42, | |
| "learning_rate": 7.930491475299229e-08, | |
| "loss": 0.0017, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 14.46, | |
| "learning_rate": 6.839272934511143e-08, | |
| "loss": 0.0021, | |
| "step": 347 | |
| }, | |
| { | |
| "epoch": 14.5, | |
| "learning_rate": 5.828530229667228e-08, | |
| "loss": 0.0064, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 14.54, | |
| "learning_rate": 4.898345261271531e-08, | |
| "loss": 0.0092, | |
| "step": 349 | |
| }, | |
| { | |
| "epoch": 14.58, | |
| "learning_rate": 4.0487934022328533e-08, | |
| "loss": 0.0002, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 14.62, | |
| "learning_rate": 3.27994349175742e-08, | |
| "loss": 0.0116, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 14.67, | |
| "learning_rate": 2.591857829770672e-08, | |
| "loss": 0.0022, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 14.71, | |
| "learning_rate": 1.984592171869082e-08, | |
| "loss": 0.0142, | |
| "step": 353 | |
| }, | |
| { | |
| "epoch": 14.75, | |
| "learning_rate": 1.4581957248026579e-08, | |
| "loss": 0.0068, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 14.79, | |
| "learning_rate": 1.0127111424872437e-08, | |
| "loss": 0.0046, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 14.83, | |
| "learning_rate": 6.481745225485059e-09, | |
| "loss": 0.0054, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 14.88, | |
| "learning_rate": 3.6461540339682855e-09, | |
| "loss": 0.0138, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 14.92, | |
| "learning_rate": 1.6205676183411733e-09, | |
| "loss": 0.0125, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 14.96, | |
| "learning_rate": 4.0515011191621933e-10, | |
| "loss": 0.0022, | |
| "step": 359 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "learning_rate": 0.0, | |
| "loss": 0.0143, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "step": 360, | |
| "total_flos": 1.427028711613399e+17, | |
| "train_loss": 0.004024597113311756, | |
| "train_runtime": 2809.5847, | |
| "train_samples_per_second": 16.358, | |
| "train_steps_per_second": 0.128 | |
| } | |
| ], | |
| "logging_steps": 1.0, | |
| "max_steps": 360, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 15, | |
| "save_steps": 100, | |
| "total_flos": 1.427028711613399e+17, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |