{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9999467841672254, "eval_steps": 500, "global_step": 8808, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00011352710991907646, "grad_norm": 8.75, "learning_rate": 0.00020600000000000002, "loss": 11.9909, "step": 1 }, { "epoch": 0.00022705421983815292, "grad_norm": 7.71875, "learning_rate": 0.00021200000000000003, "loss": 11.428, "step": 2 }, { "epoch": 0.00034058132975722935, "grad_norm": 8.25, "learning_rate": 0.00021800000000000004, "loss": 10.9556, "step": 3 }, { "epoch": 0.00045410843967630583, "grad_norm": 7.71875, "learning_rate": 0.000224, "loss": 10.4866, "step": 4 }, { "epoch": 0.0005676355495953823, "grad_norm": 6.40625, "learning_rate": 0.00023, "loss": 10.0525, "step": 5 }, { "epoch": 0.0006811626595144587, "grad_norm": 5.5, "learning_rate": 0.00023600000000000002, "loss": 9.6593, "step": 6 }, { "epoch": 0.0007946897694335352, "grad_norm": 6.375, "learning_rate": 0.00024200000000000003, "loss": 9.3475, "step": 7 }, { "epoch": 0.0009082168793526117, "grad_norm": 4.34375, "learning_rate": 0.000248, "loss": 9.0679, "step": 8 }, { "epoch": 0.001021743989271688, "grad_norm": 2.984375, "learning_rate": 0.000254, "loss": 8.8488, "step": 9 }, { "epoch": 0.0011352710991907645, "grad_norm": 2.3125, "learning_rate": 0.00026000000000000003, "loss": 8.6739, "step": 10 }, { "epoch": 0.001248798209109841, "grad_norm": 2.015625, "learning_rate": 0.000266, "loss": 8.5514, "step": 11 }, { "epoch": 0.0013623253190289174, "grad_norm": 1.8359375, "learning_rate": 0.00027200000000000005, "loss": 8.4544, "step": 12 }, { "epoch": 0.001475852428947994, "grad_norm": 2.0625, "learning_rate": 0.00027800000000000004, "loss": 8.3934, "step": 13 }, { "epoch": 0.0015893795388670705, "grad_norm": 2.296875, "learning_rate": 0.000284, "loss": 8.371, "step": 14 }, { "epoch": 0.001702906648786147, "grad_norm": 2.03125, "learning_rate": 0.00029000000000000006, "loss": 8.3597, "step": 15 }, { "epoch": 0.0018164337587052233, "grad_norm": 2.015625, "learning_rate": 0.00029600000000000004, "loss": 8.3499, "step": 16 }, { "epoch": 0.0019299608686242998, "grad_norm": 2.234375, "learning_rate": 0.000302, "loss": 8.3017, "step": 17 }, { "epoch": 0.002043487978543376, "grad_norm": 1.890625, "learning_rate": 0.000308, "loss": 8.3102, "step": 18 }, { "epoch": 0.0021570150884624526, "grad_norm": 2.3125, "learning_rate": 0.00031400000000000004, "loss": 8.3016, "step": 19 }, { "epoch": 0.002270542198381529, "grad_norm": 2.21875, "learning_rate": 0.00032, "loss": 8.2926, "step": 20 }, { "epoch": 0.0023840693083006055, "grad_norm": 2.3125, "learning_rate": 0.00032600000000000006, "loss": 8.2745, "step": 21 }, { "epoch": 0.002497596418219682, "grad_norm": 2.203125, "learning_rate": 0.00033200000000000005, "loss": 8.2567, "step": 22 }, { "epoch": 0.0026111235281387583, "grad_norm": 2.1875, "learning_rate": 0.000338, "loss": 8.238, "step": 23 }, { "epoch": 0.0027246506380578348, "grad_norm": 2.34375, "learning_rate": 0.000344, "loss": 8.2471, "step": 24 }, { "epoch": 0.0028381777479769116, "grad_norm": 2.328125, "learning_rate": 0.00035, "loss": 8.2487, "step": 25 }, { "epoch": 0.002951704857895988, "grad_norm": 2.671875, "learning_rate": 0.000356, "loss": 8.2324, "step": 26 }, { "epoch": 0.0030652319678150645, "grad_norm": 2.390625, "learning_rate": 0.000362, "loss": 8.2254, "step": 27 }, { "epoch": 0.003178759077734141, "grad_norm": 2.859375, "learning_rate": 0.000368, "loss": 8.2185, "step": 28 }, { "epoch": 0.0032922861876532174, "grad_norm": 2.21875, "learning_rate": 0.000374, "loss": 8.2334, "step": 29 }, { "epoch": 0.003405813297572294, "grad_norm": 2.84375, "learning_rate": 0.00038, "loss": 8.2103, "step": 30 }, { "epoch": 0.00351934040749137, "grad_norm": 2.125, "learning_rate": 0.000386, "loss": 8.2065, "step": 31 }, { "epoch": 0.0036328675174104466, "grad_norm": 2.84375, "learning_rate": 0.00039200000000000004, "loss": 8.2215, "step": 32 }, { "epoch": 0.003746394627329523, "grad_norm": 2.296875, "learning_rate": 0.000398, "loss": 8.2297, "step": 33 }, { "epoch": 0.0038599217372485995, "grad_norm": 4.65625, "learning_rate": 0.000404, "loss": 8.2004, "step": 34 }, { "epoch": 0.003973448847167676, "grad_norm": 3.234375, "learning_rate": 0.00041000000000000005, "loss": 8.1734, "step": 35 }, { "epoch": 0.004086975957086752, "grad_norm": 3.359375, "learning_rate": 0.00041600000000000003, "loss": 8.1699, "step": 36 }, { "epoch": 0.004200503067005829, "grad_norm": 3.59375, "learning_rate": 0.00042200000000000007, "loss": 8.1847, "step": 37 }, { "epoch": 0.004314030176924905, "grad_norm": 2.859375, "learning_rate": 0.00042800000000000005, "loss": 8.1821, "step": 38 }, { "epoch": 0.004427557286843982, "grad_norm": 3.046875, "learning_rate": 0.0004340000000000001, "loss": 8.167, "step": 39 }, { "epoch": 0.004541084396763058, "grad_norm": 2.21875, "learning_rate": 0.00044, "loss": 8.1414, "step": 40 }, { "epoch": 0.004654611506682135, "grad_norm": 3.109375, "learning_rate": 0.000446, "loss": 8.1489, "step": 41 }, { "epoch": 0.004768138616601211, "grad_norm": 2.75, "learning_rate": 0.0004520000000000001, "loss": 8.1023, "step": 42 }, { "epoch": 0.004881665726520288, "grad_norm": 3.265625, "learning_rate": 0.000458, "loss": 8.1471, "step": 43 }, { "epoch": 0.004995192836439364, "grad_norm": 2.1875, "learning_rate": 0.00046400000000000006, "loss": 8.117, "step": 44 }, { "epoch": 0.005108719946358441, "grad_norm": 2.84375, "learning_rate": 0.00047000000000000004, "loss": 8.1023, "step": 45 }, { "epoch": 0.005222247056277517, "grad_norm": 1.7421875, "learning_rate": 0.00047599999999999997, "loss": 8.0842, "step": 46 }, { "epoch": 0.0053357741661965935, "grad_norm": 2.5, "learning_rate": 0.00048200000000000006, "loss": 8.071, "step": 47 }, { "epoch": 0.0054493012761156695, "grad_norm": 1.3203125, "learning_rate": 0.00048800000000000004, "loss": 8.0508, "step": 48 }, { "epoch": 0.005562828386034746, "grad_norm": 2.5, "learning_rate": 0.000494, "loss": 8.0878, "step": 49 }, { "epoch": 0.005676355495953823, "grad_norm": 1.2890625, "learning_rate": 0.0005, "loss": 8.0276, "step": 50 }, { "epoch": 0.005789882605872899, "grad_norm": 2.78125, "learning_rate": 0.000506, "loss": 8.0411, "step": 51 }, { "epoch": 0.005903409715791976, "grad_norm": 1.2578125, "learning_rate": 0.000512, "loss": 8.0092, "step": 52 }, { "epoch": 0.006016936825711052, "grad_norm": 3.5, "learning_rate": 0.000518, "loss": 8.0271, "step": 53 }, { "epoch": 0.006130463935630129, "grad_norm": 1.5, "learning_rate": 0.000524, "loss": 8.009, "step": 54 }, { "epoch": 0.006243991045549205, "grad_norm": 3.171875, "learning_rate": 0.0005300000000000001, "loss": 7.9902, "step": 55 }, { "epoch": 0.006357518155468282, "grad_norm": 1.2578125, "learning_rate": 0.000536, "loss": 7.9482, "step": 56 }, { "epoch": 0.006471045265387358, "grad_norm": 2.625, "learning_rate": 0.0005420000000000001, "loss": 7.9866, "step": 57 }, { "epoch": 0.006584572375306435, "grad_norm": 1.1953125, "learning_rate": 0.0005480000000000001, "loss": 7.9408, "step": 58 }, { "epoch": 0.006698099485225511, "grad_norm": 2.234375, "learning_rate": 0.000554, "loss": 7.9441, "step": 59 }, { "epoch": 0.006811626595144588, "grad_norm": 1.734375, "learning_rate": 0.0005600000000000001, "loss": 7.9147, "step": 60 }, { "epoch": 0.006925153705063664, "grad_norm": 2.390625, "learning_rate": 0.0005660000000000001, "loss": 7.9292, "step": 61 }, { "epoch": 0.00703868081498274, "grad_norm": 1.3828125, "learning_rate": 0.000572, "loss": 7.8685, "step": 62 }, { "epoch": 0.007152207924901816, "grad_norm": 2.25, "learning_rate": 0.0005780000000000001, "loss": 7.8874, "step": 63 }, { "epoch": 0.007265735034820893, "grad_norm": 1.5546875, "learning_rate": 0.0005840000000000001, "loss": 7.8855, "step": 64 }, { "epoch": 0.007379262144739969, "grad_norm": 2.109375, "learning_rate": 0.0005900000000000001, "loss": 7.8879, "step": 65 }, { "epoch": 0.007492789254659046, "grad_norm": 1.4921875, "learning_rate": 0.0005960000000000001, "loss": 7.834, "step": 66 }, { "epoch": 0.007606316364578123, "grad_norm": 2.078125, "learning_rate": 0.0006020000000000001, "loss": 7.8308, "step": 67 }, { "epoch": 0.007719843474497199, "grad_norm": 1.7421875, "learning_rate": 0.000608, "loss": 7.8146, "step": 68 }, { "epoch": 0.007833370584416275, "grad_norm": 2.109375, "learning_rate": 0.0006140000000000001, "loss": 7.8115, "step": 69 }, { "epoch": 0.007946897694335353, "grad_norm": 1.6015625, "learning_rate": 0.0006200000000000001, "loss": 7.771, "step": 70 }, { "epoch": 0.008060424804254429, "grad_norm": 2.046875, "learning_rate": 0.000626, "loss": 7.7673, "step": 71 }, { "epoch": 0.008173951914173505, "grad_norm": 1.6015625, "learning_rate": 0.000632, "loss": 7.769, "step": 72 }, { "epoch": 0.00828747902409258, "grad_norm": 1.8671875, "learning_rate": 0.0006380000000000001, "loss": 7.7397, "step": 73 }, { "epoch": 0.008401006134011658, "grad_norm": 1.4453125, "learning_rate": 0.000644, "loss": 7.7263, "step": 74 }, { "epoch": 0.008514533243930734, "grad_norm": 2.171875, "learning_rate": 0.0006500000000000001, "loss": 7.7123, "step": 75 }, { "epoch": 0.00862806035384981, "grad_norm": 1.4296875, "learning_rate": 0.0006560000000000001, "loss": 7.7051, "step": 76 }, { "epoch": 0.008741587463768886, "grad_norm": 2.046875, "learning_rate": 0.0006619999999999999, "loss": 7.6825, "step": 77 }, { "epoch": 0.008855114573687964, "grad_norm": 1.640625, "learning_rate": 0.0006680000000000001, "loss": 7.6839, "step": 78 }, { "epoch": 0.00896864168360704, "grad_norm": 2.15625, "learning_rate": 0.0006739999999999999, "loss": 7.6467, "step": 79 }, { "epoch": 0.009082168793526116, "grad_norm": 1.5546875, "learning_rate": 0.0006799999999999999, "loss": 7.6222, "step": 80 }, { "epoch": 0.009195695903445192, "grad_norm": 2.125, "learning_rate": 0.0006860000000000001, "loss": 7.6246, "step": 81 }, { "epoch": 0.00930922301336427, "grad_norm": 1.421875, "learning_rate": 0.000692, "loss": 7.6004, "step": 82 }, { "epoch": 0.009422750123283346, "grad_norm": 1.7734375, "learning_rate": 0.0006979999999999999, "loss": 7.5864, "step": 83 }, { "epoch": 0.009536277233202422, "grad_norm": 1.203125, "learning_rate": 0.0007040000000000002, "loss": 7.5924, "step": 84 }, { "epoch": 0.0096498043431215, "grad_norm": 1.71875, "learning_rate": 0.00071, "loss": 7.5853, "step": 85 }, { "epoch": 0.009763331453040576, "grad_norm": 1.5, "learning_rate": 0.000716, "loss": 7.5521, "step": 86 }, { "epoch": 0.009876858562959652, "grad_norm": 2.046875, "learning_rate": 0.000722, "loss": 7.5544, "step": 87 }, { "epoch": 0.009990385672878728, "grad_norm": 1.4140625, "learning_rate": 0.000728, "loss": 7.5235, "step": 88 }, { "epoch": 0.010103912782797805, "grad_norm": 1.6640625, "learning_rate": 0.000734, "loss": 7.5459, "step": 89 }, { "epoch": 0.010217439892716881, "grad_norm": 1.234375, "learning_rate": 0.00074, "loss": 7.5094, "step": 90 }, { "epoch": 0.010330967002635957, "grad_norm": 1.625, "learning_rate": 0.000746, "loss": 7.491, "step": 91 }, { "epoch": 0.010444494112555033, "grad_norm": 1.5625, "learning_rate": 0.0007520000000000001, "loss": 7.4972, "step": 92 }, { "epoch": 0.010558021222474111, "grad_norm": 1.953125, "learning_rate": 0.000758, "loss": 7.4828, "step": 93 }, { "epoch": 0.010671548332393187, "grad_norm": 1.390625, "learning_rate": 0.000764, "loss": 7.4417, "step": 94 }, { "epoch": 0.010785075442312263, "grad_norm": 1.6640625, "learning_rate": 0.0007700000000000001, "loss": 7.4512, "step": 95 }, { "epoch": 0.010898602552231339, "grad_norm": 1.453125, "learning_rate": 0.000776, "loss": 7.4023, "step": 96 }, { "epoch": 0.011012129662150417, "grad_norm": 1.6484375, "learning_rate": 0.000782, "loss": 7.4279, "step": 97 }, { "epoch": 0.011125656772069493, "grad_norm": 1.3984375, "learning_rate": 0.0007880000000000001, "loss": 7.4036, "step": 98 }, { "epoch": 0.011239183881988569, "grad_norm": 1.5703125, "learning_rate": 0.0007940000000000001, "loss": 7.4104, "step": 99 }, { "epoch": 0.011352710991907647, "grad_norm": 1.3203125, "learning_rate": 0.0008, "loss": 7.3965, "step": 100 }, { "epoch": 0.011466238101826723, "grad_norm": 1.3125, "learning_rate": 0.0008060000000000001, "loss": 7.3772, "step": 101 }, { "epoch": 0.011579765211745799, "grad_norm": 1.359375, "learning_rate": 0.0008120000000000001, "loss": 7.3716, "step": 102 }, { "epoch": 0.011693292321664875, "grad_norm": 1.515625, "learning_rate": 0.000818, "loss": 7.3657, "step": 103 }, { "epoch": 0.011806819431583952, "grad_norm": 1.3515625, "learning_rate": 0.0008240000000000001, "loss": 7.3203, "step": 104 }, { "epoch": 0.011920346541503028, "grad_norm": 1.265625, "learning_rate": 0.0008300000000000001, "loss": 7.3213, "step": 105 }, { "epoch": 0.012033873651422104, "grad_norm": 1.2265625, "learning_rate": 0.000836, "loss": 7.3194, "step": 106 }, { "epoch": 0.01214740076134118, "grad_norm": 1.3203125, "learning_rate": 0.0008420000000000001, "loss": 7.3077, "step": 107 }, { "epoch": 0.012260927871260258, "grad_norm": 1.34375, "learning_rate": 0.0008480000000000001, "loss": 7.2861, "step": 108 }, { "epoch": 0.012374454981179334, "grad_norm": 1.21875, "learning_rate": 0.0008540000000000002, "loss": 7.2577, "step": 109 }, { "epoch": 0.01248798209109841, "grad_norm": 1.3671875, "learning_rate": 0.0008599999999999999, "loss": 7.2872, "step": 110 }, { "epoch": 0.012601509201017486, "grad_norm": 1.25, "learning_rate": 0.0008660000000000001, "loss": 7.2687, "step": 111 }, { "epoch": 0.012715036310936564, "grad_norm": 1.125, "learning_rate": 0.0008720000000000002, "loss": 7.2544, "step": 112 }, { "epoch": 0.01282856342085564, "grad_norm": 1.078125, "learning_rate": 0.0008779999999999999, "loss": 7.2292, "step": 113 }, { "epoch": 0.012942090530774716, "grad_norm": 1.109375, "learning_rate": 0.0008840000000000001, "loss": 7.2224, "step": 114 }, { "epoch": 0.013055617640693792, "grad_norm": 1.2734375, "learning_rate": 0.0008900000000000002, "loss": 7.2196, "step": 115 }, { "epoch": 0.01316914475061287, "grad_norm": 1.0859375, "learning_rate": 0.0008959999999999999, "loss": 7.1945, "step": 116 }, { "epoch": 0.013282671860531945, "grad_norm": 1.1640625, "learning_rate": 0.0009020000000000001, "loss": 7.1936, "step": 117 }, { "epoch": 0.013396198970451021, "grad_norm": 1.1171875, "learning_rate": 0.000908, "loss": 7.189, "step": 118 }, { "epoch": 0.0135097260803701, "grad_norm": 1.3359375, "learning_rate": 0.000914, "loss": 7.1527, "step": 119 }, { "epoch": 0.013623253190289175, "grad_norm": 1.1875, "learning_rate": 0.0009200000000000001, "loss": 7.1539, "step": 120 }, { "epoch": 0.013736780300208251, "grad_norm": 1.4453125, "learning_rate": 0.000926, "loss": 7.1505, "step": 121 }, { "epoch": 0.013850307410127327, "grad_norm": 1.0625, "learning_rate": 0.000932, "loss": 7.1208, "step": 122 }, { "epoch": 0.013963834520046405, "grad_norm": 1.3046875, "learning_rate": 0.0009379999999999999, "loss": 7.1369, "step": 123 }, { "epoch": 0.01407736162996548, "grad_norm": 1.1171875, "learning_rate": 0.000944, "loss": 7.111, "step": 124 }, { "epoch": 0.014190888739884557, "grad_norm": 1.3515625, "learning_rate": 0.00095, "loss": 7.1177, "step": 125 }, { "epoch": 0.014304415849803633, "grad_norm": 0.95703125, "learning_rate": 0.0009559999999999999, "loss": 7.0898, "step": 126 }, { "epoch": 0.01441794295972271, "grad_norm": 1.296875, "learning_rate": 0.000962, "loss": 7.1302, "step": 127 }, { "epoch": 0.014531470069641787, "grad_norm": 0.984375, "learning_rate": 0.000968, "loss": 7.0921, "step": 128 }, { "epoch": 0.014644997179560863, "grad_norm": 1.15625, "learning_rate": 0.000974, "loss": 7.0639, "step": 129 }, { "epoch": 0.014758524289479939, "grad_norm": 0.8671875, "learning_rate": 0.00098, "loss": 7.0577, "step": 130 }, { "epoch": 0.014872051399399016, "grad_norm": 1.0546875, "learning_rate": 0.0009860000000000001, "loss": 7.0683, "step": 131 }, { "epoch": 0.014985578509318092, "grad_norm": 1.203125, "learning_rate": 0.000992, "loss": 7.0393, "step": 132 }, { "epoch": 0.015099105619237168, "grad_norm": 0.97265625, "learning_rate": 0.000998, "loss": 7.0399, "step": 133 }, { "epoch": 0.015212632729156246, "grad_norm": 1.171875, "learning_rate": 0.0010040000000000001, "loss": 7.0315, "step": 134 }, { "epoch": 0.015326159839075322, "grad_norm": 1.046875, "learning_rate": 0.00101, "loss": 7.0166, "step": 135 }, { "epoch": 0.015439686948994398, "grad_norm": 1.1640625, "learning_rate": 0.001016, "loss": 7.0053, "step": 136 }, { "epoch": 0.015553214058913474, "grad_norm": 0.8671875, "learning_rate": 0.0010220000000000001, "loss": 6.9946, "step": 137 }, { "epoch": 0.01566674116883255, "grad_norm": 0.95703125, "learning_rate": 0.001028, "loss": 7.0057, "step": 138 }, { "epoch": 0.015780268278751626, "grad_norm": 0.859375, "learning_rate": 0.001034, "loss": 6.9886, "step": 139 }, { "epoch": 0.015893795388670705, "grad_norm": 0.90625, "learning_rate": 0.0010400000000000001, "loss": 6.9678, "step": 140 }, { "epoch": 0.01600732249858978, "grad_norm": 0.9609375, "learning_rate": 0.001046, "loss": 6.9794, "step": 141 }, { "epoch": 0.016120849608508857, "grad_norm": 0.9140625, "learning_rate": 0.001052, "loss": 6.9735, "step": 142 }, { "epoch": 0.016234376718427933, "grad_norm": 0.91796875, "learning_rate": 0.0010580000000000001, "loss": 6.9456, "step": 143 }, { "epoch": 0.01634790382834701, "grad_norm": 0.9140625, "learning_rate": 0.001064, "loss": 6.9397, "step": 144 }, { "epoch": 0.016461430938266085, "grad_norm": 1.0078125, "learning_rate": 0.00107, "loss": 6.9411, "step": 145 }, { "epoch": 0.01657495804818516, "grad_norm": 1.015625, "learning_rate": 0.0010760000000000001, "loss": 6.9274, "step": 146 }, { "epoch": 0.016688485158104237, "grad_norm": 1.1171875, "learning_rate": 0.001082, "loss": 6.921, "step": 147 }, { "epoch": 0.016802012268023317, "grad_norm": 0.9609375, "learning_rate": 0.0010880000000000002, "loss": 6.9047, "step": 148 }, { "epoch": 0.016915539377942393, "grad_norm": 1.1640625, "learning_rate": 0.0010940000000000001, "loss": 6.9285, "step": 149 }, { "epoch": 0.01702906648786147, "grad_norm": 0.921875, "learning_rate": 0.0011, "loss": 6.8916, "step": 150 }, { "epoch": 0.017142593597780545, "grad_norm": 1.0703125, "learning_rate": 0.001106, "loss": 6.8917, "step": 151 }, { "epoch": 0.01725612070769962, "grad_norm": 1.0625, "learning_rate": 0.0011120000000000001, "loss": 6.8983, "step": 152 }, { "epoch": 0.017369647817618697, "grad_norm": 0.94921875, "learning_rate": 0.001118, "loss": 6.872, "step": 153 }, { "epoch": 0.017483174927537773, "grad_norm": 0.9296875, "learning_rate": 0.001124, "loss": 6.8641, "step": 154 }, { "epoch": 0.017596702037456852, "grad_norm": 0.9453125, "learning_rate": 0.0011300000000000001, "loss": 6.8508, "step": 155 }, { "epoch": 0.01771022914737593, "grad_norm": 1.109375, "learning_rate": 0.001136, "loss": 6.8742, "step": 156 }, { "epoch": 0.017823756257295004, "grad_norm": 1.109375, "learning_rate": 0.001142, "loss": 6.8687, "step": 157 }, { "epoch": 0.01793728336721408, "grad_norm": 1.0546875, "learning_rate": 0.001148, "loss": 6.8461, "step": 158 }, { "epoch": 0.018050810477133156, "grad_norm": 1.015625, "learning_rate": 0.001154, "loss": 6.8546, "step": 159 }, { "epoch": 0.018164337587052232, "grad_norm": 0.89453125, "learning_rate": 0.00116, "loss": 6.8339, "step": 160 }, { "epoch": 0.01827786469697131, "grad_norm": 0.8671875, "learning_rate": 0.001166, "loss": 6.8353, "step": 161 }, { "epoch": 0.018391391806890384, "grad_norm": 1.046875, "learning_rate": 0.001172, "loss": 6.8294, "step": 162 }, { "epoch": 0.018504918916809464, "grad_norm": 1.0703125, "learning_rate": 0.001178, "loss": 6.816, "step": 163 }, { "epoch": 0.01861844602672854, "grad_norm": 0.83984375, "learning_rate": 0.001184, "loss": 6.8221, "step": 164 }, { "epoch": 0.018731973136647616, "grad_norm": 0.93359375, "learning_rate": 0.00119, "loss": 6.8141, "step": 165 }, { "epoch": 0.018845500246566692, "grad_norm": 0.78515625, "learning_rate": 0.001196, "loss": 6.7819, "step": 166 }, { "epoch": 0.018959027356485768, "grad_norm": 0.84765625, "learning_rate": 0.001202, "loss": 6.8, "step": 167 }, { "epoch": 0.019072554466404844, "grad_norm": 0.92578125, "learning_rate": 0.0012080000000000003, "loss": 6.7826, "step": 168 }, { "epoch": 0.01918608157632392, "grad_norm": 0.98046875, "learning_rate": 0.001214, "loss": 6.7639, "step": 169 }, { "epoch": 0.019299608686243, "grad_norm": 1.03125, "learning_rate": 0.00122, "loss": 6.7705, "step": 170 }, { "epoch": 0.019413135796162075, "grad_norm": 0.90234375, "learning_rate": 0.001226, "loss": 6.7804, "step": 171 }, { "epoch": 0.01952666290608115, "grad_norm": 0.8828125, "learning_rate": 0.001232, "loss": 6.7498, "step": 172 }, { "epoch": 0.019640190016000227, "grad_norm": 0.8359375, "learning_rate": 0.001238, "loss": 6.772, "step": 173 }, { "epoch": 0.019753717125919303, "grad_norm": 0.83984375, "learning_rate": 0.001244, "loss": 6.7528, "step": 174 }, { "epoch": 0.01986724423583838, "grad_norm": 0.83984375, "learning_rate": 0.00125, "loss": 6.7417, "step": 175 }, { "epoch": 0.019980771345757455, "grad_norm": 0.96484375, "learning_rate": 0.001256, "loss": 6.7512, "step": 176 }, { "epoch": 0.02009429845567653, "grad_norm": 0.79296875, "learning_rate": 0.001262, "loss": 6.7275, "step": 177 }, { "epoch": 0.02020782556559561, "grad_norm": 0.92578125, "learning_rate": 0.001268, "loss": 6.7165, "step": 178 }, { "epoch": 0.020321352675514687, "grad_norm": 0.9765625, "learning_rate": 0.001274, "loss": 6.7315, "step": 179 }, { "epoch": 0.020434879785433763, "grad_norm": 0.953125, "learning_rate": 0.00128, "loss": 6.716, "step": 180 }, { "epoch": 0.02054840689535284, "grad_norm": 0.81640625, "learning_rate": 0.001286, "loss": 6.7107, "step": 181 }, { "epoch": 0.020661934005271915, "grad_norm": 0.72265625, "learning_rate": 0.001292, "loss": 6.6907, "step": 182 }, { "epoch": 0.02077546111519099, "grad_norm": 0.71875, "learning_rate": 0.0012980000000000001, "loss": 6.677, "step": 183 }, { "epoch": 0.020888988225110067, "grad_norm": 0.7890625, "learning_rate": 0.0013039999999999998, "loss": 6.6887, "step": 184 }, { "epoch": 0.021002515335029146, "grad_norm": 0.83203125, "learning_rate": 0.0013100000000000002, "loss": 6.699, "step": 185 }, { "epoch": 0.021116042444948222, "grad_norm": 0.88671875, "learning_rate": 0.0013160000000000001, "loss": 6.6653, "step": 186 }, { "epoch": 0.021229569554867298, "grad_norm": 0.8515625, "learning_rate": 0.0013219999999999998, "loss": 6.6719, "step": 187 }, { "epoch": 0.021343096664786374, "grad_norm": 0.73828125, "learning_rate": 0.0013280000000000002, "loss": 6.6611, "step": 188 }, { "epoch": 0.02145662377470545, "grad_norm": 0.76171875, "learning_rate": 0.0013340000000000001, "loss": 6.6477, "step": 189 }, { "epoch": 0.021570150884624526, "grad_norm": 0.80078125, "learning_rate": 0.0013399999999999998, "loss": 6.6759, "step": 190 }, { "epoch": 0.021683677994543602, "grad_norm": 0.87890625, "learning_rate": 0.0013460000000000002, "loss": 6.6546, "step": 191 }, { "epoch": 0.021797205104462678, "grad_norm": 0.88671875, "learning_rate": 0.0013520000000000001, "loss": 6.6508, "step": 192 }, { "epoch": 0.021910732214381758, "grad_norm": 0.8359375, "learning_rate": 0.0013579999999999998, "loss": 6.6491, "step": 193 }, { "epoch": 0.022024259324300834, "grad_norm": 0.85546875, "learning_rate": 0.001364, "loss": 6.6361, "step": 194 }, { "epoch": 0.02213778643421991, "grad_norm": 0.84375, "learning_rate": 0.0013700000000000001, "loss": 6.6242, "step": 195 }, { "epoch": 0.022251313544138986, "grad_norm": 0.89453125, "learning_rate": 0.0013759999999999998, "loss": 6.6195, "step": 196 }, { "epoch": 0.02236484065405806, "grad_norm": 0.80078125, "learning_rate": 0.001382, "loss": 6.6039, "step": 197 }, { "epoch": 0.022478367763977138, "grad_norm": 0.703125, "learning_rate": 0.0013880000000000001, "loss": 6.6206, "step": 198 }, { "epoch": 0.022591894873896214, "grad_norm": 0.765625, "learning_rate": 0.0013939999999999998, "loss": 6.6069, "step": 199 }, { "epoch": 0.022705421983815293, "grad_norm": 0.8203125, "learning_rate": 0.0014, "loss": 6.6141, "step": 200 }, { "epoch": 0.02281894909373437, "grad_norm": 0.88671875, "learning_rate": 0.0014060000000000001, "loss": 6.5865, "step": 201 }, { "epoch": 0.022932476203653445, "grad_norm": 0.83203125, "learning_rate": 0.0014119999999999998, "loss": 6.6011, "step": 202 }, { "epoch": 0.02304600331357252, "grad_norm": 0.78125, "learning_rate": 0.001418, "loss": 6.5957, "step": 203 }, { "epoch": 0.023159530423491597, "grad_norm": 0.73828125, "learning_rate": 0.0014240000000000001, "loss": 6.588, "step": 204 }, { "epoch": 0.023273057533410673, "grad_norm": 0.74609375, "learning_rate": 0.00143, "loss": 6.5857, "step": 205 }, { "epoch": 0.02338658464332975, "grad_norm": 0.7578125, "learning_rate": 0.001436, "loss": 6.582, "step": 206 }, { "epoch": 0.023500111753248825, "grad_norm": 0.921875, "learning_rate": 0.001442, "loss": 6.5843, "step": 207 }, { "epoch": 0.023613638863167905, "grad_norm": 0.98828125, "learning_rate": 0.001448, "loss": 6.582, "step": 208 }, { "epoch": 0.02372716597308698, "grad_norm": 0.94921875, "learning_rate": 0.001454, "loss": 6.5853, "step": 209 }, { "epoch": 0.023840693083006056, "grad_norm": 0.875, "learning_rate": 0.00146, "loss": 6.5648, "step": 210 }, { "epoch": 0.023954220192925132, "grad_norm": 0.8828125, "learning_rate": 0.001466, "loss": 6.5536, "step": 211 }, { "epoch": 0.02406774730284421, "grad_norm": 0.828125, "learning_rate": 0.001472, "loss": 6.571, "step": 212 }, { "epoch": 0.024181274412763284, "grad_norm": 0.73828125, "learning_rate": 0.001478, "loss": 6.5288, "step": 213 }, { "epoch": 0.02429480152268236, "grad_norm": 0.71875, "learning_rate": 0.001484, "loss": 6.5409, "step": 214 }, { "epoch": 0.02440832863260144, "grad_norm": 0.859375, "learning_rate": 0.00149, "loss": 6.5423, "step": 215 }, { "epoch": 0.024521855742520516, "grad_norm": 0.8984375, "learning_rate": 0.001496, "loss": 6.5535, "step": 216 }, { "epoch": 0.024635382852439592, "grad_norm": 0.80078125, "learning_rate": 0.001502, "loss": 6.523, "step": 217 }, { "epoch": 0.024748909962358668, "grad_norm": 0.76171875, "learning_rate": 0.001508, "loss": 6.5229, "step": 218 }, { "epoch": 0.024862437072277744, "grad_norm": 0.78515625, "learning_rate": 0.001514, "loss": 6.5246, "step": 219 }, { "epoch": 0.02497596418219682, "grad_norm": 0.71484375, "learning_rate": 0.0015199999999999999, "loss": 6.5105, "step": 220 }, { "epoch": 0.025089491292115896, "grad_norm": 0.7421875, "learning_rate": 0.001526, "loss": 6.5089, "step": 221 }, { "epoch": 0.025203018402034972, "grad_norm": 0.78515625, "learning_rate": 0.0015320000000000002, "loss": 6.4997, "step": 222 }, { "epoch": 0.02531654551195405, "grad_norm": 0.7265625, "learning_rate": 0.0015379999999999999, "loss": 6.4932, "step": 223 }, { "epoch": 0.025430072621873127, "grad_norm": 0.71875, "learning_rate": 0.001544, "loss": 6.4813, "step": 224 }, { "epoch": 0.025543599731792203, "grad_norm": 0.71484375, "learning_rate": 0.0015500000000000002, "loss": 6.4868, "step": 225 }, { "epoch": 0.02565712684171128, "grad_norm": 0.8046875, "learning_rate": 0.0015559999999999999, "loss": 6.495, "step": 226 }, { "epoch": 0.025770653951630355, "grad_norm": 0.79296875, "learning_rate": 0.001562, "loss": 6.484, "step": 227 }, { "epoch": 0.02588418106154943, "grad_norm": 0.6953125, "learning_rate": 0.0015680000000000002, "loss": 6.4856, "step": 228 }, { "epoch": 0.025997708171468507, "grad_norm": 0.69140625, "learning_rate": 0.0015739999999999999, "loss": 6.4888, "step": 229 }, { "epoch": 0.026111235281387583, "grad_norm": 0.6640625, "learning_rate": 0.00158, "loss": 6.4776, "step": 230 }, { "epoch": 0.026224762391306663, "grad_norm": 0.703125, "learning_rate": 0.0015860000000000002, "loss": 6.4745, "step": 231 }, { "epoch": 0.02633828950122574, "grad_norm": 0.75390625, "learning_rate": 0.0015919999999999999, "loss": 6.4713, "step": 232 }, { "epoch": 0.026451816611144815, "grad_norm": 0.7109375, "learning_rate": 0.0015979999999999998, "loss": 6.4736, "step": 233 }, { "epoch": 0.02656534372106389, "grad_norm": 0.7734375, "learning_rate": 0.0016040000000000002, "loss": 6.4385, "step": 234 }, { "epoch": 0.026678870830982967, "grad_norm": 0.76953125, "learning_rate": 0.0016099999999999999, "loss": 6.4602, "step": 235 }, { "epoch": 0.026792397940902043, "grad_norm": 0.77734375, "learning_rate": 0.0016159999999999998, "loss": 6.4555, "step": 236 }, { "epoch": 0.02690592505082112, "grad_norm": 0.8125, "learning_rate": 0.0016220000000000002, "loss": 6.4601, "step": 237 }, { "epoch": 0.0270194521607402, "grad_norm": 0.80078125, "learning_rate": 0.0016279999999999999, "loss": 6.4413, "step": 238 }, { "epoch": 0.027132979270659274, "grad_norm": 0.7890625, "learning_rate": 0.001634, "loss": 6.4384, "step": 239 }, { "epoch": 0.02724650638057835, "grad_norm": 0.7890625, "learning_rate": 0.0016400000000000002, "loss": 6.4485, "step": 240 }, { "epoch": 0.027360033490497426, "grad_norm": 0.7734375, "learning_rate": 0.001646, "loss": 6.4372, "step": 241 }, { "epoch": 0.027473560600416502, "grad_norm": 0.77734375, "learning_rate": 0.001652, "loss": 6.4507, "step": 242 }, { "epoch": 0.02758708771033558, "grad_norm": 0.6875, "learning_rate": 0.0016580000000000002, "loss": 6.4199, "step": 243 }, { "epoch": 0.027700614820254654, "grad_norm": 0.69921875, "learning_rate": 0.001664, "loss": 6.4132, "step": 244 }, { "epoch": 0.02781414193017373, "grad_norm": 0.765625, "learning_rate": 0.00167, "loss": 6.4206, "step": 245 }, { "epoch": 0.02792766904009281, "grad_norm": 0.73046875, "learning_rate": 0.001676, "loss": 6.4129, "step": 246 }, { "epoch": 0.028041196150011886, "grad_norm": 0.81640625, "learning_rate": 0.001682, "loss": 6.4176, "step": 247 }, { "epoch": 0.02815472325993096, "grad_norm": 0.87890625, "learning_rate": 0.001688, "loss": 6.4286, "step": 248 }, { "epoch": 0.028268250369850038, "grad_norm": 0.78125, "learning_rate": 0.001694, "loss": 6.4073, "step": 249 }, { "epoch": 0.028381777479769114, "grad_norm": 0.83984375, "learning_rate": 0.0017, "loss": 6.4126, "step": 250 }, { "epoch": 0.02849530458968819, "grad_norm": 0.7890625, "learning_rate": 0.001706, "loss": 6.4056, "step": 251 }, { "epoch": 0.028608831699607266, "grad_norm": 0.76953125, "learning_rate": 0.001712, "loss": 6.3884, "step": 252 }, { "epoch": 0.028722358809526345, "grad_norm": 0.74609375, "learning_rate": 0.001718, "loss": 6.4034, "step": 253 }, { "epoch": 0.02883588591944542, "grad_norm": 0.7890625, "learning_rate": 0.001724, "loss": 6.4039, "step": 254 }, { "epoch": 0.028949413029364497, "grad_norm": 0.77734375, "learning_rate": 0.00173, "loss": 6.3756, "step": 255 }, { "epoch": 0.029062940139283573, "grad_norm": 0.69140625, "learning_rate": 0.0017360000000000001, "loss": 6.3928, "step": 256 }, { "epoch": 0.02917646724920265, "grad_norm": 0.6796875, "learning_rate": 0.001742, "loss": 6.3748, "step": 257 }, { "epoch": 0.029289994359121725, "grad_norm": 0.7890625, "learning_rate": 0.001748, "loss": 6.3946, "step": 258 }, { "epoch": 0.0294035214690408, "grad_norm": 0.88671875, "learning_rate": 0.0017540000000000001, "loss": 6.3819, "step": 259 }, { "epoch": 0.029517048578959877, "grad_norm": 0.76953125, "learning_rate": 0.00176, "loss": 6.3675, "step": 260 }, { "epoch": 0.029630575688878957, "grad_norm": 0.66796875, "learning_rate": 0.001766, "loss": 6.3723, "step": 261 }, { "epoch": 0.029744102798798033, "grad_norm": 0.65234375, "learning_rate": 0.0017720000000000001, "loss": 6.356, "step": 262 }, { "epoch": 0.02985762990871711, "grad_norm": 0.6484375, "learning_rate": 0.001778, "loss": 6.3453, "step": 263 }, { "epoch": 0.029971157018636185, "grad_norm": 0.6796875, "learning_rate": 0.001784, "loss": 6.3625, "step": 264 }, { "epoch": 0.03008468412855526, "grad_norm": 0.62109375, "learning_rate": 0.0017900000000000001, "loss": 6.358, "step": 265 }, { "epoch": 0.030198211238474337, "grad_norm": 0.67578125, "learning_rate": 0.001796, "loss": 6.3492, "step": 266 }, { "epoch": 0.030311738348393413, "grad_norm": 0.69140625, "learning_rate": 0.001802, "loss": 6.3527, "step": 267 }, { "epoch": 0.030425265458312492, "grad_norm": 0.66796875, "learning_rate": 0.0018080000000000001, "loss": 6.3575, "step": 268 }, { "epoch": 0.030538792568231568, "grad_norm": 0.6640625, "learning_rate": 0.0018139999999999999, "loss": 6.3469, "step": 269 }, { "epoch": 0.030652319678150644, "grad_norm": 0.671875, "learning_rate": 0.00182, "loss": 6.3497, "step": 270 }, { "epoch": 0.03076584678806972, "grad_norm": 0.70703125, "learning_rate": 0.0018260000000000001, "loss": 6.3454, "step": 271 }, { "epoch": 0.030879373897988796, "grad_norm": 0.6640625, "learning_rate": 0.0018319999999999999, "loss": 6.3361, "step": 272 }, { "epoch": 0.030992901007907872, "grad_norm": 0.63671875, "learning_rate": 0.0018380000000000002, "loss": 6.3293, "step": 273 }, { "epoch": 0.031106428117826948, "grad_norm": 0.6640625, "learning_rate": 0.0018440000000000002, "loss": 6.3312, "step": 274 }, { "epoch": 0.031219955227746024, "grad_norm": 0.82421875, "learning_rate": 0.0018499999999999999, "loss": 6.3324, "step": 275 }, { "epoch": 0.0313334823376651, "grad_norm": 0.796875, "learning_rate": 0.0018560000000000002, "loss": 6.3218, "step": 276 }, { "epoch": 0.031447009447584176, "grad_norm": 0.8125, "learning_rate": 0.0018620000000000002, "loss": 6.3279, "step": 277 }, { "epoch": 0.03156053655750325, "grad_norm": 0.84765625, "learning_rate": 0.0018679999999999999, "loss": 6.3369, "step": 278 }, { "epoch": 0.03167406366742233, "grad_norm": 0.828125, "learning_rate": 0.0018740000000000002, "loss": 6.3231, "step": 279 }, { "epoch": 0.03178759077734141, "grad_norm": 0.87109375, "learning_rate": 0.0018800000000000002, "loss": 6.3226, "step": 280 }, { "epoch": 0.03190111788726049, "grad_norm": 0.6875, "learning_rate": 0.0018859999999999999, "loss": 6.3117, "step": 281 }, { "epoch": 0.03201464499717956, "grad_norm": 0.65234375, "learning_rate": 0.001892, "loss": 6.2882, "step": 282 }, { "epoch": 0.03212817210709864, "grad_norm": 0.73828125, "learning_rate": 0.0018980000000000002, "loss": 6.3303, "step": 283 }, { "epoch": 0.032241699217017715, "grad_norm": 0.79296875, "learning_rate": 0.0019039999999999999, "loss": 6.3005, "step": 284 }, { "epoch": 0.03235522632693679, "grad_norm": 0.67578125, "learning_rate": 0.00191, "loss": 6.313, "step": 285 }, { "epoch": 0.03246875343685587, "grad_norm": 0.62109375, "learning_rate": 0.0019160000000000002, "loss": 6.2736, "step": 286 }, { "epoch": 0.03258228054677494, "grad_norm": 0.60546875, "learning_rate": 0.0019219999999999999, "loss": 6.2803, "step": 287 }, { "epoch": 0.03269580765669402, "grad_norm": 0.64453125, "learning_rate": 0.001928, "loss": 6.2968, "step": 288 }, { "epoch": 0.032809334766613095, "grad_norm": 0.6875, "learning_rate": 0.0019340000000000002, "loss": 6.3097, "step": 289 }, { "epoch": 0.03292286187653217, "grad_norm": 0.71875, "learning_rate": 0.0019399999999999999, "loss": 6.2837, "step": 290 }, { "epoch": 0.03303638898645125, "grad_norm": 0.78515625, "learning_rate": 0.001946, "loss": 6.2716, "step": 291 }, { "epoch": 0.03314991609637032, "grad_norm": 0.73046875, "learning_rate": 0.0019520000000000002, "loss": 6.2953, "step": 292 }, { "epoch": 0.0332634432062894, "grad_norm": 0.625, "learning_rate": 0.001958, "loss": 6.2702, "step": 293 }, { "epoch": 0.033376970316208475, "grad_norm": 0.54296875, "learning_rate": 0.001964, "loss": 6.2684, "step": 294 }, { "epoch": 0.03349049742612756, "grad_norm": 0.59765625, "learning_rate": 0.00197, "loss": 6.2785, "step": 295 }, { "epoch": 0.033604024536046634, "grad_norm": 0.6328125, "learning_rate": 0.001976, "loss": 6.2718, "step": 296 }, { "epoch": 0.03371755164596571, "grad_norm": 0.71875, "learning_rate": 0.001982, "loss": 6.2632, "step": 297 }, { "epoch": 0.033831078755884786, "grad_norm": 0.6875, "learning_rate": 0.001988, "loss": 6.253, "step": 298 }, { "epoch": 0.03394460586580386, "grad_norm": 0.60546875, "learning_rate": 0.001994, "loss": 6.261, "step": 299 }, { "epoch": 0.03405813297572294, "grad_norm": 0.61328125, "learning_rate": 0.002, "loss": 6.2471, "step": 300 }, { "epoch": 0.034171660085642014, "grad_norm": 0.62890625, "learning_rate": 0.002, "loss": 6.2568, "step": 301 }, { "epoch": 0.03428518719556109, "grad_norm": 0.6328125, "learning_rate": 0.002, "loss": 6.2433, "step": 302 }, { "epoch": 0.034398714305480166, "grad_norm": 0.6171875, "learning_rate": 0.002, "loss": 6.2558, "step": 303 }, { "epoch": 0.03451224141539924, "grad_norm": 0.75390625, "learning_rate": 0.002, "loss": 6.2437, "step": 304 }, { "epoch": 0.03462576852531832, "grad_norm": 0.734375, "learning_rate": 0.002, "loss": 6.2434, "step": 305 }, { "epoch": 0.034739295635237394, "grad_norm": 0.78125, "learning_rate": 0.002, "loss": 6.2332, "step": 306 }, { "epoch": 0.03485282274515647, "grad_norm": 0.75, "learning_rate": 0.002, "loss": 6.2421, "step": 307 }, { "epoch": 0.034966349855075546, "grad_norm": 0.6796875, "learning_rate": 0.002, "loss": 6.2396, "step": 308 }, { "epoch": 0.03507987696499462, "grad_norm": 0.6875, "learning_rate": 0.002, "loss": 6.2284, "step": 309 }, { "epoch": 0.035193404074913705, "grad_norm": 0.7421875, "learning_rate": 0.002, "loss": 6.2347, "step": 310 }, { "epoch": 0.03530693118483278, "grad_norm": 0.78125, "learning_rate": 0.002, "loss": 6.2424, "step": 311 }, { "epoch": 0.03542045829475186, "grad_norm": 0.71484375, "learning_rate": 0.002, "loss": 6.2347, "step": 312 }, { "epoch": 0.03553398540467093, "grad_norm": 0.640625, "learning_rate": 0.002, "loss": 6.2269, "step": 313 }, { "epoch": 0.03564751251459001, "grad_norm": 0.58984375, "learning_rate": 0.002, "loss": 6.2191, "step": 314 }, { "epoch": 0.035761039624509085, "grad_norm": 0.6328125, "learning_rate": 0.002, "loss": 6.2313, "step": 315 }, { "epoch": 0.03587456673442816, "grad_norm": 0.625, "learning_rate": 0.002, "loss": 6.2311, "step": 316 }, { "epoch": 0.03598809384434724, "grad_norm": 0.6484375, "learning_rate": 0.002, "loss": 6.2184, "step": 317 }, { "epoch": 0.03610162095426631, "grad_norm": 0.58984375, "learning_rate": 0.002, "loss": 6.1863, "step": 318 }, { "epoch": 0.03621514806418539, "grad_norm": 0.60546875, "learning_rate": 0.002, "loss": 6.2005, "step": 319 }, { "epoch": 0.036328675174104465, "grad_norm": 0.7109375, "learning_rate": 0.002, "loss": 6.2043, "step": 320 }, { "epoch": 0.03644220228402354, "grad_norm": 0.76953125, "learning_rate": 0.002, "loss": 6.2207, "step": 321 }, { "epoch": 0.03655572939394262, "grad_norm": 0.76953125, "learning_rate": 0.002, "loss": 6.2231, "step": 322 }, { "epoch": 0.03666925650386169, "grad_norm": 0.6953125, "learning_rate": 0.002, "loss": 6.1961, "step": 323 }, { "epoch": 0.03678278361378077, "grad_norm": 0.703125, "learning_rate": 0.002, "loss": 6.1961, "step": 324 }, { "epoch": 0.03689631072369985, "grad_norm": 0.80078125, "learning_rate": 0.002, "loss": 6.2078, "step": 325 }, { "epoch": 0.03700983783361893, "grad_norm": 0.77734375, "learning_rate": 0.002, "loss": 6.2047, "step": 326 }, { "epoch": 0.037123364943538004, "grad_norm": 0.64453125, "learning_rate": 0.002, "loss": 6.1689, "step": 327 }, { "epoch": 0.03723689205345708, "grad_norm": 0.60546875, "learning_rate": 0.002, "loss": 6.1883, "step": 328 }, { "epoch": 0.037350419163376156, "grad_norm": 0.5703125, "learning_rate": 0.002, "loss": 6.1699, "step": 329 }, { "epoch": 0.03746394627329523, "grad_norm": 0.640625, "learning_rate": 0.002, "loss": 6.1786, "step": 330 }, { "epoch": 0.03757747338321431, "grad_norm": 0.69921875, "learning_rate": 0.002, "loss": 6.1778, "step": 331 }, { "epoch": 0.037691000493133384, "grad_norm": 0.64453125, "learning_rate": 0.002, "loss": 6.1811, "step": 332 }, { "epoch": 0.03780452760305246, "grad_norm": 0.59375, "learning_rate": 0.002, "loss": 6.1769, "step": 333 }, { "epoch": 0.037918054712971536, "grad_norm": 0.59765625, "learning_rate": 0.002, "loss": 6.1642, "step": 334 }, { "epoch": 0.03803158182289061, "grad_norm": 0.6171875, "learning_rate": 0.002, "loss": 6.1512, "step": 335 }, { "epoch": 0.03814510893280969, "grad_norm": 0.61328125, "learning_rate": 0.002, "loss": 6.1648, "step": 336 }, { "epoch": 0.038258636042728764, "grad_norm": 0.72265625, "learning_rate": 0.002, "loss": 6.1735, "step": 337 }, { "epoch": 0.03837216315264784, "grad_norm": 0.73828125, "learning_rate": 0.002, "loss": 6.1485, "step": 338 }, { "epoch": 0.038485690262566916, "grad_norm": 0.671875, "learning_rate": 0.002, "loss": 6.1556, "step": 339 }, { "epoch": 0.038599217372486, "grad_norm": 0.68359375, "learning_rate": 0.002, "loss": 6.1484, "step": 340 }, { "epoch": 0.038712744482405075, "grad_norm": 0.62890625, "learning_rate": 0.002, "loss": 6.1459, "step": 341 }, { "epoch": 0.03882627159232415, "grad_norm": 0.50390625, "learning_rate": 0.002, "loss": 6.1456, "step": 342 }, { "epoch": 0.03893979870224323, "grad_norm": 0.50390625, "learning_rate": 0.002, "loss": 6.1399, "step": 343 }, { "epoch": 0.0390533258121623, "grad_norm": 0.57421875, "learning_rate": 0.002, "loss": 6.1475, "step": 344 }, { "epoch": 0.03916685292208138, "grad_norm": 0.5703125, "learning_rate": 0.002, "loss": 6.1336, "step": 345 }, { "epoch": 0.039280380032000455, "grad_norm": 0.56640625, "learning_rate": 0.002, "loss": 6.133, "step": 346 }, { "epoch": 0.03939390714191953, "grad_norm": 0.5703125, "learning_rate": 0.002, "loss": 6.1262, "step": 347 }, { "epoch": 0.039507434251838607, "grad_norm": 0.578125, "learning_rate": 0.002, "loss": 6.1377, "step": 348 }, { "epoch": 0.03962096136175768, "grad_norm": 0.53125, "learning_rate": 0.002, "loss": 6.1175, "step": 349 }, { "epoch": 0.03973448847167676, "grad_norm": 0.54296875, "learning_rate": 0.002, "loss": 6.1234, "step": 350 }, { "epoch": 0.039848015581595835, "grad_norm": 0.625, "learning_rate": 0.002, "loss": 6.1384, "step": 351 }, { "epoch": 0.03996154269151491, "grad_norm": 0.62109375, "learning_rate": 0.002, "loss": 6.1445, "step": 352 }, { "epoch": 0.040075069801433987, "grad_norm": 0.59375, "learning_rate": 0.002, "loss": 6.1153, "step": 353 }, { "epoch": 0.04018859691135306, "grad_norm": 0.60546875, "learning_rate": 0.002, "loss": 6.1028, "step": 354 }, { "epoch": 0.040302124021272145, "grad_norm": 0.62109375, "learning_rate": 0.002, "loss": 6.1172, "step": 355 }, { "epoch": 0.04041565113119122, "grad_norm": 0.5859375, "learning_rate": 0.002, "loss": 6.1185, "step": 356 }, { "epoch": 0.0405291782411103, "grad_norm": 0.52734375, "learning_rate": 0.002, "loss": 6.087, "step": 357 }, { "epoch": 0.04064270535102937, "grad_norm": 0.52734375, "learning_rate": 0.002, "loss": 6.0829, "step": 358 }, { "epoch": 0.04075623246094845, "grad_norm": 0.55859375, "learning_rate": 0.002, "loss": 6.0981, "step": 359 }, { "epoch": 0.040869759570867525, "grad_norm": 0.59765625, "learning_rate": 0.002, "loss": 6.1014, "step": 360 }, { "epoch": 0.0409832866807866, "grad_norm": 0.58984375, "learning_rate": 0.002, "loss": 6.0917, "step": 361 }, { "epoch": 0.04109681379070568, "grad_norm": 0.703125, "learning_rate": 0.002, "loss": 6.1174, "step": 362 }, { "epoch": 0.04121034090062475, "grad_norm": 0.6328125, "learning_rate": 0.002, "loss": 6.0854, "step": 363 }, { "epoch": 0.04132386801054383, "grad_norm": 0.52734375, "learning_rate": 0.002, "loss": 6.081, "step": 364 }, { "epoch": 0.041437395120462905, "grad_norm": 0.51953125, "learning_rate": 0.002, "loss": 6.1024, "step": 365 }, { "epoch": 0.04155092223038198, "grad_norm": 0.6640625, "learning_rate": 0.002, "loss": 6.0949, "step": 366 }, { "epoch": 0.04166444934030106, "grad_norm": 0.734375, "learning_rate": 0.002, "loss": 6.079, "step": 367 }, { "epoch": 0.04177797645022013, "grad_norm": 0.62109375, "learning_rate": 0.002, "loss": 6.0801, "step": 368 }, { "epoch": 0.04189150356013921, "grad_norm": 0.625, "learning_rate": 0.002, "loss": 6.0927, "step": 369 }, { "epoch": 0.04200503067005829, "grad_norm": 0.61328125, "learning_rate": 0.002, "loss": 6.0886, "step": 370 }, { "epoch": 0.04211855777997737, "grad_norm": 0.640625, "learning_rate": 0.002, "loss": 6.0786, "step": 371 }, { "epoch": 0.042232084889896444, "grad_norm": 0.64453125, "learning_rate": 0.002, "loss": 6.0789, "step": 372 }, { "epoch": 0.04234561199981552, "grad_norm": 0.6328125, "learning_rate": 0.002, "loss": 6.0908, "step": 373 }, { "epoch": 0.042459139109734596, "grad_norm": 0.61328125, "learning_rate": 0.002, "loss": 6.0615, "step": 374 }, { "epoch": 0.04257266621965367, "grad_norm": 0.54296875, "learning_rate": 0.002, "loss": 6.0617, "step": 375 }, { "epoch": 0.04268619332957275, "grad_norm": 0.5703125, "learning_rate": 0.002, "loss": 6.0826, "step": 376 }, { "epoch": 0.042799720439491824, "grad_norm": 0.61328125, "learning_rate": 0.002, "loss": 6.0556, "step": 377 }, { "epoch": 0.0429132475494109, "grad_norm": 0.58203125, "learning_rate": 0.002, "loss": 6.0778, "step": 378 }, { "epoch": 0.043026774659329976, "grad_norm": 0.60546875, "learning_rate": 0.002, "loss": 6.0702, "step": 379 }, { "epoch": 0.04314030176924905, "grad_norm": 0.5625, "learning_rate": 0.002, "loss": 6.062, "step": 380 }, { "epoch": 0.04325382887916813, "grad_norm": 0.58984375, "learning_rate": 0.002, "loss": 6.0649, "step": 381 }, { "epoch": 0.043367355989087204, "grad_norm": 0.6171875, "learning_rate": 0.002, "loss": 6.0691, "step": 382 }, { "epoch": 0.04348088309900628, "grad_norm": 0.58203125, "learning_rate": 0.002, "loss": 6.0609, "step": 383 }, { "epoch": 0.043594410208925356, "grad_norm": 0.546875, "learning_rate": 0.002, "loss": 6.0438, "step": 384 }, { "epoch": 0.04370793731884444, "grad_norm": 0.546875, "learning_rate": 0.002, "loss": 6.059, "step": 385 }, { "epoch": 0.043821464428763515, "grad_norm": 0.54296875, "learning_rate": 0.002, "loss": 6.0472, "step": 386 }, { "epoch": 0.04393499153868259, "grad_norm": 0.53515625, "learning_rate": 0.002, "loss": 6.0531, "step": 387 }, { "epoch": 0.04404851864860167, "grad_norm": 0.51171875, "learning_rate": 0.002, "loss": 6.0568, "step": 388 }, { "epoch": 0.04416204575852074, "grad_norm": 0.5078125, "learning_rate": 0.002, "loss": 6.047, "step": 389 }, { "epoch": 0.04427557286843982, "grad_norm": 0.50390625, "learning_rate": 0.002, "loss": 6.0349, "step": 390 }, { "epoch": 0.044389099978358895, "grad_norm": 0.5625, "learning_rate": 0.002, "loss": 6.032, "step": 391 }, { "epoch": 0.04450262708827797, "grad_norm": 0.58203125, "learning_rate": 0.002, "loss": 6.034, "step": 392 }, { "epoch": 0.04461615419819705, "grad_norm": 0.609375, "learning_rate": 0.002, "loss": 6.0341, "step": 393 }, { "epoch": 0.04472968130811612, "grad_norm": 0.6328125, "learning_rate": 0.002, "loss": 6.0278, "step": 394 }, { "epoch": 0.0448432084180352, "grad_norm": 0.546875, "learning_rate": 0.002, "loss": 6.0432, "step": 395 }, { "epoch": 0.044956735527954275, "grad_norm": 0.53515625, "learning_rate": 0.002, "loss": 6.0241, "step": 396 }, { "epoch": 0.04507026263787335, "grad_norm": 0.5625, "learning_rate": 0.002, "loss": 6.0446, "step": 397 }, { "epoch": 0.04518378974779243, "grad_norm": 0.57421875, "learning_rate": 0.002, "loss": 6.0416, "step": 398 }, { "epoch": 0.0452973168577115, "grad_norm": 0.671875, "learning_rate": 0.002, "loss": 6.0243, "step": 399 }, { "epoch": 0.045410843967630586, "grad_norm": 0.65625, "learning_rate": 0.002, "loss": 6.0252, "step": 400 }, { "epoch": 0.04552437107754966, "grad_norm": 0.5546875, "learning_rate": 0.002, "loss": 6.0338, "step": 401 }, { "epoch": 0.04563789818746874, "grad_norm": 0.546875, "learning_rate": 0.002, "loss": 6.023, "step": 402 }, { "epoch": 0.045751425297387814, "grad_norm": 0.51171875, "learning_rate": 0.002, "loss": 6.0366, "step": 403 }, { "epoch": 0.04586495240730689, "grad_norm": 0.49609375, "learning_rate": 0.002, "loss": 6.048, "step": 404 }, { "epoch": 0.045978479517225966, "grad_norm": 0.51171875, "learning_rate": 0.002, "loss": 6.0079, "step": 405 }, { "epoch": 0.04609200662714504, "grad_norm": 0.53515625, "learning_rate": 0.002, "loss": 5.9989, "step": 406 }, { "epoch": 0.04620553373706412, "grad_norm": 0.5625, "learning_rate": 0.002, "loss": 6.0113, "step": 407 }, { "epoch": 0.046319060846983194, "grad_norm": 0.5234375, "learning_rate": 0.002, "loss": 6.0031, "step": 408 }, { "epoch": 0.04643258795690227, "grad_norm": 0.515625, "learning_rate": 0.002, "loss": 6.0011, "step": 409 }, { "epoch": 0.046546115066821346, "grad_norm": 0.55859375, "learning_rate": 0.002, "loss": 6.0037, "step": 410 }, { "epoch": 0.04665964217674042, "grad_norm": 0.70703125, "learning_rate": 0.002, "loss": 6.0108, "step": 411 }, { "epoch": 0.0467731692866595, "grad_norm": 0.72265625, "learning_rate": 0.002, "loss": 6.0067, "step": 412 }, { "epoch": 0.046886696396578574, "grad_norm": 0.66796875, "learning_rate": 0.002, "loss": 6.0059, "step": 413 }, { "epoch": 0.04700022350649765, "grad_norm": 0.6484375, "learning_rate": 0.002, "loss": 6.0045, "step": 414 }, { "epoch": 0.04711375061641673, "grad_norm": 0.59375, "learning_rate": 0.002, "loss": 6.0096, "step": 415 }, { "epoch": 0.04722727772633581, "grad_norm": 0.546875, "learning_rate": 0.002, "loss": 5.9978, "step": 416 }, { "epoch": 0.047340804836254885, "grad_norm": 0.474609375, "learning_rate": 0.002, "loss": 5.9964, "step": 417 }, { "epoch": 0.04745433194617396, "grad_norm": 0.45703125, "learning_rate": 0.002, "loss": 5.985, "step": 418 }, { "epoch": 0.04756785905609304, "grad_norm": 0.470703125, "learning_rate": 0.002, "loss": 5.9858, "step": 419 }, { "epoch": 0.04768138616601211, "grad_norm": 0.5234375, "learning_rate": 0.002, "loss": 5.9872, "step": 420 }, { "epoch": 0.04779491327593119, "grad_norm": 0.57421875, "learning_rate": 0.002, "loss": 5.9986, "step": 421 }, { "epoch": 0.047908440385850265, "grad_norm": 0.54296875, "learning_rate": 0.002, "loss": 5.9791, "step": 422 }, { "epoch": 0.04802196749576934, "grad_norm": 0.54296875, "learning_rate": 0.002, "loss": 5.9766, "step": 423 }, { "epoch": 0.04813549460568842, "grad_norm": 0.52734375, "learning_rate": 0.002, "loss": 5.9913, "step": 424 }, { "epoch": 0.04824902171560749, "grad_norm": 0.4921875, "learning_rate": 0.002, "loss": 5.9781, "step": 425 }, { "epoch": 0.04836254882552657, "grad_norm": 0.484375, "learning_rate": 0.002, "loss": 5.9782, "step": 426 }, { "epoch": 0.048476075935445645, "grad_norm": 0.4453125, "learning_rate": 0.002, "loss": 5.9753, "step": 427 }, { "epoch": 0.04858960304536472, "grad_norm": 0.4765625, "learning_rate": 0.002, "loss": 5.968, "step": 428 }, { "epoch": 0.0487031301552838, "grad_norm": 0.5, "learning_rate": 0.002, "loss": 5.973, "step": 429 }, { "epoch": 0.04881665726520288, "grad_norm": 0.53515625, "learning_rate": 0.002, "loss": 5.9632, "step": 430 }, { "epoch": 0.048930184375121956, "grad_norm": 0.54296875, "learning_rate": 0.002, "loss": 5.969, "step": 431 }, { "epoch": 0.04904371148504103, "grad_norm": 0.60546875, "learning_rate": 0.002, "loss": 5.9568, "step": 432 }, { "epoch": 0.04915723859496011, "grad_norm": 0.6328125, "learning_rate": 0.002, "loss": 5.9873, "step": 433 }, { "epoch": 0.049270765704879184, "grad_norm": 0.6328125, "learning_rate": 0.002, "loss": 5.9671, "step": 434 }, { "epoch": 0.04938429281479826, "grad_norm": 0.57421875, "learning_rate": 0.002, "loss": 5.9707, "step": 435 }, { "epoch": 0.049497819924717336, "grad_norm": 0.59375, "learning_rate": 0.002, "loss": 5.9794, "step": 436 }, { "epoch": 0.04961134703463641, "grad_norm": 0.58984375, "learning_rate": 0.002, "loss": 5.9613, "step": 437 }, { "epoch": 0.04972487414455549, "grad_norm": 0.6328125, "learning_rate": 0.002, "loss": 5.9623, "step": 438 }, { "epoch": 0.049838401254474564, "grad_norm": 0.63671875, "learning_rate": 0.002, "loss": 5.9661, "step": 439 }, { "epoch": 0.04995192836439364, "grad_norm": 0.57421875, "learning_rate": 0.002, "loss": 5.971, "step": 440 }, { "epoch": 0.050065455474312716, "grad_norm": 0.59375, "learning_rate": 0.002, "loss": 5.9666, "step": 441 }, { "epoch": 0.05017898258423179, "grad_norm": 0.53515625, "learning_rate": 0.002, "loss": 5.9563, "step": 442 }, { "epoch": 0.05029250969415087, "grad_norm": 0.5546875, "learning_rate": 0.002, "loss": 5.9467, "step": 443 }, { "epoch": 0.050406036804069944, "grad_norm": 0.57421875, "learning_rate": 0.002, "loss": 5.961, "step": 444 }, { "epoch": 0.05051956391398903, "grad_norm": 0.578125, "learning_rate": 0.002, "loss": 5.9395, "step": 445 }, { "epoch": 0.0506330910239081, "grad_norm": 0.55078125, "learning_rate": 0.002, "loss": 5.9592, "step": 446 }, { "epoch": 0.05074661813382718, "grad_norm": 0.625, "learning_rate": 0.002, "loss": 5.9602, "step": 447 }, { "epoch": 0.050860145243746255, "grad_norm": 0.60546875, "learning_rate": 0.002, "loss": 5.9538, "step": 448 }, { "epoch": 0.05097367235366533, "grad_norm": 0.56640625, "learning_rate": 0.002, "loss": 5.9509, "step": 449 }, { "epoch": 0.05108719946358441, "grad_norm": 0.58203125, "learning_rate": 0.002, "loss": 5.9407, "step": 450 }, { "epoch": 0.05120072657350348, "grad_norm": 0.57421875, "learning_rate": 0.002, "loss": 5.9565, "step": 451 }, { "epoch": 0.05131425368342256, "grad_norm": 0.5390625, "learning_rate": 0.002, "loss": 5.9379, "step": 452 }, { "epoch": 0.051427780793341635, "grad_norm": 0.51953125, "learning_rate": 0.002, "loss": 5.9503, "step": 453 }, { "epoch": 0.05154130790326071, "grad_norm": 0.58203125, "learning_rate": 0.002, "loss": 5.9299, "step": 454 }, { "epoch": 0.05165483501317979, "grad_norm": 0.54296875, "learning_rate": 0.002, "loss": 5.9241, "step": 455 }, { "epoch": 0.05176836212309886, "grad_norm": 0.5234375, "learning_rate": 0.002, "loss": 5.9515, "step": 456 }, { "epoch": 0.05188188923301794, "grad_norm": 0.5390625, "learning_rate": 0.002, "loss": 5.9157, "step": 457 }, { "epoch": 0.051995416342937015, "grad_norm": 0.5390625, "learning_rate": 0.002, "loss": 5.9354, "step": 458 }, { "epoch": 0.05210894345285609, "grad_norm": 0.515625, "learning_rate": 0.002, "loss": 5.9348, "step": 459 }, { "epoch": 0.05222247056277517, "grad_norm": 0.474609375, "learning_rate": 0.002, "loss": 5.9315, "step": 460 }, { "epoch": 0.05233599767269425, "grad_norm": 0.447265625, "learning_rate": 0.002, "loss": 5.9449, "step": 461 }, { "epoch": 0.052449524782613326, "grad_norm": 0.4453125, "learning_rate": 0.002, "loss": 5.9425, "step": 462 }, { "epoch": 0.0525630518925324, "grad_norm": 0.486328125, "learning_rate": 0.002, "loss": 5.9275, "step": 463 }, { "epoch": 0.05267657900245148, "grad_norm": 0.5, "learning_rate": 0.002, "loss": 5.9382, "step": 464 }, { "epoch": 0.052790106112370554, "grad_norm": 0.48828125, "learning_rate": 0.002, "loss": 5.9147, "step": 465 }, { "epoch": 0.05290363322228963, "grad_norm": 0.50390625, "learning_rate": 0.002, "loss": 5.9249, "step": 466 }, { "epoch": 0.053017160332208706, "grad_norm": 0.5234375, "learning_rate": 0.002, "loss": 5.9074, "step": 467 }, { "epoch": 0.05313068744212778, "grad_norm": 0.61328125, "learning_rate": 0.002, "loss": 5.9267, "step": 468 }, { "epoch": 0.05324421455204686, "grad_norm": 0.625, "learning_rate": 0.002, "loss": 5.9167, "step": 469 }, { "epoch": 0.053357741661965934, "grad_norm": 0.56640625, "learning_rate": 0.002, "loss": 5.9287, "step": 470 }, { "epoch": 0.05347126877188501, "grad_norm": 0.53125, "learning_rate": 0.002, "loss": 5.9367, "step": 471 }, { "epoch": 0.053584795881804086, "grad_norm": 0.4765625, "learning_rate": 0.002, "loss": 5.9199, "step": 472 }, { "epoch": 0.05369832299172316, "grad_norm": 0.462890625, "learning_rate": 0.002, "loss": 5.9235, "step": 473 }, { "epoch": 0.05381185010164224, "grad_norm": 0.44921875, "learning_rate": 0.002, "loss": 5.8995, "step": 474 }, { "epoch": 0.053925377211561314, "grad_norm": 0.490234375, "learning_rate": 0.002, "loss": 5.9214, "step": 475 }, { "epoch": 0.0540389043214804, "grad_norm": 0.453125, "learning_rate": 0.002, "loss": 5.9146, "step": 476 }, { "epoch": 0.05415243143139947, "grad_norm": 0.443359375, "learning_rate": 0.002, "loss": 5.9127, "step": 477 }, { "epoch": 0.05426595854131855, "grad_norm": 0.4375, "learning_rate": 0.002, "loss": 5.9092, "step": 478 }, { "epoch": 0.054379485651237625, "grad_norm": 0.4296875, "learning_rate": 0.002, "loss": 5.914, "step": 479 }, { "epoch": 0.0544930127611567, "grad_norm": 0.439453125, "learning_rate": 0.002, "loss": 5.9071, "step": 480 }, { "epoch": 0.05460653987107578, "grad_norm": 0.44921875, "learning_rate": 0.002, "loss": 5.9036, "step": 481 }, { "epoch": 0.05472006698099485, "grad_norm": 0.443359375, "learning_rate": 0.002, "loss": 5.912, "step": 482 }, { "epoch": 0.05483359409091393, "grad_norm": 0.455078125, "learning_rate": 0.002, "loss": 5.9081, "step": 483 }, { "epoch": 0.054947121200833005, "grad_norm": 0.486328125, "learning_rate": 0.002, "loss": 5.8961, "step": 484 }, { "epoch": 0.05506064831075208, "grad_norm": 0.4921875, "learning_rate": 0.002, "loss": 5.8945, "step": 485 }, { "epoch": 0.05517417542067116, "grad_norm": 0.458984375, "learning_rate": 0.002, "loss": 5.896, "step": 486 }, { "epoch": 0.05528770253059023, "grad_norm": 0.466796875, "learning_rate": 0.002, "loss": 5.8671, "step": 487 }, { "epoch": 0.05540122964050931, "grad_norm": 0.435546875, "learning_rate": 0.002, "loss": 5.9104, "step": 488 }, { "epoch": 0.055514756750428385, "grad_norm": 0.44921875, "learning_rate": 0.002, "loss": 5.908, "step": 489 }, { "epoch": 0.05562828386034746, "grad_norm": 0.470703125, "learning_rate": 0.002, "loss": 5.8806, "step": 490 }, { "epoch": 0.05574181097026654, "grad_norm": 0.453125, "learning_rate": 0.002, "loss": 5.9009, "step": 491 }, { "epoch": 0.05585533808018562, "grad_norm": 0.5078125, "learning_rate": 0.002, "loss": 5.8843, "step": 492 }, { "epoch": 0.055968865190104695, "grad_norm": 0.51171875, "learning_rate": 0.002, "loss": 5.8909, "step": 493 }, { "epoch": 0.05608239230002377, "grad_norm": 0.59375, "learning_rate": 0.002, "loss": 5.8813, "step": 494 }, { "epoch": 0.05619591940994285, "grad_norm": 0.640625, "learning_rate": 0.002, "loss": 5.9248, "step": 495 }, { "epoch": 0.05630944651986192, "grad_norm": 0.65234375, "learning_rate": 0.002, "loss": 5.8991, "step": 496 }, { "epoch": 0.056422973629781, "grad_norm": 0.59375, "learning_rate": 0.002, "loss": 5.8897, "step": 497 }, { "epoch": 0.056536500739700075, "grad_norm": 0.54296875, "learning_rate": 0.002, "loss": 5.9034, "step": 498 }, { "epoch": 0.05665002784961915, "grad_norm": 0.52734375, "learning_rate": 0.002, "loss": 5.8889, "step": 499 }, { "epoch": 0.05676355495953823, "grad_norm": 0.4921875, "learning_rate": 0.002, "loss": 5.891, "step": 500 }, { "epoch": 0.0568770820694573, "grad_norm": 0.4921875, "learning_rate": 0.002, "loss": 5.8765, "step": 501 }, { "epoch": 0.05699060917937638, "grad_norm": 0.5234375, "learning_rate": 0.002, "loss": 5.8805, "step": 502 }, { "epoch": 0.057104136289295455, "grad_norm": 0.53125, "learning_rate": 0.002, "loss": 5.8843, "step": 503 }, { "epoch": 0.05721766339921453, "grad_norm": 0.58984375, "learning_rate": 0.002, "loss": 5.8774, "step": 504 }, { "epoch": 0.05733119050913361, "grad_norm": 0.58984375, "learning_rate": 0.002, "loss": 5.8876, "step": 505 }, { "epoch": 0.05744471761905269, "grad_norm": 0.55859375, "learning_rate": 0.002, "loss": 5.8917, "step": 506 }, { "epoch": 0.057558244728971766, "grad_norm": 0.55078125, "learning_rate": 0.002, "loss": 5.8807, "step": 507 }, { "epoch": 0.05767177183889084, "grad_norm": 0.478515625, "learning_rate": 0.002, "loss": 5.8686, "step": 508 }, { "epoch": 0.05778529894880992, "grad_norm": 0.47265625, "learning_rate": 0.002, "loss": 5.8777, "step": 509 }, { "epoch": 0.057898826058728994, "grad_norm": 0.439453125, "learning_rate": 0.002, "loss": 5.878, "step": 510 }, { "epoch": 0.05801235316864807, "grad_norm": 0.4140625, "learning_rate": 0.002, "loss": 5.8672, "step": 511 }, { "epoch": 0.058125880278567146, "grad_norm": 0.412109375, "learning_rate": 0.002, "loss": 5.8738, "step": 512 }, { "epoch": 0.05823940738848622, "grad_norm": 0.419921875, "learning_rate": 0.002, "loss": 5.8602, "step": 513 }, { "epoch": 0.0583529344984053, "grad_norm": 0.4765625, "learning_rate": 0.002, "loss": 5.858, "step": 514 }, { "epoch": 0.058466461608324374, "grad_norm": 0.55859375, "learning_rate": 0.002, "loss": 5.866, "step": 515 }, { "epoch": 0.05857998871824345, "grad_norm": 0.73828125, "learning_rate": 0.002, "loss": 5.8628, "step": 516 }, { "epoch": 0.058693515828162526, "grad_norm": 0.6328125, "learning_rate": 0.002, "loss": 5.8504, "step": 517 }, { "epoch": 0.0588070429380816, "grad_norm": 0.498046875, "learning_rate": 0.002, "loss": 5.8885, "step": 518 }, { "epoch": 0.05892057004800068, "grad_norm": 0.47265625, "learning_rate": 0.002, "loss": 5.8404, "step": 519 }, { "epoch": 0.059034097157919754, "grad_norm": 0.4296875, "learning_rate": 0.002, "loss": 5.8517, "step": 520 }, { "epoch": 0.05914762426783884, "grad_norm": 0.4453125, "learning_rate": 0.002, "loss": 5.8683, "step": 521 }, { "epoch": 0.05926115137775791, "grad_norm": 0.46875, "learning_rate": 0.002, "loss": 5.8823, "step": 522 }, { "epoch": 0.05937467848767699, "grad_norm": 0.4453125, "learning_rate": 0.002, "loss": 5.8534, "step": 523 }, { "epoch": 0.059488205597596065, "grad_norm": 0.4375, "learning_rate": 0.002, "loss": 5.861, "step": 524 }, { "epoch": 0.05960173270751514, "grad_norm": 0.462890625, "learning_rate": 0.002, "loss": 5.8512, "step": 525 }, { "epoch": 0.05971525981743422, "grad_norm": 0.4609375, "learning_rate": 0.002, "loss": 5.8561, "step": 526 }, { "epoch": 0.05982878692735329, "grad_norm": 0.466796875, "learning_rate": 0.002, "loss": 5.8532, "step": 527 }, { "epoch": 0.05994231403727237, "grad_norm": 0.455078125, "learning_rate": 0.002, "loss": 5.854, "step": 528 }, { "epoch": 0.060055841147191445, "grad_norm": 0.443359375, "learning_rate": 0.002, "loss": 5.8502, "step": 529 }, { "epoch": 0.06016936825711052, "grad_norm": 0.46875, "learning_rate": 0.002, "loss": 5.8568, "step": 530 }, { "epoch": 0.0602828953670296, "grad_norm": 0.447265625, "learning_rate": 0.002, "loss": 5.8416, "step": 531 }, { "epoch": 0.06039642247694867, "grad_norm": 0.453125, "learning_rate": 0.002, "loss": 5.8455, "step": 532 }, { "epoch": 0.06050994958686775, "grad_norm": 0.51171875, "learning_rate": 0.002, "loss": 5.8462, "step": 533 }, { "epoch": 0.060623476696786825, "grad_norm": 0.66015625, "learning_rate": 0.002, "loss": 5.8577, "step": 534 }, { "epoch": 0.0607370038067059, "grad_norm": 0.66796875, "learning_rate": 0.002, "loss": 5.8566, "step": 535 }, { "epoch": 0.060850530916624984, "grad_norm": 0.5078125, "learning_rate": 0.002, "loss": 5.8322, "step": 536 }, { "epoch": 0.06096405802654406, "grad_norm": 0.53515625, "learning_rate": 0.002, "loss": 5.8367, "step": 537 }, { "epoch": 0.061077585136463136, "grad_norm": 0.474609375, "learning_rate": 0.002, "loss": 5.8477, "step": 538 }, { "epoch": 0.06119111224638221, "grad_norm": 0.4765625, "learning_rate": 0.002, "loss": 5.8544, "step": 539 }, { "epoch": 0.06130463935630129, "grad_norm": 0.53515625, "learning_rate": 0.002, "loss": 5.8441, "step": 540 }, { "epoch": 0.061418166466220364, "grad_norm": 0.5390625, "learning_rate": 0.002, "loss": 5.8381, "step": 541 }, { "epoch": 0.06153169357613944, "grad_norm": 0.61328125, "learning_rate": 0.002, "loss": 5.834, "step": 542 }, { "epoch": 0.061645220686058516, "grad_norm": 0.62109375, "learning_rate": 0.002, "loss": 5.8289, "step": 543 }, { "epoch": 0.06175874779597759, "grad_norm": 0.66015625, "learning_rate": 0.002, "loss": 5.8362, "step": 544 }, { "epoch": 0.06187227490589667, "grad_norm": 0.64453125, "learning_rate": 0.002, "loss": 5.8187, "step": 545 }, { "epoch": 0.061985802015815744, "grad_norm": 0.52734375, "learning_rate": 0.002, "loss": 5.8164, "step": 546 }, { "epoch": 0.06209932912573482, "grad_norm": 0.57421875, "learning_rate": 0.002, "loss": 5.8342, "step": 547 }, { "epoch": 0.062212856235653896, "grad_norm": 0.5546875, "learning_rate": 0.002, "loss": 5.8349, "step": 548 }, { "epoch": 0.06232638334557297, "grad_norm": 0.54296875, "learning_rate": 0.002, "loss": 5.8105, "step": 549 }, { "epoch": 0.06243991045549205, "grad_norm": 0.578125, "learning_rate": 0.002, "loss": 5.8258, "step": 550 }, { "epoch": 0.06255343756541112, "grad_norm": 0.5625, "learning_rate": 0.002, "loss": 5.8211, "step": 551 }, { "epoch": 0.0626669646753302, "grad_norm": 0.51171875, "learning_rate": 0.002, "loss": 5.8138, "step": 552 }, { "epoch": 0.06278049178524928, "grad_norm": 0.48828125, "learning_rate": 0.002, "loss": 5.8251, "step": 553 }, { "epoch": 0.06289401889516835, "grad_norm": 0.4296875, "learning_rate": 0.002, "loss": 5.8248, "step": 554 }, { "epoch": 0.06300754600508743, "grad_norm": 0.416015625, "learning_rate": 0.002, "loss": 5.8315, "step": 555 }, { "epoch": 0.0631210731150065, "grad_norm": 0.412109375, "learning_rate": 0.002, "loss": 5.8225, "step": 556 }, { "epoch": 0.06323460022492558, "grad_norm": 0.427734375, "learning_rate": 0.002, "loss": 5.8259, "step": 557 }, { "epoch": 0.06334812733484466, "grad_norm": 0.408203125, "learning_rate": 0.002, "loss": 5.8122, "step": 558 }, { "epoch": 0.06346165444476375, "grad_norm": 0.4140625, "learning_rate": 0.002, "loss": 5.8048, "step": 559 }, { "epoch": 0.06357518155468282, "grad_norm": 0.42578125, "learning_rate": 0.002, "loss": 5.8143, "step": 560 }, { "epoch": 0.0636887086646019, "grad_norm": 0.447265625, "learning_rate": 0.002, "loss": 5.82, "step": 561 }, { "epoch": 0.06380223577452097, "grad_norm": 0.46484375, "learning_rate": 0.002, "loss": 5.8199, "step": 562 }, { "epoch": 0.06391576288444005, "grad_norm": 0.5234375, "learning_rate": 0.002, "loss": 5.8225, "step": 563 }, { "epoch": 0.06402928999435913, "grad_norm": 0.50390625, "learning_rate": 0.002, "loss": 5.8071, "step": 564 }, { "epoch": 0.0641428171042782, "grad_norm": 0.546875, "learning_rate": 0.002, "loss": 5.7962, "step": 565 }, { "epoch": 0.06425634421419728, "grad_norm": 0.56640625, "learning_rate": 0.002, "loss": 5.8026, "step": 566 }, { "epoch": 0.06436987132411635, "grad_norm": 0.5546875, "learning_rate": 0.002, "loss": 5.7825, "step": 567 }, { "epoch": 0.06448339843403543, "grad_norm": 0.59765625, "learning_rate": 0.002, "loss": 5.8015, "step": 568 }, { "epoch": 0.0645969255439545, "grad_norm": 0.5234375, "learning_rate": 0.002, "loss": 5.8206, "step": 569 }, { "epoch": 0.06471045265387358, "grad_norm": 0.52734375, "learning_rate": 0.002, "loss": 5.8249, "step": 570 }, { "epoch": 0.06482397976379266, "grad_norm": 0.49609375, "learning_rate": 0.002, "loss": 5.7873, "step": 571 }, { "epoch": 0.06493750687371173, "grad_norm": 0.482421875, "learning_rate": 0.002, "loss": 5.8174, "step": 572 }, { "epoch": 0.06505103398363081, "grad_norm": 0.5, "learning_rate": 0.002, "loss": 5.8136, "step": 573 }, { "epoch": 0.06516456109354989, "grad_norm": 0.5078125, "learning_rate": 0.002, "loss": 5.8094, "step": 574 }, { "epoch": 0.06527808820346896, "grad_norm": 0.474609375, "learning_rate": 0.002, "loss": 5.7936, "step": 575 }, { "epoch": 0.06539161531338804, "grad_norm": 0.4765625, "learning_rate": 0.002, "loss": 5.8102, "step": 576 }, { "epoch": 0.06550514242330711, "grad_norm": 0.47265625, "learning_rate": 0.002, "loss": 5.8202, "step": 577 }, { "epoch": 0.06561866953322619, "grad_norm": 0.478515625, "learning_rate": 0.002, "loss": 5.7878, "step": 578 }, { "epoch": 0.06573219664314527, "grad_norm": 0.494140625, "learning_rate": 0.002, "loss": 5.7993, "step": 579 }, { "epoch": 0.06584572375306434, "grad_norm": 0.53515625, "learning_rate": 0.002, "loss": 5.7928, "step": 580 }, { "epoch": 0.06595925086298342, "grad_norm": 0.55078125, "learning_rate": 0.002, "loss": 5.8023, "step": 581 }, { "epoch": 0.0660727779729025, "grad_norm": 0.57421875, "learning_rate": 0.002, "loss": 5.7986, "step": 582 }, { "epoch": 0.06618630508282157, "grad_norm": 0.54296875, "learning_rate": 0.002, "loss": 5.7899, "step": 583 }, { "epoch": 0.06629983219274065, "grad_norm": 0.5, "learning_rate": 0.002, "loss": 5.7876, "step": 584 }, { "epoch": 0.06641335930265972, "grad_norm": 0.51953125, "learning_rate": 0.002, "loss": 5.7819, "step": 585 }, { "epoch": 0.0665268864125788, "grad_norm": 0.50390625, "learning_rate": 0.002, "loss": 5.79, "step": 586 }, { "epoch": 0.06664041352249787, "grad_norm": 0.484375, "learning_rate": 0.002, "loss": 5.8001, "step": 587 }, { "epoch": 0.06675394063241695, "grad_norm": 0.443359375, "learning_rate": 0.002, "loss": 5.7964, "step": 588 }, { "epoch": 0.06686746774233604, "grad_norm": 0.408203125, "learning_rate": 0.002, "loss": 5.7779, "step": 589 }, { "epoch": 0.06698099485225512, "grad_norm": 0.39453125, "learning_rate": 0.002, "loss": 5.7976, "step": 590 }, { "epoch": 0.06709452196217419, "grad_norm": 0.4140625, "learning_rate": 0.002, "loss": 5.8005, "step": 591 }, { "epoch": 0.06720804907209327, "grad_norm": 0.470703125, "learning_rate": 0.002, "loss": 5.7917, "step": 592 }, { "epoch": 0.06732157618201234, "grad_norm": 0.490234375, "learning_rate": 0.002, "loss": 5.8071, "step": 593 }, { "epoch": 0.06743510329193142, "grad_norm": 0.498046875, "learning_rate": 0.002, "loss": 5.7799, "step": 594 }, { "epoch": 0.0675486304018505, "grad_norm": 0.4765625, "learning_rate": 0.002, "loss": 5.8027, "step": 595 }, { "epoch": 0.06766215751176957, "grad_norm": 0.462890625, "learning_rate": 0.002, "loss": 5.7886, "step": 596 }, { "epoch": 0.06777568462168865, "grad_norm": 0.484375, "learning_rate": 0.002, "loss": 5.7979, "step": 597 }, { "epoch": 0.06788921173160772, "grad_norm": 0.5, "learning_rate": 0.002, "loss": 5.7848, "step": 598 }, { "epoch": 0.0680027388415268, "grad_norm": 0.53515625, "learning_rate": 0.002, "loss": 5.7738, "step": 599 }, { "epoch": 0.06811626595144588, "grad_norm": 0.49609375, "learning_rate": 0.002, "loss": 5.793, "step": 600 }, { "epoch": 0.06822979306136495, "grad_norm": 0.482421875, "learning_rate": 0.002, "loss": 5.769, "step": 601 }, { "epoch": 0.06834332017128403, "grad_norm": 0.46875, "learning_rate": 0.002, "loss": 5.78, "step": 602 }, { "epoch": 0.0684568472812031, "grad_norm": 0.47265625, "learning_rate": 0.002, "loss": 5.7849, "step": 603 }, { "epoch": 0.06857037439112218, "grad_norm": 0.482421875, "learning_rate": 0.002, "loss": 5.7999, "step": 604 }, { "epoch": 0.06868390150104126, "grad_norm": 0.458984375, "learning_rate": 0.002, "loss": 5.7781, "step": 605 }, { "epoch": 0.06879742861096033, "grad_norm": 0.458984375, "learning_rate": 0.002, "loss": 5.7623, "step": 606 }, { "epoch": 0.06891095572087941, "grad_norm": 0.47265625, "learning_rate": 0.002, "loss": 5.7853, "step": 607 }, { "epoch": 0.06902448283079848, "grad_norm": 0.55078125, "learning_rate": 0.002, "loss": 5.7886, "step": 608 }, { "epoch": 0.06913800994071756, "grad_norm": 0.55078125, "learning_rate": 0.002, "loss": 5.7733, "step": 609 }, { "epoch": 0.06925153705063664, "grad_norm": 0.58203125, "learning_rate": 0.002, "loss": 5.7728, "step": 610 }, { "epoch": 0.06936506416055571, "grad_norm": 0.53125, "learning_rate": 0.002, "loss": 5.7883, "step": 611 }, { "epoch": 0.06947859127047479, "grad_norm": 0.49609375, "learning_rate": 0.002, "loss": 5.7865, "step": 612 }, { "epoch": 0.06959211838039386, "grad_norm": 0.447265625, "learning_rate": 0.002, "loss": 5.7726, "step": 613 }, { "epoch": 0.06970564549031294, "grad_norm": 0.4296875, "learning_rate": 0.002, "loss": 5.7789, "step": 614 }, { "epoch": 0.06981917260023202, "grad_norm": 0.447265625, "learning_rate": 0.002, "loss": 5.7612, "step": 615 }, { "epoch": 0.06993269971015109, "grad_norm": 0.46484375, "learning_rate": 0.002, "loss": 5.7471, "step": 616 }, { "epoch": 0.07004622682007017, "grad_norm": 0.54296875, "learning_rate": 0.002, "loss": 5.766, "step": 617 }, { "epoch": 0.07015975392998924, "grad_norm": 0.62890625, "learning_rate": 0.002, "loss": 5.765, "step": 618 }, { "epoch": 0.07027328103990833, "grad_norm": 0.62109375, "learning_rate": 0.002, "loss": 5.7685, "step": 619 }, { "epoch": 0.07038680814982741, "grad_norm": 0.546875, "learning_rate": 0.002, "loss": 5.7545, "step": 620 }, { "epoch": 0.07050033525974649, "grad_norm": 0.55078125, "learning_rate": 0.002, "loss": 5.7547, "step": 621 }, { "epoch": 0.07061386236966556, "grad_norm": 0.470703125, "learning_rate": 0.002, "loss": 5.7689, "step": 622 }, { "epoch": 0.07072738947958464, "grad_norm": 0.427734375, "learning_rate": 0.002, "loss": 5.7749, "step": 623 }, { "epoch": 0.07084091658950371, "grad_norm": 0.37890625, "learning_rate": 0.002, "loss": 5.7507, "step": 624 }, { "epoch": 0.07095444369942279, "grad_norm": 0.380859375, "learning_rate": 0.002, "loss": 5.7766, "step": 625 }, { "epoch": 0.07106797080934187, "grad_norm": 0.40234375, "learning_rate": 0.002, "loss": 5.7641, "step": 626 }, { "epoch": 0.07118149791926094, "grad_norm": 0.3984375, "learning_rate": 0.002, "loss": 5.7773, "step": 627 }, { "epoch": 0.07129502502918002, "grad_norm": 0.40234375, "learning_rate": 0.002, "loss": 5.7531, "step": 628 }, { "epoch": 0.0714085521390991, "grad_norm": 0.412109375, "learning_rate": 0.002, "loss": 5.7536, "step": 629 }, { "epoch": 0.07152207924901817, "grad_norm": 0.431640625, "learning_rate": 0.002, "loss": 5.7559, "step": 630 }, { "epoch": 0.07163560635893725, "grad_norm": 0.4453125, "learning_rate": 0.002, "loss": 5.779, "step": 631 }, { "epoch": 0.07174913346885632, "grad_norm": 0.427734375, "learning_rate": 0.002, "loss": 5.755, "step": 632 }, { "epoch": 0.0718626605787754, "grad_norm": 0.419921875, "learning_rate": 0.002, "loss": 5.7559, "step": 633 }, { "epoch": 0.07197618768869447, "grad_norm": 0.4296875, "learning_rate": 0.002, "loss": 5.7532, "step": 634 }, { "epoch": 0.07208971479861355, "grad_norm": 0.458984375, "learning_rate": 0.002, "loss": 5.748, "step": 635 }, { "epoch": 0.07220324190853263, "grad_norm": 0.439453125, "learning_rate": 0.002, "loss": 5.7481, "step": 636 }, { "epoch": 0.0723167690184517, "grad_norm": 0.47265625, "learning_rate": 0.002, "loss": 5.7658, "step": 637 }, { "epoch": 0.07243029612837078, "grad_norm": 0.482421875, "learning_rate": 0.002, "loss": 5.7519, "step": 638 }, { "epoch": 0.07254382323828985, "grad_norm": 0.52734375, "learning_rate": 0.002, "loss": 5.7332, "step": 639 }, { "epoch": 0.07265735034820893, "grad_norm": 0.54296875, "learning_rate": 0.002, "loss": 5.748, "step": 640 }, { "epoch": 0.072770877458128, "grad_norm": 0.50390625, "learning_rate": 0.002, "loss": 5.7558, "step": 641 }, { "epoch": 0.07288440456804708, "grad_norm": 0.5078125, "learning_rate": 0.002, "loss": 5.7501, "step": 642 }, { "epoch": 0.07299793167796616, "grad_norm": 0.48046875, "learning_rate": 0.002, "loss": 5.7486, "step": 643 }, { "epoch": 0.07311145878788523, "grad_norm": 0.51171875, "learning_rate": 0.002, "loss": 5.75, "step": 644 }, { "epoch": 0.07322498589780431, "grad_norm": 0.51953125, "learning_rate": 0.002, "loss": 5.7473, "step": 645 }, { "epoch": 0.07333851300772339, "grad_norm": 0.5625, "learning_rate": 0.002, "loss": 5.7436, "step": 646 }, { "epoch": 0.07345204011764246, "grad_norm": 0.51953125, "learning_rate": 0.002, "loss": 5.7425, "step": 647 }, { "epoch": 0.07356556722756154, "grad_norm": 0.48046875, "learning_rate": 0.002, "loss": 5.7354, "step": 648 }, { "epoch": 0.07367909433748063, "grad_norm": 0.50390625, "learning_rate": 0.002, "loss": 5.7463, "step": 649 }, { "epoch": 0.0737926214473997, "grad_norm": 0.51171875, "learning_rate": 0.002, "loss": 5.7663, "step": 650 }, { "epoch": 0.07390614855731878, "grad_norm": 0.5390625, "learning_rate": 0.002, "loss": 5.7306, "step": 651 }, { "epoch": 0.07401967566723786, "grad_norm": 0.48046875, "learning_rate": 0.002, "loss": 5.7533, "step": 652 }, { "epoch": 0.07413320277715693, "grad_norm": 0.5, "learning_rate": 0.002, "loss": 5.7253, "step": 653 }, { "epoch": 0.07424672988707601, "grad_norm": 0.5078125, "learning_rate": 0.002, "loss": 5.7278, "step": 654 }, { "epoch": 0.07436025699699508, "grad_norm": 0.494140625, "learning_rate": 0.002, "loss": 5.738, "step": 655 }, { "epoch": 0.07447378410691416, "grad_norm": 0.4765625, "learning_rate": 0.002, "loss": 5.7384, "step": 656 }, { "epoch": 0.07458731121683324, "grad_norm": 0.451171875, "learning_rate": 0.002, "loss": 5.7493, "step": 657 }, { "epoch": 0.07470083832675231, "grad_norm": 0.4453125, "learning_rate": 0.002, "loss": 5.7397, "step": 658 }, { "epoch": 0.07481436543667139, "grad_norm": 0.423828125, "learning_rate": 0.002, "loss": 5.7394, "step": 659 }, { "epoch": 0.07492789254659046, "grad_norm": 0.390625, "learning_rate": 0.002, "loss": 5.7142, "step": 660 }, { "epoch": 0.07504141965650954, "grad_norm": 0.41015625, "learning_rate": 0.002, "loss": 5.7464, "step": 661 }, { "epoch": 0.07515494676642862, "grad_norm": 0.4296875, "learning_rate": 0.002, "loss": 5.744, "step": 662 }, { "epoch": 0.07526847387634769, "grad_norm": 0.4453125, "learning_rate": 0.002, "loss": 5.7447, "step": 663 }, { "epoch": 0.07538200098626677, "grad_norm": 0.408203125, "learning_rate": 0.002, "loss": 5.7243, "step": 664 }, { "epoch": 0.07549552809618584, "grad_norm": 0.41015625, "learning_rate": 0.002, "loss": 5.7268, "step": 665 }, { "epoch": 0.07560905520610492, "grad_norm": 0.40625, "learning_rate": 0.002, "loss": 5.7228, "step": 666 }, { "epoch": 0.075722582316024, "grad_norm": 0.462890625, "learning_rate": 0.002, "loss": 5.76, "step": 667 }, { "epoch": 0.07583610942594307, "grad_norm": 0.47265625, "learning_rate": 0.002, "loss": 5.7159, "step": 668 }, { "epoch": 0.07594963653586215, "grad_norm": 0.439453125, "learning_rate": 0.002, "loss": 5.7309, "step": 669 }, { "epoch": 0.07606316364578122, "grad_norm": 0.4296875, "learning_rate": 0.002, "loss": 5.7336, "step": 670 }, { "epoch": 0.0761766907557003, "grad_norm": 0.419921875, "learning_rate": 0.002, "loss": 5.7507, "step": 671 }, { "epoch": 0.07629021786561938, "grad_norm": 0.419921875, "learning_rate": 0.002, "loss": 5.7326, "step": 672 }, { "epoch": 0.07640374497553845, "grad_norm": 0.400390625, "learning_rate": 0.002, "loss": 5.728, "step": 673 }, { "epoch": 0.07651727208545753, "grad_norm": 0.419921875, "learning_rate": 0.002, "loss": 5.7249, "step": 674 }, { "epoch": 0.0766307991953766, "grad_norm": 0.44140625, "learning_rate": 0.002, "loss": 5.7227, "step": 675 }, { "epoch": 0.07674432630529568, "grad_norm": 0.46484375, "learning_rate": 0.002, "loss": 5.708, "step": 676 }, { "epoch": 0.07685785341521476, "grad_norm": 0.45703125, "learning_rate": 0.002, "loss": 5.7337, "step": 677 }, { "epoch": 0.07697138052513383, "grad_norm": 0.46484375, "learning_rate": 0.002, "loss": 5.7041, "step": 678 }, { "epoch": 0.07708490763505292, "grad_norm": 0.458984375, "learning_rate": 0.002, "loss": 5.7493, "step": 679 }, { "epoch": 0.077198434744972, "grad_norm": 0.478515625, "learning_rate": 0.002, "loss": 5.7239, "step": 680 }, { "epoch": 0.07731196185489107, "grad_norm": 0.4765625, "learning_rate": 0.002, "loss": 5.7318, "step": 681 }, { "epoch": 0.07742548896481015, "grad_norm": 0.466796875, "learning_rate": 0.002, "loss": 5.7337, "step": 682 }, { "epoch": 0.07753901607472923, "grad_norm": 0.48828125, "learning_rate": 0.002, "loss": 5.7239, "step": 683 }, { "epoch": 0.0776525431846483, "grad_norm": 0.48046875, "learning_rate": 0.002, "loss": 5.7422, "step": 684 }, { "epoch": 0.07776607029456738, "grad_norm": 0.490234375, "learning_rate": 0.002, "loss": 5.7314, "step": 685 }, { "epoch": 0.07787959740448645, "grad_norm": 0.51953125, "learning_rate": 0.002, "loss": 5.7197, "step": 686 }, { "epoch": 0.07799312451440553, "grad_norm": 0.53125, "learning_rate": 0.002, "loss": 5.7187, "step": 687 }, { "epoch": 0.0781066516243246, "grad_norm": 0.515625, "learning_rate": 0.002, "loss": 5.7262, "step": 688 }, { "epoch": 0.07822017873424368, "grad_norm": 0.4921875, "learning_rate": 0.002, "loss": 5.7167, "step": 689 }, { "epoch": 0.07833370584416276, "grad_norm": 0.447265625, "learning_rate": 0.002, "loss": 5.7378, "step": 690 }, { "epoch": 0.07844723295408183, "grad_norm": 0.478515625, "learning_rate": 0.002, "loss": 5.7229, "step": 691 }, { "epoch": 0.07856076006400091, "grad_norm": 0.5859375, "learning_rate": 0.002, "loss": 5.7405, "step": 692 }, { "epoch": 0.07867428717391999, "grad_norm": 0.59765625, "learning_rate": 0.002, "loss": 5.7001, "step": 693 }, { "epoch": 0.07878781428383906, "grad_norm": 0.5390625, "learning_rate": 0.002, "loss": 5.6996, "step": 694 }, { "epoch": 0.07890134139375814, "grad_norm": 0.482421875, "learning_rate": 0.002, "loss": 5.7106, "step": 695 }, { "epoch": 0.07901486850367721, "grad_norm": 0.443359375, "learning_rate": 0.002, "loss": 5.7326, "step": 696 }, { "epoch": 0.07912839561359629, "grad_norm": 0.46875, "learning_rate": 0.002, "loss": 5.7346, "step": 697 }, { "epoch": 0.07924192272351537, "grad_norm": 0.4375, "learning_rate": 0.002, "loss": 5.7037, "step": 698 }, { "epoch": 0.07935544983343444, "grad_norm": 0.4609375, "learning_rate": 0.002, "loss": 5.7097, "step": 699 }, { "epoch": 0.07946897694335352, "grad_norm": 0.453125, "learning_rate": 0.002, "loss": 5.7179, "step": 700 }, { "epoch": 0.07958250405327259, "grad_norm": 0.42578125, "learning_rate": 0.002, "loss": 5.7063, "step": 701 }, { "epoch": 0.07969603116319167, "grad_norm": 0.421875, "learning_rate": 0.002, "loss": 5.7066, "step": 702 }, { "epoch": 0.07980955827311075, "grad_norm": 0.416015625, "learning_rate": 0.002, "loss": 5.6934, "step": 703 }, { "epoch": 0.07992308538302982, "grad_norm": 0.4375, "learning_rate": 0.002, "loss": 5.7136, "step": 704 }, { "epoch": 0.0800366124929489, "grad_norm": 0.4609375, "learning_rate": 0.002, "loss": 5.7259, "step": 705 }, { "epoch": 0.08015013960286797, "grad_norm": 0.466796875, "learning_rate": 0.002, "loss": 5.7111, "step": 706 }, { "epoch": 0.08026366671278705, "grad_norm": 0.5, "learning_rate": 0.002, "loss": 5.7146, "step": 707 }, { "epoch": 0.08037719382270613, "grad_norm": 0.53125, "learning_rate": 0.002, "loss": 5.7274, "step": 708 }, { "epoch": 0.08049072093262521, "grad_norm": 0.51953125, "learning_rate": 0.002, "loss": 5.7083, "step": 709 }, { "epoch": 0.08060424804254429, "grad_norm": 0.51953125, "learning_rate": 0.002, "loss": 5.7025, "step": 710 }, { "epoch": 0.08071777515246337, "grad_norm": 0.5078125, "learning_rate": 0.002, "loss": 5.7096, "step": 711 }, { "epoch": 0.08083130226238244, "grad_norm": 0.48828125, "learning_rate": 0.002, "loss": 5.7122, "step": 712 }, { "epoch": 0.08094482937230152, "grad_norm": 0.474609375, "learning_rate": 0.002, "loss": 5.7053, "step": 713 }, { "epoch": 0.0810583564822206, "grad_norm": 0.455078125, "learning_rate": 0.002, "loss": 5.7054, "step": 714 }, { "epoch": 0.08117188359213967, "grad_norm": 0.42578125, "learning_rate": 0.002, "loss": 5.7145, "step": 715 }, { "epoch": 0.08128541070205875, "grad_norm": 0.412109375, "learning_rate": 0.002, "loss": 5.6956, "step": 716 }, { "epoch": 0.08139893781197782, "grad_norm": 0.41015625, "learning_rate": 0.002, "loss": 5.714, "step": 717 }, { "epoch": 0.0815124649218969, "grad_norm": 0.376953125, "learning_rate": 0.002, "loss": 5.6946, "step": 718 }, { "epoch": 0.08162599203181597, "grad_norm": 0.380859375, "learning_rate": 0.002, "loss": 5.7053, "step": 719 }, { "epoch": 0.08173951914173505, "grad_norm": 0.39453125, "learning_rate": 0.002, "loss": 5.7101, "step": 720 }, { "epoch": 0.08185304625165413, "grad_norm": 0.439453125, "learning_rate": 0.002, "loss": 5.7074, "step": 721 }, { "epoch": 0.0819665733615732, "grad_norm": 0.546875, "learning_rate": 0.002, "loss": 5.7082, "step": 722 }, { "epoch": 0.08208010047149228, "grad_norm": 0.49609375, "learning_rate": 0.002, "loss": 5.6899, "step": 723 }, { "epoch": 0.08219362758141135, "grad_norm": 0.4609375, "learning_rate": 0.002, "loss": 5.7093, "step": 724 }, { "epoch": 0.08230715469133043, "grad_norm": 0.439453125, "learning_rate": 0.002, "loss": 5.7094, "step": 725 }, { "epoch": 0.0824206818012495, "grad_norm": 0.40234375, "learning_rate": 0.002, "loss": 5.7134, "step": 726 }, { "epoch": 0.08253420891116858, "grad_norm": 0.392578125, "learning_rate": 0.002, "loss": 5.7131, "step": 727 }, { "epoch": 0.08264773602108766, "grad_norm": 0.376953125, "learning_rate": 0.002, "loss": 5.7036, "step": 728 }, { "epoch": 0.08276126313100673, "grad_norm": 0.375, "learning_rate": 0.002, "loss": 5.712, "step": 729 }, { "epoch": 0.08287479024092581, "grad_norm": 0.376953125, "learning_rate": 0.002, "loss": 5.6783, "step": 730 }, { "epoch": 0.08298831735084489, "grad_norm": 0.3828125, "learning_rate": 0.002, "loss": 5.6737, "step": 731 }, { "epoch": 0.08310184446076396, "grad_norm": 0.4140625, "learning_rate": 0.002, "loss": 5.6974, "step": 732 }, { "epoch": 0.08321537157068304, "grad_norm": 0.408203125, "learning_rate": 0.002, "loss": 5.7014, "step": 733 }, { "epoch": 0.08332889868060211, "grad_norm": 0.412109375, "learning_rate": 0.002, "loss": 5.7028, "step": 734 }, { "epoch": 0.08344242579052119, "grad_norm": 0.416015625, "learning_rate": 0.002, "loss": 5.6799, "step": 735 }, { "epoch": 0.08355595290044027, "grad_norm": 0.45703125, "learning_rate": 0.002, "loss": 5.6847, "step": 736 }, { "epoch": 0.08366948001035934, "grad_norm": 0.458984375, "learning_rate": 0.002, "loss": 5.6855, "step": 737 }, { "epoch": 0.08378300712027842, "grad_norm": 0.431640625, "learning_rate": 0.002, "loss": 5.6816, "step": 738 }, { "epoch": 0.0838965342301975, "grad_norm": 0.419921875, "learning_rate": 0.002, "loss": 5.6893, "step": 739 }, { "epoch": 0.08401006134011658, "grad_norm": 0.38671875, "learning_rate": 0.002, "loss": 5.6585, "step": 740 }, { "epoch": 0.08412358845003566, "grad_norm": 0.412109375, "learning_rate": 0.002, "loss": 5.6878, "step": 741 }, { "epoch": 0.08423711555995474, "grad_norm": 0.48046875, "learning_rate": 0.002, "loss": 5.7034, "step": 742 }, { "epoch": 0.08435064266987381, "grad_norm": 0.5, "learning_rate": 0.002, "loss": 5.6927, "step": 743 }, { "epoch": 0.08446416977979289, "grad_norm": 0.482421875, "learning_rate": 0.002, "loss": 5.6771, "step": 744 }, { "epoch": 0.08457769688971196, "grad_norm": 0.47265625, "learning_rate": 0.002, "loss": 5.7051, "step": 745 }, { "epoch": 0.08469122399963104, "grad_norm": 0.451171875, "learning_rate": 0.002, "loss": 5.6919, "step": 746 }, { "epoch": 0.08480475110955012, "grad_norm": 0.443359375, "learning_rate": 0.002, "loss": 5.6853, "step": 747 }, { "epoch": 0.08491827821946919, "grad_norm": 0.462890625, "learning_rate": 0.002, "loss": 5.6983, "step": 748 }, { "epoch": 0.08503180532938827, "grad_norm": 0.515625, "learning_rate": 0.002, "loss": 5.6894, "step": 749 }, { "epoch": 0.08514533243930734, "grad_norm": 0.58203125, "learning_rate": 0.002, "loss": 5.6809, "step": 750 }, { "epoch": 0.08525885954922642, "grad_norm": 0.5546875, "learning_rate": 0.002, "loss": 5.6604, "step": 751 }, { "epoch": 0.0853723866591455, "grad_norm": 0.4609375, "learning_rate": 0.002, "loss": 5.6816, "step": 752 }, { "epoch": 0.08548591376906457, "grad_norm": 0.431640625, "learning_rate": 0.002, "loss": 5.672, "step": 753 }, { "epoch": 0.08559944087898365, "grad_norm": 0.392578125, "learning_rate": 0.002, "loss": 5.6886, "step": 754 }, { "epoch": 0.08571296798890272, "grad_norm": 0.34765625, "learning_rate": 0.002, "loss": 5.6917, "step": 755 }, { "epoch": 0.0858264950988218, "grad_norm": 0.3359375, "learning_rate": 0.002, "loss": 5.6703, "step": 756 }, { "epoch": 0.08594002220874088, "grad_norm": 0.333984375, "learning_rate": 0.002, "loss": 5.6828, "step": 757 }, { "epoch": 0.08605354931865995, "grad_norm": 0.34375, "learning_rate": 0.002, "loss": 5.6771, "step": 758 }, { "epoch": 0.08616707642857903, "grad_norm": 0.3359375, "learning_rate": 0.002, "loss": 5.6718, "step": 759 }, { "epoch": 0.0862806035384981, "grad_norm": 0.36328125, "learning_rate": 0.002, "loss": 5.6722, "step": 760 }, { "epoch": 0.08639413064841718, "grad_norm": 0.37890625, "learning_rate": 0.002, "loss": 5.6887, "step": 761 }, { "epoch": 0.08650765775833626, "grad_norm": 0.408203125, "learning_rate": 0.002, "loss": 5.6836, "step": 762 }, { "epoch": 0.08662118486825533, "grad_norm": 0.453125, "learning_rate": 0.002, "loss": 5.678, "step": 763 }, { "epoch": 0.08673471197817441, "grad_norm": 0.458984375, "learning_rate": 0.002, "loss": 5.6689, "step": 764 }, { "epoch": 0.08684823908809348, "grad_norm": 0.423828125, "learning_rate": 0.002, "loss": 5.6632, "step": 765 }, { "epoch": 0.08696176619801256, "grad_norm": 0.400390625, "learning_rate": 0.002, "loss": 5.6632, "step": 766 }, { "epoch": 0.08707529330793164, "grad_norm": 0.390625, "learning_rate": 0.002, "loss": 5.678, "step": 767 }, { "epoch": 0.08718882041785071, "grad_norm": 0.396484375, "learning_rate": 0.002, "loss": 5.6778, "step": 768 }, { "epoch": 0.08730234752776979, "grad_norm": 0.3828125, "learning_rate": 0.002, "loss": 5.6674, "step": 769 }, { "epoch": 0.08741587463768888, "grad_norm": 0.396484375, "learning_rate": 0.002, "loss": 5.6487, "step": 770 }, { "epoch": 0.08752940174760795, "grad_norm": 0.4453125, "learning_rate": 0.002, "loss": 5.6965, "step": 771 }, { "epoch": 0.08764292885752703, "grad_norm": 0.640625, "learning_rate": 0.002, "loss": 5.6662, "step": 772 }, { "epoch": 0.0877564559674461, "grad_norm": 0.625, "learning_rate": 0.002, "loss": 5.6936, "step": 773 }, { "epoch": 0.08786998307736518, "grad_norm": 0.5078125, "learning_rate": 0.002, "loss": 5.6424, "step": 774 }, { "epoch": 0.08798351018728426, "grad_norm": 0.50390625, "learning_rate": 0.002, "loss": 5.6758, "step": 775 }, { "epoch": 0.08809703729720333, "grad_norm": 0.439453125, "learning_rate": 0.002, "loss": 5.6707, "step": 776 }, { "epoch": 0.08821056440712241, "grad_norm": 0.390625, "learning_rate": 0.002, "loss": 5.6686, "step": 777 }, { "epoch": 0.08832409151704149, "grad_norm": 0.376953125, "learning_rate": 0.002, "loss": 5.6656, "step": 778 }, { "epoch": 0.08843761862696056, "grad_norm": 0.412109375, "learning_rate": 0.002, "loss": 5.6793, "step": 779 }, { "epoch": 0.08855114573687964, "grad_norm": 0.5859375, "learning_rate": 0.002, "loss": 5.6657, "step": 780 }, { "epoch": 0.08866467284679871, "grad_norm": 0.62109375, "learning_rate": 0.002, "loss": 5.6561, "step": 781 }, { "epoch": 0.08877819995671779, "grad_norm": 0.482421875, "learning_rate": 0.002, "loss": 5.6558, "step": 782 }, { "epoch": 0.08889172706663687, "grad_norm": 0.458984375, "learning_rate": 0.002, "loss": 5.6518, "step": 783 }, { "epoch": 0.08900525417655594, "grad_norm": 0.353515625, "learning_rate": 0.002, "loss": 5.6836, "step": 784 }, { "epoch": 0.08911878128647502, "grad_norm": 0.33203125, "learning_rate": 0.002, "loss": 5.6575, "step": 785 }, { "epoch": 0.0892323083963941, "grad_norm": 0.328125, "learning_rate": 0.002, "loss": 5.6695, "step": 786 }, { "epoch": 0.08934583550631317, "grad_norm": 0.34375, "learning_rate": 0.002, "loss": 5.6617, "step": 787 }, { "epoch": 0.08945936261623225, "grad_norm": 0.404296875, "learning_rate": 0.002, "loss": 5.6731, "step": 788 }, { "epoch": 0.08957288972615132, "grad_norm": 0.458984375, "learning_rate": 0.002, "loss": 5.6636, "step": 789 }, { "epoch": 0.0896864168360704, "grad_norm": 0.53515625, "learning_rate": 0.002, "loss": 5.6592, "step": 790 }, { "epoch": 0.08979994394598947, "grad_norm": 0.53125, "learning_rate": 0.002, "loss": 5.6409, "step": 791 }, { "epoch": 0.08991347105590855, "grad_norm": 0.4765625, "learning_rate": 0.002, "loss": 5.6797, "step": 792 }, { "epoch": 0.09002699816582763, "grad_norm": 0.484375, "learning_rate": 0.002, "loss": 5.6531, "step": 793 }, { "epoch": 0.0901405252757467, "grad_norm": 0.4453125, "learning_rate": 0.002, "loss": 5.6628, "step": 794 }, { "epoch": 0.09025405238566578, "grad_norm": 0.439453125, "learning_rate": 0.002, "loss": 5.6652, "step": 795 }, { "epoch": 0.09036757949558485, "grad_norm": 0.396484375, "learning_rate": 0.002, "loss": 5.6619, "step": 796 }, { "epoch": 0.09048110660550393, "grad_norm": 0.384765625, "learning_rate": 0.002, "loss": 5.6807, "step": 797 }, { "epoch": 0.090594633715423, "grad_norm": 0.388671875, "learning_rate": 0.002, "loss": 5.6865, "step": 798 }, { "epoch": 0.09070816082534208, "grad_norm": 0.369140625, "learning_rate": 0.002, "loss": 5.6616, "step": 799 }, { "epoch": 0.09082168793526117, "grad_norm": 0.40234375, "learning_rate": 0.002, "loss": 5.6624, "step": 800 }, { "epoch": 0.09093521504518025, "grad_norm": 0.4375, "learning_rate": 0.002, "loss": 5.6658, "step": 801 }, { "epoch": 0.09104874215509932, "grad_norm": 0.4921875, "learning_rate": 0.002, "loss": 5.6552, "step": 802 }, { "epoch": 0.0911622692650184, "grad_norm": 0.44140625, "learning_rate": 0.002, "loss": 5.6669, "step": 803 }, { "epoch": 0.09127579637493748, "grad_norm": 0.384765625, "learning_rate": 0.002, "loss": 5.675, "step": 804 }, { "epoch": 0.09138932348485655, "grad_norm": 0.384765625, "learning_rate": 0.002, "loss": 5.6735, "step": 805 }, { "epoch": 0.09150285059477563, "grad_norm": 0.3515625, "learning_rate": 0.002, "loss": 5.6651, "step": 806 }, { "epoch": 0.0916163777046947, "grad_norm": 0.37109375, "learning_rate": 0.002, "loss": 5.6541, "step": 807 }, { "epoch": 0.09172990481461378, "grad_norm": 0.4140625, "learning_rate": 0.002, "loss": 5.6562, "step": 808 }, { "epoch": 0.09184343192453286, "grad_norm": 0.45703125, "learning_rate": 0.002, "loss": 5.6588, "step": 809 }, { "epoch": 0.09195695903445193, "grad_norm": 0.5, "learning_rate": 0.002, "loss": 5.6664, "step": 810 }, { "epoch": 0.09207048614437101, "grad_norm": 0.490234375, "learning_rate": 0.002, "loss": 5.65, "step": 811 }, { "epoch": 0.09218401325429008, "grad_norm": 0.439453125, "learning_rate": 0.002, "loss": 5.6573, "step": 812 }, { "epoch": 0.09229754036420916, "grad_norm": 0.435546875, "learning_rate": 0.002, "loss": 5.6691, "step": 813 }, { "epoch": 0.09241106747412824, "grad_norm": 0.458984375, "learning_rate": 0.002, "loss": 5.6454, "step": 814 }, { "epoch": 0.09252459458404731, "grad_norm": 0.466796875, "learning_rate": 0.002, "loss": 5.6413, "step": 815 }, { "epoch": 0.09263812169396639, "grad_norm": 0.515625, "learning_rate": 0.002, "loss": 5.6545, "step": 816 }, { "epoch": 0.09275164880388546, "grad_norm": 0.498046875, "learning_rate": 0.002, "loss": 5.6425, "step": 817 }, { "epoch": 0.09286517591380454, "grad_norm": 0.44921875, "learning_rate": 0.002, "loss": 5.6416, "step": 818 }, { "epoch": 0.09297870302372362, "grad_norm": 0.453125, "learning_rate": 0.002, "loss": 5.6595, "step": 819 }, { "epoch": 0.09309223013364269, "grad_norm": 0.416015625, "learning_rate": 0.002, "loss": 5.6368, "step": 820 }, { "epoch": 0.09320575724356177, "grad_norm": 0.3984375, "learning_rate": 0.002, "loss": 5.6714, "step": 821 }, { "epoch": 0.09331928435348084, "grad_norm": 0.373046875, "learning_rate": 0.002, "loss": 5.6696, "step": 822 }, { "epoch": 0.09343281146339992, "grad_norm": 0.384765625, "learning_rate": 0.002, "loss": 5.6734, "step": 823 }, { "epoch": 0.093546338573319, "grad_norm": 0.359375, "learning_rate": 0.002, "loss": 5.673, "step": 824 }, { "epoch": 0.09365986568323807, "grad_norm": 0.365234375, "learning_rate": 0.002, "loss": 5.645, "step": 825 }, { "epoch": 0.09377339279315715, "grad_norm": 0.357421875, "learning_rate": 0.002, "loss": 5.6506, "step": 826 }, { "epoch": 0.09388691990307622, "grad_norm": 0.328125, "learning_rate": 0.002, "loss": 5.6317, "step": 827 }, { "epoch": 0.0940004470129953, "grad_norm": 0.33203125, "learning_rate": 0.002, "loss": 5.6394, "step": 828 }, { "epoch": 0.09411397412291438, "grad_norm": 0.3359375, "learning_rate": 0.002, "loss": 5.6609, "step": 829 }, { "epoch": 0.09422750123283347, "grad_norm": 0.34765625, "learning_rate": 0.002, "loss": 5.6418, "step": 830 }, { "epoch": 0.09434102834275254, "grad_norm": 0.34375, "learning_rate": 0.002, "loss": 5.6387, "step": 831 }, { "epoch": 0.09445455545267162, "grad_norm": 0.37109375, "learning_rate": 0.002, "loss": 5.6421, "step": 832 }, { "epoch": 0.0945680825625907, "grad_norm": 0.37109375, "learning_rate": 0.002, "loss": 5.656, "step": 833 }, { "epoch": 0.09468160967250977, "grad_norm": 0.41015625, "learning_rate": 0.002, "loss": 5.6557, "step": 834 }, { "epoch": 0.09479513678242885, "grad_norm": 0.4453125, "learning_rate": 0.002, "loss": 5.6472, "step": 835 }, { "epoch": 0.09490866389234792, "grad_norm": 0.55078125, "learning_rate": 0.002, "loss": 5.6651, "step": 836 }, { "epoch": 0.095022191002267, "grad_norm": 0.53125, "learning_rate": 0.002, "loss": 5.6386, "step": 837 }, { "epoch": 0.09513571811218607, "grad_norm": 0.404296875, "learning_rate": 0.002, "loss": 5.6418, "step": 838 }, { "epoch": 0.09524924522210515, "grad_norm": 0.412109375, "learning_rate": 0.002, "loss": 5.6447, "step": 839 }, { "epoch": 0.09536277233202423, "grad_norm": 0.40234375, "learning_rate": 0.002, "loss": 5.6292, "step": 840 }, { "epoch": 0.0954762994419433, "grad_norm": 0.42578125, "learning_rate": 0.002, "loss": 5.6334, "step": 841 }, { "epoch": 0.09558982655186238, "grad_norm": 0.52734375, "learning_rate": 0.002, "loss": 5.6577, "step": 842 }, { "epoch": 0.09570335366178145, "grad_norm": 0.53515625, "learning_rate": 0.002, "loss": 5.654, "step": 843 }, { "epoch": 0.09581688077170053, "grad_norm": 0.494140625, "learning_rate": 0.002, "loss": 5.624, "step": 844 }, { "epoch": 0.0959304078816196, "grad_norm": 0.5078125, "learning_rate": 0.002, "loss": 5.6521, "step": 845 }, { "epoch": 0.09604393499153868, "grad_norm": 0.419921875, "learning_rate": 0.002, "loss": 5.6229, "step": 846 }, { "epoch": 0.09615746210145776, "grad_norm": 0.396484375, "learning_rate": 0.002, "loss": 5.6424, "step": 847 }, { "epoch": 0.09627098921137683, "grad_norm": 0.373046875, "learning_rate": 0.002, "loss": 5.6545, "step": 848 }, { "epoch": 0.09638451632129591, "grad_norm": 0.36328125, "learning_rate": 0.002, "loss": 5.6503, "step": 849 }, { "epoch": 0.09649804343121499, "grad_norm": 0.35546875, "learning_rate": 0.002, "loss": 5.629, "step": 850 }, { "epoch": 0.09661157054113406, "grad_norm": 0.361328125, "learning_rate": 0.002, "loss": 5.6165, "step": 851 }, { "epoch": 0.09672509765105314, "grad_norm": 0.37890625, "learning_rate": 0.002, "loss": 5.6342, "step": 852 }, { "epoch": 0.09683862476097221, "grad_norm": 0.390625, "learning_rate": 0.002, "loss": 5.6343, "step": 853 }, { "epoch": 0.09695215187089129, "grad_norm": 0.384765625, "learning_rate": 0.002, "loss": 5.6143, "step": 854 }, { "epoch": 0.09706567898081037, "grad_norm": 0.361328125, "learning_rate": 0.002, "loss": 5.6072, "step": 855 }, { "epoch": 0.09717920609072944, "grad_norm": 0.357421875, "learning_rate": 0.002, "loss": 5.6483, "step": 856 }, { "epoch": 0.09729273320064852, "grad_norm": 0.37890625, "learning_rate": 0.002, "loss": 5.6203, "step": 857 }, { "epoch": 0.0974062603105676, "grad_norm": 0.4140625, "learning_rate": 0.002, "loss": 5.6298, "step": 858 }, { "epoch": 0.09751978742048667, "grad_norm": 0.4140625, "learning_rate": 0.002, "loss": 5.6195, "step": 859 }, { "epoch": 0.09763331453040576, "grad_norm": 0.431640625, "learning_rate": 0.002, "loss": 5.6411, "step": 860 }, { "epoch": 0.09774684164032484, "grad_norm": 0.447265625, "learning_rate": 0.002, "loss": 5.6273, "step": 861 }, { "epoch": 0.09786036875024391, "grad_norm": 0.6015625, "learning_rate": 0.002, "loss": 5.6302, "step": 862 }, { "epoch": 0.09797389586016299, "grad_norm": 0.59765625, "learning_rate": 0.002, "loss": 5.6143, "step": 863 }, { "epoch": 0.09808742297008206, "grad_norm": 0.46875, "learning_rate": 0.002, "loss": 5.6288, "step": 864 }, { "epoch": 0.09820095008000114, "grad_norm": 0.51171875, "learning_rate": 0.002, "loss": 5.6417, "step": 865 }, { "epoch": 0.09831447718992022, "grad_norm": 0.427734375, "learning_rate": 0.002, "loss": 5.6172, "step": 866 }, { "epoch": 0.09842800429983929, "grad_norm": 0.373046875, "learning_rate": 0.002, "loss": 5.6289, "step": 867 }, { "epoch": 0.09854153140975837, "grad_norm": 0.361328125, "learning_rate": 0.002, "loss": 5.6493, "step": 868 }, { "epoch": 0.09865505851967744, "grad_norm": 0.376953125, "learning_rate": 0.002, "loss": 5.6282, "step": 869 }, { "epoch": 0.09876858562959652, "grad_norm": 0.3984375, "learning_rate": 0.002, "loss": 5.6209, "step": 870 }, { "epoch": 0.0988821127395156, "grad_norm": 0.416015625, "learning_rate": 0.002, "loss": 5.6253, "step": 871 }, { "epoch": 0.09899563984943467, "grad_norm": 0.380859375, "learning_rate": 0.002, "loss": 5.6341, "step": 872 }, { "epoch": 0.09910916695935375, "grad_norm": 0.328125, "learning_rate": 0.002, "loss": 5.6358, "step": 873 }, { "epoch": 0.09922269406927282, "grad_norm": 0.326171875, "learning_rate": 0.002, "loss": 5.6132, "step": 874 }, { "epoch": 0.0993362211791919, "grad_norm": 0.390625, "learning_rate": 0.002, "loss": 5.6295, "step": 875 }, { "epoch": 0.09944974828911098, "grad_norm": 0.56640625, "learning_rate": 0.002, "loss": 5.6159, "step": 876 }, { "epoch": 0.09956327539903005, "grad_norm": 0.625, "learning_rate": 0.002, "loss": 5.6369, "step": 877 }, { "epoch": 0.09967680250894913, "grad_norm": 0.6015625, "learning_rate": 0.002, "loss": 5.6308, "step": 878 }, { "epoch": 0.0997903296188682, "grad_norm": 0.55859375, "learning_rate": 0.002, "loss": 5.6488, "step": 879 }, { "epoch": 0.09990385672878728, "grad_norm": 0.43359375, "learning_rate": 0.002, "loss": 5.6272, "step": 880 }, { "epoch": 0.10001738383870636, "grad_norm": 0.42578125, "learning_rate": 0.002, "loss": 5.6217, "step": 881 }, { "epoch": 0.10013091094862543, "grad_norm": 0.38671875, "learning_rate": 0.002, "loss": 5.6349, "step": 882 }, { "epoch": 0.10024443805854451, "grad_norm": 0.380859375, "learning_rate": 0.002, "loss": 5.6253, "step": 883 }, { "epoch": 0.10035796516846358, "grad_norm": 0.373046875, "learning_rate": 0.002, "loss": 5.6104, "step": 884 }, { "epoch": 0.10047149227838266, "grad_norm": 0.36328125, "learning_rate": 0.002, "loss": 5.6363, "step": 885 }, { "epoch": 0.10058501938830174, "grad_norm": 0.33984375, "learning_rate": 0.002, "loss": 5.6375, "step": 886 }, { "epoch": 0.10069854649822081, "grad_norm": 0.34765625, "learning_rate": 0.002, "loss": 5.6283, "step": 887 }, { "epoch": 0.10081207360813989, "grad_norm": 0.37109375, "learning_rate": 0.002, "loss": 5.6314, "step": 888 }, { "epoch": 0.10092560071805896, "grad_norm": 0.44921875, "learning_rate": 0.002, "loss": 5.6346, "step": 889 }, { "epoch": 0.10103912782797805, "grad_norm": 0.50390625, "learning_rate": 0.002, "loss": 5.6193, "step": 890 }, { "epoch": 0.10115265493789713, "grad_norm": 0.625, "learning_rate": 0.002, "loss": 5.6268, "step": 891 }, { "epoch": 0.1012661820478162, "grad_norm": 0.5703125, "learning_rate": 0.002, "loss": 5.6322, "step": 892 }, { "epoch": 0.10137970915773528, "grad_norm": 0.41015625, "learning_rate": 0.002, "loss": 5.6032, "step": 893 }, { "epoch": 0.10149323626765436, "grad_norm": 0.373046875, "learning_rate": 0.002, "loss": 5.6263, "step": 894 }, { "epoch": 0.10160676337757343, "grad_norm": 0.333984375, "learning_rate": 0.002, "loss": 5.6228, "step": 895 }, { "epoch": 0.10172029048749251, "grad_norm": 0.32421875, "learning_rate": 0.002, "loss": 5.6125, "step": 896 }, { "epoch": 0.10183381759741159, "grad_norm": 0.318359375, "learning_rate": 0.002, "loss": 5.6185, "step": 897 }, { "epoch": 0.10194734470733066, "grad_norm": 0.333984375, "learning_rate": 0.002, "loss": 5.6267, "step": 898 }, { "epoch": 0.10206087181724974, "grad_norm": 0.369140625, "learning_rate": 0.002, "loss": 5.621, "step": 899 }, { "epoch": 0.10217439892716881, "grad_norm": 0.4140625, "learning_rate": 0.002, "loss": 5.614, "step": 900 }, { "epoch": 0.10228792603708789, "grad_norm": 0.53515625, "learning_rate": 0.002, "loss": 5.6343, "step": 901 }, { "epoch": 0.10240145314700697, "grad_norm": 0.52734375, "learning_rate": 0.002, "loss": 5.6302, "step": 902 }, { "epoch": 0.10251498025692604, "grad_norm": 0.455078125, "learning_rate": 0.002, "loss": 5.6186, "step": 903 }, { "epoch": 0.10262850736684512, "grad_norm": 0.435546875, "learning_rate": 0.002, "loss": 5.6047, "step": 904 }, { "epoch": 0.1027420344767642, "grad_norm": 0.39453125, "learning_rate": 0.002, "loss": 5.6217, "step": 905 }, { "epoch": 0.10285556158668327, "grad_norm": 0.400390625, "learning_rate": 0.002, "loss": 5.6354, "step": 906 }, { "epoch": 0.10296908869660235, "grad_norm": 0.3515625, "learning_rate": 0.002, "loss": 5.6317, "step": 907 }, { "epoch": 0.10308261580652142, "grad_norm": 0.3515625, "learning_rate": 0.002, "loss": 5.5977, "step": 908 }, { "epoch": 0.1031961429164405, "grad_norm": 0.328125, "learning_rate": 0.002, "loss": 5.6094, "step": 909 }, { "epoch": 0.10330967002635957, "grad_norm": 0.345703125, "learning_rate": 0.002, "loss": 5.604, "step": 910 }, { "epoch": 0.10342319713627865, "grad_norm": 0.373046875, "learning_rate": 0.002, "loss": 5.6258, "step": 911 }, { "epoch": 0.10353672424619773, "grad_norm": 0.392578125, "learning_rate": 0.002, "loss": 5.6154, "step": 912 }, { "epoch": 0.1036502513561168, "grad_norm": 0.42578125, "learning_rate": 0.002, "loss": 5.6283, "step": 913 }, { "epoch": 0.10376377846603588, "grad_norm": 0.41796875, "learning_rate": 0.002, "loss": 5.632, "step": 914 }, { "epoch": 0.10387730557595495, "grad_norm": 0.42578125, "learning_rate": 0.002, "loss": 5.6283, "step": 915 }, { "epoch": 0.10399083268587403, "grad_norm": 0.4140625, "learning_rate": 0.002, "loss": 5.6206, "step": 916 }, { "epoch": 0.1041043597957931, "grad_norm": 0.388671875, "learning_rate": 0.002, "loss": 5.6153, "step": 917 }, { "epoch": 0.10421788690571218, "grad_norm": 0.3671875, "learning_rate": 0.002, "loss": 5.5826, "step": 918 }, { "epoch": 0.10433141401563126, "grad_norm": 0.359375, "learning_rate": 0.002, "loss": 5.6171, "step": 919 }, { "epoch": 0.10444494112555033, "grad_norm": 0.34375, "learning_rate": 0.002, "loss": 5.6129, "step": 920 }, { "epoch": 0.10455846823546942, "grad_norm": 0.37109375, "learning_rate": 0.002, "loss": 5.6148, "step": 921 }, { "epoch": 0.1046719953453885, "grad_norm": 0.369140625, "learning_rate": 0.002, "loss": 5.6074, "step": 922 }, { "epoch": 0.10478552245530758, "grad_norm": 0.361328125, "learning_rate": 0.002, "loss": 5.5983, "step": 923 }, { "epoch": 0.10489904956522665, "grad_norm": 0.380859375, "learning_rate": 0.002, "loss": 5.6257, "step": 924 }, { "epoch": 0.10501257667514573, "grad_norm": 0.40625, "learning_rate": 0.002, "loss": 5.5954, "step": 925 }, { "epoch": 0.1051261037850648, "grad_norm": 0.416015625, "learning_rate": 0.002, "loss": 5.6087, "step": 926 }, { "epoch": 0.10523963089498388, "grad_norm": 0.41796875, "learning_rate": 0.002, "loss": 5.618, "step": 927 }, { "epoch": 0.10535315800490296, "grad_norm": 0.34765625, "learning_rate": 0.002, "loss": 5.6049, "step": 928 }, { "epoch": 0.10546668511482203, "grad_norm": 0.333984375, "learning_rate": 0.002, "loss": 5.5961, "step": 929 }, { "epoch": 0.10558021222474111, "grad_norm": 0.369140625, "learning_rate": 0.002, "loss": 5.5927, "step": 930 }, { "epoch": 0.10569373933466018, "grad_norm": 0.5, "learning_rate": 0.002, "loss": 5.6119, "step": 931 }, { "epoch": 0.10580726644457926, "grad_norm": 0.51953125, "learning_rate": 0.002, "loss": 5.5959, "step": 932 }, { "epoch": 0.10592079355449834, "grad_norm": 0.455078125, "learning_rate": 0.002, "loss": 5.6095, "step": 933 }, { "epoch": 0.10603432066441741, "grad_norm": 0.46484375, "learning_rate": 0.002, "loss": 5.6101, "step": 934 }, { "epoch": 0.10614784777433649, "grad_norm": 0.375, "learning_rate": 0.002, "loss": 5.5955, "step": 935 }, { "epoch": 0.10626137488425556, "grad_norm": 0.37109375, "learning_rate": 0.002, "loss": 5.6151, "step": 936 }, { "epoch": 0.10637490199417464, "grad_norm": 0.353515625, "learning_rate": 0.002, "loss": 5.6209, "step": 937 }, { "epoch": 0.10648842910409372, "grad_norm": 0.400390625, "learning_rate": 0.002, "loss": 5.591, "step": 938 }, { "epoch": 0.10660195621401279, "grad_norm": 0.447265625, "learning_rate": 0.002, "loss": 5.6023, "step": 939 }, { "epoch": 0.10671548332393187, "grad_norm": 0.546875, "learning_rate": 0.002, "loss": 5.6064, "step": 940 }, { "epoch": 0.10682901043385094, "grad_norm": 0.50390625, "learning_rate": 0.002, "loss": 5.6035, "step": 941 }, { "epoch": 0.10694253754377002, "grad_norm": 0.435546875, "learning_rate": 0.002, "loss": 5.5801, "step": 942 }, { "epoch": 0.1070560646536891, "grad_norm": 0.451171875, "learning_rate": 0.002, "loss": 5.6222, "step": 943 }, { "epoch": 0.10716959176360817, "grad_norm": 0.404296875, "learning_rate": 0.002, "loss": 5.6049, "step": 944 }, { "epoch": 0.10728311887352725, "grad_norm": 0.396484375, "learning_rate": 0.002, "loss": 5.6068, "step": 945 }, { "epoch": 0.10739664598344632, "grad_norm": 0.357421875, "learning_rate": 0.002, "loss": 5.6129, "step": 946 }, { "epoch": 0.1075101730933654, "grad_norm": 0.349609375, "learning_rate": 0.002, "loss": 5.6057, "step": 947 }, { "epoch": 0.10762370020328448, "grad_norm": 0.328125, "learning_rate": 0.002, "loss": 5.5867, "step": 948 }, { "epoch": 0.10773722731320355, "grad_norm": 0.330078125, "learning_rate": 0.002, "loss": 5.6051, "step": 949 }, { "epoch": 0.10785075442312263, "grad_norm": 0.3671875, "learning_rate": 0.002, "loss": 5.5915, "step": 950 }, { "epoch": 0.10796428153304172, "grad_norm": 0.400390625, "learning_rate": 0.002, "loss": 5.5859, "step": 951 }, { "epoch": 0.1080778086429608, "grad_norm": 0.39453125, "learning_rate": 0.002, "loss": 5.5956, "step": 952 }, { "epoch": 0.10819133575287987, "grad_norm": 0.400390625, "learning_rate": 0.002, "loss": 5.6042, "step": 953 }, { "epoch": 0.10830486286279895, "grad_norm": 0.373046875, "learning_rate": 0.002, "loss": 5.6142, "step": 954 }, { "epoch": 0.10841838997271802, "grad_norm": 0.349609375, "learning_rate": 0.002, "loss": 5.5827, "step": 955 }, { "epoch": 0.1085319170826371, "grad_norm": 0.345703125, "learning_rate": 0.002, "loss": 5.5824, "step": 956 }, { "epoch": 0.10864544419255617, "grad_norm": 0.337890625, "learning_rate": 0.002, "loss": 5.6083, "step": 957 }, { "epoch": 0.10875897130247525, "grad_norm": 0.3359375, "learning_rate": 0.002, "loss": 5.5804, "step": 958 }, { "epoch": 0.10887249841239433, "grad_norm": 0.376953125, "learning_rate": 0.002, "loss": 5.6095, "step": 959 }, { "epoch": 0.1089860255223134, "grad_norm": 0.416015625, "learning_rate": 0.002, "loss": 5.5909, "step": 960 }, { "epoch": 0.10909955263223248, "grad_norm": 0.498046875, "learning_rate": 0.002, "loss": 5.5835, "step": 961 }, { "epoch": 0.10921307974215155, "grad_norm": 0.5, "learning_rate": 0.002, "loss": 5.6202, "step": 962 }, { "epoch": 0.10932660685207063, "grad_norm": 0.498046875, "learning_rate": 0.002, "loss": 5.5972, "step": 963 }, { "epoch": 0.1094401339619897, "grad_norm": 0.4765625, "learning_rate": 0.002, "loss": 5.5943, "step": 964 }, { "epoch": 0.10955366107190878, "grad_norm": 0.40234375, "learning_rate": 0.002, "loss": 5.5897, "step": 965 }, { "epoch": 0.10966718818182786, "grad_norm": 0.404296875, "learning_rate": 0.002, "loss": 5.5808, "step": 966 }, { "epoch": 0.10978071529174693, "grad_norm": 0.404296875, "learning_rate": 0.002, "loss": 5.6067, "step": 967 }, { "epoch": 0.10989424240166601, "grad_norm": 0.41015625, "learning_rate": 0.002, "loss": 5.6222, "step": 968 }, { "epoch": 0.11000776951158509, "grad_norm": 0.37109375, "learning_rate": 0.002, "loss": 5.5942, "step": 969 }, { "epoch": 0.11012129662150416, "grad_norm": 0.384765625, "learning_rate": 0.002, "loss": 5.6148, "step": 970 }, { "epoch": 0.11023482373142324, "grad_norm": 0.369140625, "learning_rate": 0.002, "loss": 5.5933, "step": 971 }, { "epoch": 0.11034835084134231, "grad_norm": 0.37109375, "learning_rate": 0.002, "loss": 5.6097, "step": 972 }, { "epoch": 0.11046187795126139, "grad_norm": 0.375, "learning_rate": 0.002, "loss": 5.6176, "step": 973 }, { "epoch": 0.11057540506118047, "grad_norm": 0.44921875, "learning_rate": 0.002, "loss": 5.575, "step": 974 }, { "epoch": 0.11068893217109954, "grad_norm": 0.66015625, "learning_rate": 0.002, "loss": 5.6118, "step": 975 }, { "epoch": 0.11080245928101862, "grad_norm": 0.6171875, "learning_rate": 0.002, "loss": 5.5853, "step": 976 }, { "epoch": 0.1109159863909377, "grad_norm": 0.45703125, "learning_rate": 0.002, "loss": 5.5862, "step": 977 }, { "epoch": 0.11102951350085677, "grad_norm": 0.4921875, "learning_rate": 0.002, "loss": 5.5831, "step": 978 }, { "epoch": 0.11114304061077585, "grad_norm": 0.47265625, "learning_rate": 0.002, "loss": 5.5997, "step": 979 }, { "epoch": 0.11125656772069492, "grad_norm": 0.470703125, "learning_rate": 0.002, "loss": 5.5824, "step": 980 }, { "epoch": 0.11137009483061401, "grad_norm": 0.46875, "learning_rate": 0.002, "loss": 5.6081, "step": 981 }, { "epoch": 0.11148362194053309, "grad_norm": 0.44140625, "learning_rate": 0.002, "loss": 5.5927, "step": 982 }, { "epoch": 0.11159714905045216, "grad_norm": 0.40625, "learning_rate": 0.002, "loss": 5.5977, "step": 983 }, { "epoch": 0.11171067616037124, "grad_norm": 0.365234375, "learning_rate": 0.002, "loss": 5.5878, "step": 984 }, { "epoch": 0.11182420327029031, "grad_norm": 0.34765625, "learning_rate": 0.002, "loss": 5.5938, "step": 985 }, { "epoch": 0.11193773038020939, "grad_norm": 0.353515625, "learning_rate": 0.002, "loss": 5.5986, "step": 986 }, { "epoch": 0.11205125749012847, "grad_norm": 0.330078125, "learning_rate": 0.002, "loss": 5.5747, "step": 987 }, { "epoch": 0.11216478460004754, "grad_norm": 0.357421875, "learning_rate": 0.002, "loss": 5.5795, "step": 988 }, { "epoch": 0.11227831170996662, "grad_norm": 0.359375, "learning_rate": 0.002, "loss": 5.5969, "step": 989 }, { "epoch": 0.1123918388198857, "grad_norm": 0.35546875, "learning_rate": 0.002, "loss": 5.602, "step": 990 }, { "epoch": 0.11250536592980477, "grad_norm": 0.3671875, "learning_rate": 0.002, "loss": 5.5899, "step": 991 }, { "epoch": 0.11261889303972385, "grad_norm": 0.380859375, "learning_rate": 0.002, "loss": 5.5868, "step": 992 }, { "epoch": 0.11273242014964292, "grad_norm": 0.419921875, "learning_rate": 0.002, "loss": 5.5909, "step": 993 }, { "epoch": 0.112845947259562, "grad_norm": 0.50390625, "learning_rate": 0.002, "loss": 5.6037, "step": 994 }, { "epoch": 0.11295947436948107, "grad_norm": 0.478515625, "learning_rate": 0.002, "loss": 5.5669, "step": 995 }, { "epoch": 0.11307300147940015, "grad_norm": 0.3984375, "learning_rate": 0.002, "loss": 5.5906, "step": 996 }, { "epoch": 0.11318652858931923, "grad_norm": 0.390625, "learning_rate": 0.002, "loss": 5.5805, "step": 997 }, { "epoch": 0.1133000556992383, "grad_norm": 0.337890625, "learning_rate": 0.002, "loss": 5.5668, "step": 998 }, { "epoch": 0.11341358280915738, "grad_norm": 0.322265625, "learning_rate": 0.002, "loss": 5.5836, "step": 999 }, { "epoch": 0.11352710991907645, "grad_norm": 0.328125, "learning_rate": 0.002, "loss": 5.5779, "step": 1000 }, { "epoch": 0.11364063702899553, "grad_norm": 0.330078125, "learning_rate": 0.002, "loss": 5.5953, "step": 1001 }, { "epoch": 0.1137541641389146, "grad_norm": 0.3359375, "learning_rate": 0.002, "loss": 5.5934, "step": 1002 }, { "epoch": 0.11386769124883368, "grad_norm": 0.373046875, "learning_rate": 0.002, "loss": 5.5934, "step": 1003 }, { "epoch": 0.11398121835875276, "grad_norm": 0.369140625, "learning_rate": 0.002, "loss": 5.5778, "step": 1004 }, { "epoch": 0.11409474546867183, "grad_norm": 0.365234375, "learning_rate": 0.002, "loss": 5.5706, "step": 1005 }, { "epoch": 0.11420827257859091, "grad_norm": 0.361328125, "learning_rate": 0.002, "loss": 5.5717, "step": 1006 }, { "epoch": 0.11432179968850999, "grad_norm": 0.37109375, "learning_rate": 0.002, "loss": 5.5936, "step": 1007 }, { "epoch": 0.11443532679842906, "grad_norm": 0.44140625, "learning_rate": 0.002, "loss": 5.578, "step": 1008 }, { "epoch": 0.11454885390834814, "grad_norm": 0.451171875, "learning_rate": 0.002, "loss": 5.5532, "step": 1009 }, { "epoch": 0.11466238101826721, "grad_norm": 0.419921875, "learning_rate": 0.002, "loss": 5.5636, "step": 1010 }, { "epoch": 0.1147759081281863, "grad_norm": 0.4140625, "learning_rate": 0.002, "loss": 5.5852, "step": 1011 }, { "epoch": 0.11488943523810538, "grad_norm": 0.40625, "learning_rate": 0.002, "loss": 5.5727, "step": 1012 }, { "epoch": 0.11500296234802446, "grad_norm": 0.39453125, "learning_rate": 0.002, "loss": 5.578, "step": 1013 }, { "epoch": 0.11511648945794353, "grad_norm": 0.3515625, "learning_rate": 0.002, "loss": 5.5804, "step": 1014 }, { "epoch": 0.11523001656786261, "grad_norm": 0.31640625, "learning_rate": 0.002, "loss": 5.5649, "step": 1015 }, { "epoch": 0.11534354367778168, "grad_norm": 0.29296875, "learning_rate": 0.002, "loss": 5.5849, "step": 1016 }, { "epoch": 0.11545707078770076, "grad_norm": 0.302734375, "learning_rate": 0.002, "loss": 5.5747, "step": 1017 }, { "epoch": 0.11557059789761984, "grad_norm": 0.330078125, "learning_rate": 0.002, "loss": 5.5729, "step": 1018 }, { "epoch": 0.11568412500753891, "grad_norm": 0.37109375, "learning_rate": 0.002, "loss": 5.5544, "step": 1019 }, { "epoch": 0.11579765211745799, "grad_norm": 0.458984375, "learning_rate": 0.002, "loss": 5.5804, "step": 1020 }, { "epoch": 0.11591117922737706, "grad_norm": 0.443359375, "learning_rate": 0.002, "loss": 5.5695, "step": 1021 }, { "epoch": 0.11602470633729614, "grad_norm": 0.40234375, "learning_rate": 0.002, "loss": 5.5706, "step": 1022 }, { "epoch": 0.11613823344721522, "grad_norm": 0.37890625, "learning_rate": 0.002, "loss": 5.5751, "step": 1023 }, { "epoch": 0.11625176055713429, "grad_norm": 0.33203125, "learning_rate": 0.002, "loss": 5.5699, "step": 1024 }, { "epoch": 0.11636528766705337, "grad_norm": 0.35546875, "learning_rate": 0.002, "loss": 5.5722, "step": 1025 }, { "epoch": 0.11647881477697244, "grad_norm": 0.392578125, "learning_rate": 0.002, "loss": 5.5785, "step": 1026 }, { "epoch": 0.11659234188689152, "grad_norm": 0.375, "learning_rate": 0.002, "loss": 5.5789, "step": 1027 }, { "epoch": 0.1167058689968106, "grad_norm": 0.349609375, "learning_rate": 0.002, "loss": 5.5705, "step": 1028 }, { "epoch": 0.11681939610672967, "grad_norm": 0.34375, "learning_rate": 0.002, "loss": 5.586, "step": 1029 }, { "epoch": 0.11693292321664875, "grad_norm": 0.337890625, "learning_rate": 0.002, "loss": 5.5683, "step": 1030 }, { "epoch": 0.11704645032656782, "grad_norm": 0.31640625, "learning_rate": 0.002, "loss": 5.5833, "step": 1031 }, { "epoch": 0.1171599774364869, "grad_norm": 0.306640625, "learning_rate": 0.002, "loss": 5.5729, "step": 1032 }, { "epoch": 0.11727350454640598, "grad_norm": 0.2890625, "learning_rate": 0.002, "loss": 5.5695, "step": 1033 }, { "epoch": 0.11738703165632505, "grad_norm": 0.298828125, "learning_rate": 0.002, "loss": 5.5532, "step": 1034 }, { "epoch": 0.11750055876624413, "grad_norm": 0.294921875, "learning_rate": 0.002, "loss": 5.5794, "step": 1035 }, { "epoch": 0.1176140858761632, "grad_norm": 0.298828125, "learning_rate": 0.002, "loss": 5.5763, "step": 1036 }, { "epoch": 0.11772761298608228, "grad_norm": 0.32421875, "learning_rate": 0.002, "loss": 5.5686, "step": 1037 }, { "epoch": 0.11784114009600136, "grad_norm": 0.37109375, "learning_rate": 0.002, "loss": 5.5819, "step": 1038 }, { "epoch": 0.11795466720592043, "grad_norm": 0.439453125, "learning_rate": 0.002, "loss": 5.5806, "step": 1039 }, { "epoch": 0.11806819431583951, "grad_norm": 0.4453125, "learning_rate": 0.002, "loss": 5.5665, "step": 1040 }, { "epoch": 0.1181817214257586, "grad_norm": 0.419921875, "learning_rate": 0.002, "loss": 5.5706, "step": 1041 }, { "epoch": 0.11829524853567767, "grad_norm": 0.384765625, "learning_rate": 0.002, "loss": 5.562, "step": 1042 }, { "epoch": 0.11840877564559675, "grad_norm": 0.337890625, "learning_rate": 0.002, "loss": 5.5604, "step": 1043 }, { "epoch": 0.11852230275551583, "grad_norm": 0.328125, "learning_rate": 0.002, "loss": 5.5737, "step": 1044 }, { "epoch": 0.1186358298654349, "grad_norm": 0.30859375, "learning_rate": 0.002, "loss": 5.5703, "step": 1045 }, { "epoch": 0.11874935697535398, "grad_norm": 0.322265625, "learning_rate": 0.002, "loss": 5.5501, "step": 1046 }, { "epoch": 0.11886288408527305, "grad_norm": 0.33984375, "learning_rate": 0.002, "loss": 5.5648, "step": 1047 }, { "epoch": 0.11897641119519213, "grad_norm": 0.40234375, "learning_rate": 0.002, "loss": 5.5639, "step": 1048 }, { "epoch": 0.1190899383051112, "grad_norm": 0.56640625, "learning_rate": 0.002, "loss": 5.5697, "step": 1049 }, { "epoch": 0.11920346541503028, "grad_norm": 0.5625, "learning_rate": 0.002, "loss": 5.5672, "step": 1050 }, { "epoch": 0.11931699252494936, "grad_norm": 0.48046875, "learning_rate": 0.002, "loss": 5.5607, "step": 1051 }, { "epoch": 0.11943051963486843, "grad_norm": 0.4375, "learning_rate": 0.002, "loss": 5.573, "step": 1052 }, { "epoch": 0.11954404674478751, "grad_norm": 0.373046875, "learning_rate": 0.002, "loss": 5.5559, "step": 1053 }, { "epoch": 0.11965757385470659, "grad_norm": 0.345703125, "learning_rate": 0.002, "loss": 5.5561, "step": 1054 }, { "epoch": 0.11977110096462566, "grad_norm": 0.318359375, "learning_rate": 0.002, "loss": 5.5514, "step": 1055 }, { "epoch": 0.11988462807454474, "grad_norm": 0.33203125, "learning_rate": 0.002, "loss": 5.5471, "step": 1056 }, { "epoch": 0.11999815518446381, "grad_norm": 0.34375, "learning_rate": 0.002, "loss": 5.5953, "step": 1057 }, { "epoch": 0.12011168229438289, "grad_norm": 0.359375, "learning_rate": 0.002, "loss": 5.5661, "step": 1058 }, { "epoch": 0.12022520940430197, "grad_norm": 0.375, "learning_rate": 0.002, "loss": 5.5598, "step": 1059 }, { "epoch": 0.12033873651422104, "grad_norm": 0.396484375, "learning_rate": 0.002, "loss": 5.5619, "step": 1060 }, { "epoch": 0.12045226362414012, "grad_norm": 0.357421875, "learning_rate": 0.002, "loss": 5.5565, "step": 1061 }, { "epoch": 0.1205657907340592, "grad_norm": 0.349609375, "learning_rate": 0.002, "loss": 5.5556, "step": 1062 }, { "epoch": 0.12067931784397827, "grad_norm": 0.306640625, "learning_rate": 0.002, "loss": 5.5713, "step": 1063 }, { "epoch": 0.12079284495389735, "grad_norm": 0.310546875, "learning_rate": 0.002, "loss": 5.5582, "step": 1064 }, { "epoch": 0.12090637206381642, "grad_norm": 0.404296875, "learning_rate": 0.002, "loss": 5.5706, "step": 1065 }, { "epoch": 0.1210198991737355, "grad_norm": 0.458984375, "learning_rate": 0.002, "loss": 5.5816, "step": 1066 }, { "epoch": 0.12113342628365457, "grad_norm": 0.4453125, "learning_rate": 0.002, "loss": 5.5739, "step": 1067 }, { "epoch": 0.12124695339357365, "grad_norm": 0.44921875, "learning_rate": 0.002, "loss": 5.574, "step": 1068 }, { "epoch": 0.12136048050349273, "grad_norm": 0.5546875, "learning_rate": 0.002, "loss": 5.5521, "step": 1069 }, { "epoch": 0.1214740076134118, "grad_norm": 0.65625, "learning_rate": 0.002, "loss": 5.5583, "step": 1070 }, { "epoch": 0.12158753472333089, "grad_norm": 0.60546875, "learning_rate": 0.002, "loss": 5.5464, "step": 1071 }, { "epoch": 0.12170106183324997, "grad_norm": 0.5078125, "learning_rate": 0.002, "loss": 5.5743, "step": 1072 }, { "epoch": 0.12181458894316904, "grad_norm": 0.412109375, "learning_rate": 0.002, "loss": 5.5292, "step": 1073 }, { "epoch": 0.12192811605308812, "grad_norm": 0.388671875, "learning_rate": 0.002, "loss": 5.57, "step": 1074 }, { "epoch": 0.1220416431630072, "grad_norm": 0.365234375, "learning_rate": 0.002, "loss": 5.5705, "step": 1075 }, { "epoch": 0.12215517027292627, "grad_norm": 0.359375, "learning_rate": 0.002, "loss": 5.5686, "step": 1076 }, { "epoch": 0.12226869738284535, "grad_norm": 0.359375, "learning_rate": 0.002, "loss": 5.5457, "step": 1077 }, { "epoch": 0.12238222449276442, "grad_norm": 0.40234375, "learning_rate": 0.002, "loss": 5.5514, "step": 1078 }, { "epoch": 0.1224957516026835, "grad_norm": 0.44921875, "learning_rate": 0.002, "loss": 5.5715, "step": 1079 }, { "epoch": 0.12260927871260258, "grad_norm": 0.458984375, "learning_rate": 0.002, "loss": 5.5441, "step": 1080 }, { "epoch": 0.12272280582252165, "grad_norm": 0.462890625, "learning_rate": 0.002, "loss": 5.5699, "step": 1081 }, { "epoch": 0.12283633293244073, "grad_norm": 0.44921875, "learning_rate": 0.002, "loss": 5.5504, "step": 1082 }, { "epoch": 0.1229498600423598, "grad_norm": 0.375, "learning_rate": 0.002, "loss": 5.5494, "step": 1083 }, { "epoch": 0.12306338715227888, "grad_norm": 0.36328125, "learning_rate": 0.002, "loss": 5.5575, "step": 1084 }, { "epoch": 0.12317691426219796, "grad_norm": 0.3515625, "learning_rate": 0.002, "loss": 5.563, "step": 1085 }, { "epoch": 0.12329044137211703, "grad_norm": 0.375, "learning_rate": 0.002, "loss": 5.5622, "step": 1086 }, { "epoch": 0.12340396848203611, "grad_norm": 0.359375, "learning_rate": 0.002, "loss": 5.5618, "step": 1087 }, { "epoch": 0.12351749559195518, "grad_norm": 0.341796875, "learning_rate": 0.002, "loss": 5.5402, "step": 1088 }, { "epoch": 0.12363102270187426, "grad_norm": 0.318359375, "learning_rate": 0.002, "loss": 5.5557, "step": 1089 }, { "epoch": 0.12374454981179334, "grad_norm": 0.3046875, "learning_rate": 0.002, "loss": 5.563, "step": 1090 }, { "epoch": 0.12385807692171241, "grad_norm": 0.306640625, "learning_rate": 0.002, "loss": 5.5684, "step": 1091 }, { "epoch": 0.12397160403163149, "grad_norm": 0.326171875, "learning_rate": 0.002, "loss": 5.5484, "step": 1092 }, { "epoch": 0.12408513114155056, "grad_norm": 0.36328125, "learning_rate": 0.002, "loss": 5.5572, "step": 1093 }, { "epoch": 0.12419865825146964, "grad_norm": 0.3984375, "learning_rate": 0.002, "loss": 5.5657, "step": 1094 }, { "epoch": 0.12431218536138872, "grad_norm": 0.48828125, "learning_rate": 0.002, "loss": 5.5525, "step": 1095 }, { "epoch": 0.12442571247130779, "grad_norm": 0.498046875, "learning_rate": 0.002, "loss": 5.5524, "step": 1096 }, { "epoch": 0.12453923958122687, "grad_norm": 0.4765625, "learning_rate": 0.002, "loss": 5.5588, "step": 1097 }, { "epoch": 0.12465276669114594, "grad_norm": 0.490234375, "learning_rate": 0.002, "loss": 5.5512, "step": 1098 }, { "epoch": 0.12476629380106502, "grad_norm": 0.421875, "learning_rate": 0.002, "loss": 5.5488, "step": 1099 }, { "epoch": 0.1248798209109841, "grad_norm": 0.431640625, "learning_rate": 0.002, "loss": 5.5724, "step": 1100 }, { "epoch": 0.12499334802090317, "grad_norm": 0.384765625, "learning_rate": 0.002, "loss": 5.5408, "step": 1101 }, { "epoch": 0.12510687513082225, "grad_norm": 0.384765625, "learning_rate": 0.002, "loss": 5.5319, "step": 1102 }, { "epoch": 0.12522040224074132, "grad_norm": 0.3984375, "learning_rate": 0.002, "loss": 5.5478, "step": 1103 }, { "epoch": 0.1253339293506604, "grad_norm": 0.46875, "learning_rate": 0.002, "loss": 5.5648, "step": 1104 }, { "epoch": 0.12544745646057948, "grad_norm": 0.427734375, "learning_rate": 0.002, "loss": 5.5579, "step": 1105 }, { "epoch": 0.12556098357049855, "grad_norm": 0.384765625, "learning_rate": 0.002, "loss": 5.5536, "step": 1106 }, { "epoch": 0.12567451068041763, "grad_norm": 0.369140625, "learning_rate": 0.002, "loss": 5.5541, "step": 1107 }, { "epoch": 0.1257880377903367, "grad_norm": 0.333984375, "learning_rate": 0.002, "loss": 5.5458, "step": 1108 }, { "epoch": 0.12590156490025578, "grad_norm": 0.333984375, "learning_rate": 0.002, "loss": 5.5617, "step": 1109 }, { "epoch": 0.12601509201017486, "grad_norm": 0.349609375, "learning_rate": 0.002, "loss": 5.5634, "step": 1110 }, { "epoch": 0.12612861912009393, "grad_norm": 0.34765625, "learning_rate": 0.002, "loss": 5.5347, "step": 1111 }, { "epoch": 0.126242146230013, "grad_norm": 0.375, "learning_rate": 0.002, "loss": 5.5753, "step": 1112 }, { "epoch": 0.12635567333993208, "grad_norm": 0.39453125, "learning_rate": 0.002, "loss": 5.5607, "step": 1113 }, { "epoch": 0.12646920044985116, "grad_norm": 0.416015625, "learning_rate": 0.002, "loss": 5.5263, "step": 1114 }, { "epoch": 0.12658272755977024, "grad_norm": 0.419921875, "learning_rate": 0.002, "loss": 5.5605, "step": 1115 }, { "epoch": 0.1266962546696893, "grad_norm": 0.423828125, "learning_rate": 0.002, "loss": 5.5603, "step": 1116 }, { "epoch": 0.12680978177960842, "grad_norm": 0.419921875, "learning_rate": 0.002, "loss": 5.54, "step": 1117 }, { "epoch": 0.1269233088895275, "grad_norm": 0.408203125, "learning_rate": 0.002, "loss": 5.5457, "step": 1118 }, { "epoch": 0.12703683599944657, "grad_norm": 0.341796875, "learning_rate": 0.002, "loss": 5.5271, "step": 1119 }, { "epoch": 0.12715036310936564, "grad_norm": 0.337890625, "learning_rate": 0.002, "loss": 5.5395, "step": 1120 }, { "epoch": 0.12726389021928472, "grad_norm": 0.333984375, "learning_rate": 0.002, "loss": 5.5356, "step": 1121 }, { "epoch": 0.1273774173292038, "grad_norm": 0.3515625, "learning_rate": 0.002, "loss": 5.5575, "step": 1122 }, { "epoch": 0.12749094443912287, "grad_norm": 0.384765625, "learning_rate": 0.002, "loss": 5.5514, "step": 1123 }, { "epoch": 0.12760447154904195, "grad_norm": 0.39453125, "learning_rate": 0.002, "loss": 5.5386, "step": 1124 }, { "epoch": 0.12771799865896102, "grad_norm": 0.375, "learning_rate": 0.002, "loss": 5.5548, "step": 1125 }, { "epoch": 0.1278315257688801, "grad_norm": 0.349609375, "learning_rate": 0.002, "loss": 5.5535, "step": 1126 }, { "epoch": 0.12794505287879918, "grad_norm": 0.3515625, "learning_rate": 0.002, "loss": 5.5349, "step": 1127 }, { "epoch": 0.12805857998871825, "grad_norm": 0.341796875, "learning_rate": 0.002, "loss": 5.5458, "step": 1128 }, { "epoch": 0.12817210709863733, "grad_norm": 0.3515625, "learning_rate": 0.002, "loss": 5.5261, "step": 1129 }, { "epoch": 0.1282856342085564, "grad_norm": 0.373046875, "learning_rate": 0.002, "loss": 5.5365, "step": 1130 }, { "epoch": 0.12839916131847548, "grad_norm": 0.466796875, "learning_rate": 0.002, "loss": 5.5481, "step": 1131 }, { "epoch": 0.12851268842839456, "grad_norm": 0.4765625, "learning_rate": 0.002, "loss": 5.5552, "step": 1132 }, { "epoch": 0.12862621553831363, "grad_norm": 0.46484375, "learning_rate": 0.002, "loss": 5.5549, "step": 1133 }, { "epoch": 0.1287397426482327, "grad_norm": 0.5625, "learning_rate": 0.002, "loss": 5.5279, "step": 1134 }, { "epoch": 0.12885326975815178, "grad_norm": 0.474609375, "learning_rate": 0.002, "loss": 5.5441, "step": 1135 }, { "epoch": 0.12896679686807086, "grad_norm": 0.427734375, "learning_rate": 0.002, "loss": 5.5371, "step": 1136 }, { "epoch": 0.12908032397798994, "grad_norm": 0.43359375, "learning_rate": 0.002, "loss": 5.5476, "step": 1137 }, { "epoch": 0.129193851087909, "grad_norm": 0.40234375, "learning_rate": 0.002, "loss": 5.5427, "step": 1138 }, { "epoch": 0.1293073781978281, "grad_norm": 0.419921875, "learning_rate": 0.002, "loss": 5.5379, "step": 1139 }, { "epoch": 0.12942090530774716, "grad_norm": 0.421875, "learning_rate": 0.002, "loss": 5.558, "step": 1140 }, { "epoch": 0.12953443241766624, "grad_norm": 0.412109375, "learning_rate": 0.002, "loss": 5.5446, "step": 1141 }, { "epoch": 0.12964795952758532, "grad_norm": 0.39453125, "learning_rate": 0.002, "loss": 5.5415, "step": 1142 }, { "epoch": 0.1297614866375044, "grad_norm": 0.37890625, "learning_rate": 0.002, "loss": 5.5544, "step": 1143 }, { "epoch": 0.12987501374742347, "grad_norm": 0.373046875, "learning_rate": 0.002, "loss": 5.5142, "step": 1144 }, { "epoch": 0.12998854085734254, "grad_norm": 0.390625, "learning_rate": 0.002, "loss": 5.5177, "step": 1145 }, { "epoch": 0.13010206796726162, "grad_norm": 0.408203125, "learning_rate": 0.002, "loss": 5.5389, "step": 1146 }, { "epoch": 0.1302155950771807, "grad_norm": 0.419921875, "learning_rate": 0.002, "loss": 5.5318, "step": 1147 }, { "epoch": 0.13032912218709977, "grad_norm": 0.3984375, "learning_rate": 0.002, "loss": 5.5477, "step": 1148 }, { "epoch": 0.13044264929701885, "grad_norm": 0.396484375, "learning_rate": 0.002, "loss": 5.5443, "step": 1149 }, { "epoch": 0.13055617640693792, "grad_norm": 0.353515625, "learning_rate": 0.002, "loss": 5.5231, "step": 1150 }, { "epoch": 0.130669703516857, "grad_norm": 0.3671875, "learning_rate": 0.002, "loss": 5.5401, "step": 1151 }, { "epoch": 0.13078323062677608, "grad_norm": 0.3671875, "learning_rate": 0.002, "loss": 5.5374, "step": 1152 }, { "epoch": 0.13089675773669515, "grad_norm": 0.40234375, "learning_rate": 0.002, "loss": 5.5306, "step": 1153 }, { "epoch": 0.13101028484661423, "grad_norm": 0.53125, "learning_rate": 0.002, "loss": 5.5381, "step": 1154 }, { "epoch": 0.1311238119565333, "grad_norm": 0.5078125, "learning_rate": 0.002, "loss": 5.5337, "step": 1155 }, { "epoch": 0.13123733906645238, "grad_norm": 0.44921875, "learning_rate": 0.002, "loss": 5.5434, "step": 1156 }, { "epoch": 0.13135086617637146, "grad_norm": 0.46875, "learning_rate": 0.002, "loss": 5.5441, "step": 1157 }, { "epoch": 0.13146439328629053, "grad_norm": 0.37890625, "learning_rate": 0.002, "loss": 5.5277, "step": 1158 }, { "epoch": 0.1315779203962096, "grad_norm": 0.376953125, "learning_rate": 0.002, "loss": 5.539, "step": 1159 }, { "epoch": 0.13169144750612868, "grad_norm": 0.337890625, "learning_rate": 0.002, "loss": 5.5387, "step": 1160 }, { "epoch": 0.13180497461604776, "grad_norm": 0.35546875, "learning_rate": 0.002, "loss": 5.5396, "step": 1161 }, { "epoch": 0.13191850172596684, "grad_norm": 0.333984375, "learning_rate": 0.002, "loss": 5.5249, "step": 1162 }, { "epoch": 0.1320320288358859, "grad_norm": 0.337890625, "learning_rate": 0.002, "loss": 5.5466, "step": 1163 }, { "epoch": 0.132145555945805, "grad_norm": 0.3125, "learning_rate": 0.002, "loss": 5.526, "step": 1164 }, { "epoch": 0.13225908305572406, "grad_norm": 0.328125, "learning_rate": 0.002, "loss": 5.5321, "step": 1165 }, { "epoch": 0.13237261016564314, "grad_norm": 0.37890625, "learning_rate": 0.002, "loss": 5.5269, "step": 1166 }, { "epoch": 0.13248613727556222, "grad_norm": 0.4375, "learning_rate": 0.002, "loss": 5.5436, "step": 1167 }, { "epoch": 0.1325996643854813, "grad_norm": 0.453125, "learning_rate": 0.002, "loss": 5.5382, "step": 1168 }, { "epoch": 0.13271319149540037, "grad_norm": 0.44921875, "learning_rate": 0.002, "loss": 5.5295, "step": 1169 }, { "epoch": 0.13282671860531944, "grad_norm": 0.416015625, "learning_rate": 0.002, "loss": 5.5262, "step": 1170 }, { "epoch": 0.13294024571523852, "grad_norm": 0.333984375, "learning_rate": 0.002, "loss": 5.5363, "step": 1171 }, { "epoch": 0.1330537728251576, "grad_norm": 0.345703125, "learning_rate": 0.002, "loss": 5.5427, "step": 1172 }, { "epoch": 0.13316729993507667, "grad_norm": 0.318359375, "learning_rate": 0.002, "loss": 5.5425, "step": 1173 }, { "epoch": 0.13328082704499575, "grad_norm": 0.349609375, "learning_rate": 0.002, "loss": 5.5189, "step": 1174 }, { "epoch": 0.13339435415491482, "grad_norm": 0.3671875, "learning_rate": 0.002, "loss": 5.554, "step": 1175 }, { "epoch": 0.1335078812648339, "grad_norm": 0.412109375, "learning_rate": 0.002, "loss": 5.4912, "step": 1176 }, { "epoch": 0.133621408374753, "grad_norm": 0.421875, "learning_rate": 0.002, "loss": 5.5278, "step": 1177 }, { "epoch": 0.13373493548467208, "grad_norm": 0.42578125, "learning_rate": 0.002, "loss": 5.5212, "step": 1178 }, { "epoch": 0.13384846259459116, "grad_norm": 0.431640625, "learning_rate": 0.002, "loss": 5.5424, "step": 1179 }, { "epoch": 0.13396198970451023, "grad_norm": 0.421875, "learning_rate": 0.002, "loss": 5.5192, "step": 1180 }, { "epoch": 0.1340755168144293, "grad_norm": 0.435546875, "learning_rate": 0.002, "loss": 5.5283, "step": 1181 }, { "epoch": 0.13418904392434838, "grad_norm": 0.40234375, "learning_rate": 0.002, "loss": 5.537, "step": 1182 }, { "epoch": 0.13430257103426746, "grad_norm": 0.36328125, "learning_rate": 0.002, "loss": 5.5259, "step": 1183 }, { "epoch": 0.13441609814418654, "grad_norm": 0.322265625, "learning_rate": 0.002, "loss": 5.5359, "step": 1184 }, { "epoch": 0.1345296252541056, "grad_norm": 0.294921875, "learning_rate": 0.002, "loss": 5.5311, "step": 1185 }, { "epoch": 0.1346431523640247, "grad_norm": 0.2890625, "learning_rate": 0.002, "loss": 5.5232, "step": 1186 }, { "epoch": 0.13475667947394376, "grad_norm": 0.283203125, "learning_rate": 0.002, "loss": 5.5296, "step": 1187 }, { "epoch": 0.13487020658386284, "grad_norm": 0.287109375, "learning_rate": 0.002, "loss": 5.5201, "step": 1188 }, { "epoch": 0.13498373369378192, "grad_norm": 0.30859375, "learning_rate": 0.002, "loss": 5.5279, "step": 1189 }, { "epoch": 0.135097260803701, "grad_norm": 0.3125, "learning_rate": 0.002, "loss": 5.5238, "step": 1190 }, { "epoch": 0.13521078791362007, "grad_norm": 0.3125, "learning_rate": 0.002, "loss": 5.5171, "step": 1191 }, { "epoch": 0.13532431502353914, "grad_norm": 0.302734375, "learning_rate": 0.002, "loss": 5.5474, "step": 1192 }, { "epoch": 0.13543784213345822, "grad_norm": 0.310546875, "learning_rate": 0.002, "loss": 5.5475, "step": 1193 }, { "epoch": 0.1355513692433773, "grad_norm": 0.34375, "learning_rate": 0.002, "loss": 5.5243, "step": 1194 }, { "epoch": 0.13566489635329637, "grad_norm": 0.380859375, "learning_rate": 0.002, "loss": 5.5344, "step": 1195 }, { "epoch": 0.13577842346321545, "grad_norm": 0.50390625, "learning_rate": 0.002, "loss": 5.53, "step": 1196 }, { "epoch": 0.13589195057313452, "grad_norm": 0.5234375, "learning_rate": 0.002, "loss": 5.5222, "step": 1197 }, { "epoch": 0.1360054776830536, "grad_norm": 0.59765625, "learning_rate": 0.002, "loss": 5.5147, "step": 1198 }, { "epoch": 0.13611900479297268, "grad_norm": 0.59375, "learning_rate": 0.002, "loss": 5.5287, "step": 1199 }, { "epoch": 0.13623253190289175, "grad_norm": 0.421875, "learning_rate": 0.002, "loss": 5.5145, "step": 1200 }, { "epoch": 0.13634605901281083, "grad_norm": 0.4296875, "learning_rate": 0.002, "loss": 5.529, "step": 1201 }, { "epoch": 0.1364595861227299, "grad_norm": 0.376953125, "learning_rate": 0.002, "loss": 5.5366, "step": 1202 }, { "epoch": 0.13657311323264898, "grad_norm": 0.369140625, "learning_rate": 0.002, "loss": 5.5282, "step": 1203 }, { "epoch": 0.13668664034256806, "grad_norm": 0.40625, "learning_rate": 0.002, "loss": 5.5166, "step": 1204 }, { "epoch": 0.13680016745248713, "grad_norm": 0.421875, "learning_rate": 0.002, "loss": 5.5217, "step": 1205 }, { "epoch": 0.1369136945624062, "grad_norm": 0.4609375, "learning_rate": 0.002, "loss": 5.5257, "step": 1206 }, { "epoch": 0.13702722167232528, "grad_norm": 0.4140625, "learning_rate": 0.002, "loss": 5.5025, "step": 1207 }, { "epoch": 0.13714074878224436, "grad_norm": 0.333984375, "learning_rate": 0.002, "loss": 5.5269, "step": 1208 }, { "epoch": 0.13725427589216344, "grad_norm": 0.35546875, "learning_rate": 0.002, "loss": 5.5391, "step": 1209 }, { "epoch": 0.1373678030020825, "grad_norm": 0.3125, "learning_rate": 0.002, "loss": 5.5028, "step": 1210 }, { "epoch": 0.1374813301120016, "grad_norm": 0.328125, "learning_rate": 0.002, "loss": 5.527, "step": 1211 }, { "epoch": 0.13759485722192066, "grad_norm": 0.384765625, "learning_rate": 0.002, "loss": 5.5209, "step": 1212 }, { "epoch": 0.13770838433183974, "grad_norm": 0.447265625, "learning_rate": 0.002, "loss": 5.5134, "step": 1213 }, { "epoch": 0.13782191144175882, "grad_norm": 0.5078125, "learning_rate": 0.002, "loss": 5.5211, "step": 1214 }, { "epoch": 0.1379354385516779, "grad_norm": 0.5234375, "learning_rate": 0.002, "loss": 5.5359, "step": 1215 }, { "epoch": 0.13804896566159697, "grad_norm": 0.453125, "learning_rate": 0.002, "loss": 5.5179, "step": 1216 }, { "epoch": 0.13816249277151604, "grad_norm": 0.421875, "learning_rate": 0.002, "loss": 5.5178, "step": 1217 }, { "epoch": 0.13827601988143512, "grad_norm": 0.41015625, "learning_rate": 0.002, "loss": 5.4937, "step": 1218 }, { "epoch": 0.1383895469913542, "grad_norm": 0.41796875, "learning_rate": 0.002, "loss": 5.5265, "step": 1219 }, { "epoch": 0.13850307410127327, "grad_norm": 0.416015625, "learning_rate": 0.002, "loss": 5.5118, "step": 1220 }, { "epoch": 0.13861660121119235, "grad_norm": 0.3828125, "learning_rate": 0.002, "loss": 5.5181, "step": 1221 }, { "epoch": 0.13873012832111142, "grad_norm": 0.36328125, "learning_rate": 0.002, "loss": 5.5385, "step": 1222 }, { "epoch": 0.1388436554310305, "grad_norm": 0.30859375, "learning_rate": 0.002, "loss": 5.539, "step": 1223 }, { "epoch": 0.13895718254094958, "grad_norm": 0.29296875, "learning_rate": 0.002, "loss": 5.5237, "step": 1224 }, { "epoch": 0.13907070965086865, "grad_norm": 0.28515625, "learning_rate": 0.002, "loss": 5.5251, "step": 1225 }, { "epoch": 0.13918423676078773, "grad_norm": 0.298828125, "learning_rate": 0.002, "loss": 5.5393, "step": 1226 }, { "epoch": 0.1392977638707068, "grad_norm": 0.326171875, "learning_rate": 0.002, "loss": 5.4971, "step": 1227 }, { "epoch": 0.13941129098062588, "grad_norm": 0.3359375, "learning_rate": 0.002, "loss": 5.5192, "step": 1228 }, { "epoch": 0.13952481809054496, "grad_norm": 0.328125, "learning_rate": 0.002, "loss": 5.4989, "step": 1229 }, { "epoch": 0.13963834520046403, "grad_norm": 0.3203125, "learning_rate": 0.002, "loss": 5.5368, "step": 1230 }, { "epoch": 0.1397518723103831, "grad_norm": 0.310546875, "learning_rate": 0.002, "loss": 5.5391, "step": 1231 }, { "epoch": 0.13986539942030218, "grad_norm": 0.302734375, "learning_rate": 0.002, "loss": 5.519, "step": 1232 }, { "epoch": 0.13997892653022126, "grad_norm": 0.328125, "learning_rate": 0.002, "loss": 5.5221, "step": 1233 }, { "epoch": 0.14009245364014034, "grad_norm": 0.44921875, "learning_rate": 0.002, "loss": 5.5069, "step": 1234 }, { "epoch": 0.1402059807500594, "grad_norm": 0.439453125, "learning_rate": 0.002, "loss": 5.53, "step": 1235 }, { "epoch": 0.1403195078599785, "grad_norm": 0.421875, "learning_rate": 0.002, "loss": 5.5109, "step": 1236 }, { "epoch": 0.1404330349698976, "grad_norm": 0.396484375, "learning_rate": 0.002, "loss": 5.5188, "step": 1237 }, { "epoch": 0.14054656207981667, "grad_norm": 0.330078125, "learning_rate": 0.002, "loss": 5.5151, "step": 1238 }, { "epoch": 0.14066008918973574, "grad_norm": 0.314453125, "learning_rate": 0.002, "loss": 5.5115, "step": 1239 }, { "epoch": 0.14077361629965482, "grad_norm": 0.30859375, "learning_rate": 0.002, "loss": 5.5396, "step": 1240 }, { "epoch": 0.1408871434095739, "grad_norm": 0.31640625, "learning_rate": 0.002, "loss": 5.5101, "step": 1241 }, { "epoch": 0.14100067051949297, "grad_norm": 0.33203125, "learning_rate": 0.002, "loss": 5.5293, "step": 1242 }, { "epoch": 0.14111419762941205, "grad_norm": 0.37109375, "learning_rate": 0.002, "loss": 5.507, "step": 1243 }, { "epoch": 0.14122772473933112, "grad_norm": 0.34765625, "learning_rate": 0.002, "loss": 5.5201, "step": 1244 }, { "epoch": 0.1413412518492502, "grad_norm": 0.36328125, "learning_rate": 0.002, "loss": 5.507, "step": 1245 }, { "epoch": 0.14145477895916928, "grad_norm": 0.43359375, "learning_rate": 0.002, "loss": 5.5111, "step": 1246 }, { "epoch": 0.14156830606908835, "grad_norm": 0.55859375, "learning_rate": 0.002, "loss": 5.5155, "step": 1247 }, { "epoch": 0.14168183317900743, "grad_norm": 0.53125, "learning_rate": 0.002, "loss": 5.5221, "step": 1248 }, { "epoch": 0.1417953602889265, "grad_norm": 0.439453125, "learning_rate": 0.002, "loss": 5.5118, "step": 1249 }, { "epoch": 0.14190888739884558, "grad_norm": 0.431640625, "learning_rate": 0.002, "loss": 5.5088, "step": 1250 }, { "epoch": 0.14202241450876466, "grad_norm": 0.380859375, "learning_rate": 0.002, "loss": 5.5193, "step": 1251 }, { "epoch": 0.14213594161868373, "grad_norm": 0.345703125, "learning_rate": 0.002, "loss": 5.4872, "step": 1252 }, { "epoch": 0.1422494687286028, "grad_norm": 0.33203125, "learning_rate": 0.002, "loss": 5.5207, "step": 1253 }, { "epoch": 0.14236299583852188, "grad_norm": 0.341796875, "learning_rate": 0.002, "loss": 5.5089, "step": 1254 }, { "epoch": 0.14247652294844096, "grad_norm": 0.3515625, "learning_rate": 0.002, "loss": 5.4982, "step": 1255 }, { "epoch": 0.14259005005836003, "grad_norm": 0.392578125, "learning_rate": 0.002, "loss": 5.5159, "step": 1256 }, { "epoch": 0.1427035771682791, "grad_norm": 0.50390625, "learning_rate": 0.002, "loss": 5.507, "step": 1257 }, { "epoch": 0.1428171042781982, "grad_norm": 0.546875, "learning_rate": 0.002, "loss": 5.5016, "step": 1258 }, { "epoch": 0.14293063138811726, "grad_norm": 0.52734375, "learning_rate": 0.002, "loss": 5.5108, "step": 1259 }, { "epoch": 0.14304415849803634, "grad_norm": 0.47265625, "learning_rate": 0.002, "loss": 5.5205, "step": 1260 }, { "epoch": 0.14315768560795541, "grad_norm": 0.33984375, "learning_rate": 0.002, "loss": 5.5127, "step": 1261 }, { "epoch": 0.1432712127178745, "grad_norm": 0.345703125, "learning_rate": 0.002, "loss": 5.4916, "step": 1262 }, { "epoch": 0.14338473982779357, "grad_norm": 0.3046875, "learning_rate": 0.002, "loss": 5.5037, "step": 1263 }, { "epoch": 0.14349826693771264, "grad_norm": 0.314453125, "learning_rate": 0.002, "loss": 5.518, "step": 1264 }, { "epoch": 0.14361179404763172, "grad_norm": 0.3203125, "learning_rate": 0.002, "loss": 5.4998, "step": 1265 }, { "epoch": 0.1437253211575508, "grad_norm": 0.328125, "learning_rate": 0.002, "loss": 5.5086, "step": 1266 }, { "epoch": 0.14383884826746987, "grad_norm": 0.345703125, "learning_rate": 0.002, "loss": 5.5099, "step": 1267 }, { "epoch": 0.14395237537738895, "grad_norm": 0.33984375, "learning_rate": 0.002, "loss": 5.5156, "step": 1268 }, { "epoch": 0.14406590248730802, "grad_norm": 0.318359375, "learning_rate": 0.002, "loss": 5.4937, "step": 1269 }, { "epoch": 0.1441794295972271, "grad_norm": 0.322265625, "learning_rate": 0.002, "loss": 5.5108, "step": 1270 }, { "epoch": 0.14429295670714617, "grad_norm": 0.318359375, "learning_rate": 0.002, "loss": 5.5208, "step": 1271 }, { "epoch": 0.14440648381706525, "grad_norm": 0.330078125, "learning_rate": 0.002, "loss": 5.517, "step": 1272 }, { "epoch": 0.14452001092698433, "grad_norm": 0.328125, "learning_rate": 0.002, "loss": 5.514, "step": 1273 }, { "epoch": 0.1446335380369034, "grad_norm": 0.34765625, "learning_rate": 0.002, "loss": 5.5225, "step": 1274 }, { "epoch": 0.14474706514682248, "grad_norm": 0.361328125, "learning_rate": 0.002, "loss": 5.5213, "step": 1275 }, { "epoch": 0.14486059225674155, "grad_norm": 0.359375, "learning_rate": 0.002, "loss": 5.5253, "step": 1276 }, { "epoch": 0.14497411936666063, "grad_norm": 0.34765625, "learning_rate": 0.002, "loss": 5.4806, "step": 1277 }, { "epoch": 0.1450876464765797, "grad_norm": 0.35546875, "learning_rate": 0.002, "loss": 5.4986, "step": 1278 }, { "epoch": 0.14520117358649878, "grad_norm": 0.337890625, "learning_rate": 0.002, "loss": 5.4802, "step": 1279 }, { "epoch": 0.14531470069641786, "grad_norm": 0.3671875, "learning_rate": 0.002, "loss": 5.509, "step": 1280 }, { "epoch": 0.14542822780633693, "grad_norm": 0.388671875, "learning_rate": 0.002, "loss": 5.5083, "step": 1281 }, { "epoch": 0.145541754916256, "grad_norm": 0.3984375, "learning_rate": 0.002, "loss": 5.4918, "step": 1282 }, { "epoch": 0.1456552820261751, "grad_norm": 0.404296875, "learning_rate": 0.002, "loss": 5.5365, "step": 1283 }, { "epoch": 0.14576880913609416, "grad_norm": 0.388671875, "learning_rate": 0.002, "loss": 5.5178, "step": 1284 }, { "epoch": 0.14588233624601324, "grad_norm": 0.37890625, "learning_rate": 0.002, "loss": 5.5097, "step": 1285 }, { "epoch": 0.14599586335593231, "grad_norm": 0.3984375, "learning_rate": 0.002, "loss": 5.5035, "step": 1286 }, { "epoch": 0.1461093904658514, "grad_norm": 0.34375, "learning_rate": 0.002, "loss": 5.513, "step": 1287 }, { "epoch": 0.14622291757577047, "grad_norm": 0.3125, "learning_rate": 0.002, "loss": 5.5109, "step": 1288 }, { "epoch": 0.14633644468568954, "grad_norm": 0.28515625, "learning_rate": 0.002, "loss": 5.5049, "step": 1289 }, { "epoch": 0.14644997179560862, "grad_norm": 0.31640625, "learning_rate": 0.002, "loss": 5.5105, "step": 1290 }, { "epoch": 0.1465634989055277, "grad_norm": 0.4921875, "learning_rate": 0.002, "loss": 5.5136, "step": 1291 }, { "epoch": 0.14667702601544677, "grad_norm": 0.59375, "learning_rate": 0.002, "loss": 5.5088, "step": 1292 }, { "epoch": 0.14679055312536585, "grad_norm": 0.54296875, "learning_rate": 0.002, "loss": 5.512, "step": 1293 }, { "epoch": 0.14690408023528492, "grad_norm": 0.48046875, "learning_rate": 0.002, "loss": 5.4919, "step": 1294 }, { "epoch": 0.147017607345204, "grad_norm": 0.3984375, "learning_rate": 0.002, "loss": 5.4931, "step": 1295 }, { "epoch": 0.14713113445512307, "grad_norm": 0.427734375, "learning_rate": 0.002, "loss": 5.493, "step": 1296 }, { "epoch": 0.14724466156504215, "grad_norm": 0.416015625, "learning_rate": 0.002, "loss": 5.4839, "step": 1297 }, { "epoch": 0.14735818867496125, "grad_norm": 0.359375, "learning_rate": 0.002, "loss": 5.4777, "step": 1298 }, { "epoch": 0.14747171578488033, "grad_norm": 0.369140625, "learning_rate": 0.002, "loss": 5.5059, "step": 1299 }, { "epoch": 0.1475852428947994, "grad_norm": 0.333984375, "learning_rate": 0.002, "loss": 5.5141, "step": 1300 }, { "epoch": 0.14769877000471848, "grad_norm": 0.318359375, "learning_rate": 0.002, "loss": 5.5259, "step": 1301 }, { "epoch": 0.14781229711463756, "grad_norm": 0.302734375, "learning_rate": 0.002, "loss": 5.5155, "step": 1302 }, { "epoch": 0.14792582422455663, "grad_norm": 0.298828125, "learning_rate": 0.002, "loss": 5.503, "step": 1303 }, { "epoch": 0.1480393513344757, "grad_norm": 0.3046875, "learning_rate": 0.002, "loss": 5.5032, "step": 1304 }, { "epoch": 0.1481528784443948, "grad_norm": 0.33203125, "learning_rate": 0.002, "loss": 5.4906, "step": 1305 }, { "epoch": 0.14826640555431386, "grad_norm": 0.369140625, "learning_rate": 0.002, "loss": 5.4984, "step": 1306 }, { "epoch": 0.14837993266423294, "grad_norm": 0.412109375, "learning_rate": 0.002, "loss": 5.5264, "step": 1307 }, { "epoch": 0.14849345977415201, "grad_norm": 0.51953125, "learning_rate": 0.002, "loss": 5.5082, "step": 1308 }, { "epoch": 0.1486069868840711, "grad_norm": 0.515625, "learning_rate": 0.002, "loss": 5.4823, "step": 1309 }, { "epoch": 0.14872051399399017, "grad_norm": 0.5234375, "learning_rate": 0.002, "loss": 5.5023, "step": 1310 }, { "epoch": 0.14883404110390924, "grad_norm": 0.46484375, "learning_rate": 0.002, "loss": 5.4978, "step": 1311 }, { "epoch": 0.14894756821382832, "grad_norm": 0.333984375, "learning_rate": 0.002, "loss": 5.5204, "step": 1312 }, { "epoch": 0.1490610953237474, "grad_norm": 0.390625, "learning_rate": 0.002, "loss": 5.4882, "step": 1313 }, { "epoch": 0.14917462243366647, "grad_norm": 0.349609375, "learning_rate": 0.002, "loss": 5.4701, "step": 1314 }, { "epoch": 0.14928814954358555, "grad_norm": 0.345703125, "learning_rate": 0.002, "loss": 5.51, "step": 1315 }, { "epoch": 0.14940167665350462, "grad_norm": 0.326171875, "learning_rate": 0.002, "loss": 5.4809, "step": 1316 }, { "epoch": 0.1495152037634237, "grad_norm": 0.318359375, "learning_rate": 0.002, "loss": 5.4878, "step": 1317 }, { "epoch": 0.14962873087334277, "grad_norm": 0.36328125, "learning_rate": 0.002, "loss": 5.4939, "step": 1318 }, { "epoch": 0.14974225798326185, "grad_norm": 0.375, "learning_rate": 0.002, "loss": 5.4838, "step": 1319 }, { "epoch": 0.14985578509318093, "grad_norm": 0.384765625, "learning_rate": 0.002, "loss": 5.4949, "step": 1320 }, { "epoch": 0.1499693122031, "grad_norm": 0.41015625, "learning_rate": 0.002, "loss": 5.4901, "step": 1321 }, { "epoch": 0.15008283931301908, "grad_norm": 0.41015625, "learning_rate": 0.002, "loss": 5.4935, "step": 1322 }, { "epoch": 0.15019636642293815, "grad_norm": 0.423828125, "learning_rate": 0.002, "loss": 5.5011, "step": 1323 }, { "epoch": 0.15030989353285723, "grad_norm": 0.482421875, "learning_rate": 0.002, "loss": 5.4881, "step": 1324 }, { "epoch": 0.1504234206427763, "grad_norm": 0.43359375, "learning_rate": 0.002, "loss": 5.4987, "step": 1325 }, { "epoch": 0.15053694775269538, "grad_norm": 0.34375, "learning_rate": 0.002, "loss": 5.4833, "step": 1326 }, { "epoch": 0.15065047486261446, "grad_norm": 0.349609375, "learning_rate": 0.002, "loss": 5.5303, "step": 1327 }, { "epoch": 0.15076400197253353, "grad_norm": 0.310546875, "learning_rate": 0.002, "loss": 5.5166, "step": 1328 }, { "epoch": 0.1508775290824526, "grad_norm": 0.31640625, "learning_rate": 0.002, "loss": 5.4696, "step": 1329 }, { "epoch": 0.1509910561923717, "grad_norm": 0.32421875, "learning_rate": 0.002, "loss": 5.4704, "step": 1330 }, { "epoch": 0.15110458330229076, "grad_norm": 0.349609375, "learning_rate": 0.002, "loss": 5.4845, "step": 1331 }, { "epoch": 0.15121811041220984, "grad_norm": 0.34375, "learning_rate": 0.002, "loss": 5.4976, "step": 1332 }, { "epoch": 0.15133163752212891, "grad_norm": 0.337890625, "learning_rate": 0.002, "loss": 5.4732, "step": 1333 }, { "epoch": 0.151445164632048, "grad_norm": 0.3203125, "learning_rate": 0.002, "loss": 5.4948, "step": 1334 }, { "epoch": 0.15155869174196707, "grad_norm": 0.3203125, "learning_rate": 0.002, "loss": 5.4892, "step": 1335 }, { "epoch": 0.15167221885188614, "grad_norm": 0.314453125, "learning_rate": 0.002, "loss": 5.4983, "step": 1336 }, { "epoch": 0.15178574596180522, "grad_norm": 0.302734375, "learning_rate": 0.002, "loss": 5.4859, "step": 1337 }, { "epoch": 0.1518992730717243, "grad_norm": 0.3125, "learning_rate": 0.002, "loss": 5.4646, "step": 1338 }, { "epoch": 0.15201280018164337, "grad_norm": 0.326171875, "learning_rate": 0.002, "loss": 5.5193, "step": 1339 }, { "epoch": 0.15212632729156245, "grad_norm": 0.34375, "learning_rate": 0.002, "loss": 5.5059, "step": 1340 }, { "epoch": 0.15223985440148152, "grad_norm": 0.35546875, "learning_rate": 0.002, "loss": 5.5149, "step": 1341 }, { "epoch": 0.1523533815114006, "grad_norm": 0.353515625, "learning_rate": 0.002, "loss": 5.4976, "step": 1342 }, { "epoch": 0.15246690862131967, "grad_norm": 0.36328125, "learning_rate": 0.002, "loss": 5.491, "step": 1343 }, { "epoch": 0.15258043573123875, "grad_norm": 0.349609375, "learning_rate": 0.002, "loss": 5.4841, "step": 1344 }, { "epoch": 0.15269396284115783, "grad_norm": 0.3359375, "learning_rate": 0.002, "loss": 5.4861, "step": 1345 }, { "epoch": 0.1528074899510769, "grad_norm": 0.291015625, "learning_rate": 0.002, "loss": 5.4922, "step": 1346 }, { "epoch": 0.15292101706099598, "grad_norm": 0.296875, "learning_rate": 0.002, "loss": 5.5032, "step": 1347 }, { "epoch": 0.15303454417091505, "grad_norm": 0.27734375, "learning_rate": 0.002, "loss": 5.4852, "step": 1348 }, { "epoch": 0.15314807128083413, "grad_norm": 0.28515625, "learning_rate": 0.002, "loss": 5.5097, "step": 1349 }, { "epoch": 0.1532615983907532, "grad_norm": 0.29296875, "learning_rate": 0.002, "loss": 5.4892, "step": 1350 }, { "epoch": 0.15337512550067228, "grad_norm": 0.294921875, "learning_rate": 0.002, "loss": 5.4972, "step": 1351 }, { "epoch": 0.15348865261059136, "grad_norm": 0.283203125, "learning_rate": 0.002, "loss": 5.5134, "step": 1352 }, { "epoch": 0.15360217972051043, "grad_norm": 0.30078125, "learning_rate": 0.002, "loss": 5.4984, "step": 1353 }, { "epoch": 0.1537157068304295, "grad_norm": 0.326171875, "learning_rate": 0.002, "loss": 5.4886, "step": 1354 }, { "epoch": 0.1538292339403486, "grad_norm": 0.46484375, "learning_rate": 0.002, "loss": 5.466, "step": 1355 }, { "epoch": 0.15394276105026766, "grad_norm": 0.51171875, "learning_rate": 0.002, "loss": 5.4912, "step": 1356 }, { "epoch": 0.15405628816018674, "grad_norm": 0.55078125, "learning_rate": 0.002, "loss": 5.5099, "step": 1357 }, { "epoch": 0.15416981527010584, "grad_norm": 0.482421875, "learning_rate": 0.002, "loss": 5.5082, "step": 1358 }, { "epoch": 0.15428334238002492, "grad_norm": 0.373046875, "learning_rate": 0.002, "loss": 5.4784, "step": 1359 }, { "epoch": 0.154396869489944, "grad_norm": 0.37890625, "learning_rate": 0.002, "loss": 5.5001, "step": 1360 }, { "epoch": 0.15451039659986307, "grad_norm": 0.2890625, "learning_rate": 0.002, "loss": 5.4859, "step": 1361 }, { "epoch": 0.15462392370978215, "grad_norm": 0.283203125, "learning_rate": 0.002, "loss": 5.4926, "step": 1362 }, { "epoch": 0.15473745081970122, "grad_norm": 0.275390625, "learning_rate": 0.002, "loss": 5.4977, "step": 1363 }, { "epoch": 0.1548509779296203, "grad_norm": 0.2734375, "learning_rate": 0.002, "loss": 5.4775, "step": 1364 }, { "epoch": 0.15496450503953937, "grad_norm": 0.296875, "learning_rate": 0.002, "loss": 5.5012, "step": 1365 }, { "epoch": 0.15507803214945845, "grad_norm": 0.328125, "learning_rate": 0.002, "loss": 5.5044, "step": 1366 }, { "epoch": 0.15519155925937753, "grad_norm": 0.375, "learning_rate": 0.002, "loss": 5.4905, "step": 1367 }, { "epoch": 0.1553050863692966, "grad_norm": 0.474609375, "learning_rate": 0.002, "loss": 5.5009, "step": 1368 }, { "epoch": 0.15541861347921568, "grad_norm": 0.44921875, "learning_rate": 0.002, "loss": 5.4886, "step": 1369 }, { "epoch": 0.15553214058913475, "grad_norm": 0.38671875, "learning_rate": 0.002, "loss": 5.5077, "step": 1370 }, { "epoch": 0.15564566769905383, "grad_norm": 0.404296875, "learning_rate": 0.002, "loss": 5.4751, "step": 1371 }, { "epoch": 0.1557591948089729, "grad_norm": 0.3671875, "learning_rate": 0.002, "loss": 5.5031, "step": 1372 }, { "epoch": 0.15587272191889198, "grad_norm": 0.36328125, "learning_rate": 0.002, "loss": 5.5057, "step": 1373 }, { "epoch": 0.15598624902881106, "grad_norm": 0.35546875, "learning_rate": 0.002, "loss": 5.4909, "step": 1374 }, { "epoch": 0.15609977613873013, "grad_norm": 0.37109375, "learning_rate": 0.002, "loss": 5.4869, "step": 1375 }, { "epoch": 0.1562133032486492, "grad_norm": 0.4140625, "learning_rate": 0.002, "loss": 5.5038, "step": 1376 }, { "epoch": 0.1563268303585683, "grad_norm": 0.4140625, "learning_rate": 0.002, "loss": 5.4862, "step": 1377 }, { "epoch": 0.15644035746848736, "grad_norm": 0.453125, "learning_rate": 0.002, "loss": 5.4859, "step": 1378 }, { "epoch": 0.15655388457840644, "grad_norm": 0.4609375, "learning_rate": 0.002, "loss": 5.4768, "step": 1379 }, { "epoch": 0.15666741168832551, "grad_norm": 0.376953125, "learning_rate": 0.002, "loss": 5.4793, "step": 1380 }, { "epoch": 0.1567809387982446, "grad_norm": 0.328125, "learning_rate": 0.002, "loss": 5.4765, "step": 1381 }, { "epoch": 0.15689446590816367, "grad_norm": 0.296875, "learning_rate": 0.002, "loss": 5.4869, "step": 1382 }, { "epoch": 0.15700799301808274, "grad_norm": 0.296875, "learning_rate": 0.002, "loss": 5.5028, "step": 1383 }, { "epoch": 0.15712152012800182, "grad_norm": 0.287109375, "learning_rate": 0.002, "loss": 5.5071, "step": 1384 }, { "epoch": 0.1572350472379209, "grad_norm": 0.294921875, "learning_rate": 0.002, "loss": 5.485, "step": 1385 }, { "epoch": 0.15734857434783997, "grad_norm": 0.314453125, "learning_rate": 0.002, "loss": 5.4737, "step": 1386 }, { "epoch": 0.15746210145775905, "grad_norm": 0.294921875, "learning_rate": 0.002, "loss": 5.4989, "step": 1387 }, { "epoch": 0.15757562856767812, "grad_norm": 0.30078125, "learning_rate": 0.002, "loss": 5.4855, "step": 1388 }, { "epoch": 0.1576891556775972, "grad_norm": 0.306640625, "learning_rate": 0.002, "loss": 5.4772, "step": 1389 }, { "epoch": 0.15780268278751627, "grad_norm": 0.337890625, "learning_rate": 0.002, "loss": 5.4933, "step": 1390 }, { "epoch": 0.15791620989743535, "grad_norm": 0.36328125, "learning_rate": 0.002, "loss": 5.4855, "step": 1391 }, { "epoch": 0.15802973700735443, "grad_norm": 0.3828125, "learning_rate": 0.002, "loss": 5.4806, "step": 1392 }, { "epoch": 0.1581432641172735, "grad_norm": 0.39453125, "learning_rate": 0.002, "loss": 5.4889, "step": 1393 }, { "epoch": 0.15825679122719258, "grad_norm": 0.41015625, "learning_rate": 0.002, "loss": 5.4864, "step": 1394 }, { "epoch": 0.15837031833711165, "grad_norm": 0.40625, "learning_rate": 0.002, "loss": 5.4974, "step": 1395 }, { "epoch": 0.15848384544703073, "grad_norm": 0.35546875, "learning_rate": 0.002, "loss": 5.4708, "step": 1396 }, { "epoch": 0.1585973725569498, "grad_norm": 0.33984375, "learning_rate": 0.002, "loss": 5.4889, "step": 1397 }, { "epoch": 0.15871089966686888, "grad_norm": 0.37890625, "learning_rate": 0.002, "loss": 5.4789, "step": 1398 }, { "epoch": 0.15882442677678796, "grad_norm": 0.416015625, "learning_rate": 0.002, "loss": 5.5028, "step": 1399 }, { "epoch": 0.15893795388670703, "grad_norm": 0.421875, "learning_rate": 0.002, "loss": 5.4884, "step": 1400 }, { "epoch": 0.1590514809966261, "grad_norm": 0.40234375, "learning_rate": 0.002, "loss": 5.4941, "step": 1401 }, { "epoch": 0.15916500810654519, "grad_norm": 0.47265625, "learning_rate": 0.002, "loss": 5.476, "step": 1402 }, { "epoch": 0.15927853521646426, "grad_norm": 0.5078125, "learning_rate": 0.002, "loss": 5.5018, "step": 1403 }, { "epoch": 0.15939206232638334, "grad_norm": 0.453125, "learning_rate": 0.002, "loss": 5.4796, "step": 1404 }, { "epoch": 0.15950558943630241, "grad_norm": 0.44921875, "learning_rate": 0.002, "loss": 5.4904, "step": 1405 }, { "epoch": 0.1596191165462215, "grad_norm": 0.478515625, "learning_rate": 0.002, "loss": 5.4859, "step": 1406 }, { "epoch": 0.15973264365614057, "grad_norm": 0.51953125, "learning_rate": 0.002, "loss": 5.473, "step": 1407 }, { "epoch": 0.15984617076605964, "grad_norm": 0.578125, "learning_rate": 0.002, "loss": 5.4877, "step": 1408 }, { "epoch": 0.15995969787597872, "grad_norm": 0.546875, "learning_rate": 0.002, "loss": 5.495, "step": 1409 }, { "epoch": 0.1600732249858978, "grad_norm": 0.361328125, "learning_rate": 0.002, "loss": 5.4757, "step": 1410 }, { "epoch": 0.16018675209581687, "grad_norm": 0.35546875, "learning_rate": 0.002, "loss": 5.4813, "step": 1411 }, { "epoch": 0.16030027920573595, "grad_norm": 0.29296875, "learning_rate": 0.002, "loss": 5.4831, "step": 1412 }, { "epoch": 0.16041380631565502, "grad_norm": 0.28125, "learning_rate": 0.002, "loss": 5.4812, "step": 1413 }, { "epoch": 0.1605273334255741, "grad_norm": 0.287109375, "learning_rate": 0.002, "loss": 5.4656, "step": 1414 }, { "epoch": 0.16064086053549317, "grad_norm": 0.279296875, "learning_rate": 0.002, "loss": 5.4795, "step": 1415 }, { "epoch": 0.16075438764541225, "grad_norm": 0.30078125, "learning_rate": 0.002, "loss": 5.4759, "step": 1416 }, { "epoch": 0.16086791475533133, "grad_norm": 0.3203125, "learning_rate": 0.002, "loss": 5.4771, "step": 1417 }, { "epoch": 0.16098144186525043, "grad_norm": 0.349609375, "learning_rate": 0.002, "loss": 5.5008, "step": 1418 }, { "epoch": 0.1610949689751695, "grad_norm": 0.375, "learning_rate": 0.002, "loss": 5.4532, "step": 1419 }, { "epoch": 0.16120849608508858, "grad_norm": 0.46484375, "learning_rate": 0.002, "loss": 5.4976, "step": 1420 }, { "epoch": 0.16132202319500766, "grad_norm": 0.4765625, "learning_rate": 0.002, "loss": 5.4839, "step": 1421 }, { "epoch": 0.16143555030492673, "grad_norm": 0.51171875, "learning_rate": 0.002, "loss": 5.4972, "step": 1422 }, { "epoch": 0.1615490774148458, "grad_norm": 0.4921875, "learning_rate": 0.002, "loss": 5.4811, "step": 1423 }, { "epoch": 0.16166260452476489, "grad_norm": 0.3984375, "learning_rate": 0.002, "loss": 5.4947, "step": 1424 }, { "epoch": 0.16177613163468396, "grad_norm": 0.40234375, "learning_rate": 0.002, "loss": 5.4983, "step": 1425 }, { "epoch": 0.16188965874460304, "grad_norm": 0.341796875, "learning_rate": 0.002, "loss": 5.4758, "step": 1426 }, { "epoch": 0.1620031858545221, "grad_norm": 0.33203125, "learning_rate": 0.002, "loss": 5.4805, "step": 1427 }, { "epoch": 0.1621167129644412, "grad_norm": 0.298828125, "learning_rate": 0.002, "loss": 5.4665, "step": 1428 }, { "epoch": 0.16223024007436027, "grad_norm": 0.28125, "learning_rate": 0.002, "loss": 5.4805, "step": 1429 }, { "epoch": 0.16234376718427934, "grad_norm": 0.28515625, "learning_rate": 0.002, "loss": 5.4692, "step": 1430 }, { "epoch": 0.16245729429419842, "grad_norm": 0.28515625, "learning_rate": 0.002, "loss": 5.5048, "step": 1431 }, { "epoch": 0.1625708214041175, "grad_norm": 0.302734375, "learning_rate": 0.002, "loss": 5.4789, "step": 1432 }, { "epoch": 0.16268434851403657, "grad_norm": 0.306640625, "learning_rate": 0.002, "loss": 5.4673, "step": 1433 }, { "epoch": 0.16279787562395565, "grad_norm": 0.310546875, "learning_rate": 0.002, "loss": 5.4844, "step": 1434 }, { "epoch": 0.16291140273387472, "grad_norm": 0.345703125, "learning_rate": 0.002, "loss": 5.4937, "step": 1435 }, { "epoch": 0.1630249298437938, "grad_norm": 0.53125, "learning_rate": 0.002, "loss": 5.4909, "step": 1436 }, { "epoch": 0.16313845695371287, "grad_norm": 0.578125, "learning_rate": 0.002, "loss": 5.4605, "step": 1437 }, { "epoch": 0.16325198406363195, "grad_norm": 0.494140625, "learning_rate": 0.002, "loss": 5.4732, "step": 1438 }, { "epoch": 0.16336551117355103, "grad_norm": 0.462890625, "learning_rate": 0.002, "loss": 5.4838, "step": 1439 }, { "epoch": 0.1634790382834701, "grad_norm": 0.53125, "learning_rate": 0.002, "loss": 5.476, "step": 1440 }, { "epoch": 0.16359256539338918, "grad_norm": 0.48046875, "learning_rate": 0.002, "loss": 5.4707, "step": 1441 }, { "epoch": 0.16370609250330825, "grad_norm": 0.36328125, "learning_rate": 0.002, "loss": 5.491, "step": 1442 }, { "epoch": 0.16381961961322733, "grad_norm": 0.392578125, "learning_rate": 0.002, "loss": 5.4845, "step": 1443 }, { "epoch": 0.1639331467231464, "grad_norm": 0.341796875, "learning_rate": 0.002, "loss": 5.4903, "step": 1444 }, { "epoch": 0.16404667383306548, "grad_norm": 0.33203125, "learning_rate": 0.002, "loss": 5.4855, "step": 1445 }, { "epoch": 0.16416020094298456, "grad_norm": 0.30859375, "learning_rate": 0.002, "loss": 5.4706, "step": 1446 }, { "epoch": 0.16427372805290363, "grad_norm": 0.31640625, "learning_rate": 0.002, "loss": 5.4858, "step": 1447 }, { "epoch": 0.1643872551628227, "grad_norm": 0.318359375, "learning_rate": 0.002, "loss": 5.4512, "step": 1448 }, { "epoch": 0.16450078227274179, "grad_norm": 0.31640625, "learning_rate": 0.002, "loss": 5.4715, "step": 1449 }, { "epoch": 0.16461430938266086, "grad_norm": 0.306640625, "learning_rate": 0.002, "loss": 5.4567, "step": 1450 }, { "epoch": 0.16472783649257994, "grad_norm": 0.287109375, "learning_rate": 0.002, "loss": 5.4879, "step": 1451 }, { "epoch": 0.164841363602499, "grad_norm": 0.30859375, "learning_rate": 0.002, "loss": 5.4386, "step": 1452 }, { "epoch": 0.1649548907124181, "grad_norm": 0.32421875, "learning_rate": 0.002, "loss": 5.4727, "step": 1453 }, { "epoch": 0.16506841782233717, "grad_norm": 0.349609375, "learning_rate": 0.002, "loss": 5.4587, "step": 1454 }, { "epoch": 0.16518194493225624, "grad_norm": 0.380859375, "learning_rate": 0.002, "loss": 5.4848, "step": 1455 }, { "epoch": 0.16529547204217532, "grad_norm": 0.369140625, "learning_rate": 0.002, "loss": 5.4719, "step": 1456 }, { "epoch": 0.1654089991520944, "grad_norm": 0.365234375, "learning_rate": 0.002, "loss": 5.4634, "step": 1457 }, { "epoch": 0.16552252626201347, "grad_norm": 0.345703125, "learning_rate": 0.002, "loss": 5.495, "step": 1458 }, { "epoch": 0.16563605337193255, "grad_norm": 0.3359375, "learning_rate": 0.002, "loss": 5.4712, "step": 1459 }, { "epoch": 0.16574958048185162, "grad_norm": 0.337890625, "learning_rate": 0.002, "loss": 5.4553, "step": 1460 }, { "epoch": 0.1658631075917707, "grad_norm": 0.3203125, "learning_rate": 0.002, "loss": 5.4866, "step": 1461 }, { "epoch": 0.16597663470168977, "grad_norm": 0.318359375, "learning_rate": 0.002, "loss": 5.4752, "step": 1462 }, { "epoch": 0.16609016181160885, "grad_norm": 0.326171875, "learning_rate": 0.002, "loss": 5.4927, "step": 1463 }, { "epoch": 0.16620368892152793, "grad_norm": 0.32421875, "learning_rate": 0.002, "loss": 5.4516, "step": 1464 }, { "epoch": 0.166317216031447, "grad_norm": 0.341796875, "learning_rate": 0.002, "loss": 5.4769, "step": 1465 }, { "epoch": 0.16643074314136608, "grad_norm": 0.34375, "learning_rate": 0.002, "loss": 5.4526, "step": 1466 }, { "epoch": 0.16654427025128515, "grad_norm": 0.330078125, "learning_rate": 0.002, "loss": 5.4606, "step": 1467 }, { "epoch": 0.16665779736120423, "grad_norm": 0.33203125, "learning_rate": 0.002, "loss": 5.4622, "step": 1468 }, { "epoch": 0.1667713244711233, "grad_norm": 0.345703125, "learning_rate": 0.002, "loss": 5.4751, "step": 1469 }, { "epoch": 0.16688485158104238, "grad_norm": 0.333984375, "learning_rate": 0.002, "loss": 5.4731, "step": 1470 }, { "epoch": 0.16699837869096146, "grad_norm": 0.3046875, "learning_rate": 0.002, "loss": 5.4632, "step": 1471 }, { "epoch": 0.16711190580088053, "grad_norm": 0.30859375, "learning_rate": 0.002, "loss": 5.472, "step": 1472 }, { "epoch": 0.1672254329107996, "grad_norm": 0.32421875, "learning_rate": 0.002, "loss": 5.4779, "step": 1473 }, { "epoch": 0.16733896002071869, "grad_norm": 0.3515625, "learning_rate": 0.002, "loss": 5.4662, "step": 1474 }, { "epoch": 0.16745248713063776, "grad_norm": 0.333984375, "learning_rate": 0.002, "loss": 5.4617, "step": 1475 }, { "epoch": 0.16756601424055684, "grad_norm": 0.470703125, "learning_rate": 0.002, "loss": 5.4877, "step": 1476 }, { "epoch": 0.1676795413504759, "grad_norm": 0.49609375, "learning_rate": 0.002, "loss": 5.4593, "step": 1477 }, { "epoch": 0.167793068460395, "grad_norm": 0.447265625, "learning_rate": 0.002, "loss": 5.4768, "step": 1478 }, { "epoch": 0.1679065955703141, "grad_norm": 0.5, "learning_rate": 0.002, "loss": 5.4696, "step": 1479 }, { "epoch": 0.16802012268023317, "grad_norm": 0.423828125, "learning_rate": 0.002, "loss": 5.4673, "step": 1480 }, { "epoch": 0.16813364979015225, "grad_norm": 0.330078125, "learning_rate": 0.002, "loss": 5.4726, "step": 1481 }, { "epoch": 0.16824717690007132, "grad_norm": 0.341796875, "learning_rate": 0.002, "loss": 5.4702, "step": 1482 }, { "epoch": 0.1683607040099904, "grad_norm": 0.3046875, "learning_rate": 0.002, "loss": 5.485, "step": 1483 }, { "epoch": 0.16847423111990947, "grad_norm": 0.302734375, "learning_rate": 0.002, "loss": 5.4598, "step": 1484 }, { "epoch": 0.16858775822982855, "grad_norm": 0.302734375, "learning_rate": 0.002, "loss": 5.4861, "step": 1485 }, { "epoch": 0.16870128533974763, "grad_norm": 0.3671875, "learning_rate": 0.002, "loss": 5.4659, "step": 1486 }, { "epoch": 0.1688148124496667, "grad_norm": 0.3671875, "learning_rate": 0.002, "loss": 5.4681, "step": 1487 }, { "epoch": 0.16892833955958578, "grad_norm": 0.392578125, "learning_rate": 0.002, "loss": 5.4692, "step": 1488 }, { "epoch": 0.16904186666950485, "grad_norm": 0.431640625, "learning_rate": 0.002, "loss": 5.4862, "step": 1489 }, { "epoch": 0.16915539377942393, "grad_norm": 0.515625, "learning_rate": 0.002, "loss": 5.473, "step": 1490 }, { "epoch": 0.169268920889343, "grad_norm": 0.4609375, "learning_rate": 0.002, "loss": 5.476, "step": 1491 }, { "epoch": 0.16938244799926208, "grad_norm": 0.3828125, "learning_rate": 0.002, "loss": 5.4545, "step": 1492 }, { "epoch": 0.16949597510918116, "grad_norm": 0.419921875, "learning_rate": 0.002, "loss": 5.471, "step": 1493 }, { "epoch": 0.16960950221910023, "grad_norm": 0.357421875, "learning_rate": 0.002, "loss": 5.4775, "step": 1494 }, { "epoch": 0.1697230293290193, "grad_norm": 0.35546875, "learning_rate": 0.002, "loss": 5.4582, "step": 1495 }, { "epoch": 0.16983655643893839, "grad_norm": 0.306640625, "learning_rate": 0.002, "loss": 5.4853, "step": 1496 }, { "epoch": 0.16995008354885746, "grad_norm": 0.3046875, "learning_rate": 0.002, "loss": 5.4723, "step": 1497 }, { "epoch": 0.17006361065877654, "grad_norm": 0.296875, "learning_rate": 0.002, "loss": 5.4633, "step": 1498 }, { "epoch": 0.1701771377686956, "grad_norm": 0.306640625, "learning_rate": 0.002, "loss": 5.4763, "step": 1499 }, { "epoch": 0.1702906648786147, "grad_norm": 0.31640625, "learning_rate": 0.002, "loss": 5.453, "step": 1500 }, { "epoch": 0.17040419198853377, "grad_norm": 0.318359375, "learning_rate": 0.002, "loss": 5.4781, "step": 1501 }, { "epoch": 0.17051771909845284, "grad_norm": 0.32421875, "learning_rate": 0.002, "loss": 5.4801, "step": 1502 }, { "epoch": 0.17063124620837192, "grad_norm": 0.326171875, "learning_rate": 0.002, "loss": 5.465, "step": 1503 }, { "epoch": 0.170744773318291, "grad_norm": 0.33203125, "learning_rate": 0.002, "loss": 5.4826, "step": 1504 }, { "epoch": 0.17085830042821007, "grad_norm": 0.328125, "learning_rate": 0.002, "loss": 5.452, "step": 1505 }, { "epoch": 0.17097182753812915, "grad_norm": 0.3046875, "learning_rate": 0.002, "loss": 5.4603, "step": 1506 }, { "epoch": 0.17108535464804822, "grad_norm": 0.302734375, "learning_rate": 0.002, "loss": 5.4688, "step": 1507 }, { "epoch": 0.1711988817579673, "grad_norm": 0.30078125, "learning_rate": 0.002, "loss": 5.4663, "step": 1508 }, { "epoch": 0.17131240886788637, "grad_norm": 0.369140625, "learning_rate": 0.002, "loss": 5.4648, "step": 1509 }, { "epoch": 0.17142593597780545, "grad_norm": 0.404296875, "learning_rate": 0.002, "loss": 5.4656, "step": 1510 }, { "epoch": 0.17153946308772453, "grad_norm": 0.5078125, "learning_rate": 0.002, "loss": 5.4348, "step": 1511 }, { "epoch": 0.1716529901976436, "grad_norm": 0.486328125, "learning_rate": 0.002, "loss": 5.4604, "step": 1512 }, { "epoch": 0.17176651730756268, "grad_norm": 0.451171875, "learning_rate": 0.002, "loss": 5.4631, "step": 1513 }, { "epoch": 0.17188004441748175, "grad_norm": 0.4296875, "learning_rate": 0.002, "loss": 5.4614, "step": 1514 }, { "epoch": 0.17199357152740083, "grad_norm": 0.357421875, "learning_rate": 0.002, "loss": 5.4629, "step": 1515 }, { "epoch": 0.1721070986373199, "grad_norm": 0.390625, "learning_rate": 0.002, "loss": 5.4654, "step": 1516 }, { "epoch": 0.17222062574723898, "grad_norm": 0.349609375, "learning_rate": 0.002, "loss": 5.477, "step": 1517 }, { "epoch": 0.17233415285715806, "grad_norm": 0.34375, "learning_rate": 0.002, "loss": 5.4586, "step": 1518 }, { "epoch": 0.17244767996707713, "grad_norm": 0.31640625, "learning_rate": 0.002, "loss": 5.4732, "step": 1519 }, { "epoch": 0.1725612070769962, "grad_norm": 0.31640625, "learning_rate": 0.002, "loss": 5.4437, "step": 1520 }, { "epoch": 0.17267473418691529, "grad_norm": 0.427734375, "learning_rate": 0.002, "loss": 5.4716, "step": 1521 }, { "epoch": 0.17278826129683436, "grad_norm": 0.419921875, "learning_rate": 0.002, "loss": 5.4551, "step": 1522 }, { "epoch": 0.17290178840675344, "grad_norm": 0.3984375, "learning_rate": 0.002, "loss": 5.4778, "step": 1523 }, { "epoch": 0.1730153155166725, "grad_norm": 0.41015625, "learning_rate": 0.002, "loss": 5.4687, "step": 1524 }, { "epoch": 0.1731288426265916, "grad_norm": 0.341796875, "learning_rate": 0.002, "loss": 5.4522, "step": 1525 }, { "epoch": 0.17324236973651067, "grad_norm": 0.357421875, "learning_rate": 0.002, "loss": 5.4479, "step": 1526 }, { "epoch": 0.17335589684642974, "grad_norm": 0.33203125, "learning_rate": 0.002, "loss": 5.4743, "step": 1527 }, { "epoch": 0.17346942395634882, "grad_norm": 0.345703125, "learning_rate": 0.002, "loss": 5.472, "step": 1528 }, { "epoch": 0.1735829510662679, "grad_norm": 0.3828125, "learning_rate": 0.002, "loss": 5.4773, "step": 1529 }, { "epoch": 0.17369647817618697, "grad_norm": 0.3671875, "learning_rate": 0.002, "loss": 5.4717, "step": 1530 }, { "epoch": 0.17381000528610605, "grad_norm": 0.416015625, "learning_rate": 0.002, "loss": 5.4617, "step": 1531 }, { "epoch": 0.17392353239602512, "grad_norm": 0.388671875, "learning_rate": 0.002, "loss": 5.4705, "step": 1532 }, { "epoch": 0.1740370595059442, "grad_norm": 0.3046875, "learning_rate": 0.002, "loss": 5.4663, "step": 1533 }, { "epoch": 0.17415058661586327, "grad_norm": 0.30078125, "learning_rate": 0.002, "loss": 5.4957, "step": 1534 }, { "epoch": 0.17426411372578235, "grad_norm": 0.265625, "learning_rate": 0.002, "loss": 5.4572, "step": 1535 }, { "epoch": 0.17437764083570143, "grad_norm": 0.26171875, "learning_rate": 0.002, "loss": 5.4683, "step": 1536 }, { "epoch": 0.1744911679456205, "grad_norm": 0.26171875, "learning_rate": 0.002, "loss": 5.4949, "step": 1537 }, { "epoch": 0.17460469505553958, "grad_norm": 0.267578125, "learning_rate": 0.002, "loss": 5.4521, "step": 1538 }, { "epoch": 0.17471822216545868, "grad_norm": 0.330078125, "learning_rate": 0.002, "loss": 5.4502, "step": 1539 }, { "epoch": 0.17483174927537776, "grad_norm": 0.380859375, "learning_rate": 0.002, "loss": 5.4611, "step": 1540 }, { "epoch": 0.17494527638529683, "grad_norm": 0.5234375, "learning_rate": 0.002, "loss": 5.4778, "step": 1541 }, { "epoch": 0.1750588034952159, "grad_norm": 0.5390625, "learning_rate": 0.002, "loss": 5.4469, "step": 1542 }, { "epoch": 0.17517233060513498, "grad_norm": 0.486328125, "learning_rate": 0.002, "loss": 5.4497, "step": 1543 }, { "epoch": 0.17528585771505406, "grad_norm": 0.447265625, "learning_rate": 0.002, "loss": 5.4443, "step": 1544 }, { "epoch": 0.17539938482497314, "grad_norm": 0.30859375, "learning_rate": 0.002, "loss": 5.4545, "step": 1545 }, { "epoch": 0.1755129119348922, "grad_norm": 0.296875, "learning_rate": 0.002, "loss": 5.4699, "step": 1546 }, { "epoch": 0.1756264390448113, "grad_norm": 0.271484375, "learning_rate": 0.002, "loss": 5.4678, "step": 1547 }, { "epoch": 0.17573996615473036, "grad_norm": 0.34765625, "learning_rate": 0.002, "loss": 5.4652, "step": 1548 }, { "epoch": 0.17585349326464944, "grad_norm": 0.388671875, "learning_rate": 0.002, "loss": 5.464, "step": 1549 }, { "epoch": 0.17596702037456852, "grad_norm": 0.482421875, "learning_rate": 0.002, "loss": 5.483, "step": 1550 }, { "epoch": 0.1760805474844876, "grad_norm": 0.4375, "learning_rate": 0.002, "loss": 5.4593, "step": 1551 }, { "epoch": 0.17619407459440667, "grad_norm": 0.34375, "learning_rate": 0.002, "loss": 5.4614, "step": 1552 }, { "epoch": 0.17630760170432574, "grad_norm": 0.37890625, "learning_rate": 0.002, "loss": 5.4653, "step": 1553 }, { "epoch": 0.17642112881424482, "grad_norm": 0.373046875, "learning_rate": 0.002, "loss": 5.4821, "step": 1554 }, { "epoch": 0.1765346559241639, "grad_norm": 0.3671875, "learning_rate": 0.002, "loss": 5.4356, "step": 1555 }, { "epoch": 0.17664818303408297, "grad_norm": 0.361328125, "learning_rate": 0.002, "loss": 5.4494, "step": 1556 }, { "epoch": 0.17676171014400205, "grad_norm": 0.37890625, "learning_rate": 0.002, "loss": 5.4647, "step": 1557 }, { "epoch": 0.17687523725392112, "grad_norm": 0.359375, "learning_rate": 0.002, "loss": 5.4265, "step": 1558 }, { "epoch": 0.1769887643638402, "grad_norm": 0.365234375, "learning_rate": 0.002, "loss": 5.4432, "step": 1559 }, { "epoch": 0.17710229147375928, "grad_norm": 0.345703125, "learning_rate": 0.002, "loss": 5.4406, "step": 1560 }, { "epoch": 0.17721581858367835, "grad_norm": 0.34375, "learning_rate": 0.002, "loss": 5.4446, "step": 1561 }, { "epoch": 0.17732934569359743, "grad_norm": 0.345703125, "learning_rate": 0.002, "loss": 5.467, "step": 1562 }, { "epoch": 0.1774428728035165, "grad_norm": 0.345703125, "learning_rate": 0.002, "loss": 5.4731, "step": 1563 }, { "epoch": 0.17755639991343558, "grad_norm": 0.384765625, "learning_rate": 0.002, "loss": 5.4614, "step": 1564 }, { "epoch": 0.17766992702335466, "grad_norm": 0.421875, "learning_rate": 0.002, "loss": 5.456, "step": 1565 }, { "epoch": 0.17778345413327373, "grad_norm": 0.498046875, "learning_rate": 0.002, "loss": 5.4389, "step": 1566 }, { "epoch": 0.1778969812431928, "grad_norm": 0.431640625, "learning_rate": 0.002, "loss": 5.4437, "step": 1567 }, { "epoch": 0.17801050835311188, "grad_norm": 0.365234375, "learning_rate": 0.002, "loss": 5.4411, "step": 1568 }, { "epoch": 0.17812403546303096, "grad_norm": 0.388671875, "learning_rate": 0.002, "loss": 5.4746, "step": 1569 }, { "epoch": 0.17823756257295004, "grad_norm": 0.34765625, "learning_rate": 0.002, "loss": 5.4455, "step": 1570 }, { "epoch": 0.1783510896828691, "grad_norm": 0.373046875, "learning_rate": 0.002, "loss": 5.4468, "step": 1571 }, { "epoch": 0.1784646167927882, "grad_norm": 0.3984375, "learning_rate": 0.002, "loss": 5.4619, "step": 1572 }, { "epoch": 0.17857814390270726, "grad_norm": 0.38671875, "learning_rate": 0.002, "loss": 5.4728, "step": 1573 }, { "epoch": 0.17869167101262634, "grad_norm": 0.376953125, "learning_rate": 0.002, "loss": 5.4654, "step": 1574 }, { "epoch": 0.17880519812254542, "grad_norm": 0.392578125, "learning_rate": 0.002, "loss": 5.4495, "step": 1575 }, { "epoch": 0.1789187252324645, "grad_norm": 0.39453125, "learning_rate": 0.002, "loss": 5.4541, "step": 1576 }, { "epoch": 0.17903225234238357, "grad_norm": 0.3828125, "learning_rate": 0.002, "loss": 5.4461, "step": 1577 }, { "epoch": 0.17914577945230264, "grad_norm": 0.341796875, "learning_rate": 0.002, "loss": 5.4525, "step": 1578 }, { "epoch": 0.17925930656222172, "grad_norm": 0.330078125, "learning_rate": 0.002, "loss": 5.4405, "step": 1579 }, { "epoch": 0.1793728336721408, "grad_norm": 0.3359375, "learning_rate": 0.002, "loss": 5.45, "step": 1580 }, { "epoch": 0.17948636078205987, "grad_norm": 0.328125, "learning_rate": 0.002, "loss": 5.4674, "step": 1581 }, { "epoch": 0.17959988789197895, "grad_norm": 0.34375, "learning_rate": 0.002, "loss": 5.4532, "step": 1582 }, { "epoch": 0.17971341500189802, "grad_norm": 0.4453125, "learning_rate": 0.002, "loss": 5.4626, "step": 1583 }, { "epoch": 0.1798269421118171, "grad_norm": 0.369140625, "learning_rate": 0.002, "loss": 5.4286, "step": 1584 }, { "epoch": 0.17994046922173618, "grad_norm": 0.337890625, "learning_rate": 0.002, "loss": 5.4414, "step": 1585 }, { "epoch": 0.18005399633165525, "grad_norm": 0.333984375, "learning_rate": 0.002, "loss": 5.4642, "step": 1586 }, { "epoch": 0.18016752344157433, "grad_norm": 0.291015625, "learning_rate": 0.002, "loss": 5.4289, "step": 1587 }, { "epoch": 0.1802810505514934, "grad_norm": 0.291015625, "learning_rate": 0.002, "loss": 5.4624, "step": 1588 }, { "epoch": 0.18039457766141248, "grad_norm": 0.3046875, "learning_rate": 0.002, "loss": 5.4389, "step": 1589 }, { "epoch": 0.18050810477133156, "grad_norm": 0.3203125, "learning_rate": 0.002, "loss": 5.4468, "step": 1590 }, { "epoch": 0.18062163188125063, "grad_norm": 0.357421875, "learning_rate": 0.002, "loss": 5.4455, "step": 1591 }, { "epoch": 0.1807351589911697, "grad_norm": 0.396484375, "learning_rate": 0.002, "loss": 5.4618, "step": 1592 }, { "epoch": 0.18084868610108878, "grad_norm": 0.427734375, "learning_rate": 0.002, "loss": 5.4449, "step": 1593 }, { "epoch": 0.18096221321100786, "grad_norm": 0.392578125, "learning_rate": 0.002, "loss": 5.4244, "step": 1594 }, { "epoch": 0.18107574032092694, "grad_norm": 0.392578125, "learning_rate": 0.002, "loss": 5.4657, "step": 1595 }, { "epoch": 0.181189267430846, "grad_norm": 0.380859375, "learning_rate": 0.002, "loss": 5.4504, "step": 1596 }, { "epoch": 0.1813027945407651, "grad_norm": 0.3203125, "learning_rate": 0.002, "loss": 5.4436, "step": 1597 }, { "epoch": 0.18141632165068416, "grad_norm": 0.337890625, "learning_rate": 0.002, "loss": 5.4427, "step": 1598 }, { "epoch": 0.18152984876060327, "grad_norm": 0.38671875, "learning_rate": 0.002, "loss": 5.4517, "step": 1599 }, { "epoch": 0.18164337587052234, "grad_norm": 0.396484375, "learning_rate": 0.002, "loss": 5.4587, "step": 1600 }, { "epoch": 0.18175690298044142, "grad_norm": 0.443359375, "learning_rate": 0.002, "loss": 5.4272, "step": 1601 }, { "epoch": 0.1818704300903605, "grad_norm": 0.41015625, "learning_rate": 0.002, "loss": 5.4805, "step": 1602 }, { "epoch": 0.18198395720027957, "grad_norm": 0.33203125, "learning_rate": 0.002, "loss": 5.4365, "step": 1603 }, { "epoch": 0.18209748431019865, "grad_norm": 0.361328125, "learning_rate": 0.002, "loss": 5.4694, "step": 1604 }, { "epoch": 0.18221101142011772, "grad_norm": 0.3125, "learning_rate": 0.002, "loss": 5.4463, "step": 1605 }, { "epoch": 0.1823245385300368, "grad_norm": 0.302734375, "learning_rate": 0.002, "loss": 5.4463, "step": 1606 }, { "epoch": 0.18243806563995588, "grad_norm": 0.28515625, "learning_rate": 0.002, "loss": 5.4485, "step": 1607 }, { "epoch": 0.18255159274987495, "grad_norm": 0.298828125, "learning_rate": 0.002, "loss": 5.485, "step": 1608 }, { "epoch": 0.18266511985979403, "grad_norm": 0.2734375, "learning_rate": 0.002, "loss": 5.453, "step": 1609 }, { "epoch": 0.1827786469697131, "grad_norm": 0.263671875, "learning_rate": 0.002, "loss": 5.4696, "step": 1610 }, { "epoch": 0.18289217407963218, "grad_norm": 0.265625, "learning_rate": 0.002, "loss": 5.4293, "step": 1611 }, { "epoch": 0.18300570118955126, "grad_norm": 0.298828125, "learning_rate": 0.002, "loss": 5.4609, "step": 1612 }, { "epoch": 0.18311922829947033, "grad_norm": 0.3515625, "learning_rate": 0.002, "loss": 5.4205, "step": 1613 }, { "epoch": 0.1832327554093894, "grad_norm": 0.52734375, "learning_rate": 0.002, "loss": 5.4594, "step": 1614 }, { "epoch": 0.18334628251930848, "grad_norm": 0.56640625, "learning_rate": 0.002, "loss": 5.4291, "step": 1615 }, { "epoch": 0.18345980962922756, "grad_norm": 0.5078125, "learning_rate": 0.002, "loss": 5.4399, "step": 1616 }, { "epoch": 0.18357333673914664, "grad_norm": 0.494140625, "learning_rate": 0.002, "loss": 5.4586, "step": 1617 }, { "epoch": 0.1836868638490657, "grad_norm": 0.388671875, "learning_rate": 0.002, "loss": 5.4633, "step": 1618 }, { "epoch": 0.1838003909589848, "grad_norm": 0.482421875, "learning_rate": 0.002, "loss": 5.4631, "step": 1619 }, { "epoch": 0.18391391806890386, "grad_norm": 0.4140625, "learning_rate": 0.002, "loss": 5.445, "step": 1620 }, { "epoch": 0.18402744517882294, "grad_norm": 0.32421875, "learning_rate": 0.002, "loss": 5.4526, "step": 1621 }, { "epoch": 0.18414097228874202, "grad_norm": 0.3515625, "learning_rate": 0.002, "loss": 5.4417, "step": 1622 }, { "epoch": 0.1842544993986611, "grad_norm": 0.28125, "learning_rate": 0.002, "loss": 5.454, "step": 1623 }, { "epoch": 0.18436802650858017, "grad_norm": 0.287109375, "learning_rate": 0.002, "loss": 5.453, "step": 1624 }, { "epoch": 0.18448155361849924, "grad_norm": 0.28515625, "learning_rate": 0.002, "loss": 5.4207, "step": 1625 }, { "epoch": 0.18459508072841832, "grad_norm": 0.318359375, "learning_rate": 0.002, "loss": 5.4373, "step": 1626 }, { "epoch": 0.1847086078383374, "grad_norm": 0.357421875, "learning_rate": 0.002, "loss": 5.4356, "step": 1627 }, { "epoch": 0.18482213494825647, "grad_norm": 0.408203125, "learning_rate": 0.002, "loss": 5.4404, "step": 1628 }, { "epoch": 0.18493566205817555, "grad_norm": 0.4296875, "learning_rate": 0.002, "loss": 5.4603, "step": 1629 }, { "epoch": 0.18504918916809462, "grad_norm": 0.4609375, "learning_rate": 0.002, "loss": 5.4375, "step": 1630 }, { "epoch": 0.1851627162780137, "grad_norm": 0.41796875, "learning_rate": 0.002, "loss": 5.4427, "step": 1631 }, { "epoch": 0.18527624338793278, "grad_norm": 0.330078125, "learning_rate": 0.002, "loss": 5.4439, "step": 1632 }, { "epoch": 0.18538977049785185, "grad_norm": 0.326171875, "learning_rate": 0.002, "loss": 5.4491, "step": 1633 }, { "epoch": 0.18550329760777093, "grad_norm": 0.29296875, "learning_rate": 0.002, "loss": 5.4486, "step": 1634 }, { "epoch": 0.18561682471769, "grad_norm": 0.318359375, "learning_rate": 0.002, "loss": 5.4597, "step": 1635 }, { "epoch": 0.18573035182760908, "grad_norm": 0.3125, "learning_rate": 0.002, "loss": 5.4445, "step": 1636 }, { "epoch": 0.18584387893752816, "grad_norm": 0.33203125, "learning_rate": 0.002, "loss": 5.4322, "step": 1637 }, { "epoch": 0.18595740604744723, "grad_norm": 0.318359375, "learning_rate": 0.002, "loss": 5.4408, "step": 1638 }, { "epoch": 0.1860709331573663, "grad_norm": 0.298828125, "learning_rate": 0.002, "loss": 5.4643, "step": 1639 }, { "epoch": 0.18618446026728538, "grad_norm": 0.29296875, "learning_rate": 0.002, "loss": 5.4536, "step": 1640 }, { "epoch": 0.18629798737720446, "grad_norm": 0.287109375, "learning_rate": 0.002, "loss": 5.4587, "step": 1641 }, { "epoch": 0.18641151448712354, "grad_norm": 0.287109375, "learning_rate": 0.002, "loss": 5.4356, "step": 1642 }, { "epoch": 0.1865250415970426, "grad_norm": 0.302734375, "learning_rate": 0.002, "loss": 5.4437, "step": 1643 }, { "epoch": 0.1866385687069617, "grad_norm": 0.30859375, "learning_rate": 0.002, "loss": 5.4518, "step": 1644 }, { "epoch": 0.18675209581688076, "grad_norm": 0.326171875, "learning_rate": 0.002, "loss": 5.4505, "step": 1645 }, { "epoch": 0.18686562292679984, "grad_norm": 0.4296875, "learning_rate": 0.002, "loss": 5.4595, "step": 1646 }, { "epoch": 0.18697915003671892, "grad_norm": 0.412109375, "learning_rate": 0.002, "loss": 5.432, "step": 1647 }, { "epoch": 0.187092677146638, "grad_norm": 0.39453125, "learning_rate": 0.002, "loss": 5.4474, "step": 1648 }, { "epoch": 0.18720620425655707, "grad_norm": 0.38671875, "learning_rate": 0.002, "loss": 5.4421, "step": 1649 }, { "epoch": 0.18731973136647614, "grad_norm": 0.33203125, "learning_rate": 0.002, "loss": 5.4705, "step": 1650 }, { "epoch": 0.18743325847639522, "grad_norm": 0.431640625, "learning_rate": 0.002, "loss": 5.4448, "step": 1651 }, { "epoch": 0.1875467855863143, "grad_norm": 0.43359375, "learning_rate": 0.002, "loss": 5.4486, "step": 1652 }, { "epoch": 0.18766031269623337, "grad_norm": 0.5390625, "learning_rate": 0.002, "loss": 5.4351, "step": 1653 }, { "epoch": 0.18777383980615245, "grad_norm": 0.5, "learning_rate": 0.002, "loss": 5.4437, "step": 1654 }, { "epoch": 0.18788736691607152, "grad_norm": 0.396484375, "learning_rate": 0.002, "loss": 5.4236, "step": 1655 }, { "epoch": 0.1880008940259906, "grad_norm": 0.42578125, "learning_rate": 0.002, "loss": 5.4349, "step": 1656 }, { "epoch": 0.18811442113590968, "grad_norm": 0.361328125, "learning_rate": 0.002, "loss": 5.4658, "step": 1657 }, { "epoch": 0.18822794824582875, "grad_norm": 0.353515625, "learning_rate": 0.002, "loss": 5.4347, "step": 1658 }, { "epoch": 0.18834147535574783, "grad_norm": 0.3125, "learning_rate": 0.002, "loss": 5.4195, "step": 1659 }, { "epoch": 0.18845500246566693, "grad_norm": 0.341796875, "learning_rate": 0.002, "loss": 5.4737, "step": 1660 }, { "epoch": 0.188568529575586, "grad_norm": 0.361328125, "learning_rate": 0.002, "loss": 5.4287, "step": 1661 }, { "epoch": 0.18868205668550508, "grad_norm": 0.46484375, "learning_rate": 0.002, "loss": 5.457, "step": 1662 }, { "epoch": 0.18879558379542416, "grad_norm": 0.408203125, "learning_rate": 0.002, "loss": 5.4479, "step": 1663 }, { "epoch": 0.18890911090534324, "grad_norm": 0.326171875, "learning_rate": 0.002, "loss": 5.4372, "step": 1664 }, { "epoch": 0.1890226380152623, "grad_norm": 0.3203125, "learning_rate": 0.002, "loss": 5.4425, "step": 1665 }, { "epoch": 0.1891361651251814, "grad_norm": 0.279296875, "learning_rate": 0.002, "loss": 5.428, "step": 1666 }, { "epoch": 0.18924969223510046, "grad_norm": 0.2890625, "learning_rate": 0.002, "loss": 5.4491, "step": 1667 }, { "epoch": 0.18936321934501954, "grad_norm": 0.330078125, "learning_rate": 0.002, "loss": 5.4045, "step": 1668 }, { "epoch": 0.18947674645493862, "grad_norm": 0.384765625, "learning_rate": 0.002, "loss": 5.4429, "step": 1669 }, { "epoch": 0.1895902735648577, "grad_norm": 0.484375, "learning_rate": 0.002, "loss": 5.4346, "step": 1670 }, { "epoch": 0.18970380067477677, "grad_norm": 0.45703125, "learning_rate": 0.002, "loss": 5.4292, "step": 1671 }, { "epoch": 0.18981732778469584, "grad_norm": 0.39453125, "learning_rate": 0.002, "loss": 5.4513, "step": 1672 }, { "epoch": 0.18993085489461492, "grad_norm": 0.41015625, "learning_rate": 0.002, "loss": 5.4545, "step": 1673 }, { "epoch": 0.190044382004534, "grad_norm": 0.359375, "learning_rate": 0.002, "loss": 5.4336, "step": 1674 }, { "epoch": 0.19015790911445307, "grad_norm": 0.3359375, "learning_rate": 0.002, "loss": 5.4561, "step": 1675 }, { "epoch": 0.19027143622437215, "grad_norm": 0.2890625, "learning_rate": 0.002, "loss": 5.4442, "step": 1676 }, { "epoch": 0.19038496333429122, "grad_norm": 0.275390625, "learning_rate": 0.002, "loss": 5.4376, "step": 1677 }, { "epoch": 0.1904984904442103, "grad_norm": 0.26953125, "learning_rate": 0.002, "loss": 5.4509, "step": 1678 }, { "epoch": 0.19061201755412938, "grad_norm": 0.2734375, "learning_rate": 0.002, "loss": 5.4609, "step": 1679 }, { "epoch": 0.19072554466404845, "grad_norm": 0.2734375, "learning_rate": 0.002, "loss": 5.434, "step": 1680 }, { "epoch": 0.19083907177396753, "grad_norm": 0.28125, "learning_rate": 0.002, "loss": 5.4381, "step": 1681 }, { "epoch": 0.1909525988838866, "grad_norm": 0.33203125, "learning_rate": 0.002, "loss": 5.4327, "step": 1682 }, { "epoch": 0.19106612599380568, "grad_norm": 0.326171875, "learning_rate": 0.002, "loss": 5.4649, "step": 1683 }, { "epoch": 0.19117965310372476, "grad_norm": 0.353515625, "learning_rate": 0.002, "loss": 5.4252, "step": 1684 }, { "epoch": 0.19129318021364383, "grad_norm": 0.341796875, "learning_rate": 0.002, "loss": 5.425, "step": 1685 }, { "epoch": 0.1914067073235629, "grad_norm": 0.328125, "learning_rate": 0.002, "loss": 5.4499, "step": 1686 }, { "epoch": 0.19152023443348198, "grad_norm": 0.337890625, "learning_rate": 0.002, "loss": 5.4485, "step": 1687 }, { "epoch": 0.19163376154340106, "grad_norm": 0.32421875, "learning_rate": 0.002, "loss": 5.4428, "step": 1688 }, { "epoch": 0.19174728865332014, "grad_norm": 0.337890625, "learning_rate": 0.002, "loss": 5.4555, "step": 1689 }, { "epoch": 0.1918608157632392, "grad_norm": 0.4296875, "learning_rate": 0.002, "loss": 5.4447, "step": 1690 }, { "epoch": 0.1919743428731583, "grad_norm": 0.462890625, "learning_rate": 0.002, "loss": 5.4394, "step": 1691 }, { "epoch": 0.19208786998307736, "grad_norm": 0.38671875, "learning_rate": 0.002, "loss": 5.4614, "step": 1692 }, { "epoch": 0.19220139709299644, "grad_norm": 0.423828125, "learning_rate": 0.002, "loss": 5.4278, "step": 1693 }, { "epoch": 0.19231492420291552, "grad_norm": 0.412109375, "learning_rate": 0.002, "loss": 5.4706, "step": 1694 }, { "epoch": 0.1924284513128346, "grad_norm": 0.384765625, "learning_rate": 0.002, "loss": 5.4458, "step": 1695 }, { "epoch": 0.19254197842275367, "grad_norm": 0.40625, "learning_rate": 0.002, "loss": 5.4402, "step": 1696 }, { "epoch": 0.19265550553267274, "grad_norm": 0.423828125, "learning_rate": 0.002, "loss": 5.4508, "step": 1697 }, { "epoch": 0.19276903264259182, "grad_norm": 0.38671875, "learning_rate": 0.002, "loss": 5.4455, "step": 1698 }, { "epoch": 0.1928825597525109, "grad_norm": 0.3359375, "learning_rate": 0.002, "loss": 5.4254, "step": 1699 }, { "epoch": 0.19299608686242997, "grad_norm": 0.357421875, "learning_rate": 0.002, "loss": 5.44, "step": 1700 }, { "epoch": 0.19310961397234905, "grad_norm": 0.318359375, "learning_rate": 0.002, "loss": 5.451, "step": 1701 }, { "epoch": 0.19322314108226812, "grad_norm": 0.3046875, "learning_rate": 0.002, "loss": 5.436, "step": 1702 }, { "epoch": 0.1933366681921872, "grad_norm": 0.302734375, "learning_rate": 0.002, "loss": 5.4104, "step": 1703 }, { "epoch": 0.19345019530210628, "grad_norm": 0.318359375, "learning_rate": 0.002, "loss": 5.4251, "step": 1704 }, { "epoch": 0.19356372241202535, "grad_norm": 0.3125, "learning_rate": 0.002, "loss": 5.4337, "step": 1705 }, { "epoch": 0.19367724952194443, "grad_norm": 0.330078125, "learning_rate": 0.002, "loss": 5.4406, "step": 1706 }, { "epoch": 0.1937907766318635, "grad_norm": 0.326171875, "learning_rate": 0.002, "loss": 5.439, "step": 1707 }, { "epoch": 0.19390430374178258, "grad_norm": 0.3046875, "learning_rate": 0.002, "loss": 5.4343, "step": 1708 }, { "epoch": 0.19401783085170166, "grad_norm": 0.322265625, "learning_rate": 0.002, "loss": 5.4254, "step": 1709 }, { "epoch": 0.19413135796162073, "grad_norm": 0.318359375, "learning_rate": 0.002, "loss": 5.4301, "step": 1710 }, { "epoch": 0.1942448850715398, "grad_norm": 0.353515625, "learning_rate": 0.002, "loss": 5.4377, "step": 1711 }, { "epoch": 0.19435841218145888, "grad_norm": 0.470703125, "learning_rate": 0.002, "loss": 5.4302, "step": 1712 }, { "epoch": 0.19447193929137796, "grad_norm": 0.419921875, "learning_rate": 0.002, "loss": 5.4458, "step": 1713 }, { "epoch": 0.19458546640129704, "grad_norm": 0.349609375, "learning_rate": 0.002, "loss": 5.4197, "step": 1714 }, { "epoch": 0.1946989935112161, "grad_norm": 0.369140625, "learning_rate": 0.002, "loss": 5.4456, "step": 1715 }, { "epoch": 0.1948125206211352, "grad_norm": 0.365234375, "learning_rate": 0.002, "loss": 5.4335, "step": 1716 }, { "epoch": 0.19492604773105426, "grad_norm": 0.34765625, "learning_rate": 0.002, "loss": 5.4399, "step": 1717 }, { "epoch": 0.19503957484097334, "grad_norm": 0.3046875, "learning_rate": 0.002, "loss": 5.4432, "step": 1718 }, { "epoch": 0.19515310195089242, "grad_norm": 0.3125, "learning_rate": 0.002, "loss": 5.4308, "step": 1719 }, { "epoch": 0.19526662906081152, "grad_norm": 0.279296875, "learning_rate": 0.002, "loss": 5.4433, "step": 1720 }, { "epoch": 0.1953801561707306, "grad_norm": 0.291015625, "learning_rate": 0.002, "loss": 5.4424, "step": 1721 }, { "epoch": 0.19549368328064967, "grad_norm": 0.302734375, "learning_rate": 0.002, "loss": 5.4281, "step": 1722 }, { "epoch": 0.19560721039056875, "grad_norm": 0.31640625, "learning_rate": 0.002, "loss": 5.4521, "step": 1723 }, { "epoch": 0.19572073750048782, "grad_norm": 0.3125, "learning_rate": 0.002, "loss": 5.4382, "step": 1724 }, { "epoch": 0.1958342646104069, "grad_norm": 0.30078125, "learning_rate": 0.002, "loss": 5.4436, "step": 1725 }, { "epoch": 0.19594779172032598, "grad_norm": 0.302734375, "learning_rate": 0.002, "loss": 5.4182, "step": 1726 }, { "epoch": 0.19606131883024505, "grad_norm": 0.302734375, "learning_rate": 0.002, "loss": 5.4215, "step": 1727 }, { "epoch": 0.19617484594016413, "grad_norm": 0.30078125, "learning_rate": 0.002, "loss": 5.425, "step": 1728 }, { "epoch": 0.1962883730500832, "grad_norm": 0.33203125, "learning_rate": 0.002, "loss": 5.4334, "step": 1729 }, { "epoch": 0.19640190016000228, "grad_norm": 0.333984375, "learning_rate": 0.002, "loss": 5.4219, "step": 1730 }, { "epoch": 0.19651542726992136, "grad_norm": 0.330078125, "learning_rate": 0.002, "loss": 5.4228, "step": 1731 }, { "epoch": 0.19662895437984043, "grad_norm": 0.361328125, "learning_rate": 0.002, "loss": 5.4405, "step": 1732 }, { "epoch": 0.1967424814897595, "grad_norm": 0.384765625, "learning_rate": 0.002, "loss": 5.4157, "step": 1733 }, { "epoch": 0.19685600859967858, "grad_norm": 0.36328125, "learning_rate": 0.002, "loss": 5.4272, "step": 1734 }, { "epoch": 0.19696953570959766, "grad_norm": 0.3125, "learning_rate": 0.002, "loss": 5.426, "step": 1735 }, { "epoch": 0.19708306281951674, "grad_norm": 0.38671875, "learning_rate": 0.002, "loss": 5.4255, "step": 1736 }, { "epoch": 0.1971965899294358, "grad_norm": 0.40234375, "learning_rate": 0.002, "loss": 5.44, "step": 1737 }, { "epoch": 0.1973101170393549, "grad_norm": 0.4765625, "learning_rate": 0.002, "loss": 5.4355, "step": 1738 }, { "epoch": 0.19742364414927396, "grad_norm": 0.50390625, "learning_rate": 0.002, "loss": 5.4303, "step": 1739 }, { "epoch": 0.19753717125919304, "grad_norm": 0.359375, "learning_rate": 0.002, "loss": 5.4257, "step": 1740 }, { "epoch": 0.19765069836911212, "grad_norm": 0.373046875, "learning_rate": 0.002, "loss": 5.4174, "step": 1741 }, { "epoch": 0.1977642254790312, "grad_norm": 0.3359375, "learning_rate": 0.002, "loss": 5.45, "step": 1742 }, { "epoch": 0.19787775258895027, "grad_norm": 0.33203125, "learning_rate": 0.002, "loss": 5.4156, "step": 1743 }, { "epoch": 0.19799127969886934, "grad_norm": 0.35546875, "learning_rate": 0.002, "loss": 5.4177, "step": 1744 }, { "epoch": 0.19810480680878842, "grad_norm": 0.349609375, "learning_rate": 0.002, "loss": 5.4242, "step": 1745 }, { "epoch": 0.1982183339187075, "grad_norm": 0.345703125, "learning_rate": 0.002, "loss": 5.416, "step": 1746 }, { "epoch": 0.19833186102862657, "grad_norm": 0.384765625, "learning_rate": 0.002, "loss": 5.4304, "step": 1747 }, { "epoch": 0.19844538813854565, "grad_norm": 0.4453125, "learning_rate": 0.002, "loss": 5.4186, "step": 1748 }, { "epoch": 0.19855891524846472, "grad_norm": 0.435546875, "learning_rate": 0.002, "loss": 5.4159, "step": 1749 }, { "epoch": 0.1986724423583838, "grad_norm": 0.37890625, "learning_rate": 0.002, "loss": 5.4203, "step": 1750 }, { "epoch": 0.19878596946830288, "grad_norm": 0.3984375, "learning_rate": 0.002, "loss": 5.4175, "step": 1751 }, { "epoch": 0.19889949657822195, "grad_norm": 0.359375, "learning_rate": 0.002, "loss": 5.4461, "step": 1752 }, { "epoch": 0.19901302368814103, "grad_norm": 0.326171875, "learning_rate": 0.002, "loss": 5.4222, "step": 1753 }, { "epoch": 0.1991265507980601, "grad_norm": 0.30078125, "learning_rate": 0.002, "loss": 5.4224, "step": 1754 }, { "epoch": 0.19924007790797918, "grad_norm": 0.30078125, "learning_rate": 0.002, "loss": 5.4232, "step": 1755 }, { "epoch": 0.19935360501789826, "grad_norm": 0.310546875, "learning_rate": 0.002, "loss": 5.4121, "step": 1756 }, { "epoch": 0.19946713212781733, "grad_norm": 0.373046875, "learning_rate": 0.002, "loss": 5.4381, "step": 1757 }, { "epoch": 0.1995806592377364, "grad_norm": 0.40625, "learning_rate": 0.002, "loss": 5.4171, "step": 1758 }, { "epoch": 0.19969418634765548, "grad_norm": 0.35546875, "learning_rate": 0.002, "loss": 5.4242, "step": 1759 }, { "epoch": 0.19980771345757456, "grad_norm": 0.318359375, "learning_rate": 0.002, "loss": 5.4195, "step": 1760 }, { "epoch": 0.19992124056749364, "grad_norm": 0.29296875, "learning_rate": 0.002, "loss": 5.4407, "step": 1761 }, { "epoch": 0.2000347676774127, "grad_norm": 0.26171875, "learning_rate": 0.002, "loss": 5.4333, "step": 1762 }, { "epoch": 0.2001482947873318, "grad_norm": 0.25390625, "learning_rate": 0.002, "loss": 5.3916, "step": 1763 }, { "epoch": 0.20026182189725086, "grad_norm": 0.2734375, "learning_rate": 0.002, "loss": 5.4317, "step": 1764 }, { "epoch": 0.20037534900716994, "grad_norm": 0.34375, "learning_rate": 0.002, "loss": 5.4223, "step": 1765 }, { "epoch": 0.20048887611708902, "grad_norm": 0.31640625, "learning_rate": 0.002, "loss": 5.4211, "step": 1766 }, { "epoch": 0.2006024032270081, "grad_norm": 0.298828125, "learning_rate": 0.002, "loss": 5.443, "step": 1767 }, { "epoch": 0.20071593033692717, "grad_norm": 0.326171875, "learning_rate": 0.002, "loss": 5.4414, "step": 1768 }, { "epoch": 0.20082945744684624, "grad_norm": 0.310546875, "learning_rate": 0.002, "loss": 5.4441, "step": 1769 }, { "epoch": 0.20094298455676532, "grad_norm": 0.322265625, "learning_rate": 0.002, "loss": 5.4197, "step": 1770 }, { "epoch": 0.2010565116666844, "grad_norm": 0.318359375, "learning_rate": 0.002, "loss": 5.4335, "step": 1771 }, { "epoch": 0.20117003877660347, "grad_norm": 0.326171875, "learning_rate": 0.002, "loss": 5.4135, "step": 1772 }, { "epoch": 0.20128356588652255, "grad_norm": 0.35546875, "learning_rate": 0.002, "loss": 5.415, "step": 1773 }, { "epoch": 0.20139709299644162, "grad_norm": 0.37109375, "learning_rate": 0.002, "loss": 5.4429, "step": 1774 }, { "epoch": 0.2015106201063607, "grad_norm": 0.404296875, "learning_rate": 0.002, "loss": 5.4239, "step": 1775 }, { "epoch": 0.20162414721627978, "grad_norm": 0.412109375, "learning_rate": 0.002, "loss": 5.4177, "step": 1776 }, { "epoch": 0.20173767432619885, "grad_norm": 0.4453125, "learning_rate": 0.002, "loss": 5.4501, "step": 1777 }, { "epoch": 0.20185120143611793, "grad_norm": 0.48828125, "learning_rate": 0.002, "loss": 5.4213, "step": 1778 }, { "epoch": 0.201964728546037, "grad_norm": 0.435546875, "learning_rate": 0.002, "loss": 5.4293, "step": 1779 }, { "epoch": 0.2020782556559561, "grad_norm": 0.423828125, "learning_rate": 0.002, "loss": 5.4367, "step": 1780 }, { "epoch": 0.20219178276587518, "grad_norm": 0.396484375, "learning_rate": 0.002, "loss": 5.4392, "step": 1781 }, { "epoch": 0.20230530987579426, "grad_norm": 0.380859375, "learning_rate": 0.002, "loss": 5.4157, "step": 1782 }, { "epoch": 0.20241883698571334, "grad_norm": 0.3671875, "learning_rate": 0.002, "loss": 5.4281, "step": 1783 }, { "epoch": 0.2025323640956324, "grad_norm": 0.400390625, "learning_rate": 0.002, "loss": 5.3975, "step": 1784 }, { "epoch": 0.2026458912055515, "grad_norm": 0.470703125, "learning_rate": 0.002, "loss": 5.4414, "step": 1785 }, { "epoch": 0.20275941831547056, "grad_norm": 0.458984375, "learning_rate": 0.002, "loss": 5.4281, "step": 1786 }, { "epoch": 0.20287294542538964, "grad_norm": 0.396484375, "learning_rate": 0.002, "loss": 5.4183, "step": 1787 }, { "epoch": 0.20298647253530872, "grad_norm": 0.37109375, "learning_rate": 0.002, "loss": 5.4122, "step": 1788 }, { "epoch": 0.2030999996452278, "grad_norm": 0.287109375, "learning_rate": 0.002, "loss": 5.4049, "step": 1789 }, { "epoch": 0.20321352675514687, "grad_norm": 0.271484375, "learning_rate": 0.002, "loss": 5.4121, "step": 1790 }, { "epoch": 0.20332705386506594, "grad_norm": 0.259765625, "learning_rate": 0.002, "loss": 5.4204, "step": 1791 }, { "epoch": 0.20344058097498502, "grad_norm": 0.2578125, "learning_rate": 0.002, "loss": 5.4223, "step": 1792 }, { "epoch": 0.2035541080849041, "grad_norm": 0.298828125, "learning_rate": 0.002, "loss": 5.4206, "step": 1793 }, { "epoch": 0.20366763519482317, "grad_norm": 0.31640625, "learning_rate": 0.002, "loss": 5.4167, "step": 1794 }, { "epoch": 0.20378116230474225, "grad_norm": 0.369140625, "learning_rate": 0.002, "loss": 5.4226, "step": 1795 }, { "epoch": 0.20389468941466132, "grad_norm": 0.36328125, "learning_rate": 0.002, "loss": 5.4224, "step": 1796 }, { "epoch": 0.2040082165245804, "grad_norm": 0.419921875, "learning_rate": 0.002, "loss": 5.4074, "step": 1797 }, { "epoch": 0.20412174363449948, "grad_norm": 0.388671875, "learning_rate": 0.002, "loss": 5.4349, "step": 1798 }, { "epoch": 0.20423527074441855, "grad_norm": 0.35546875, "learning_rate": 0.002, "loss": 5.3955, "step": 1799 }, { "epoch": 0.20434879785433763, "grad_norm": 0.376953125, "learning_rate": 0.002, "loss": 5.4145, "step": 1800 }, { "epoch": 0.2044623249642567, "grad_norm": 0.439453125, "learning_rate": 0.002, "loss": 5.4225, "step": 1801 }, { "epoch": 0.20457585207417578, "grad_norm": 0.40234375, "learning_rate": 0.002, "loss": 5.3982, "step": 1802 }, { "epoch": 0.20468937918409486, "grad_norm": 0.3828125, "learning_rate": 0.002, "loss": 5.4133, "step": 1803 }, { "epoch": 0.20480290629401393, "grad_norm": 0.365234375, "learning_rate": 0.002, "loss": 5.4301, "step": 1804 }, { "epoch": 0.204916433403933, "grad_norm": 0.306640625, "learning_rate": 0.002, "loss": 5.4216, "step": 1805 }, { "epoch": 0.20502996051385208, "grad_norm": 0.306640625, "learning_rate": 0.002, "loss": 5.4176, "step": 1806 }, { "epoch": 0.20514348762377116, "grad_norm": 0.2578125, "learning_rate": 0.002, "loss": 5.4278, "step": 1807 }, { "epoch": 0.20525701473369024, "grad_norm": 0.25, "learning_rate": 0.002, "loss": 5.4071, "step": 1808 }, { "epoch": 0.2053705418436093, "grad_norm": 0.255859375, "learning_rate": 0.002, "loss": 5.4412, "step": 1809 }, { "epoch": 0.2054840689535284, "grad_norm": 0.26953125, "learning_rate": 0.002, "loss": 5.4154, "step": 1810 }, { "epoch": 0.20559759606344746, "grad_norm": 0.2734375, "learning_rate": 0.002, "loss": 5.4196, "step": 1811 }, { "epoch": 0.20571112317336654, "grad_norm": 0.310546875, "learning_rate": 0.002, "loss": 5.4096, "step": 1812 }, { "epoch": 0.20582465028328562, "grad_norm": 0.35546875, "learning_rate": 0.002, "loss": 5.411, "step": 1813 }, { "epoch": 0.2059381773932047, "grad_norm": 0.396484375, "learning_rate": 0.002, "loss": 5.4079, "step": 1814 }, { "epoch": 0.20605170450312377, "grad_norm": 0.5078125, "learning_rate": 0.002, "loss": 5.4057, "step": 1815 }, { "epoch": 0.20616523161304284, "grad_norm": 0.458984375, "learning_rate": 0.002, "loss": 5.4242, "step": 1816 }, { "epoch": 0.20627875872296192, "grad_norm": 0.40234375, "learning_rate": 0.002, "loss": 5.429, "step": 1817 }, { "epoch": 0.206392285832881, "grad_norm": 0.400390625, "learning_rate": 0.002, "loss": 5.4308, "step": 1818 }, { "epoch": 0.20650581294280007, "grad_norm": 0.34375, "learning_rate": 0.002, "loss": 5.4171, "step": 1819 }, { "epoch": 0.20661934005271915, "grad_norm": 0.3359375, "learning_rate": 0.002, "loss": 5.4254, "step": 1820 }, { "epoch": 0.20673286716263822, "grad_norm": 0.310546875, "learning_rate": 0.002, "loss": 5.4287, "step": 1821 }, { "epoch": 0.2068463942725573, "grad_norm": 0.2890625, "learning_rate": 0.002, "loss": 5.4344, "step": 1822 }, { "epoch": 0.20695992138247638, "grad_norm": 0.25390625, "learning_rate": 0.002, "loss": 5.4188, "step": 1823 }, { "epoch": 0.20707344849239545, "grad_norm": 0.2412109375, "learning_rate": 0.002, "loss": 5.4039, "step": 1824 }, { "epoch": 0.20718697560231453, "grad_norm": 0.2578125, "learning_rate": 0.002, "loss": 5.4286, "step": 1825 }, { "epoch": 0.2073005027122336, "grad_norm": 0.2734375, "learning_rate": 0.002, "loss": 5.4135, "step": 1826 }, { "epoch": 0.20741402982215268, "grad_norm": 0.30859375, "learning_rate": 0.002, "loss": 5.4281, "step": 1827 }, { "epoch": 0.20752755693207176, "grad_norm": 0.337890625, "learning_rate": 0.002, "loss": 5.4046, "step": 1828 }, { "epoch": 0.20764108404199083, "grad_norm": 0.3359375, "learning_rate": 0.002, "loss": 5.4162, "step": 1829 }, { "epoch": 0.2077546111519099, "grad_norm": 0.36328125, "learning_rate": 0.002, "loss": 5.421, "step": 1830 }, { "epoch": 0.20786813826182898, "grad_norm": 0.498046875, "learning_rate": 0.002, "loss": 5.4429, "step": 1831 }, { "epoch": 0.20798166537174806, "grad_norm": 0.455078125, "learning_rate": 0.002, "loss": 5.418, "step": 1832 }, { "epoch": 0.20809519248166714, "grad_norm": 0.408203125, "learning_rate": 0.002, "loss": 5.412, "step": 1833 }, { "epoch": 0.2082087195915862, "grad_norm": 0.412109375, "learning_rate": 0.002, "loss": 5.4358, "step": 1834 }, { "epoch": 0.2083222467015053, "grad_norm": 0.31640625, "learning_rate": 0.002, "loss": 5.4257, "step": 1835 }, { "epoch": 0.20843577381142436, "grad_norm": 0.328125, "learning_rate": 0.002, "loss": 5.4159, "step": 1836 }, { "epoch": 0.20854930092134344, "grad_norm": 0.3046875, "learning_rate": 0.002, "loss": 5.4374, "step": 1837 }, { "epoch": 0.20866282803126251, "grad_norm": 0.296875, "learning_rate": 0.002, "loss": 5.4029, "step": 1838 }, { "epoch": 0.2087763551411816, "grad_norm": 0.265625, "learning_rate": 0.002, "loss": 5.3989, "step": 1839 }, { "epoch": 0.20888988225110067, "grad_norm": 0.2890625, "learning_rate": 0.002, "loss": 5.4171, "step": 1840 }, { "epoch": 0.20900340936101977, "grad_norm": 0.341796875, "learning_rate": 0.002, "loss": 5.422, "step": 1841 }, { "epoch": 0.20911693647093885, "grad_norm": 0.36328125, "learning_rate": 0.002, "loss": 5.4012, "step": 1842 }, { "epoch": 0.20923046358085792, "grad_norm": 0.46875, "learning_rate": 0.002, "loss": 5.4251, "step": 1843 }, { "epoch": 0.209343990690777, "grad_norm": 0.47265625, "learning_rate": 0.002, "loss": 5.416, "step": 1844 }, { "epoch": 0.20945751780069607, "grad_norm": 0.443359375, "learning_rate": 0.002, "loss": 5.4226, "step": 1845 }, { "epoch": 0.20957104491061515, "grad_norm": 0.421875, "learning_rate": 0.002, "loss": 5.427, "step": 1846 }, { "epoch": 0.20968457202053423, "grad_norm": 0.322265625, "learning_rate": 0.002, "loss": 5.4247, "step": 1847 }, { "epoch": 0.2097980991304533, "grad_norm": 0.33984375, "learning_rate": 0.002, "loss": 5.4358, "step": 1848 }, { "epoch": 0.20991162624037238, "grad_norm": 0.353515625, "learning_rate": 0.002, "loss": 5.391, "step": 1849 }, { "epoch": 0.21002515335029145, "grad_norm": 0.3515625, "learning_rate": 0.002, "loss": 5.419, "step": 1850 }, { "epoch": 0.21013868046021053, "grad_norm": 0.337890625, "learning_rate": 0.002, "loss": 5.4086, "step": 1851 }, { "epoch": 0.2102522075701296, "grad_norm": 0.384765625, "learning_rate": 0.002, "loss": 5.4202, "step": 1852 }, { "epoch": 0.21036573468004868, "grad_norm": 0.40234375, "learning_rate": 0.002, "loss": 5.4064, "step": 1853 }, { "epoch": 0.21047926178996776, "grad_norm": 0.373046875, "learning_rate": 0.002, "loss": 5.4271, "step": 1854 }, { "epoch": 0.21059278889988683, "grad_norm": 0.333984375, "learning_rate": 0.002, "loss": 5.4143, "step": 1855 }, { "epoch": 0.2107063160098059, "grad_norm": 0.322265625, "learning_rate": 0.002, "loss": 5.3868, "step": 1856 }, { "epoch": 0.210819843119725, "grad_norm": 0.302734375, "learning_rate": 0.002, "loss": 5.4158, "step": 1857 }, { "epoch": 0.21093337022964406, "grad_norm": 0.27734375, "learning_rate": 0.002, "loss": 5.4217, "step": 1858 }, { "epoch": 0.21104689733956314, "grad_norm": 0.251953125, "learning_rate": 0.002, "loss": 5.4094, "step": 1859 }, { "epoch": 0.21116042444948221, "grad_norm": 0.251953125, "learning_rate": 0.002, "loss": 5.4318, "step": 1860 }, { "epoch": 0.2112739515594013, "grad_norm": 0.27734375, "learning_rate": 0.002, "loss": 5.4094, "step": 1861 }, { "epoch": 0.21138747866932037, "grad_norm": 0.30859375, "learning_rate": 0.002, "loss": 5.4279, "step": 1862 }, { "epoch": 0.21150100577923944, "grad_norm": 0.32421875, "learning_rate": 0.002, "loss": 5.408, "step": 1863 }, { "epoch": 0.21161453288915852, "grad_norm": 0.419921875, "learning_rate": 0.002, "loss": 5.4187, "step": 1864 }, { "epoch": 0.2117280599990776, "grad_norm": 0.41796875, "learning_rate": 0.002, "loss": 5.4209, "step": 1865 }, { "epoch": 0.21184158710899667, "grad_norm": 0.35546875, "learning_rate": 0.002, "loss": 5.412, "step": 1866 }, { "epoch": 0.21195511421891575, "grad_norm": 0.369140625, "learning_rate": 0.002, "loss": 5.4038, "step": 1867 }, { "epoch": 0.21206864132883482, "grad_norm": 0.294921875, "learning_rate": 0.002, "loss": 5.415, "step": 1868 }, { "epoch": 0.2121821684387539, "grad_norm": 0.279296875, "learning_rate": 0.002, "loss": 5.4166, "step": 1869 }, { "epoch": 0.21229569554867297, "grad_norm": 0.259765625, "learning_rate": 0.002, "loss": 5.4355, "step": 1870 }, { "epoch": 0.21240922265859205, "grad_norm": 0.279296875, "learning_rate": 0.002, "loss": 5.423, "step": 1871 }, { "epoch": 0.21252274976851113, "grad_norm": 0.33203125, "learning_rate": 0.002, "loss": 5.4043, "step": 1872 }, { "epoch": 0.2126362768784302, "grad_norm": 0.3828125, "learning_rate": 0.002, "loss": 5.4141, "step": 1873 }, { "epoch": 0.21274980398834928, "grad_norm": 0.376953125, "learning_rate": 0.002, "loss": 5.4215, "step": 1874 }, { "epoch": 0.21286333109826835, "grad_norm": 0.322265625, "learning_rate": 0.002, "loss": 5.4197, "step": 1875 }, { "epoch": 0.21297685820818743, "grad_norm": 0.298828125, "learning_rate": 0.002, "loss": 5.4246, "step": 1876 }, { "epoch": 0.2130903853181065, "grad_norm": 0.4140625, "learning_rate": 0.002, "loss": 5.4259, "step": 1877 }, { "epoch": 0.21320391242802558, "grad_norm": 0.4296875, "learning_rate": 0.002, "loss": 5.428, "step": 1878 }, { "epoch": 0.21331743953794466, "grad_norm": 0.42578125, "learning_rate": 0.002, "loss": 5.407, "step": 1879 }, { "epoch": 0.21343096664786373, "grad_norm": 0.462890625, "learning_rate": 0.002, "loss": 5.4029, "step": 1880 }, { "epoch": 0.2135444937577828, "grad_norm": 0.447265625, "learning_rate": 0.002, "loss": 5.4101, "step": 1881 }, { "epoch": 0.2136580208677019, "grad_norm": 0.4296875, "learning_rate": 0.002, "loss": 5.4078, "step": 1882 }, { "epoch": 0.21377154797762096, "grad_norm": 0.3671875, "learning_rate": 0.002, "loss": 5.441, "step": 1883 }, { "epoch": 0.21388507508754004, "grad_norm": 0.388671875, "learning_rate": 0.002, "loss": 5.4151, "step": 1884 }, { "epoch": 0.21399860219745911, "grad_norm": 0.400390625, "learning_rate": 0.002, "loss": 5.4206, "step": 1885 }, { "epoch": 0.2141121293073782, "grad_norm": 0.39453125, "learning_rate": 0.002, "loss": 5.4111, "step": 1886 }, { "epoch": 0.21422565641729727, "grad_norm": 0.400390625, "learning_rate": 0.002, "loss": 5.4172, "step": 1887 }, { "epoch": 0.21433918352721634, "grad_norm": 0.392578125, "learning_rate": 0.002, "loss": 5.401, "step": 1888 }, { "epoch": 0.21445271063713542, "grad_norm": 0.37890625, "learning_rate": 0.002, "loss": 5.4182, "step": 1889 }, { "epoch": 0.2145662377470545, "grad_norm": 0.3671875, "learning_rate": 0.002, "loss": 5.3957, "step": 1890 }, { "epoch": 0.21467976485697357, "grad_norm": 0.322265625, "learning_rate": 0.002, "loss": 5.3993, "step": 1891 }, { "epoch": 0.21479329196689265, "grad_norm": 0.32421875, "learning_rate": 0.002, "loss": 5.4031, "step": 1892 }, { "epoch": 0.21490681907681172, "grad_norm": 0.30078125, "learning_rate": 0.002, "loss": 5.4166, "step": 1893 }, { "epoch": 0.2150203461867308, "grad_norm": 0.30078125, "learning_rate": 0.002, "loss": 5.4179, "step": 1894 }, { "epoch": 0.21513387329664987, "grad_norm": 0.2734375, "learning_rate": 0.002, "loss": 5.424, "step": 1895 }, { "epoch": 0.21524740040656895, "grad_norm": 0.263671875, "learning_rate": 0.002, "loss": 5.4048, "step": 1896 }, { "epoch": 0.21536092751648803, "grad_norm": 0.2578125, "learning_rate": 0.002, "loss": 5.4076, "step": 1897 }, { "epoch": 0.2154744546264071, "grad_norm": 0.30859375, "learning_rate": 0.002, "loss": 5.4134, "step": 1898 }, { "epoch": 0.21558798173632618, "grad_norm": 0.32421875, "learning_rate": 0.002, "loss": 5.4067, "step": 1899 }, { "epoch": 0.21570150884624525, "grad_norm": 0.3984375, "learning_rate": 0.002, "loss": 5.3943, "step": 1900 }, { "epoch": 0.21581503595616436, "grad_norm": 0.390625, "learning_rate": 0.002, "loss": 5.406, "step": 1901 }, { "epoch": 0.21592856306608343, "grad_norm": 0.341796875, "learning_rate": 0.002, "loss": 5.3992, "step": 1902 }, { "epoch": 0.2160420901760025, "grad_norm": 0.41015625, "learning_rate": 0.002, "loss": 5.4058, "step": 1903 }, { "epoch": 0.2161556172859216, "grad_norm": 0.36328125, "learning_rate": 0.002, "loss": 5.4055, "step": 1904 }, { "epoch": 0.21626914439584066, "grad_norm": 0.337890625, "learning_rate": 0.002, "loss": 5.3939, "step": 1905 }, { "epoch": 0.21638267150575974, "grad_norm": 0.345703125, "learning_rate": 0.002, "loss": 5.4179, "step": 1906 }, { "epoch": 0.21649619861567881, "grad_norm": 0.330078125, "learning_rate": 0.002, "loss": 5.4217, "step": 1907 }, { "epoch": 0.2166097257255979, "grad_norm": 0.314453125, "learning_rate": 0.002, "loss": 5.4194, "step": 1908 }, { "epoch": 0.21672325283551697, "grad_norm": 0.2734375, "learning_rate": 0.002, "loss": 5.3921, "step": 1909 }, { "epoch": 0.21683677994543604, "grad_norm": 0.271484375, "learning_rate": 0.002, "loss": 5.4091, "step": 1910 }, { "epoch": 0.21695030705535512, "grad_norm": 0.26171875, "learning_rate": 0.002, "loss": 5.41, "step": 1911 }, { "epoch": 0.2170638341652742, "grad_norm": 0.2890625, "learning_rate": 0.002, "loss": 5.394, "step": 1912 }, { "epoch": 0.21717736127519327, "grad_norm": 0.3203125, "learning_rate": 0.002, "loss": 5.4143, "step": 1913 }, { "epoch": 0.21729088838511235, "grad_norm": 0.48828125, "learning_rate": 0.002, "loss": 5.4048, "step": 1914 }, { "epoch": 0.21740441549503142, "grad_norm": 0.4453125, "learning_rate": 0.002, "loss": 5.4296, "step": 1915 }, { "epoch": 0.2175179426049505, "grad_norm": 0.400390625, "learning_rate": 0.002, "loss": 5.3821, "step": 1916 }, { "epoch": 0.21763146971486957, "grad_norm": 0.41796875, "learning_rate": 0.002, "loss": 5.4009, "step": 1917 }, { "epoch": 0.21774499682478865, "grad_norm": 0.3359375, "learning_rate": 0.002, "loss": 5.4232, "step": 1918 }, { "epoch": 0.21785852393470773, "grad_norm": 0.33203125, "learning_rate": 0.002, "loss": 5.4186, "step": 1919 }, { "epoch": 0.2179720510446268, "grad_norm": 0.28125, "learning_rate": 0.002, "loss": 5.4007, "step": 1920 }, { "epoch": 0.21808557815454588, "grad_norm": 0.29296875, "learning_rate": 0.002, "loss": 5.4072, "step": 1921 }, { "epoch": 0.21819910526446495, "grad_norm": 0.265625, "learning_rate": 0.002, "loss": 5.375, "step": 1922 }, { "epoch": 0.21831263237438403, "grad_norm": 0.271484375, "learning_rate": 0.002, "loss": 5.4234, "step": 1923 }, { "epoch": 0.2184261594843031, "grad_norm": 0.265625, "learning_rate": 0.002, "loss": 5.3999, "step": 1924 }, { "epoch": 0.21853968659422218, "grad_norm": 0.28515625, "learning_rate": 0.002, "loss": 5.3978, "step": 1925 }, { "epoch": 0.21865321370414126, "grad_norm": 0.310546875, "learning_rate": 0.002, "loss": 5.4143, "step": 1926 }, { "epoch": 0.21876674081406033, "grad_norm": 0.34765625, "learning_rate": 0.002, "loss": 5.4105, "step": 1927 }, { "epoch": 0.2188802679239794, "grad_norm": 0.46484375, "learning_rate": 0.002, "loss": 5.4058, "step": 1928 }, { "epoch": 0.2189937950338985, "grad_norm": 0.41015625, "learning_rate": 0.002, "loss": 5.4075, "step": 1929 }, { "epoch": 0.21910732214381756, "grad_norm": 0.369140625, "learning_rate": 0.002, "loss": 5.4129, "step": 1930 }, { "epoch": 0.21922084925373664, "grad_norm": 0.388671875, "learning_rate": 0.002, "loss": 5.4163, "step": 1931 }, { "epoch": 0.21933437636365571, "grad_norm": 0.37109375, "learning_rate": 0.002, "loss": 5.4088, "step": 1932 }, { "epoch": 0.2194479034735748, "grad_norm": 0.478515625, "learning_rate": 0.002, "loss": 5.3952, "step": 1933 }, { "epoch": 0.21956143058349387, "grad_norm": 0.41015625, "learning_rate": 0.002, "loss": 5.382, "step": 1934 }, { "epoch": 0.21967495769341294, "grad_norm": 0.400390625, "learning_rate": 0.002, "loss": 5.4053, "step": 1935 }, { "epoch": 0.21978848480333202, "grad_norm": 0.3984375, "learning_rate": 0.002, "loss": 5.3779, "step": 1936 }, { "epoch": 0.2199020119132511, "grad_norm": 0.3515625, "learning_rate": 0.002, "loss": 5.3973, "step": 1937 }, { "epoch": 0.22001553902317017, "grad_norm": 0.3515625, "learning_rate": 0.002, "loss": 5.4245, "step": 1938 }, { "epoch": 0.22012906613308925, "grad_norm": 0.294921875, "learning_rate": 0.002, "loss": 5.4045, "step": 1939 }, { "epoch": 0.22024259324300832, "grad_norm": 0.322265625, "learning_rate": 0.002, "loss": 5.3981, "step": 1940 }, { "epoch": 0.2203561203529274, "grad_norm": 0.3515625, "learning_rate": 0.002, "loss": 5.412, "step": 1941 }, { "epoch": 0.22046964746284647, "grad_norm": 0.34765625, "learning_rate": 0.002, "loss": 5.4019, "step": 1942 }, { "epoch": 0.22058317457276555, "grad_norm": 0.328125, "learning_rate": 0.002, "loss": 5.4214, "step": 1943 }, { "epoch": 0.22069670168268463, "grad_norm": 0.33203125, "learning_rate": 0.002, "loss": 5.3993, "step": 1944 }, { "epoch": 0.2208102287926037, "grad_norm": 0.365234375, "learning_rate": 0.002, "loss": 5.4013, "step": 1945 }, { "epoch": 0.22092375590252278, "grad_norm": 0.3671875, "learning_rate": 0.002, "loss": 5.4114, "step": 1946 }, { "epoch": 0.22103728301244185, "grad_norm": 0.392578125, "learning_rate": 0.002, "loss": 5.4081, "step": 1947 }, { "epoch": 0.22115081012236093, "grad_norm": 0.4296875, "learning_rate": 0.002, "loss": 5.4156, "step": 1948 }, { "epoch": 0.22126433723228, "grad_norm": 0.48046875, "learning_rate": 0.002, "loss": 5.4123, "step": 1949 }, { "epoch": 0.22137786434219908, "grad_norm": 0.439453125, "learning_rate": 0.002, "loss": 5.4018, "step": 1950 }, { "epoch": 0.22149139145211816, "grad_norm": 0.34375, "learning_rate": 0.002, "loss": 5.387, "step": 1951 }, { "epoch": 0.22160491856203723, "grad_norm": 0.369140625, "learning_rate": 0.002, "loss": 5.3916, "step": 1952 }, { "epoch": 0.2217184456719563, "grad_norm": 0.35546875, "learning_rate": 0.002, "loss": 5.4162, "step": 1953 }, { "epoch": 0.2218319727818754, "grad_norm": 0.376953125, "learning_rate": 0.002, "loss": 5.377, "step": 1954 }, { "epoch": 0.22194549989179446, "grad_norm": 0.373046875, "learning_rate": 0.002, "loss": 5.3958, "step": 1955 }, { "epoch": 0.22205902700171354, "grad_norm": 0.365234375, "learning_rate": 0.002, "loss": 5.4014, "step": 1956 }, { "epoch": 0.22217255411163261, "grad_norm": 0.341796875, "learning_rate": 0.002, "loss": 5.4109, "step": 1957 }, { "epoch": 0.2222860812215517, "grad_norm": 0.341796875, "learning_rate": 0.002, "loss": 5.4088, "step": 1958 }, { "epoch": 0.22239960833147077, "grad_norm": 0.310546875, "learning_rate": 0.002, "loss": 5.4198, "step": 1959 }, { "epoch": 0.22251313544138984, "grad_norm": 0.322265625, "learning_rate": 0.002, "loss": 5.4139, "step": 1960 }, { "epoch": 0.22262666255130895, "grad_norm": 0.298828125, "learning_rate": 0.002, "loss": 5.3946, "step": 1961 }, { "epoch": 0.22274018966122802, "grad_norm": 0.306640625, "learning_rate": 0.002, "loss": 5.4308, "step": 1962 }, { "epoch": 0.2228537167711471, "grad_norm": 0.27734375, "learning_rate": 0.002, "loss": 5.4132, "step": 1963 }, { "epoch": 0.22296724388106617, "grad_norm": 0.271484375, "learning_rate": 0.002, "loss": 5.4049, "step": 1964 }, { "epoch": 0.22308077099098525, "grad_norm": 0.263671875, "learning_rate": 0.002, "loss": 5.4155, "step": 1965 }, { "epoch": 0.22319429810090433, "grad_norm": 0.2734375, "learning_rate": 0.002, "loss": 5.3751, "step": 1966 }, { "epoch": 0.2233078252108234, "grad_norm": 0.294921875, "learning_rate": 0.002, "loss": 5.3814, "step": 1967 }, { "epoch": 0.22342135232074248, "grad_norm": 0.30078125, "learning_rate": 0.002, "loss": 5.4234, "step": 1968 }, { "epoch": 0.22353487943066155, "grad_norm": 0.29296875, "learning_rate": 0.002, "loss": 5.3833, "step": 1969 }, { "epoch": 0.22364840654058063, "grad_norm": 0.291015625, "learning_rate": 0.002, "loss": 5.4179, "step": 1970 }, { "epoch": 0.2237619336504997, "grad_norm": 0.3203125, "learning_rate": 0.002, "loss": 5.388, "step": 1971 }, { "epoch": 0.22387546076041878, "grad_norm": 0.4375, "learning_rate": 0.002, "loss": 5.4069, "step": 1972 }, { "epoch": 0.22398898787033786, "grad_norm": 0.412109375, "learning_rate": 0.002, "loss": 5.3993, "step": 1973 }, { "epoch": 0.22410251498025693, "grad_norm": 0.3515625, "learning_rate": 0.002, "loss": 5.4097, "step": 1974 }, { "epoch": 0.224216042090176, "grad_norm": 0.369140625, "learning_rate": 0.002, "loss": 5.4118, "step": 1975 }, { "epoch": 0.22432956920009509, "grad_norm": 0.3203125, "learning_rate": 0.002, "loss": 5.4064, "step": 1976 }, { "epoch": 0.22444309631001416, "grad_norm": 0.318359375, "learning_rate": 0.002, "loss": 5.4124, "step": 1977 }, { "epoch": 0.22455662341993324, "grad_norm": 0.28125, "learning_rate": 0.002, "loss": 5.4048, "step": 1978 }, { "epoch": 0.2246701505298523, "grad_norm": 0.291015625, "learning_rate": 0.002, "loss": 5.4064, "step": 1979 }, { "epoch": 0.2247836776397714, "grad_norm": 0.294921875, "learning_rate": 0.002, "loss": 5.3858, "step": 1980 }, { "epoch": 0.22489720474969047, "grad_norm": 0.3125, "learning_rate": 0.002, "loss": 5.4007, "step": 1981 }, { "epoch": 0.22501073185960954, "grad_norm": 0.38671875, "learning_rate": 0.002, "loss": 5.4041, "step": 1982 }, { "epoch": 0.22512425896952862, "grad_norm": 0.37109375, "learning_rate": 0.002, "loss": 5.3858, "step": 1983 }, { "epoch": 0.2252377860794477, "grad_norm": 0.412109375, "learning_rate": 0.002, "loss": 5.4045, "step": 1984 }, { "epoch": 0.22535131318936677, "grad_norm": 0.3984375, "learning_rate": 0.002, "loss": 5.403, "step": 1985 }, { "epoch": 0.22546484029928585, "grad_norm": 0.333984375, "learning_rate": 0.002, "loss": 5.3755, "step": 1986 }, { "epoch": 0.22557836740920492, "grad_norm": 0.361328125, "learning_rate": 0.002, "loss": 5.3899, "step": 1987 }, { "epoch": 0.225691894519124, "grad_norm": 0.330078125, "learning_rate": 0.002, "loss": 5.4022, "step": 1988 }, { "epoch": 0.22580542162904307, "grad_norm": 0.3359375, "learning_rate": 0.002, "loss": 5.389, "step": 1989 }, { "epoch": 0.22591894873896215, "grad_norm": 0.32421875, "learning_rate": 0.002, "loss": 5.4022, "step": 1990 }, { "epoch": 0.22603247584888123, "grad_norm": 0.333984375, "learning_rate": 0.002, "loss": 5.3991, "step": 1991 }, { "epoch": 0.2261460029588003, "grad_norm": 0.361328125, "learning_rate": 0.002, "loss": 5.3909, "step": 1992 }, { "epoch": 0.22625953006871938, "grad_norm": 0.37890625, "learning_rate": 0.002, "loss": 5.3888, "step": 1993 }, { "epoch": 0.22637305717863845, "grad_norm": 0.390625, "learning_rate": 0.002, "loss": 5.4074, "step": 1994 }, { "epoch": 0.22648658428855753, "grad_norm": 0.482421875, "learning_rate": 0.002, "loss": 5.4051, "step": 1995 }, { "epoch": 0.2266001113984766, "grad_norm": 0.40625, "learning_rate": 0.002, "loss": 5.3952, "step": 1996 }, { "epoch": 0.22671363850839568, "grad_norm": 0.357421875, "learning_rate": 0.002, "loss": 5.4027, "step": 1997 }, { "epoch": 0.22682716561831476, "grad_norm": 0.3125, "learning_rate": 0.002, "loss": 5.4056, "step": 1998 }, { "epoch": 0.22694069272823383, "grad_norm": 0.25390625, "learning_rate": 0.002, "loss": 5.4077, "step": 1999 }, { "epoch": 0.2270542198381529, "grad_norm": 0.25390625, "learning_rate": 0.002, "loss": 5.4049, "step": 2000 }, { "epoch": 0.22716774694807199, "grad_norm": 0.2451171875, "learning_rate": 0.002, "loss": 5.4031, "step": 2001 }, { "epoch": 0.22728127405799106, "grad_norm": 0.251953125, "learning_rate": 0.002, "loss": 5.4046, "step": 2002 }, { "epoch": 0.22739480116791014, "grad_norm": 0.2451171875, "learning_rate": 0.002, "loss": 5.3963, "step": 2003 }, { "epoch": 0.2275083282778292, "grad_norm": 0.259765625, "learning_rate": 0.002, "loss": 5.3966, "step": 2004 }, { "epoch": 0.2276218553877483, "grad_norm": 0.244140625, "learning_rate": 0.002, "loss": 5.3972, "step": 2005 }, { "epoch": 0.22773538249766737, "grad_norm": 0.24609375, "learning_rate": 0.002, "loss": 5.4023, "step": 2006 }, { "epoch": 0.22784890960758644, "grad_norm": 0.26953125, "learning_rate": 0.002, "loss": 5.3935, "step": 2007 }, { "epoch": 0.22796243671750552, "grad_norm": 0.32421875, "learning_rate": 0.002, "loss": 5.4097, "step": 2008 }, { "epoch": 0.2280759638274246, "grad_norm": 0.375, "learning_rate": 0.002, "loss": 5.3915, "step": 2009 }, { "epoch": 0.22818949093734367, "grad_norm": 0.470703125, "learning_rate": 0.002, "loss": 5.4088, "step": 2010 }, { "epoch": 0.22830301804726275, "grad_norm": 0.44921875, "learning_rate": 0.002, "loss": 5.3922, "step": 2011 }, { "epoch": 0.22841654515718182, "grad_norm": 0.451171875, "learning_rate": 0.002, "loss": 5.3775, "step": 2012 }, { "epoch": 0.2285300722671009, "grad_norm": 0.4609375, "learning_rate": 0.002, "loss": 5.401, "step": 2013 }, { "epoch": 0.22864359937701997, "grad_norm": 0.41796875, "learning_rate": 0.002, "loss": 5.41, "step": 2014 }, { "epoch": 0.22875712648693905, "grad_norm": 0.41015625, "learning_rate": 0.002, "loss": 5.4001, "step": 2015 }, { "epoch": 0.22887065359685813, "grad_norm": 0.32421875, "learning_rate": 0.002, "loss": 5.3902, "step": 2016 }, { "epoch": 0.2289841807067772, "grad_norm": 0.349609375, "learning_rate": 0.002, "loss": 5.4054, "step": 2017 }, { "epoch": 0.22909770781669628, "grad_norm": 0.341796875, "learning_rate": 0.002, "loss": 5.3748, "step": 2018 }, { "epoch": 0.22921123492661535, "grad_norm": 0.39453125, "learning_rate": 0.002, "loss": 5.3984, "step": 2019 }, { "epoch": 0.22932476203653443, "grad_norm": 0.3671875, "learning_rate": 0.002, "loss": 5.3988, "step": 2020 }, { "epoch": 0.2294382891464535, "grad_norm": 0.33203125, "learning_rate": 0.002, "loss": 5.3763, "step": 2021 }, { "epoch": 0.2295518162563726, "grad_norm": 0.328125, "learning_rate": 0.002, "loss": 5.385, "step": 2022 }, { "epoch": 0.22966534336629169, "grad_norm": 0.3359375, "learning_rate": 0.002, "loss": 5.4149, "step": 2023 }, { "epoch": 0.22977887047621076, "grad_norm": 0.33203125, "learning_rate": 0.002, "loss": 5.4083, "step": 2024 }, { "epoch": 0.22989239758612984, "grad_norm": 0.3359375, "learning_rate": 0.002, "loss": 5.3961, "step": 2025 }, { "epoch": 0.2300059246960489, "grad_norm": 0.357421875, "learning_rate": 0.002, "loss": 5.3852, "step": 2026 }, { "epoch": 0.230119451805968, "grad_norm": 0.376953125, "learning_rate": 0.002, "loss": 5.385, "step": 2027 }, { "epoch": 0.23023297891588707, "grad_norm": 0.353515625, "learning_rate": 0.002, "loss": 5.4009, "step": 2028 }, { "epoch": 0.23034650602580614, "grad_norm": 0.33203125, "learning_rate": 0.002, "loss": 5.4018, "step": 2029 }, { "epoch": 0.23046003313572522, "grad_norm": 0.337890625, "learning_rate": 0.002, "loss": 5.4244, "step": 2030 }, { "epoch": 0.2305735602456443, "grad_norm": 0.349609375, "learning_rate": 0.002, "loss": 5.4061, "step": 2031 }, { "epoch": 0.23068708735556337, "grad_norm": 0.318359375, "learning_rate": 0.002, "loss": 5.3881, "step": 2032 }, { "epoch": 0.23080061446548245, "grad_norm": 0.283203125, "learning_rate": 0.002, "loss": 5.4068, "step": 2033 }, { "epoch": 0.23091414157540152, "grad_norm": 0.314453125, "learning_rate": 0.002, "loss": 5.3866, "step": 2034 }, { "epoch": 0.2310276686853206, "grad_norm": 0.32421875, "learning_rate": 0.002, "loss": 5.385, "step": 2035 }, { "epoch": 0.23114119579523967, "grad_norm": 0.357421875, "learning_rate": 0.002, "loss": 5.4174, "step": 2036 }, { "epoch": 0.23125472290515875, "grad_norm": 0.458984375, "learning_rate": 0.002, "loss": 5.411, "step": 2037 }, { "epoch": 0.23136825001507783, "grad_norm": 0.447265625, "learning_rate": 0.002, "loss": 5.3917, "step": 2038 }, { "epoch": 0.2314817771249969, "grad_norm": 0.41796875, "learning_rate": 0.002, "loss": 5.4043, "step": 2039 }, { "epoch": 0.23159530423491598, "grad_norm": 0.44140625, "learning_rate": 0.002, "loss": 5.4053, "step": 2040 }, { "epoch": 0.23170883134483505, "grad_norm": 0.38671875, "learning_rate": 0.002, "loss": 5.4052, "step": 2041 }, { "epoch": 0.23182235845475413, "grad_norm": 0.390625, "learning_rate": 0.002, "loss": 5.4109, "step": 2042 }, { "epoch": 0.2319358855646732, "grad_norm": 0.3515625, "learning_rate": 0.002, "loss": 5.3953, "step": 2043 }, { "epoch": 0.23204941267459228, "grad_norm": 0.349609375, "learning_rate": 0.002, "loss": 5.3913, "step": 2044 }, { "epoch": 0.23216293978451136, "grad_norm": 0.361328125, "learning_rate": 0.002, "loss": 5.3846, "step": 2045 }, { "epoch": 0.23227646689443043, "grad_norm": 0.3515625, "learning_rate": 0.002, "loss": 5.4065, "step": 2046 }, { "epoch": 0.2323899940043495, "grad_norm": 0.330078125, "learning_rate": 0.002, "loss": 5.3722, "step": 2047 }, { "epoch": 0.23250352111426859, "grad_norm": 0.34765625, "learning_rate": 0.002, "loss": 5.402, "step": 2048 }, { "epoch": 0.23261704822418766, "grad_norm": 0.318359375, "learning_rate": 0.002, "loss": 5.3825, "step": 2049 }, { "epoch": 0.23273057533410674, "grad_norm": 0.314453125, "learning_rate": 0.002, "loss": 5.3987, "step": 2050 }, { "epoch": 0.2328441024440258, "grad_norm": 0.291015625, "learning_rate": 0.002, "loss": 5.4239, "step": 2051 }, { "epoch": 0.2329576295539449, "grad_norm": 0.2890625, "learning_rate": 0.002, "loss": 5.3948, "step": 2052 }, { "epoch": 0.23307115666386397, "grad_norm": 0.2734375, "learning_rate": 0.002, "loss": 5.3854, "step": 2053 }, { "epoch": 0.23318468377378304, "grad_norm": 0.30859375, "learning_rate": 0.002, "loss": 5.3738, "step": 2054 }, { "epoch": 0.23329821088370212, "grad_norm": 0.32421875, "learning_rate": 0.002, "loss": 5.3851, "step": 2055 }, { "epoch": 0.2334117379936212, "grad_norm": 0.380859375, "learning_rate": 0.002, "loss": 5.4038, "step": 2056 }, { "epoch": 0.23352526510354027, "grad_norm": 0.369140625, "learning_rate": 0.002, "loss": 5.401, "step": 2057 }, { "epoch": 0.23363879221345935, "grad_norm": 0.36328125, "learning_rate": 0.002, "loss": 5.3975, "step": 2058 }, { "epoch": 0.23375231932337842, "grad_norm": 0.369140625, "learning_rate": 0.002, "loss": 5.3579, "step": 2059 }, { "epoch": 0.2338658464332975, "grad_norm": 0.38671875, "learning_rate": 0.002, "loss": 5.3699, "step": 2060 }, { "epoch": 0.23397937354321657, "grad_norm": 0.376953125, "learning_rate": 0.002, "loss": 5.3939, "step": 2061 }, { "epoch": 0.23409290065313565, "grad_norm": 0.328125, "learning_rate": 0.002, "loss": 5.4082, "step": 2062 }, { "epoch": 0.23420642776305473, "grad_norm": 0.29296875, "learning_rate": 0.002, "loss": 5.3757, "step": 2063 }, { "epoch": 0.2343199548729738, "grad_norm": 0.23828125, "learning_rate": 0.002, "loss": 5.3791, "step": 2064 }, { "epoch": 0.23443348198289288, "grad_norm": 0.2412109375, "learning_rate": 0.002, "loss": 5.4254, "step": 2065 }, { "epoch": 0.23454700909281195, "grad_norm": 0.2578125, "learning_rate": 0.002, "loss": 5.4129, "step": 2066 }, { "epoch": 0.23466053620273103, "grad_norm": 0.283203125, "learning_rate": 0.002, "loss": 5.4029, "step": 2067 }, { "epoch": 0.2347740633126501, "grad_norm": 0.32421875, "learning_rate": 0.002, "loss": 5.3978, "step": 2068 }, { "epoch": 0.23488759042256918, "grad_norm": 0.41015625, "learning_rate": 0.002, "loss": 5.3829, "step": 2069 }, { "epoch": 0.23500111753248826, "grad_norm": 0.396484375, "learning_rate": 0.002, "loss": 5.419, "step": 2070 }, { "epoch": 0.23511464464240733, "grad_norm": 0.341796875, "learning_rate": 0.002, "loss": 5.3789, "step": 2071 }, { "epoch": 0.2352281717523264, "grad_norm": 0.369140625, "learning_rate": 0.002, "loss": 5.4004, "step": 2072 }, { "epoch": 0.23534169886224549, "grad_norm": 0.421875, "learning_rate": 0.002, "loss": 5.4018, "step": 2073 }, { "epoch": 0.23545522597216456, "grad_norm": 0.3984375, "learning_rate": 0.002, "loss": 5.4067, "step": 2074 }, { "epoch": 0.23556875308208364, "grad_norm": 0.345703125, "learning_rate": 0.002, "loss": 5.4032, "step": 2075 }, { "epoch": 0.2356822801920027, "grad_norm": 0.38671875, "learning_rate": 0.002, "loss": 5.4079, "step": 2076 }, { "epoch": 0.2357958073019218, "grad_norm": 0.37109375, "learning_rate": 0.002, "loss": 5.3909, "step": 2077 }, { "epoch": 0.23590933441184087, "grad_norm": 0.373046875, "learning_rate": 0.002, "loss": 5.3813, "step": 2078 }, { "epoch": 0.23602286152175994, "grad_norm": 0.353515625, "learning_rate": 0.002, "loss": 5.3918, "step": 2079 }, { "epoch": 0.23613638863167902, "grad_norm": 0.369140625, "learning_rate": 0.002, "loss": 5.3936, "step": 2080 }, { "epoch": 0.2362499157415981, "grad_norm": 0.361328125, "learning_rate": 0.002, "loss": 5.3989, "step": 2081 }, { "epoch": 0.2363634428515172, "grad_norm": 0.361328125, "learning_rate": 0.002, "loss": 5.4042, "step": 2082 }, { "epoch": 0.23647696996143627, "grad_norm": 0.3828125, "learning_rate": 0.002, "loss": 5.3853, "step": 2083 }, { "epoch": 0.23659049707135535, "grad_norm": 0.35546875, "learning_rate": 0.002, "loss": 5.3921, "step": 2084 }, { "epoch": 0.23670402418127443, "grad_norm": 0.30078125, "learning_rate": 0.002, "loss": 5.4004, "step": 2085 }, { "epoch": 0.2368175512911935, "grad_norm": 0.291015625, "learning_rate": 0.002, "loss": 5.3765, "step": 2086 }, { "epoch": 0.23693107840111258, "grad_norm": 0.255859375, "learning_rate": 0.002, "loss": 5.4073, "step": 2087 }, { "epoch": 0.23704460551103165, "grad_norm": 0.2431640625, "learning_rate": 0.002, "loss": 5.393, "step": 2088 }, { "epoch": 0.23715813262095073, "grad_norm": 0.26953125, "learning_rate": 0.002, "loss": 5.4015, "step": 2089 }, { "epoch": 0.2372716597308698, "grad_norm": 0.326171875, "learning_rate": 0.002, "loss": 5.3903, "step": 2090 }, { "epoch": 0.23738518684078888, "grad_norm": 0.330078125, "learning_rate": 0.002, "loss": 5.4097, "step": 2091 }, { "epoch": 0.23749871395070796, "grad_norm": 0.322265625, "learning_rate": 0.002, "loss": 5.3742, "step": 2092 }, { "epoch": 0.23761224106062703, "grad_norm": 0.33984375, "learning_rate": 0.002, "loss": 5.3701, "step": 2093 }, { "epoch": 0.2377257681705461, "grad_norm": 0.34765625, "learning_rate": 0.002, "loss": 5.3913, "step": 2094 }, { "epoch": 0.23783929528046518, "grad_norm": 0.35546875, "learning_rate": 0.002, "loss": 5.3918, "step": 2095 }, { "epoch": 0.23795282239038426, "grad_norm": 0.330078125, "learning_rate": 0.002, "loss": 5.3823, "step": 2096 }, { "epoch": 0.23806634950030334, "grad_norm": 0.3203125, "learning_rate": 0.002, "loss": 5.3811, "step": 2097 }, { "epoch": 0.2381798766102224, "grad_norm": 0.28515625, "learning_rate": 0.002, "loss": 5.3858, "step": 2098 }, { "epoch": 0.2382934037201415, "grad_norm": 0.30859375, "learning_rate": 0.002, "loss": 5.3962, "step": 2099 }, { "epoch": 0.23840693083006056, "grad_norm": 0.33984375, "learning_rate": 0.002, "loss": 5.3937, "step": 2100 }, { "epoch": 0.23852045793997964, "grad_norm": 0.337890625, "learning_rate": 0.002, "loss": 5.4131, "step": 2101 }, { "epoch": 0.23863398504989872, "grad_norm": 0.375, "learning_rate": 0.002, "loss": 5.3794, "step": 2102 }, { "epoch": 0.2387475121598178, "grad_norm": 0.341796875, "learning_rate": 0.002, "loss": 5.3989, "step": 2103 }, { "epoch": 0.23886103926973687, "grad_norm": 0.29296875, "learning_rate": 0.002, "loss": 5.3994, "step": 2104 }, { "epoch": 0.23897456637965594, "grad_norm": 0.291015625, "learning_rate": 0.002, "loss": 5.3824, "step": 2105 }, { "epoch": 0.23908809348957502, "grad_norm": 0.259765625, "learning_rate": 0.002, "loss": 5.397, "step": 2106 }, { "epoch": 0.2392016205994941, "grad_norm": 0.255859375, "learning_rate": 0.002, "loss": 5.4024, "step": 2107 }, { "epoch": 0.23931514770941317, "grad_norm": 0.26171875, "learning_rate": 0.002, "loss": 5.4112, "step": 2108 }, { "epoch": 0.23942867481933225, "grad_norm": 0.357421875, "learning_rate": 0.002, "loss": 5.3826, "step": 2109 }, { "epoch": 0.23954220192925132, "grad_norm": 0.353515625, "learning_rate": 0.002, "loss": 5.376, "step": 2110 }, { "epoch": 0.2396557290391704, "grad_norm": 0.357421875, "learning_rate": 0.002, "loss": 5.3851, "step": 2111 }, { "epoch": 0.23976925614908948, "grad_norm": 0.369140625, "learning_rate": 0.002, "loss": 5.3876, "step": 2112 }, { "epoch": 0.23988278325900855, "grad_norm": 0.349609375, "learning_rate": 0.002, "loss": 5.3778, "step": 2113 }, { "epoch": 0.23999631036892763, "grad_norm": 0.33203125, "learning_rate": 0.002, "loss": 5.3819, "step": 2114 }, { "epoch": 0.2401098374788467, "grad_norm": 0.287109375, "learning_rate": 0.002, "loss": 5.392, "step": 2115 }, { "epoch": 0.24022336458876578, "grad_norm": 0.298828125, "learning_rate": 0.002, "loss": 5.3932, "step": 2116 }, { "epoch": 0.24033689169868486, "grad_norm": 0.287109375, "learning_rate": 0.002, "loss": 5.3922, "step": 2117 }, { "epoch": 0.24045041880860393, "grad_norm": 0.30859375, "learning_rate": 0.002, "loss": 5.408, "step": 2118 }, { "epoch": 0.240563945918523, "grad_norm": 0.345703125, "learning_rate": 0.002, "loss": 5.3798, "step": 2119 }, { "epoch": 0.24067747302844208, "grad_norm": 0.361328125, "learning_rate": 0.002, "loss": 5.3892, "step": 2120 }, { "epoch": 0.24079100013836116, "grad_norm": 0.412109375, "learning_rate": 0.002, "loss": 5.3991, "step": 2121 }, { "epoch": 0.24090452724828024, "grad_norm": 0.416015625, "learning_rate": 0.002, "loss": 5.3892, "step": 2122 }, { "epoch": 0.2410180543581993, "grad_norm": 0.400390625, "learning_rate": 0.002, "loss": 5.3834, "step": 2123 }, { "epoch": 0.2411315814681184, "grad_norm": 0.380859375, "learning_rate": 0.002, "loss": 5.398, "step": 2124 }, { "epoch": 0.24124510857803746, "grad_norm": 0.33203125, "learning_rate": 0.002, "loss": 5.3822, "step": 2125 }, { "epoch": 0.24135863568795654, "grad_norm": 0.34375, "learning_rate": 0.002, "loss": 5.3883, "step": 2126 }, { "epoch": 0.24147216279787562, "grad_norm": 0.333984375, "learning_rate": 0.002, "loss": 5.3846, "step": 2127 }, { "epoch": 0.2415856899077947, "grad_norm": 0.376953125, "learning_rate": 0.002, "loss": 5.3829, "step": 2128 }, { "epoch": 0.24169921701771377, "grad_norm": 0.4765625, "learning_rate": 0.002, "loss": 5.3777, "step": 2129 }, { "epoch": 0.24181274412763284, "grad_norm": 0.4296875, "learning_rate": 0.002, "loss": 5.4042, "step": 2130 }, { "epoch": 0.24192627123755192, "grad_norm": 0.3671875, "learning_rate": 0.002, "loss": 5.3933, "step": 2131 }, { "epoch": 0.242039798347471, "grad_norm": 0.3984375, "learning_rate": 0.002, "loss": 5.3824, "step": 2132 }, { "epoch": 0.24215332545739007, "grad_norm": 0.34765625, "learning_rate": 0.002, "loss": 5.3863, "step": 2133 }, { "epoch": 0.24226685256730915, "grad_norm": 0.373046875, "learning_rate": 0.002, "loss": 5.3743, "step": 2134 }, { "epoch": 0.24238037967722822, "grad_norm": 0.359375, "learning_rate": 0.002, "loss": 5.382, "step": 2135 }, { "epoch": 0.2424939067871473, "grad_norm": 0.359375, "learning_rate": 0.002, "loss": 5.3797, "step": 2136 }, { "epoch": 0.24260743389706638, "grad_norm": 0.3203125, "learning_rate": 0.002, "loss": 5.3915, "step": 2137 }, { "epoch": 0.24272096100698545, "grad_norm": 0.32421875, "learning_rate": 0.002, "loss": 5.3877, "step": 2138 }, { "epoch": 0.24283448811690453, "grad_norm": 0.3203125, "learning_rate": 0.002, "loss": 5.3841, "step": 2139 }, { "epoch": 0.2429480152268236, "grad_norm": 0.322265625, "learning_rate": 0.002, "loss": 5.3565, "step": 2140 }, { "epoch": 0.24306154233674268, "grad_norm": 0.3671875, "learning_rate": 0.002, "loss": 5.3835, "step": 2141 }, { "epoch": 0.24317506944666178, "grad_norm": 0.37890625, "learning_rate": 0.002, "loss": 5.3726, "step": 2142 }, { "epoch": 0.24328859655658086, "grad_norm": 0.375, "learning_rate": 0.002, "loss": 5.3656, "step": 2143 }, { "epoch": 0.24340212366649994, "grad_norm": 0.373046875, "learning_rate": 0.002, "loss": 5.3759, "step": 2144 }, { "epoch": 0.243515650776419, "grad_norm": 0.31640625, "learning_rate": 0.002, "loss": 5.3909, "step": 2145 }, { "epoch": 0.2436291778863381, "grad_norm": 0.3203125, "learning_rate": 0.002, "loss": 5.3735, "step": 2146 }, { "epoch": 0.24374270499625716, "grad_norm": 0.314453125, "learning_rate": 0.002, "loss": 5.3757, "step": 2147 }, { "epoch": 0.24385623210617624, "grad_norm": 0.353515625, "learning_rate": 0.002, "loss": 5.3903, "step": 2148 }, { "epoch": 0.24396975921609532, "grad_norm": 0.359375, "learning_rate": 0.002, "loss": 5.3924, "step": 2149 }, { "epoch": 0.2440832863260144, "grad_norm": 0.369140625, "learning_rate": 0.002, "loss": 5.3989, "step": 2150 }, { "epoch": 0.24419681343593347, "grad_norm": 0.3125, "learning_rate": 0.002, "loss": 5.3725, "step": 2151 }, { "epoch": 0.24431034054585254, "grad_norm": 0.296875, "learning_rate": 0.002, "loss": 5.3858, "step": 2152 }, { "epoch": 0.24442386765577162, "grad_norm": 0.275390625, "learning_rate": 0.002, "loss": 5.3765, "step": 2153 }, { "epoch": 0.2445373947656907, "grad_norm": 0.296875, "learning_rate": 0.002, "loss": 5.3717, "step": 2154 }, { "epoch": 0.24465092187560977, "grad_norm": 0.416015625, "learning_rate": 0.002, "loss": 5.3982, "step": 2155 }, { "epoch": 0.24476444898552885, "grad_norm": 0.396484375, "learning_rate": 0.002, "loss": 5.3693, "step": 2156 }, { "epoch": 0.24487797609544792, "grad_norm": 0.392578125, "learning_rate": 0.002, "loss": 5.3722, "step": 2157 }, { "epoch": 0.244991503205367, "grad_norm": 0.408203125, "learning_rate": 0.002, "loss": 5.3723, "step": 2158 }, { "epoch": 0.24510503031528608, "grad_norm": 0.37890625, "learning_rate": 0.002, "loss": 5.4009, "step": 2159 }, { "epoch": 0.24521855742520515, "grad_norm": 0.369140625, "learning_rate": 0.002, "loss": 5.3717, "step": 2160 }, { "epoch": 0.24533208453512423, "grad_norm": 0.33984375, "learning_rate": 0.002, "loss": 5.378, "step": 2161 }, { "epoch": 0.2454456116450433, "grad_norm": 0.34765625, "learning_rate": 0.002, "loss": 5.3653, "step": 2162 }, { "epoch": 0.24555913875496238, "grad_norm": 0.33203125, "learning_rate": 0.002, "loss": 5.3694, "step": 2163 }, { "epoch": 0.24567266586488146, "grad_norm": 0.333984375, "learning_rate": 0.002, "loss": 5.3713, "step": 2164 }, { "epoch": 0.24578619297480053, "grad_norm": 0.3203125, "learning_rate": 0.002, "loss": 5.3894, "step": 2165 }, { "epoch": 0.2458997200847196, "grad_norm": 0.3203125, "learning_rate": 0.002, "loss": 5.3828, "step": 2166 }, { "epoch": 0.24601324719463868, "grad_norm": 0.30078125, "learning_rate": 0.002, "loss": 5.3866, "step": 2167 }, { "epoch": 0.24612677430455776, "grad_norm": 0.3125, "learning_rate": 0.002, "loss": 5.3773, "step": 2168 }, { "epoch": 0.24624030141447684, "grad_norm": 0.388671875, "learning_rate": 0.002, "loss": 5.3887, "step": 2169 }, { "epoch": 0.2463538285243959, "grad_norm": 0.40625, "learning_rate": 0.002, "loss": 5.3628, "step": 2170 }, { "epoch": 0.246467355634315, "grad_norm": 0.48828125, "learning_rate": 0.002, "loss": 5.3883, "step": 2171 }, { "epoch": 0.24658088274423406, "grad_norm": 0.490234375, "learning_rate": 0.002, "loss": 5.3939, "step": 2172 }, { "epoch": 0.24669440985415314, "grad_norm": 0.36328125, "learning_rate": 0.002, "loss": 5.3899, "step": 2173 }, { "epoch": 0.24680793696407222, "grad_norm": 0.369140625, "learning_rate": 0.002, "loss": 5.3814, "step": 2174 }, { "epoch": 0.2469214640739913, "grad_norm": 0.28125, "learning_rate": 0.002, "loss": 5.3869, "step": 2175 }, { "epoch": 0.24703499118391037, "grad_norm": 0.27734375, "learning_rate": 0.002, "loss": 5.3761, "step": 2176 }, { "epoch": 0.24714851829382944, "grad_norm": 0.298828125, "learning_rate": 0.002, "loss": 5.4045, "step": 2177 }, { "epoch": 0.24726204540374852, "grad_norm": 0.294921875, "learning_rate": 0.002, "loss": 5.359, "step": 2178 }, { "epoch": 0.2473755725136676, "grad_norm": 0.279296875, "learning_rate": 0.002, "loss": 5.3757, "step": 2179 }, { "epoch": 0.24748909962358667, "grad_norm": 0.28515625, "learning_rate": 0.002, "loss": 5.3918, "step": 2180 }, { "epoch": 0.24760262673350575, "grad_norm": 0.28515625, "learning_rate": 0.002, "loss": 5.3606, "step": 2181 }, { "epoch": 0.24771615384342482, "grad_norm": 0.29296875, "learning_rate": 0.002, "loss": 5.3714, "step": 2182 }, { "epoch": 0.2478296809533439, "grad_norm": 0.302734375, "learning_rate": 0.002, "loss": 5.3626, "step": 2183 }, { "epoch": 0.24794320806326298, "grad_norm": 0.31640625, "learning_rate": 0.002, "loss": 5.3666, "step": 2184 }, { "epoch": 0.24805673517318205, "grad_norm": 0.373046875, "learning_rate": 0.002, "loss": 5.3706, "step": 2185 }, { "epoch": 0.24817026228310113, "grad_norm": 0.34765625, "learning_rate": 0.002, "loss": 5.3722, "step": 2186 }, { "epoch": 0.2482837893930202, "grad_norm": 0.314453125, "learning_rate": 0.002, "loss": 5.3818, "step": 2187 }, { "epoch": 0.24839731650293928, "grad_norm": 0.357421875, "learning_rate": 0.002, "loss": 5.3888, "step": 2188 }, { "epoch": 0.24851084361285836, "grad_norm": 0.41015625, "learning_rate": 0.002, "loss": 5.3918, "step": 2189 }, { "epoch": 0.24862437072277743, "grad_norm": 0.41015625, "learning_rate": 0.002, "loss": 5.3825, "step": 2190 }, { "epoch": 0.2487378978326965, "grad_norm": 0.427734375, "learning_rate": 0.002, "loss": 5.3804, "step": 2191 }, { "epoch": 0.24885142494261558, "grad_norm": 0.37109375, "learning_rate": 0.002, "loss": 5.3776, "step": 2192 }, { "epoch": 0.24896495205253466, "grad_norm": 0.271484375, "learning_rate": 0.002, "loss": 5.3857, "step": 2193 }, { "epoch": 0.24907847916245374, "grad_norm": 0.259765625, "learning_rate": 0.002, "loss": 5.3733, "step": 2194 }, { "epoch": 0.2491920062723728, "grad_norm": 0.255859375, "learning_rate": 0.002, "loss": 5.3628, "step": 2195 }, { "epoch": 0.2493055333822919, "grad_norm": 0.333984375, "learning_rate": 0.002, "loss": 5.3954, "step": 2196 }, { "epoch": 0.24941906049221096, "grad_norm": 0.365234375, "learning_rate": 0.002, "loss": 5.38, "step": 2197 }, { "epoch": 0.24953258760213004, "grad_norm": 0.40625, "learning_rate": 0.002, "loss": 5.3786, "step": 2198 }, { "epoch": 0.24964611471204912, "grad_norm": 0.40234375, "learning_rate": 0.002, "loss": 5.3641, "step": 2199 }, { "epoch": 0.2497596418219682, "grad_norm": 0.34765625, "learning_rate": 0.002, "loss": 5.3888, "step": 2200 }, { "epoch": 0.24987316893188727, "grad_norm": 0.345703125, "learning_rate": 0.002, "loss": 5.3921, "step": 2201 }, { "epoch": 0.24998669604180634, "grad_norm": 0.310546875, "learning_rate": 0.002, "loss": 5.3643, "step": 2202 }, { "epoch": 0.2501002231517254, "grad_norm": 0.30859375, "learning_rate": 0.002, "loss": 5.3762, "step": 2203 }, { "epoch": 0.2502137502616445, "grad_norm": 0.296875, "learning_rate": 0.002, "loss": 5.3679, "step": 2204 }, { "epoch": 0.2503272773715636, "grad_norm": 0.3203125, "learning_rate": 0.002, "loss": 5.358, "step": 2205 }, { "epoch": 0.25044080448148265, "grad_norm": 0.3984375, "learning_rate": 0.002, "loss": 5.3759, "step": 2206 }, { "epoch": 0.2505543315914017, "grad_norm": 0.38671875, "learning_rate": 0.002, "loss": 5.3565, "step": 2207 }, { "epoch": 0.2506678587013208, "grad_norm": 0.33203125, "learning_rate": 0.002, "loss": 5.3774, "step": 2208 }, { "epoch": 0.2507813858112399, "grad_norm": 0.33984375, "learning_rate": 0.002, "loss": 5.3796, "step": 2209 }, { "epoch": 0.25089491292115895, "grad_norm": 0.306640625, "learning_rate": 0.002, "loss": 5.3725, "step": 2210 }, { "epoch": 0.25100844003107803, "grad_norm": 0.287109375, "learning_rate": 0.002, "loss": 5.3791, "step": 2211 }, { "epoch": 0.2511219671409971, "grad_norm": 0.279296875, "learning_rate": 0.002, "loss": 5.3773, "step": 2212 }, { "epoch": 0.2512354942509162, "grad_norm": 0.3046875, "learning_rate": 0.002, "loss": 5.3767, "step": 2213 }, { "epoch": 0.25134902136083526, "grad_norm": 0.306640625, "learning_rate": 0.002, "loss": 5.3853, "step": 2214 }, { "epoch": 0.25146254847075433, "grad_norm": 0.291015625, "learning_rate": 0.002, "loss": 5.388, "step": 2215 }, { "epoch": 0.2515760755806734, "grad_norm": 0.291015625, "learning_rate": 0.002, "loss": 5.3691, "step": 2216 }, { "epoch": 0.2516896026905925, "grad_norm": 0.3046875, "learning_rate": 0.002, "loss": 5.3731, "step": 2217 }, { "epoch": 0.25180312980051156, "grad_norm": 0.33203125, "learning_rate": 0.002, "loss": 5.4074, "step": 2218 }, { "epoch": 0.25191665691043064, "grad_norm": 0.37890625, "learning_rate": 0.002, "loss": 5.3913, "step": 2219 }, { "epoch": 0.2520301840203497, "grad_norm": 0.37890625, "learning_rate": 0.002, "loss": 5.3942, "step": 2220 }, { "epoch": 0.2521437111302688, "grad_norm": 0.35546875, "learning_rate": 0.002, "loss": 5.3758, "step": 2221 }, { "epoch": 0.25225723824018786, "grad_norm": 0.3828125, "learning_rate": 0.002, "loss": 5.372, "step": 2222 }, { "epoch": 0.25237076535010694, "grad_norm": 0.373046875, "learning_rate": 0.002, "loss": 5.3535, "step": 2223 }, { "epoch": 0.252484292460026, "grad_norm": 0.35546875, "learning_rate": 0.002, "loss": 5.385, "step": 2224 }, { "epoch": 0.2525978195699451, "grad_norm": 0.3046875, "learning_rate": 0.002, "loss": 5.3681, "step": 2225 }, { "epoch": 0.25271134667986417, "grad_norm": 0.3125, "learning_rate": 0.002, "loss": 5.3894, "step": 2226 }, { "epoch": 0.25282487378978324, "grad_norm": 0.28125, "learning_rate": 0.002, "loss": 5.3829, "step": 2227 }, { "epoch": 0.2529384008997023, "grad_norm": 0.287109375, "learning_rate": 0.002, "loss": 5.3662, "step": 2228 }, { "epoch": 0.2530519280096214, "grad_norm": 0.3125, "learning_rate": 0.002, "loss": 5.3852, "step": 2229 }, { "epoch": 0.2531654551195405, "grad_norm": 0.35546875, "learning_rate": 0.002, "loss": 5.3736, "step": 2230 }, { "epoch": 0.25327898222945955, "grad_norm": 0.3515625, "learning_rate": 0.002, "loss": 5.3798, "step": 2231 }, { "epoch": 0.2533925093393786, "grad_norm": 0.392578125, "learning_rate": 0.002, "loss": 5.3794, "step": 2232 }, { "epoch": 0.25350603644929776, "grad_norm": 0.380859375, "learning_rate": 0.002, "loss": 5.3789, "step": 2233 }, { "epoch": 0.25361956355921683, "grad_norm": 0.369140625, "learning_rate": 0.002, "loss": 5.3801, "step": 2234 }, { "epoch": 0.2537330906691359, "grad_norm": 0.3984375, "learning_rate": 0.002, "loss": 5.3562, "step": 2235 }, { "epoch": 0.253846617779055, "grad_norm": 0.376953125, "learning_rate": 0.002, "loss": 5.3646, "step": 2236 }, { "epoch": 0.25396014488897406, "grad_norm": 0.3828125, "learning_rate": 0.002, "loss": 5.3798, "step": 2237 }, { "epoch": 0.25407367199889314, "grad_norm": 0.416015625, "learning_rate": 0.002, "loss": 5.3857, "step": 2238 }, { "epoch": 0.2541871991088122, "grad_norm": 0.427734375, "learning_rate": 0.002, "loss": 5.3707, "step": 2239 }, { "epoch": 0.2543007262187313, "grad_norm": 0.447265625, "learning_rate": 0.002, "loss": 5.3886, "step": 2240 }, { "epoch": 0.25441425332865036, "grad_norm": 0.4453125, "learning_rate": 0.002, "loss": 5.3673, "step": 2241 }, { "epoch": 0.25452778043856944, "grad_norm": 0.3671875, "learning_rate": 0.002, "loss": 5.3728, "step": 2242 }, { "epoch": 0.2546413075484885, "grad_norm": 0.37890625, "learning_rate": 0.002, "loss": 5.4026, "step": 2243 }, { "epoch": 0.2547548346584076, "grad_norm": 0.31640625, "learning_rate": 0.002, "loss": 5.3839, "step": 2244 }, { "epoch": 0.25486836176832667, "grad_norm": 0.31640625, "learning_rate": 0.002, "loss": 5.3835, "step": 2245 }, { "epoch": 0.25498188887824574, "grad_norm": 0.271484375, "learning_rate": 0.002, "loss": 5.3616, "step": 2246 }, { "epoch": 0.2550954159881648, "grad_norm": 0.263671875, "learning_rate": 0.002, "loss": 5.3724, "step": 2247 }, { "epoch": 0.2552089430980839, "grad_norm": 0.265625, "learning_rate": 0.002, "loss": 5.3684, "step": 2248 }, { "epoch": 0.25532247020800297, "grad_norm": 0.263671875, "learning_rate": 0.002, "loss": 5.3784, "step": 2249 }, { "epoch": 0.25543599731792205, "grad_norm": 0.287109375, "learning_rate": 0.002, "loss": 5.3811, "step": 2250 }, { "epoch": 0.2555495244278411, "grad_norm": 0.326171875, "learning_rate": 0.002, "loss": 5.3619, "step": 2251 }, { "epoch": 0.2556630515377602, "grad_norm": 0.314453125, "learning_rate": 0.002, "loss": 5.3966, "step": 2252 }, { "epoch": 0.2557765786476793, "grad_norm": 0.287109375, "learning_rate": 0.002, "loss": 5.3597, "step": 2253 }, { "epoch": 0.25589010575759835, "grad_norm": 0.31640625, "learning_rate": 0.002, "loss": 5.3571, "step": 2254 }, { "epoch": 0.2560036328675174, "grad_norm": 0.330078125, "learning_rate": 0.002, "loss": 5.3814, "step": 2255 }, { "epoch": 0.2561171599774365, "grad_norm": 0.376953125, "learning_rate": 0.002, "loss": 5.3683, "step": 2256 }, { "epoch": 0.2562306870873556, "grad_norm": 0.34765625, "learning_rate": 0.002, "loss": 5.367, "step": 2257 }, { "epoch": 0.25634421419727466, "grad_norm": 0.294921875, "learning_rate": 0.002, "loss": 5.3781, "step": 2258 }, { "epoch": 0.25645774130719373, "grad_norm": 0.2890625, "learning_rate": 0.002, "loss": 5.3797, "step": 2259 }, { "epoch": 0.2565712684171128, "grad_norm": 0.267578125, "learning_rate": 0.002, "loss": 5.3751, "step": 2260 }, { "epoch": 0.2566847955270319, "grad_norm": 0.314453125, "learning_rate": 0.002, "loss": 5.3896, "step": 2261 }, { "epoch": 0.25679832263695096, "grad_norm": 0.384765625, "learning_rate": 0.002, "loss": 5.3775, "step": 2262 }, { "epoch": 0.25691184974687004, "grad_norm": 0.43359375, "learning_rate": 0.002, "loss": 5.3752, "step": 2263 }, { "epoch": 0.2570253768567891, "grad_norm": 0.384765625, "learning_rate": 0.002, "loss": 5.3723, "step": 2264 }, { "epoch": 0.2571389039667082, "grad_norm": 0.337890625, "learning_rate": 0.002, "loss": 5.3591, "step": 2265 }, { "epoch": 0.25725243107662726, "grad_norm": 0.29296875, "learning_rate": 0.002, "loss": 5.3745, "step": 2266 }, { "epoch": 0.25736595818654634, "grad_norm": 0.255859375, "learning_rate": 0.002, "loss": 5.3634, "step": 2267 }, { "epoch": 0.2574794852964654, "grad_norm": 0.28125, "learning_rate": 0.002, "loss": 5.3927, "step": 2268 }, { "epoch": 0.2575930124063845, "grad_norm": 0.26953125, "learning_rate": 0.002, "loss": 5.3695, "step": 2269 }, { "epoch": 0.25770653951630357, "grad_norm": 0.25390625, "learning_rate": 0.002, "loss": 5.3797, "step": 2270 }, { "epoch": 0.25782006662622264, "grad_norm": 0.283203125, "learning_rate": 0.002, "loss": 5.3603, "step": 2271 }, { "epoch": 0.2579335937361417, "grad_norm": 0.2490234375, "learning_rate": 0.002, "loss": 5.3724, "step": 2272 }, { "epoch": 0.2580471208460608, "grad_norm": 0.244140625, "learning_rate": 0.002, "loss": 5.3726, "step": 2273 }, { "epoch": 0.25816064795597987, "grad_norm": 0.251953125, "learning_rate": 0.002, "loss": 5.3601, "step": 2274 }, { "epoch": 0.25827417506589895, "grad_norm": 0.275390625, "learning_rate": 0.002, "loss": 5.3731, "step": 2275 }, { "epoch": 0.258387702175818, "grad_norm": 0.314453125, "learning_rate": 0.002, "loss": 5.3855, "step": 2276 }, { "epoch": 0.2585012292857371, "grad_norm": 0.427734375, "learning_rate": 0.002, "loss": 5.3519, "step": 2277 }, { "epoch": 0.2586147563956562, "grad_norm": 0.419921875, "learning_rate": 0.002, "loss": 5.3801, "step": 2278 }, { "epoch": 0.25872828350557525, "grad_norm": 0.4609375, "learning_rate": 0.002, "loss": 5.3835, "step": 2279 }, { "epoch": 0.2588418106154943, "grad_norm": 0.466796875, "learning_rate": 0.002, "loss": 5.3921, "step": 2280 }, { "epoch": 0.2589553377254134, "grad_norm": 0.3828125, "learning_rate": 0.002, "loss": 5.3824, "step": 2281 }, { "epoch": 0.2590688648353325, "grad_norm": 0.392578125, "learning_rate": 0.002, "loss": 5.3546, "step": 2282 }, { "epoch": 0.25918239194525156, "grad_norm": 0.408203125, "learning_rate": 0.002, "loss": 5.3792, "step": 2283 }, { "epoch": 0.25929591905517063, "grad_norm": 0.423828125, "learning_rate": 0.002, "loss": 5.3717, "step": 2284 }, { "epoch": 0.2594094461650897, "grad_norm": 0.419921875, "learning_rate": 0.002, "loss": 5.3755, "step": 2285 }, { "epoch": 0.2595229732750088, "grad_norm": 0.41796875, "learning_rate": 0.002, "loss": 5.3605, "step": 2286 }, { "epoch": 0.25963650038492786, "grad_norm": 0.3515625, "learning_rate": 0.002, "loss": 5.3559, "step": 2287 }, { "epoch": 0.25975002749484694, "grad_norm": 0.361328125, "learning_rate": 0.002, "loss": 5.3695, "step": 2288 }, { "epoch": 0.259863554604766, "grad_norm": 0.328125, "learning_rate": 0.002, "loss": 5.3625, "step": 2289 }, { "epoch": 0.2599770817146851, "grad_norm": 0.33984375, "learning_rate": 0.002, "loss": 5.3741, "step": 2290 }, { "epoch": 0.26009060882460416, "grad_norm": 0.302734375, "learning_rate": 0.002, "loss": 5.3482, "step": 2291 }, { "epoch": 0.26020413593452324, "grad_norm": 0.296875, "learning_rate": 0.002, "loss": 5.3818, "step": 2292 }, { "epoch": 0.2603176630444423, "grad_norm": 0.26171875, "learning_rate": 0.002, "loss": 5.3687, "step": 2293 }, { "epoch": 0.2604311901543614, "grad_norm": 0.255859375, "learning_rate": 0.002, "loss": 5.3785, "step": 2294 }, { "epoch": 0.26054471726428047, "grad_norm": 0.26171875, "learning_rate": 0.002, "loss": 5.3809, "step": 2295 }, { "epoch": 0.26065824437419954, "grad_norm": 0.255859375, "learning_rate": 0.002, "loss": 5.3827, "step": 2296 }, { "epoch": 0.2607717714841186, "grad_norm": 0.267578125, "learning_rate": 0.002, "loss": 5.3752, "step": 2297 }, { "epoch": 0.2608852985940377, "grad_norm": 0.326171875, "learning_rate": 0.002, "loss": 5.3729, "step": 2298 }, { "epoch": 0.26099882570395677, "grad_norm": 0.34765625, "learning_rate": 0.002, "loss": 5.3809, "step": 2299 }, { "epoch": 0.26111235281387585, "grad_norm": 0.392578125, "learning_rate": 0.002, "loss": 5.3625, "step": 2300 }, { "epoch": 0.2612258799237949, "grad_norm": 0.35546875, "learning_rate": 0.002, "loss": 5.3655, "step": 2301 }, { "epoch": 0.261339407033714, "grad_norm": 0.31640625, "learning_rate": 0.002, "loss": 5.3651, "step": 2302 }, { "epoch": 0.2614529341436331, "grad_norm": 0.30859375, "learning_rate": 0.002, "loss": 5.3579, "step": 2303 }, { "epoch": 0.26156646125355215, "grad_norm": 0.306640625, "learning_rate": 0.002, "loss": 5.3849, "step": 2304 }, { "epoch": 0.2616799883634712, "grad_norm": 0.318359375, "learning_rate": 0.002, "loss": 5.3786, "step": 2305 }, { "epoch": 0.2617935154733903, "grad_norm": 0.333984375, "learning_rate": 0.002, "loss": 5.3629, "step": 2306 }, { "epoch": 0.2619070425833094, "grad_norm": 0.423828125, "learning_rate": 0.002, "loss": 5.3586, "step": 2307 }, { "epoch": 0.26202056969322846, "grad_norm": 0.396484375, "learning_rate": 0.002, "loss": 5.381, "step": 2308 }, { "epoch": 0.26213409680314753, "grad_norm": 0.34765625, "learning_rate": 0.002, "loss": 5.3641, "step": 2309 }, { "epoch": 0.2622476239130666, "grad_norm": 0.34375, "learning_rate": 0.002, "loss": 5.37, "step": 2310 }, { "epoch": 0.2623611510229857, "grad_norm": 0.341796875, "learning_rate": 0.002, "loss": 5.393, "step": 2311 }, { "epoch": 0.26247467813290476, "grad_norm": 0.33984375, "learning_rate": 0.002, "loss": 5.3671, "step": 2312 }, { "epoch": 0.26258820524282384, "grad_norm": 0.330078125, "learning_rate": 0.002, "loss": 5.3687, "step": 2313 }, { "epoch": 0.2627017323527429, "grad_norm": 0.359375, "learning_rate": 0.002, "loss": 5.3809, "step": 2314 }, { "epoch": 0.262815259462662, "grad_norm": 0.388671875, "learning_rate": 0.002, "loss": 5.3712, "step": 2315 }, { "epoch": 0.26292878657258106, "grad_norm": 0.396484375, "learning_rate": 0.002, "loss": 5.3449, "step": 2316 }, { "epoch": 0.26304231368250014, "grad_norm": 0.41015625, "learning_rate": 0.002, "loss": 5.3721, "step": 2317 }, { "epoch": 0.2631558407924192, "grad_norm": 0.40625, "learning_rate": 0.002, "loss": 5.3897, "step": 2318 }, { "epoch": 0.2632693679023383, "grad_norm": 0.37109375, "learning_rate": 0.002, "loss": 5.3531, "step": 2319 }, { "epoch": 0.26338289501225737, "grad_norm": 0.365234375, "learning_rate": 0.002, "loss": 5.3663, "step": 2320 }, { "epoch": 0.26349642212217644, "grad_norm": 0.322265625, "learning_rate": 0.002, "loss": 5.3661, "step": 2321 }, { "epoch": 0.2636099492320955, "grad_norm": 0.326171875, "learning_rate": 0.002, "loss": 5.3477, "step": 2322 }, { "epoch": 0.2637234763420146, "grad_norm": 0.318359375, "learning_rate": 0.002, "loss": 5.3773, "step": 2323 }, { "epoch": 0.26383700345193367, "grad_norm": 0.337890625, "learning_rate": 0.002, "loss": 5.3576, "step": 2324 }, { "epoch": 0.26395053056185275, "grad_norm": 0.328125, "learning_rate": 0.002, "loss": 5.3664, "step": 2325 }, { "epoch": 0.2640640576717718, "grad_norm": 0.33984375, "learning_rate": 0.002, "loss": 5.3741, "step": 2326 }, { "epoch": 0.2641775847816909, "grad_norm": 0.361328125, "learning_rate": 0.002, "loss": 5.3485, "step": 2327 }, { "epoch": 0.26429111189161, "grad_norm": 0.34765625, "learning_rate": 0.002, "loss": 5.3624, "step": 2328 }, { "epoch": 0.26440463900152905, "grad_norm": 0.322265625, "learning_rate": 0.002, "loss": 5.3582, "step": 2329 }, { "epoch": 0.2645181661114481, "grad_norm": 0.326171875, "learning_rate": 0.002, "loss": 5.3683, "step": 2330 }, { "epoch": 0.2646316932213672, "grad_norm": 0.3203125, "learning_rate": 0.002, "loss": 5.3427, "step": 2331 }, { "epoch": 0.2647452203312863, "grad_norm": 0.333984375, "learning_rate": 0.002, "loss": 5.3618, "step": 2332 }, { "epoch": 0.26485874744120536, "grad_norm": 0.328125, "learning_rate": 0.002, "loss": 5.3724, "step": 2333 }, { "epoch": 0.26497227455112443, "grad_norm": 0.34375, "learning_rate": 0.002, "loss": 5.3807, "step": 2334 }, { "epoch": 0.2650858016610435, "grad_norm": 0.287109375, "learning_rate": 0.002, "loss": 5.3608, "step": 2335 }, { "epoch": 0.2651993287709626, "grad_norm": 0.283203125, "learning_rate": 0.002, "loss": 5.3692, "step": 2336 }, { "epoch": 0.26531285588088166, "grad_norm": 0.28515625, "learning_rate": 0.002, "loss": 5.3816, "step": 2337 }, { "epoch": 0.26542638299080074, "grad_norm": 0.28125, "learning_rate": 0.002, "loss": 5.3728, "step": 2338 }, { "epoch": 0.2655399101007198, "grad_norm": 0.345703125, "learning_rate": 0.002, "loss": 5.3493, "step": 2339 }, { "epoch": 0.2656534372106389, "grad_norm": 0.369140625, "learning_rate": 0.002, "loss": 5.365, "step": 2340 }, { "epoch": 0.26576696432055796, "grad_norm": 0.41015625, "learning_rate": 0.002, "loss": 5.3621, "step": 2341 }, { "epoch": 0.26588049143047704, "grad_norm": 0.36328125, "learning_rate": 0.002, "loss": 5.3872, "step": 2342 }, { "epoch": 0.2659940185403961, "grad_norm": 0.275390625, "learning_rate": 0.002, "loss": 5.356, "step": 2343 }, { "epoch": 0.2661075456503152, "grad_norm": 0.26171875, "learning_rate": 0.002, "loss": 5.3688, "step": 2344 }, { "epoch": 0.26622107276023427, "grad_norm": 0.2490234375, "learning_rate": 0.002, "loss": 5.3744, "step": 2345 }, { "epoch": 0.26633459987015334, "grad_norm": 0.26171875, "learning_rate": 0.002, "loss": 5.3645, "step": 2346 }, { "epoch": 0.2664481269800724, "grad_norm": 0.2734375, "learning_rate": 0.002, "loss": 5.3512, "step": 2347 }, { "epoch": 0.2665616540899915, "grad_norm": 0.265625, "learning_rate": 0.002, "loss": 5.3718, "step": 2348 }, { "epoch": 0.26667518119991057, "grad_norm": 0.263671875, "learning_rate": 0.002, "loss": 5.3796, "step": 2349 }, { "epoch": 0.26678870830982965, "grad_norm": 0.2353515625, "learning_rate": 0.002, "loss": 5.3652, "step": 2350 }, { "epoch": 0.2669022354197487, "grad_norm": 0.236328125, "learning_rate": 0.002, "loss": 5.3977, "step": 2351 }, { "epoch": 0.2670157625296678, "grad_norm": 0.28125, "learning_rate": 0.002, "loss": 5.3615, "step": 2352 }, { "epoch": 0.26712928963958693, "grad_norm": 0.3203125, "learning_rate": 0.002, "loss": 5.3442, "step": 2353 }, { "epoch": 0.267242816749506, "grad_norm": 0.3359375, "learning_rate": 0.002, "loss": 5.3815, "step": 2354 }, { "epoch": 0.2673563438594251, "grad_norm": 0.322265625, "learning_rate": 0.002, "loss": 5.3595, "step": 2355 }, { "epoch": 0.26746987096934416, "grad_norm": 0.298828125, "learning_rate": 0.002, "loss": 5.3547, "step": 2356 }, { "epoch": 0.26758339807926323, "grad_norm": 0.35546875, "learning_rate": 0.002, "loss": 5.3679, "step": 2357 }, { "epoch": 0.2676969251891823, "grad_norm": 0.359375, "learning_rate": 0.002, "loss": 5.355, "step": 2358 }, { "epoch": 0.2678104522991014, "grad_norm": 0.443359375, "learning_rate": 0.002, "loss": 5.3619, "step": 2359 }, { "epoch": 0.26792397940902046, "grad_norm": 0.466796875, "learning_rate": 0.002, "loss": 5.3802, "step": 2360 }, { "epoch": 0.26803750651893954, "grad_norm": 0.43359375, "learning_rate": 0.002, "loss": 5.3428, "step": 2361 }, { "epoch": 0.2681510336288586, "grad_norm": 0.421875, "learning_rate": 0.002, "loss": 5.3605, "step": 2362 }, { "epoch": 0.2682645607387777, "grad_norm": 0.349609375, "learning_rate": 0.002, "loss": 5.3847, "step": 2363 }, { "epoch": 0.26837808784869677, "grad_norm": 0.36328125, "learning_rate": 0.002, "loss": 5.3636, "step": 2364 }, { "epoch": 0.26849161495861584, "grad_norm": 0.359375, "learning_rate": 0.002, "loss": 5.3649, "step": 2365 }, { "epoch": 0.2686051420685349, "grad_norm": 0.427734375, "learning_rate": 0.002, "loss": 5.3644, "step": 2366 }, { "epoch": 0.268718669178454, "grad_norm": 0.37890625, "learning_rate": 0.002, "loss": 5.3542, "step": 2367 }, { "epoch": 0.26883219628837307, "grad_norm": 0.38671875, "learning_rate": 0.002, "loss": 5.3625, "step": 2368 }, { "epoch": 0.26894572339829215, "grad_norm": 0.373046875, "learning_rate": 0.002, "loss": 5.3676, "step": 2369 }, { "epoch": 0.2690592505082112, "grad_norm": 0.318359375, "learning_rate": 0.002, "loss": 5.3512, "step": 2370 }, { "epoch": 0.2691727776181303, "grad_norm": 0.2890625, "learning_rate": 0.002, "loss": 5.3529, "step": 2371 }, { "epoch": 0.2692863047280494, "grad_norm": 0.296875, "learning_rate": 0.002, "loss": 5.3475, "step": 2372 }, { "epoch": 0.26939983183796845, "grad_norm": 0.296875, "learning_rate": 0.002, "loss": 5.3737, "step": 2373 }, { "epoch": 0.2695133589478875, "grad_norm": 0.33984375, "learning_rate": 0.002, "loss": 5.3523, "step": 2374 }, { "epoch": 0.2696268860578066, "grad_norm": 0.404296875, "learning_rate": 0.002, "loss": 5.3521, "step": 2375 }, { "epoch": 0.2697404131677257, "grad_norm": 0.365234375, "learning_rate": 0.002, "loss": 5.3541, "step": 2376 }, { "epoch": 0.26985394027764475, "grad_norm": 0.31640625, "learning_rate": 0.002, "loss": 5.3778, "step": 2377 }, { "epoch": 0.26996746738756383, "grad_norm": 0.314453125, "learning_rate": 0.002, "loss": 5.3716, "step": 2378 }, { "epoch": 0.2700809944974829, "grad_norm": 0.26171875, "learning_rate": 0.002, "loss": 5.3657, "step": 2379 }, { "epoch": 0.270194521607402, "grad_norm": 0.263671875, "learning_rate": 0.002, "loss": 5.3762, "step": 2380 }, { "epoch": 0.27030804871732106, "grad_norm": 0.25, "learning_rate": 0.002, "loss": 5.3495, "step": 2381 }, { "epoch": 0.27042157582724013, "grad_norm": 0.265625, "learning_rate": 0.002, "loss": 5.3728, "step": 2382 }, { "epoch": 0.2705351029371592, "grad_norm": 0.28125, "learning_rate": 0.002, "loss": 5.3674, "step": 2383 }, { "epoch": 0.2706486300470783, "grad_norm": 0.267578125, "learning_rate": 0.002, "loss": 5.3564, "step": 2384 }, { "epoch": 0.27076215715699736, "grad_norm": 0.263671875, "learning_rate": 0.002, "loss": 5.3791, "step": 2385 }, { "epoch": 0.27087568426691644, "grad_norm": 0.271484375, "learning_rate": 0.002, "loss": 5.3709, "step": 2386 }, { "epoch": 0.2709892113768355, "grad_norm": 0.275390625, "learning_rate": 0.002, "loss": 5.3542, "step": 2387 }, { "epoch": 0.2711027384867546, "grad_norm": 0.28515625, "learning_rate": 0.002, "loss": 5.3665, "step": 2388 }, { "epoch": 0.27121626559667367, "grad_norm": 0.310546875, "learning_rate": 0.002, "loss": 5.3555, "step": 2389 }, { "epoch": 0.27132979270659274, "grad_norm": 0.306640625, "learning_rate": 0.002, "loss": 5.3479, "step": 2390 }, { "epoch": 0.2714433198165118, "grad_norm": 0.30078125, "learning_rate": 0.002, "loss": 5.3534, "step": 2391 }, { "epoch": 0.2715568469264309, "grad_norm": 0.306640625, "learning_rate": 0.002, "loss": 5.3619, "step": 2392 }, { "epoch": 0.27167037403634997, "grad_norm": 0.310546875, "learning_rate": 0.002, "loss": 5.368, "step": 2393 }, { "epoch": 0.27178390114626905, "grad_norm": 0.359375, "learning_rate": 0.002, "loss": 5.3835, "step": 2394 }, { "epoch": 0.2718974282561881, "grad_norm": 0.318359375, "learning_rate": 0.002, "loss": 5.3651, "step": 2395 }, { "epoch": 0.2720109553661072, "grad_norm": 0.296875, "learning_rate": 0.002, "loss": 5.3643, "step": 2396 }, { "epoch": 0.2721244824760263, "grad_norm": 0.306640625, "learning_rate": 0.002, "loss": 5.3777, "step": 2397 }, { "epoch": 0.27223800958594535, "grad_norm": 0.3515625, "learning_rate": 0.002, "loss": 5.3618, "step": 2398 }, { "epoch": 0.2723515366958644, "grad_norm": 0.345703125, "learning_rate": 0.002, "loss": 5.3679, "step": 2399 }, { "epoch": 0.2724650638057835, "grad_norm": 0.3046875, "learning_rate": 0.002, "loss": 5.3504, "step": 2400 }, { "epoch": 0.2725785909157026, "grad_norm": 0.328125, "learning_rate": 0.002, "loss": 5.3545, "step": 2401 }, { "epoch": 0.27269211802562165, "grad_norm": 0.390625, "learning_rate": 0.002, "loss": 5.326, "step": 2402 }, { "epoch": 0.27280564513554073, "grad_norm": 0.400390625, "learning_rate": 0.002, "loss": 5.3659, "step": 2403 }, { "epoch": 0.2729191722454598, "grad_norm": 0.43359375, "learning_rate": 0.002, "loss": 5.3483, "step": 2404 }, { "epoch": 0.2730326993553789, "grad_norm": 0.4375, "learning_rate": 0.002, "loss": 5.3616, "step": 2405 }, { "epoch": 0.27314622646529796, "grad_norm": 0.443359375, "learning_rate": 0.002, "loss": 5.3567, "step": 2406 }, { "epoch": 0.27325975357521703, "grad_norm": 0.435546875, "learning_rate": 0.002, "loss": 5.3584, "step": 2407 }, { "epoch": 0.2733732806851361, "grad_norm": 0.38671875, "learning_rate": 0.002, "loss": 5.3457, "step": 2408 }, { "epoch": 0.2734868077950552, "grad_norm": 0.392578125, "learning_rate": 0.002, "loss": 5.3489, "step": 2409 }, { "epoch": 0.27360033490497426, "grad_norm": 0.3203125, "learning_rate": 0.002, "loss": 5.3529, "step": 2410 }, { "epoch": 0.27371386201489334, "grad_norm": 0.328125, "learning_rate": 0.002, "loss": 5.3451, "step": 2411 }, { "epoch": 0.2738273891248124, "grad_norm": 0.3046875, "learning_rate": 0.002, "loss": 5.3569, "step": 2412 }, { "epoch": 0.2739409162347315, "grad_norm": 0.29296875, "learning_rate": 0.002, "loss": 5.3644, "step": 2413 }, { "epoch": 0.27405444334465057, "grad_norm": 0.2890625, "learning_rate": 0.002, "loss": 5.3699, "step": 2414 }, { "epoch": 0.27416797045456964, "grad_norm": 0.33984375, "learning_rate": 0.002, "loss": 5.335, "step": 2415 }, { "epoch": 0.2742814975644887, "grad_norm": 0.33984375, "learning_rate": 0.002, "loss": 5.3639, "step": 2416 }, { "epoch": 0.2743950246744078, "grad_norm": 0.328125, "learning_rate": 0.002, "loss": 5.3572, "step": 2417 }, { "epoch": 0.27450855178432687, "grad_norm": 0.306640625, "learning_rate": 0.002, "loss": 5.3565, "step": 2418 }, { "epoch": 0.27462207889424595, "grad_norm": 0.2578125, "learning_rate": 0.002, "loss": 5.3561, "step": 2419 }, { "epoch": 0.274735606004165, "grad_norm": 0.26953125, "learning_rate": 0.002, "loss": 5.3848, "step": 2420 }, { "epoch": 0.2748491331140841, "grad_norm": 0.255859375, "learning_rate": 0.002, "loss": 5.3925, "step": 2421 }, { "epoch": 0.2749626602240032, "grad_norm": 0.267578125, "learning_rate": 0.002, "loss": 5.3607, "step": 2422 }, { "epoch": 0.27507618733392225, "grad_norm": 0.279296875, "learning_rate": 0.002, "loss": 5.378, "step": 2423 }, { "epoch": 0.2751897144438413, "grad_norm": 0.287109375, "learning_rate": 0.002, "loss": 5.3582, "step": 2424 }, { "epoch": 0.2753032415537604, "grad_norm": 0.30859375, "learning_rate": 0.002, "loss": 5.3538, "step": 2425 }, { "epoch": 0.2754167686636795, "grad_norm": 0.318359375, "learning_rate": 0.002, "loss": 5.3657, "step": 2426 }, { "epoch": 0.27553029577359855, "grad_norm": 0.3125, "learning_rate": 0.002, "loss": 5.3457, "step": 2427 }, { "epoch": 0.27564382288351763, "grad_norm": 0.287109375, "learning_rate": 0.002, "loss": 5.3748, "step": 2428 }, { "epoch": 0.2757573499934367, "grad_norm": 0.302734375, "learning_rate": 0.002, "loss": 5.3652, "step": 2429 }, { "epoch": 0.2758708771033558, "grad_norm": 0.3046875, "learning_rate": 0.002, "loss": 5.3376, "step": 2430 }, { "epoch": 0.27598440421327486, "grad_norm": 0.287109375, "learning_rate": 0.002, "loss": 5.3514, "step": 2431 }, { "epoch": 0.27609793132319393, "grad_norm": 0.326171875, "learning_rate": 0.002, "loss": 5.3831, "step": 2432 }, { "epoch": 0.276211458433113, "grad_norm": 0.349609375, "learning_rate": 0.002, "loss": 5.3605, "step": 2433 }, { "epoch": 0.2763249855430321, "grad_norm": 0.341796875, "learning_rate": 0.002, "loss": 5.3646, "step": 2434 }, { "epoch": 0.27643851265295116, "grad_norm": 0.3515625, "learning_rate": 0.002, "loss": 5.3773, "step": 2435 }, { "epoch": 0.27655203976287024, "grad_norm": 0.337890625, "learning_rate": 0.002, "loss": 5.3634, "step": 2436 }, { "epoch": 0.2766655668727893, "grad_norm": 0.35546875, "learning_rate": 0.002, "loss": 5.3605, "step": 2437 }, { "epoch": 0.2767790939827084, "grad_norm": 0.349609375, "learning_rate": 0.002, "loss": 5.3524, "step": 2438 }, { "epoch": 0.27689262109262747, "grad_norm": 0.330078125, "learning_rate": 0.002, "loss": 5.3584, "step": 2439 }, { "epoch": 0.27700614820254654, "grad_norm": 0.34375, "learning_rate": 0.002, "loss": 5.3535, "step": 2440 }, { "epoch": 0.2771196753124656, "grad_norm": 0.326171875, "learning_rate": 0.002, "loss": 5.3707, "step": 2441 }, { "epoch": 0.2772332024223847, "grad_norm": 0.345703125, "learning_rate": 0.002, "loss": 5.3441, "step": 2442 }, { "epoch": 0.27734672953230377, "grad_norm": 0.42578125, "learning_rate": 0.002, "loss": 5.3677, "step": 2443 }, { "epoch": 0.27746025664222285, "grad_norm": 0.423828125, "learning_rate": 0.002, "loss": 5.3463, "step": 2444 }, { "epoch": 0.2775737837521419, "grad_norm": 0.390625, "learning_rate": 0.002, "loss": 5.3485, "step": 2445 }, { "epoch": 0.277687310862061, "grad_norm": 0.427734375, "learning_rate": 0.002, "loss": 5.3489, "step": 2446 }, { "epoch": 0.2778008379719801, "grad_norm": 0.396484375, "learning_rate": 0.002, "loss": 5.3562, "step": 2447 }, { "epoch": 0.27791436508189915, "grad_norm": 0.375, "learning_rate": 0.002, "loss": 5.3534, "step": 2448 }, { "epoch": 0.2780278921918182, "grad_norm": 0.353515625, "learning_rate": 0.002, "loss": 5.339, "step": 2449 }, { "epoch": 0.2781414193017373, "grad_norm": 0.369140625, "learning_rate": 0.002, "loss": 5.3937, "step": 2450 }, { "epoch": 0.2782549464116564, "grad_norm": 0.361328125, "learning_rate": 0.002, "loss": 5.3663, "step": 2451 }, { "epoch": 0.27836847352157545, "grad_norm": 0.3515625, "learning_rate": 0.002, "loss": 5.355, "step": 2452 }, { "epoch": 0.27848200063149453, "grad_norm": 0.296875, "learning_rate": 0.002, "loss": 5.3379, "step": 2453 }, { "epoch": 0.2785955277414136, "grad_norm": 0.302734375, "learning_rate": 0.002, "loss": 5.3444, "step": 2454 }, { "epoch": 0.2787090548513327, "grad_norm": 0.283203125, "learning_rate": 0.002, "loss": 5.3576, "step": 2455 }, { "epoch": 0.27882258196125176, "grad_norm": 0.31640625, "learning_rate": 0.002, "loss": 5.3529, "step": 2456 }, { "epoch": 0.27893610907117083, "grad_norm": 0.34765625, "learning_rate": 0.002, "loss": 5.3495, "step": 2457 }, { "epoch": 0.2790496361810899, "grad_norm": 0.40625, "learning_rate": 0.002, "loss": 5.3736, "step": 2458 }, { "epoch": 0.279163163291009, "grad_norm": 0.388671875, "learning_rate": 0.002, "loss": 5.3537, "step": 2459 }, { "epoch": 0.27927669040092806, "grad_norm": 0.341796875, "learning_rate": 0.002, "loss": 5.3623, "step": 2460 }, { "epoch": 0.27939021751084714, "grad_norm": 0.3203125, "learning_rate": 0.002, "loss": 5.3488, "step": 2461 }, { "epoch": 0.2795037446207662, "grad_norm": 0.29296875, "learning_rate": 0.002, "loss": 5.3732, "step": 2462 }, { "epoch": 0.2796172717306853, "grad_norm": 0.283203125, "learning_rate": 0.002, "loss": 5.3481, "step": 2463 }, { "epoch": 0.27973079884060437, "grad_norm": 0.3203125, "learning_rate": 0.002, "loss": 5.3714, "step": 2464 }, { "epoch": 0.27984432595052344, "grad_norm": 0.36328125, "learning_rate": 0.002, "loss": 5.3587, "step": 2465 }, { "epoch": 0.2799578530604425, "grad_norm": 0.326171875, "learning_rate": 0.002, "loss": 5.3615, "step": 2466 }, { "epoch": 0.2800713801703616, "grad_norm": 0.263671875, "learning_rate": 0.002, "loss": 5.3612, "step": 2467 }, { "epoch": 0.28018490728028067, "grad_norm": 0.2578125, "learning_rate": 0.002, "loss": 5.3399, "step": 2468 }, { "epoch": 0.28029843439019975, "grad_norm": 0.28125, "learning_rate": 0.002, "loss": 5.3491, "step": 2469 }, { "epoch": 0.2804119615001188, "grad_norm": 0.30859375, "learning_rate": 0.002, "loss": 5.3483, "step": 2470 }, { "epoch": 0.2805254886100379, "grad_norm": 0.328125, "learning_rate": 0.002, "loss": 5.3832, "step": 2471 }, { "epoch": 0.280639015719957, "grad_norm": 0.3515625, "learning_rate": 0.002, "loss": 5.3651, "step": 2472 }, { "epoch": 0.28075254282987605, "grad_norm": 0.298828125, "learning_rate": 0.002, "loss": 5.3485, "step": 2473 }, { "epoch": 0.2808660699397952, "grad_norm": 0.25, "learning_rate": 0.002, "loss": 5.3514, "step": 2474 }, { "epoch": 0.28097959704971426, "grad_norm": 0.2314453125, "learning_rate": 0.002, "loss": 5.3549, "step": 2475 }, { "epoch": 0.28109312415963333, "grad_norm": 0.25, "learning_rate": 0.002, "loss": 5.3659, "step": 2476 }, { "epoch": 0.2812066512695524, "grad_norm": 0.3203125, "learning_rate": 0.002, "loss": 5.3705, "step": 2477 }, { "epoch": 0.2813201783794715, "grad_norm": 0.341796875, "learning_rate": 0.002, "loss": 5.3244, "step": 2478 }, { "epoch": 0.28143370548939056, "grad_norm": 0.361328125, "learning_rate": 0.002, "loss": 5.3508, "step": 2479 }, { "epoch": 0.28154723259930964, "grad_norm": 0.376953125, "learning_rate": 0.002, "loss": 5.3657, "step": 2480 }, { "epoch": 0.2816607597092287, "grad_norm": 0.408203125, "learning_rate": 0.002, "loss": 5.3507, "step": 2481 }, { "epoch": 0.2817742868191478, "grad_norm": 0.408203125, "learning_rate": 0.002, "loss": 5.3579, "step": 2482 }, { "epoch": 0.28188781392906687, "grad_norm": 0.353515625, "learning_rate": 0.002, "loss": 5.3825, "step": 2483 }, { "epoch": 0.28200134103898594, "grad_norm": 0.388671875, "learning_rate": 0.002, "loss": 5.3445, "step": 2484 }, { "epoch": 0.282114868148905, "grad_norm": 0.37890625, "learning_rate": 0.002, "loss": 5.3444, "step": 2485 }, { "epoch": 0.2822283952588241, "grad_norm": 0.345703125, "learning_rate": 0.002, "loss": 5.3659, "step": 2486 }, { "epoch": 0.28234192236874317, "grad_norm": 0.306640625, "learning_rate": 0.002, "loss": 5.3711, "step": 2487 }, { "epoch": 0.28245544947866225, "grad_norm": 0.3359375, "learning_rate": 0.002, "loss": 5.3682, "step": 2488 }, { "epoch": 0.2825689765885813, "grad_norm": 0.3125, "learning_rate": 0.002, "loss": 5.36, "step": 2489 }, { "epoch": 0.2826825036985004, "grad_norm": 0.337890625, "learning_rate": 0.002, "loss": 5.3365, "step": 2490 }, { "epoch": 0.2827960308084195, "grad_norm": 0.388671875, "learning_rate": 0.002, "loss": 5.3482, "step": 2491 }, { "epoch": 0.28290955791833855, "grad_norm": 0.361328125, "learning_rate": 0.002, "loss": 5.3279, "step": 2492 }, { "epoch": 0.2830230850282576, "grad_norm": 0.326171875, "learning_rate": 0.002, "loss": 5.3502, "step": 2493 }, { "epoch": 0.2831366121381767, "grad_norm": 0.357421875, "learning_rate": 0.002, "loss": 5.3568, "step": 2494 }, { "epoch": 0.2832501392480958, "grad_norm": 0.33984375, "learning_rate": 0.002, "loss": 5.3691, "step": 2495 }, { "epoch": 0.28336366635801485, "grad_norm": 0.373046875, "learning_rate": 0.002, "loss": 5.3665, "step": 2496 }, { "epoch": 0.28347719346793393, "grad_norm": 0.361328125, "learning_rate": 0.002, "loss": 5.3511, "step": 2497 }, { "epoch": 0.283590720577853, "grad_norm": 0.375, "learning_rate": 0.002, "loss": 5.3583, "step": 2498 }, { "epoch": 0.2837042476877721, "grad_norm": 0.36328125, "learning_rate": 0.002, "loss": 5.356, "step": 2499 }, { "epoch": 0.28381777479769116, "grad_norm": 0.328125, "learning_rate": 0.002, "loss": 5.3577, "step": 2500 }, { "epoch": 0.28393130190761023, "grad_norm": 0.31640625, "learning_rate": 0.002, "loss": 5.3445, "step": 2501 }, { "epoch": 0.2840448290175293, "grad_norm": 0.328125, "learning_rate": 0.002, "loss": 5.3587, "step": 2502 }, { "epoch": 0.2841583561274484, "grad_norm": 0.353515625, "learning_rate": 0.002, "loss": 5.3454, "step": 2503 }, { "epoch": 0.28427188323736746, "grad_norm": 0.376953125, "learning_rate": 0.002, "loss": 5.3489, "step": 2504 }, { "epoch": 0.28438541034728654, "grad_norm": 0.34765625, "learning_rate": 0.002, "loss": 5.3634, "step": 2505 }, { "epoch": 0.2844989374572056, "grad_norm": 0.3125, "learning_rate": 0.002, "loss": 5.3582, "step": 2506 }, { "epoch": 0.2846124645671247, "grad_norm": 0.240234375, "learning_rate": 0.002, "loss": 5.3541, "step": 2507 }, { "epoch": 0.28472599167704377, "grad_norm": 0.2421875, "learning_rate": 0.002, "loss": 5.3565, "step": 2508 }, { "epoch": 0.28483951878696284, "grad_norm": 0.2353515625, "learning_rate": 0.002, "loss": 5.3445, "step": 2509 }, { "epoch": 0.2849530458968819, "grad_norm": 0.26953125, "learning_rate": 0.002, "loss": 5.3612, "step": 2510 }, { "epoch": 0.285066573006801, "grad_norm": 0.287109375, "learning_rate": 0.002, "loss": 5.3661, "step": 2511 }, { "epoch": 0.28518010011672007, "grad_norm": 0.302734375, "learning_rate": 0.002, "loss": 5.3323, "step": 2512 }, { "epoch": 0.28529362722663915, "grad_norm": 0.333984375, "learning_rate": 0.002, "loss": 5.3676, "step": 2513 }, { "epoch": 0.2854071543365582, "grad_norm": 0.41015625, "learning_rate": 0.002, "loss": 5.3548, "step": 2514 }, { "epoch": 0.2855206814464773, "grad_norm": 0.412109375, "learning_rate": 0.002, "loss": 5.3656, "step": 2515 }, { "epoch": 0.2856342085563964, "grad_norm": 0.39453125, "learning_rate": 0.002, "loss": 5.3388, "step": 2516 }, { "epoch": 0.28574773566631545, "grad_norm": 0.396484375, "learning_rate": 0.002, "loss": 5.3548, "step": 2517 }, { "epoch": 0.2858612627762345, "grad_norm": 0.322265625, "learning_rate": 0.002, "loss": 5.37, "step": 2518 }, { "epoch": 0.2859747898861536, "grad_norm": 0.33203125, "learning_rate": 0.002, "loss": 5.3515, "step": 2519 }, { "epoch": 0.2860883169960727, "grad_norm": 0.279296875, "learning_rate": 0.002, "loss": 5.3598, "step": 2520 }, { "epoch": 0.28620184410599175, "grad_norm": 0.265625, "learning_rate": 0.002, "loss": 5.3482, "step": 2521 }, { "epoch": 0.28631537121591083, "grad_norm": 0.26953125, "learning_rate": 0.002, "loss": 5.3534, "step": 2522 }, { "epoch": 0.2864288983258299, "grad_norm": 0.255859375, "learning_rate": 0.002, "loss": 5.3576, "step": 2523 }, { "epoch": 0.286542425435749, "grad_norm": 0.2470703125, "learning_rate": 0.002, "loss": 5.353, "step": 2524 }, { "epoch": 0.28665595254566806, "grad_norm": 0.2490234375, "learning_rate": 0.002, "loss": 5.359, "step": 2525 }, { "epoch": 0.28676947965558713, "grad_norm": 0.279296875, "learning_rate": 0.002, "loss": 5.3438, "step": 2526 }, { "epoch": 0.2868830067655062, "grad_norm": 0.291015625, "learning_rate": 0.002, "loss": 5.3538, "step": 2527 }, { "epoch": 0.2869965338754253, "grad_norm": 0.30078125, "learning_rate": 0.002, "loss": 5.3563, "step": 2528 }, { "epoch": 0.28711006098534436, "grad_norm": 0.314453125, "learning_rate": 0.002, "loss": 5.3587, "step": 2529 }, { "epoch": 0.28722358809526344, "grad_norm": 0.296875, "learning_rate": 0.002, "loss": 5.3341, "step": 2530 }, { "epoch": 0.2873371152051825, "grad_norm": 0.337890625, "learning_rate": 0.002, "loss": 5.3527, "step": 2531 }, { "epoch": 0.2874506423151016, "grad_norm": 0.357421875, "learning_rate": 0.002, "loss": 5.3506, "step": 2532 }, { "epoch": 0.28756416942502067, "grad_norm": 0.34375, "learning_rate": 0.002, "loss": 5.3655, "step": 2533 }, { "epoch": 0.28767769653493974, "grad_norm": 0.3046875, "learning_rate": 0.002, "loss": 5.3326, "step": 2534 }, { "epoch": 0.2877912236448588, "grad_norm": 0.271484375, "learning_rate": 0.002, "loss": 5.3591, "step": 2535 }, { "epoch": 0.2879047507547779, "grad_norm": 0.30859375, "learning_rate": 0.002, "loss": 5.3472, "step": 2536 }, { "epoch": 0.28801827786469697, "grad_norm": 0.306640625, "learning_rate": 0.002, "loss": 5.3472, "step": 2537 }, { "epoch": 0.28813180497461605, "grad_norm": 0.333984375, "learning_rate": 0.002, "loss": 5.3494, "step": 2538 }, { "epoch": 0.2882453320845351, "grad_norm": 0.40234375, "learning_rate": 0.002, "loss": 5.3477, "step": 2539 }, { "epoch": 0.2883588591944542, "grad_norm": 0.482421875, "learning_rate": 0.002, "loss": 5.3513, "step": 2540 }, { "epoch": 0.2884723863043733, "grad_norm": 0.43359375, "learning_rate": 0.002, "loss": 5.3572, "step": 2541 }, { "epoch": 0.28858591341429235, "grad_norm": 0.37109375, "learning_rate": 0.002, "loss": 5.3518, "step": 2542 }, { "epoch": 0.2886994405242114, "grad_norm": 0.380859375, "learning_rate": 0.002, "loss": 5.3464, "step": 2543 }, { "epoch": 0.2888129676341305, "grad_norm": 0.30078125, "learning_rate": 0.002, "loss": 5.3441, "step": 2544 }, { "epoch": 0.2889264947440496, "grad_norm": 0.287109375, "learning_rate": 0.002, "loss": 5.3555, "step": 2545 }, { "epoch": 0.28904002185396865, "grad_norm": 0.251953125, "learning_rate": 0.002, "loss": 5.3647, "step": 2546 }, { "epoch": 0.28915354896388773, "grad_norm": 0.26171875, "learning_rate": 0.002, "loss": 5.3614, "step": 2547 }, { "epoch": 0.2892670760738068, "grad_norm": 0.2578125, "learning_rate": 0.002, "loss": 5.3384, "step": 2548 }, { "epoch": 0.2893806031837259, "grad_norm": 0.291015625, "learning_rate": 0.002, "loss": 5.3661, "step": 2549 }, { "epoch": 0.28949413029364496, "grad_norm": 0.30078125, "learning_rate": 0.002, "loss": 5.3477, "step": 2550 }, { "epoch": 0.28960765740356403, "grad_norm": 0.322265625, "learning_rate": 0.002, "loss": 5.3668, "step": 2551 }, { "epoch": 0.2897211845134831, "grad_norm": 0.376953125, "learning_rate": 0.002, "loss": 5.3419, "step": 2552 }, { "epoch": 0.2898347116234022, "grad_norm": 0.349609375, "learning_rate": 0.002, "loss": 5.3398, "step": 2553 }, { "epoch": 0.28994823873332126, "grad_norm": 0.349609375, "learning_rate": 0.002, "loss": 5.3422, "step": 2554 }, { "epoch": 0.29006176584324034, "grad_norm": 0.376953125, "learning_rate": 0.002, "loss": 5.3485, "step": 2555 }, { "epoch": 0.2901752929531594, "grad_norm": 0.35546875, "learning_rate": 0.002, "loss": 5.3351, "step": 2556 }, { "epoch": 0.2902888200630785, "grad_norm": 0.369140625, "learning_rate": 0.002, "loss": 5.3459, "step": 2557 }, { "epoch": 0.29040234717299757, "grad_norm": 0.328125, "learning_rate": 0.002, "loss": 5.3607, "step": 2558 }, { "epoch": 0.29051587428291664, "grad_norm": 0.32421875, "learning_rate": 0.002, "loss": 5.3386, "step": 2559 }, { "epoch": 0.2906294013928357, "grad_norm": 0.27734375, "learning_rate": 0.002, "loss": 5.361, "step": 2560 }, { "epoch": 0.2907429285027548, "grad_norm": 0.275390625, "learning_rate": 0.002, "loss": 5.3316, "step": 2561 }, { "epoch": 0.29085645561267387, "grad_norm": 0.25, "learning_rate": 0.002, "loss": 5.3541, "step": 2562 }, { "epoch": 0.29096998272259295, "grad_norm": 0.265625, "learning_rate": 0.002, "loss": 5.3345, "step": 2563 }, { "epoch": 0.291083509832512, "grad_norm": 0.25, "learning_rate": 0.002, "loss": 5.3556, "step": 2564 }, { "epoch": 0.2911970369424311, "grad_norm": 0.24609375, "learning_rate": 0.002, "loss": 5.3455, "step": 2565 }, { "epoch": 0.2913105640523502, "grad_norm": 0.244140625, "learning_rate": 0.002, "loss": 5.3541, "step": 2566 }, { "epoch": 0.29142409116226925, "grad_norm": 0.2490234375, "learning_rate": 0.002, "loss": 5.3269, "step": 2567 }, { "epoch": 0.2915376182721883, "grad_norm": 0.326171875, "learning_rate": 0.002, "loss": 5.3498, "step": 2568 }, { "epoch": 0.2916511453821074, "grad_norm": 0.380859375, "learning_rate": 0.002, "loss": 5.361, "step": 2569 }, { "epoch": 0.2917646724920265, "grad_norm": 0.37890625, "learning_rate": 0.002, "loss": 5.3471, "step": 2570 }, { "epoch": 0.29187819960194555, "grad_norm": 0.369140625, "learning_rate": 0.002, "loss": 5.3524, "step": 2571 }, { "epoch": 0.29199172671186463, "grad_norm": 0.326171875, "learning_rate": 0.002, "loss": 5.3462, "step": 2572 }, { "epoch": 0.2921052538217837, "grad_norm": 0.30078125, "learning_rate": 0.002, "loss": 5.3409, "step": 2573 }, { "epoch": 0.2922187809317028, "grad_norm": 0.27734375, "learning_rate": 0.002, "loss": 5.3553, "step": 2574 }, { "epoch": 0.29233230804162186, "grad_norm": 0.251953125, "learning_rate": 0.002, "loss": 5.3301, "step": 2575 }, { "epoch": 0.29244583515154093, "grad_norm": 0.25390625, "learning_rate": 0.002, "loss": 5.3471, "step": 2576 }, { "epoch": 0.29255936226146, "grad_norm": 0.279296875, "learning_rate": 0.002, "loss": 5.3513, "step": 2577 }, { "epoch": 0.2926728893713791, "grad_norm": 0.3515625, "learning_rate": 0.002, "loss": 5.356, "step": 2578 }, { "epoch": 0.29278641648129816, "grad_norm": 0.365234375, "learning_rate": 0.002, "loss": 5.373, "step": 2579 }, { "epoch": 0.29289994359121724, "grad_norm": 0.396484375, "learning_rate": 0.002, "loss": 5.342, "step": 2580 }, { "epoch": 0.2930134707011363, "grad_norm": 0.400390625, "learning_rate": 0.002, "loss": 5.3498, "step": 2581 }, { "epoch": 0.2931269978110554, "grad_norm": 0.36328125, "learning_rate": 0.002, "loss": 5.3701, "step": 2582 }, { "epoch": 0.29324052492097447, "grad_norm": 0.40234375, "learning_rate": 0.002, "loss": 5.3586, "step": 2583 }, { "epoch": 0.29335405203089354, "grad_norm": 0.341796875, "learning_rate": 0.002, "loss": 5.3188, "step": 2584 }, { "epoch": 0.2934675791408126, "grad_norm": 0.349609375, "learning_rate": 0.002, "loss": 5.3679, "step": 2585 }, { "epoch": 0.2935811062507317, "grad_norm": 0.3359375, "learning_rate": 0.002, "loss": 5.3458, "step": 2586 }, { "epoch": 0.29369463336065077, "grad_norm": 0.33984375, "learning_rate": 0.002, "loss": 5.3558, "step": 2587 }, { "epoch": 0.29380816047056985, "grad_norm": 0.400390625, "learning_rate": 0.002, "loss": 5.331, "step": 2588 }, { "epoch": 0.2939216875804889, "grad_norm": 0.41796875, "learning_rate": 0.002, "loss": 5.3614, "step": 2589 }, { "epoch": 0.294035214690408, "grad_norm": 0.416015625, "learning_rate": 0.002, "loss": 5.3502, "step": 2590 }, { "epoch": 0.2941487418003271, "grad_norm": 0.388671875, "learning_rate": 0.002, "loss": 5.3417, "step": 2591 }, { "epoch": 0.29426226891024615, "grad_norm": 0.36328125, "learning_rate": 0.002, "loss": 5.3353, "step": 2592 }, { "epoch": 0.2943757960201652, "grad_norm": 0.365234375, "learning_rate": 0.002, "loss": 5.3257, "step": 2593 }, { "epoch": 0.2944893231300843, "grad_norm": 0.326171875, "learning_rate": 0.002, "loss": 5.3525, "step": 2594 }, { "epoch": 0.29460285024000343, "grad_norm": 0.326171875, "learning_rate": 0.002, "loss": 5.3291, "step": 2595 }, { "epoch": 0.2947163773499225, "grad_norm": 0.330078125, "learning_rate": 0.002, "loss": 5.349, "step": 2596 }, { "epoch": 0.2948299044598416, "grad_norm": 0.3125, "learning_rate": 0.002, "loss": 5.345, "step": 2597 }, { "epoch": 0.29494343156976066, "grad_norm": 0.361328125, "learning_rate": 0.002, "loss": 5.3508, "step": 2598 }, { "epoch": 0.29505695867967974, "grad_norm": 0.376953125, "learning_rate": 0.002, "loss": 5.3662, "step": 2599 }, { "epoch": 0.2951704857895988, "grad_norm": 0.400390625, "learning_rate": 0.002, "loss": 5.3447, "step": 2600 }, { "epoch": 0.2952840128995179, "grad_norm": 0.396484375, "learning_rate": 0.002, "loss": 5.3273, "step": 2601 }, { "epoch": 0.29539754000943697, "grad_norm": 0.345703125, "learning_rate": 0.002, "loss": 5.3454, "step": 2602 }, { "epoch": 0.29551106711935604, "grad_norm": 0.375, "learning_rate": 0.002, "loss": 5.3529, "step": 2603 }, { "epoch": 0.2956245942292751, "grad_norm": 0.365234375, "learning_rate": 0.002, "loss": 5.3538, "step": 2604 }, { "epoch": 0.2957381213391942, "grad_norm": 0.353515625, "learning_rate": 0.002, "loss": 5.359, "step": 2605 }, { "epoch": 0.29585164844911327, "grad_norm": 0.32421875, "learning_rate": 0.002, "loss": 5.3371, "step": 2606 }, { "epoch": 0.29596517555903235, "grad_norm": 0.349609375, "learning_rate": 0.002, "loss": 5.3317, "step": 2607 }, { "epoch": 0.2960787026689514, "grad_norm": 0.341796875, "learning_rate": 0.002, "loss": 5.339, "step": 2608 }, { "epoch": 0.2961922297788705, "grad_norm": 0.359375, "learning_rate": 0.002, "loss": 5.3311, "step": 2609 }, { "epoch": 0.2963057568887896, "grad_norm": 0.32421875, "learning_rate": 0.002, "loss": 5.3362, "step": 2610 }, { "epoch": 0.29641928399870865, "grad_norm": 0.30078125, "learning_rate": 0.002, "loss": 5.3661, "step": 2611 }, { "epoch": 0.2965328111086277, "grad_norm": 0.23046875, "learning_rate": 0.002, "loss": 5.3678, "step": 2612 }, { "epoch": 0.2966463382185468, "grad_norm": 0.2314453125, "learning_rate": 0.002, "loss": 5.3438, "step": 2613 }, { "epoch": 0.2967598653284659, "grad_norm": 0.25, "learning_rate": 0.002, "loss": 5.3374, "step": 2614 }, { "epoch": 0.29687339243838495, "grad_norm": 0.326171875, "learning_rate": 0.002, "loss": 5.3207, "step": 2615 }, { "epoch": 0.29698691954830403, "grad_norm": 0.35546875, "learning_rate": 0.002, "loss": 5.327, "step": 2616 }, { "epoch": 0.2971004466582231, "grad_norm": 0.392578125, "learning_rate": 0.002, "loss": 5.3606, "step": 2617 }, { "epoch": 0.2972139737681422, "grad_norm": 0.373046875, "learning_rate": 0.002, "loss": 5.3338, "step": 2618 }, { "epoch": 0.29732750087806126, "grad_norm": 0.294921875, "learning_rate": 0.002, "loss": 5.3313, "step": 2619 }, { "epoch": 0.29744102798798033, "grad_norm": 0.265625, "learning_rate": 0.002, "loss": 5.3456, "step": 2620 }, { "epoch": 0.2975545550978994, "grad_norm": 0.2470703125, "learning_rate": 0.002, "loss": 5.33, "step": 2621 }, { "epoch": 0.2976680822078185, "grad_norm": 0.29296875, "learning_rate": 0.002, "loss": 5.3694, "step": 2622 }, { "epoch": 0.29778160931773756, "grad_norm": 0.396484375, "learning_rate": 0.002, "loss": 5.3583, "step": 2623 }, { "epoch": 0.29789513642765664, "grad_norm": 0.39453125, "learning_rate": 0.002, "loss": 5.3354, "step": 2624 }, { "epoch": 0.2980086635375757, "grad_norm": 0.375, "learning_rate": 0.002, "loss": 5.3299, "step": 2625 }, { "epoch": 0.2981221906474948, "grad_norm": 0.39453125, "learning_rate": 0.002, "loss": 5.3533, "step": 2626 }, { "epoch": 0.29823571775741387, "grad_norm": 0.376953125, "learning_rate": 0.002, "loss": 5.3374, "step": 2627 }, { "epoch": 0.29834924486733294, "grad_norm": 0.349609375, "learning_rate": 0.002, "loss": 5.3585, "step": 2628 }, { "epoch": 0.298462771977252, "grad_norm": 0.33203125, "learning_rate": 0.002, "loss": 5.341, "step": 2629 }, { "epoch": 0.2985762990871711, "grad_norm": 0.310546875, "learning_rate": 0.002, "loss": 5.3304, "step": 2630 }, { "epoch": 0.29868982619709017, "grad_norm": 0.3515625, "learning_rate": 0.002, "loss": 5.3343, "step": 2631 }, { "epoch": 0.29880335330700925, "grad_norm": 0.359375, "learning_rate": 0.002, "loss": 5.3329, "step": 2632 }, { "epoch": 0.2989168804169283, "grad_norm": 0.376953125, "learning_rate": 0.002, "loss": 5.3447, "step": 2633 }, { "epoch": 0.2990304075268474, "grad_norm": 0.41015625, "learning_rate": 0.002, "loss": 5.3568, "step": 2634 }, { "epoch": 0.2991439346367665, "grad_norm": 0.412109375, "learning_rate": 0.002, "loss": 5.3096, "step": 2635 }, { "epoch": 0.29925746174668555, "grad_norm": 0.37109375, "learning_rate": 0.002, "loss": 5.3422, "step": 2636 }, { "epoch": 0.2993709888566046, "grad_norm": 0.376953125, "learning_rate": 0.002, "loss": 5.3535, "step": 2637 }, { "epoch": 0.2994845159665237, "grad_norm": 0.322265625, "learning_rate": 0.002, "loss": 5.3548, "step": 2638 }, { "epoch": 0.2995980430764428, "grad_norm": 0.30859375, "learning_rate": 0.002, "loss": 5.3577, "step": 2639 }, { "epoch": 0.29971157018636185, "grad_norm": 0.271484375, "learning_rate": 0.002, "loss": 5.3489, "step": 2640 }, { "epoch": 0.29982509729628093, "grad_norm": 0.27734375, "learning_rate": 0.002, "loss": 5.3419, "step": 2641 }, { "epoch": 0.2999386244062, "grad_norm": 0.26953125, "learning_rate": 0.002, "loss": 5.324, "step": 2642 }, { "epoch": 0.3000521515161191, "grad_norm": 0.2578125, "learning_rate": 0.002, "loss": 5.3465, "step": 2643 }, { "epoch": 0.30016567862603816, "grad_norm": 0.2294921875, "learning_rate": 0.002, "loss": 5.3479, "step": 2644 }, { "epoch": 0.30027920573595723, "grad_norm": 0.224609375, "learning_rate": 0.002, "loss": 5.3225, "step": 2645 }, { "epoch": 0.3003927328458763, "grad_norm": 0.21875, "learning_rate": 0.002, "loss": 5.3508, "step": 2646 }, { "epoch": 0.3005062599557954, "grad_norm": 0.2353515625, "learning_rate": 0.002, "loss": 5.3504, "step": 2647 }, { "epoch": 0.30061978706571446, "grad_norm": 0.259765625, "learning_rate": 0.002, "loss": 5.336, "step": 2648 }, { "epoch": 0.30073331417563354, "grad_norm": 0.279296875, "learning_rate": 0.002, "loss": 5.3425, "step": 2649 }, { "epoch": 0.3008468412855526, "grad_norm": 0.30859375, "learning_rate": 0.002, "loss": 5.3459, "step": 2650 }, { "epoch": 0.3009603683954717, "grad_norm": 0.328125, "learning_rate": 0.002, "loss": 5.3418, "step": 2651 }, { "epoch": 0.30107389550539077, "grad_norm": 0.33203125, "learning_rate": 0.002, "loss": 5.3524, "step": 2652 }, { "epoch": 0.30118742261530984, "grad_norm": 0.3203125, "learning_rate": 0.002, "loss": 5.3242, "step": 2653 }, { "epoch": 0.3013009497252289, "grad_norm": 0.3359375, "learning_rate": 0.002, "loss": 5.3563, "step": 2654 }, { "epoch": 0.301414476835148, "grad_norm": 0.328125, "learning_rate": 0.002, "loss": 5.3462, "step": 2655 }, { "epoch": 0.30152800394506707, "grad_norm": 0.33203125, "learning_rate": 0.002, "loss": 5.3461, "step": 2656 }, { "epoch": 0.30164153105498615, "grad_norm": 0.30859375, "learning_rate": 0.002, "loss": 5.345, "step": 2657 }, { "epoch": 0.3017550581649052, "grad_norm": 0.337890625, "learning_rate": 0.002, "loss": 5.3436, "step": 2658 }, { "epoch": 0.3018685852748243, "grad_norm": 0.345703125, "learning_rate": 0.002, "loss": 5.3532, "step": 2659 }, { "epoch": 0.3019821123847434, "grad_norm": 0.3125, "learning_rate": 0.002, "loss": 5.3572, "step": 2660 }, { "epoch": 0.30209563949466245, "grad_norm": 0.287109375, "learning_rate": 0.002, "loss": 5.3438, "step": 2661 }, { "epoch": 0.3022091666045815, "grad_norm": 0.27734375, "learning_rate": 0.002, "loss": 5.3724, "step": 2662 }, { "epoch": 0.3023226937145006, "grad_norm": 0.2314453125, "learning_rate": 0.002, "loss": 5.3409, "step": 2663 }, { "epoch": 0.3024362208244197, "grad_norm": 0.275390625, "learning_rate": 0.002, "loss": 5.339, "step": 2664 }, { "epoch": 0.30254974793433875, "grad_norm": 0.265625, "learning_rate": 0.002, "loss": 5.3297, "step": 2665 }, { "epoch": 0.30266327504425783, "grad_norm": 0.279296875, "learning_rate": 0.002, "loss": 5.3289, "step": 2666 }, { "epoch": 0.3027768021541769, "grad_norm": 0.330078125, "learning_rate": 0.002, "loss": 5.3585, "step": 2667 }, { "epoch": 0.302890329264096, "grad_norm": 0.373046875, "learning_rate": 0.002, "loss": 5.3271, "step": 2668 }, { "epoch": 0.30300385637401506, "grad_norm": 0.40234375, "learning_rate": 0.002, "loss": 5.3465, "step": 2669 }, { "epoch": 0.30311738348393413, "grad_norm": 0.392578125, "learning_rate": 0.002, "loss": 5.3523, "step": 2670 }, { "epoch": 0.3032309105938532, "grad_norm": 0.33984375, "learning_rate": 0.002, "loss": 5.3516, "step": 2671 }, { "epoch": 0.3033444377037723, "grad_norm": 0.31640625, "learning_rate": 0.002, "loss": 5.3454, "step": 2672 }, { "epoch": 0.30345796481369136, "grad_norm": 0.2890625, "learning_rate": 0.002, "loss": 5.339, "step": 2673 }, { "epoch": 0.30357149192361044, "grad_norm": 0.283203125, "learning_rate": 0.002, "loss": 5.3395, "step": 2674 }, { "epoch": 0.3036850190335295, "grad_norm": 0.294921875, "learning_rate": 0.002, "loss": 5.3396, "step": 2675 }, { "epoch": 0.3037985461434486, "grad_norm": 0.302734375, "learning_rate": 0.002, "loss": 5.3409, "step": 2676 }, { "epoch": 0.30391207325336766, "grad_norm": 0.318359375, "learning_rate": 0.002, "loss": 5.3437, "step": 2677 }, { "epoch": 0.30402560036328674, "grad_norm": 0.35546875, "learning_rate": 0.002, "loss": 5.3397, "step": 2678 }, { "epoch": 0.3041391274732058, "grad_norm": 0.400390625, "learning_rate": 0.002, "loss": 5.3367, "step": 2679 }, { "epoch": 0.3042526545831249, "grad_norm": 0.453125, "learning_rate": 0.002, "loss": 5.3408, "step": 2680 }, { "epoch": 0.30436618169304397, "grad_norm": 0.42578125, "learning_rate": 0.002, "loss": 5.342, "step": 2681 }, { "epoch": 0.30447970880296304, "grad_norm": 0.392578125, "learning_rate": 0.002, "loss": 5.351, "step": 2682 }, { "epoch": 0.3045932359128821, "grad_norm": 0.404296875, "learning_rate": 0.002, "loss": 5.3537, "step": 2683 }, { "epoch": 0.3047067630228012, "grad_norm": 0.37109375, "learning_rate": 0.002, "loss": 5.3009, "step": 2684 }, { "epoch": 0.3048202901327203, "grad_norm": 0.365234375, "learning_rate": 0.002, "loss": 5.3364, "step": 2685 }, { "epoch": 0.30493381724263935, "grad_norm": 0.32421875, "learning_rate": 0.002, "loss": 5.3383, "step": 2686 }, { "epoch": 0.3050473443525584, "grad_norm": 0.314453125, "learning_rate": 0.002, "loss": 5.3537, "step": 2687 }, { "epoch": 0.3051608714624775, "grad_norm": 0.296875, "learning_rate": 0.002, "loss": 5.353, "step": 2688 }, { "epoch": 0.3052743985723966, "grad_norm": 0.296875, "learning_rate": 0.002, "loss": 5.3661, "step": 2689 }, { "epoch": 0.30538792568231565, "grad_norm": 0.2578125, "learning_rate": 0.002, "loss": 5.3359, "step": 2690 }, { "epoch": 0.30550145279223473, "grad_norm": 0.2890625, "learning_rate": 0.002, "loss": 5.3512, "step": 2691 }, { "epoch": 0.3056149799021538, "grad_norm": 0.306640625, "learning_rate": 0.002, "loss": 5.345, "step": 2692 }, { "epoch": 0.3057285070120729, "grad_norm": 0.30859375, "learning_rate": 0.002, "loss": 5.343, "step": 2693 }, { "epoch": 0.30584203412199196, "grad_norm": 0.29296875, "learning_rate": 0.002, "loss": 5.3313, "step": 2694 }, { "epoch": 0.30595556123191103, "grad_norm": 0.2890625, "learning_rate": 0.002, "loss": 5.3352, "step": 2695 }, { "epoch": 0.3060690883418301, "grad_norm": 0.271484375, "learning_rate": 0.002, "loss": 5.3463, "step": 2696 }, { "epoch": 0.3061826154517492, "grad_norm": 0.275390625, "learning_rate": 0.002, "loss": 5.365, "step": 2697 }, { "epoch": 0.30629614256166826, "grad_norm": 0.2412109375, "learning_rate": 0.002, "loss": 5.3402, "step": 2698 }, { "epoch": 0.30640966967158734, "grad_norm": 0.2158203125, "learning_rate": 0.002, "loss": 5.3273, "step": 2699 }, { "epoch": 0.3065231967815064, "grad_norm": 0.2158203125, "learning_rate": 0.002, "loss": 5.3307, "step": 2700 }, { "epoch": 0.3066367238914255, "grad_norm": 0.24609375, "learning_rate": 0.002, "loss": 5.3397, "step": 2701 }, { "epoch": 0.30675025100134456, "grad_norm": 0.333984375, "learning_rate": 0.002, "loss": 5.3251, "step": 2702 }, { "epoch": 0.30686377811126364, "grad_norm": 0.36328125, "learning_rate": 0.002, "loss": 5.3497, "step": 2703 }, { "epoch": 0.3069773052211827, "grad_norm": 0.412109375, "learning_rate": 0.002, "loss": 5.3403, "step": 2704 }, { "epoch": 0.3070908323311018, "grad_norm": 0.40234375, "learning_rate": 0.002, "loss": 5.3256, "step": 2705 }, { "epoch": 0.30720435944102087, "grad_norm": 0.380859375, "learning_rate": 0.002, "loss": 5.3561, "step": 2706 }, { "epoch": 0.30731788655093994, "grad_norm": 0.35546875, "learning_rate": 0.002, "loss": 5.3235, "step": 2707 }, { "epoch": 0.307431413660859, "grad_norm": 0.275390625, "learning_rate": 0.002, "loss": 5.3304, "step": 2708 }, { "epoch": 0.3075449407707781, "grad_norm": 0.291015625, "learning_rate": 0.002, "loss": 5.342, "step": 2709 }, { "epoch": 0.3076584678806972, "grad_norm": 0.265625, "learning_rate": 0.002, "loss": 5.3503, "step": 2710 }, { "epoch": 0.30777199499061625, "grad_norm": 0.25390625, "learning_rate": 0.002, "loss": 5.3484, "step": 2711 }, { "epoch": 0.3078855221005353, "grad_norm": 0.2578125, "learning_rate": 0.002, "loss": 5.3268, "step": 2712 }, { "epoch": 0.3079990492104544, "grad_norm": 0.25390625, "learning_rate": 0.002, "loss": 5.3628, "step": 2713 }, { "epoch": 0.3081125763203735, "grad_norm": 0.263671875, "learning_rate": 0.002, "loss": 5.3616, "step": 2714 }, { "epoch": 0.3082261034302926, "grad_norm": 0.265625, "learning_rate": 0.002, "loss": 5.3459, "step": 2715 }, { "epoch": 0.3083396305402117, "grad_norm": 0.283203125, "learning_rate": 0.002, "loss": 5.3393, "step": 2716 }, { "epoch": 0.30845315765013076, "grad_norm": 0.271484375, "learning_rate": 0.002, "loss": 5.3488, "step": 2717 }, { "epoch": 0.30856668476004984, "grad_norm": 0.255859375, "learning_rate": 0.002, "loss": 5.3409, "step": 2718 }, { "epoch": 0.3086802118699689, "grad_norm": 0.28515625, "learning_rate": 0.002, "loss": 5.3536, "step": 2719 }, { "epoch": 0.308793738979888, "grad_norm": 0.2890625, "learning_rate": 0.002, "loss": 5.3303, "step": 2720 }, { "epoch": 0.30890726608980706, "grad_norm": 0.31640625, "learning_rate": 0.002, "loss": 5.3454, "step": 2721 }, { "epoch": 0.30902079319972614, "grad_norm": 0.3671875, "learning_rate": 0.002, "loss": 5.3333, "step": 2722 }, { "epoch": 0.3091343203096452, "grad_norm": 0.396484375, "learning_rate": 0.002, "loss": 5.3361, "step": 2723 }, { "epoch": 0.3092478474195643, "grad_norm": 0.38671875, "learning_rate": 0.002, "loss": 5.3314, "step": 2724 }, { "epoch": 0.30936137452948337, "grad_norm": 0.376953125, "learning_rate": 0.002, "loss": 5.3335, "step": 2725 }, { "epoch": 0.30947490163940244, "grad_norm": 0.380859375, "learning_rate": 0.002, "loss": 5.3457, "step": 2726 }, { "epoch": 0.3095884287493215, "grad_norm": 0.3671875, "learning_rate": 0.002, "loss": 5.3325, "step": 2727 }, { "epoch": 0.3097019558592406, "grad_norm": 0.3671875, "learning_rate": 0.002, "loss": 5.323, "step": 2728 }, { "epoch": 0.3098154829691597, "grad_norm": 0.322265625, "learning_rate": 0.002, "loss": 5.3496, "step": 2729 }, { "epoch": 0.30992901007907875, "grad_norm": 0.30078125, "learning_rate": 0.002, "loss": 5.3345, "step": 2730 }, { "epoch": 0.3100425371889978, "grad_norm": 0.275390625, "learning_rate": 0.002, "loss": 5.3401, "step": 2731 }, { "epoch": 0.3101560642989169, "grad_norm": 0.2490234375, "learning_rate": 0.002, "loss": 5.348, "step": 2732 }, { "epoch": 0.310269591408836, "grad_norm": 0.2314453125, "learning_rate": 0.002, "loss": 5.3617, "step": 2733 }, { "epoch": 0.31038311851875505, "grad_norm": 0.2294921875, "learning_rate": 0.002, "loss": 5.3244, "step": 2734 }, { "epoch": 0.31049664562867413, "grad_norm": 0.263671875, "learning_rate": 0.002, "loss": 5.3384, "step": 2735 }, { "epoch": 0.3106101727385932, "grad_norm": 0.302734375, "learning_rate": 0.002, "loss": 5.3517, "step": 2736 }, { "epoch": 0.3107236998485123, "grad_norm": 0.322265625, "learning_rate": 0.002, "loss": 5.3469, "step": 2737 }, { "epoch": 0.31083722695843136, "grad_norm": 0.3046875, "learning_rate": 0.002, "loss": 5.326, "step": 2738 }, { "epoch": 0.31095075406835043, "grad_norm": 0.306640625, "learning_rate": 0.002, "loss": 5.3233, "step": 2739 }, { "epoch": 0.3110642811782695, "grad_norm": 0.291015625, "learning_rate": 0.002, "loss": 5.3316, "step": 2740 }, { "epoch": 0.3111778082881886, "grad_norm": 0.28515625, "learning_rate": 0.002, "loss": 5.3433, "step": 2741 }, { "epoch": 0.31129133539810766, "grad_norm": 0.275390625, "learning_rate": 0.002, "loss": 5.3288, "step": 2742 }, { "epoch": 0.31140486250802674, "grad_norm": 0.2734375, "learning_rate": 0.002, "loss": 5.3583, "step": 2743 }, { "epoch": 0.3115183896179458, "grad_norm": 0.279296875, "learning_rate": 0.002, "loss": 5.3372, "step": 2744 }, { "epoch": 0.3116319167278649, "grad_norm": 0.294921875, "learning_rate": 0.002, "loss": 5.3423, "step": 2745 }, { "epoch": 0.31174544383778396, "grad_norm": 0.33203125, "learning_rate": 0.002, "loss": 5.3474, "step": 2746 }, { "epoch": 0.31185897094770304, "grad_norm": 0.359375, "learning_rate": 0.002, "loss": 5.345, "step": 2747 }, { "epoch": 0.3119724980576221, "grad_norm": 0.365234375, "learning_rate": 0.002, "loss": 5.3454, "step": 2748 }, { "epoch": 0.3120860251675412, "grad_norm": 0.337890625, "learning_rate": 0.002, "loss": 5.3448, "step": 2749 }, { "epoch": 0.31219955227746027, "grad_norm": 0.345703125, "learning_rate": 0.002, "loss": 5.322, "step": 2750 }, { "epoch": 0.31231307938737934, "grad_norm": 0.330078125, "learning_rate": 0.002, "loss": 5.325, "step": 2751 }, { "epoch": 0.3124266064972984, "grad_norm": 0.32421875, "learning_rate": 0.002, "loss": 5.3631, "step": 2752 }, { "epoch": 0.3125401336072175, "grad_norm": 0.314453125, "learning_rate": 0.002, "loss": 5.3105, "step": 2753 }, { "epoch": 0.3126536607171366, "grad_norm": 0.333984375, "learning_rate": 0.002, "loss": 5.339, "step": 2754 }, { "epoch": 0.31276718782705565, "grad_norm": 0.3203125, "learning_rate": 0.002, "loss": 5.3593, "step": 2755 }, { "epoch": 0.3128807149369747, "grad_norm": 0.322265625, "learning_rate": 0.002, "loss": 5.354, "step": 2756 }, { "epoch": 0.3129942420468938, "grad_norm": 0.302734375, "learning_rate": 0.002, "loss": 5.3314, "step": 2757 }, { "epoch": 0.3131077691568129, "grad_norm": 0.314453125, "learning_rate": 0.002, "loss": 5.3324, "step": 2758 }, { "epoch": 0.31322129626673195, "grad_norm": 0.3046875, "learning_rate": 0.002, "loss": 5.335, "step": 2759 }, { "epoch": 0.31333482337665103, "grad_norm": 0.314453125, "learning_rate": 0.002, "loss": 5.3499, "step": 2760 }, { "epoch": 0.3134483504865701, "grad_norm": 0.326171875, "learning_rate": 0.002, "loss": 5.3374, "step": 2761 }, { "epoch": 0.3135618775964892, "grad_norm": 0.34765625, "learning_rate": 0.002, "loss": 5.3346, "step": 2762 }, { "epoch": 0.31367540470640826, "grad_norm": 0.361328125, "learning_rate": 0.002, "loss": 5.3417, "step": 2763 }, { "epoch": 0.31378893181632733, "grad_norm": 0.3828125, "learning_rate": 0.002, "loss": 5.337, "step": 2764 }, { "epoch": 0.3139024589262464, "grad_norm": 0.41796875, "learning_rate": 0.002, "loss": 5.3368, "step": 2765 }, { "epoch": 0.3140159860361655, "grad_norm": 0.404296875, "learning_rate": 0.002, "loss": 5.3337, "step": 2766 }, { "epoch": 0.31412951314608456, "grad_norm": 0.3984375, "learning_rate": 0.002, "loss": 5.3574, "step": 2767 }, { "epoch": 0.31424304025600364, "grad_norm": 0.314453125, "learning_rate": 0.002, "loss": 5.3211, "step": 2768 }, { "epoch": 0.3143565673659227, "grad_norm": 0.330078125, "learning_rate": 0.002, "loss": 5.3289, "step": 2769 }, { "epoch": 0.3144700944758418, "grad_norm": 0.310546875, "learning_rate": 0.002, "loss": 5.3371, "step": 2770 }, { "epoch": 0.31458362158576086, "grad_norm": 0.33203125, "learning_rate": 0.002, "loss": 5.3471, "step": 2771 }, { "epoch": 0.31469714869567994, "grad_norm": 0.3359375, "learning_rate": 0.002, "loss": 5.3428, "step": 2772 }, { "epoch": 0.314810675805599, "grad_norm": 0.341796875, "learning_rate": 0.002, "loss": 5.3197, "step": 2773 }, { "epoch": 0.3149242029155181, "grad_norm": 0.318359375, "learning_rate": 0.002, "loss": 5.3241, "step": 2774 }, { "epoch": 0.31503773002543717, "grad_norm": 0.302734375, "learning_rate": 0.002, "loss": 5.3403, "step": 2775 }, { "epoch": 0.31515125713535624, "grad_norm": 0.296875, "learning_rate": 0.002, "loss": 5.3214, "step": 2776 }, { "epoch": 0.3152647842452753, "grad_norm": 0.279296875, "learning_rate": 0.002, "loss": 5.32, "step": 2777 }, { "epoch": 0.3153783113551944, "grad_norm": 0.30859375, "learning_rate": 0.002, "loss": 5.3383, "step": 2778 }, { "epoch": 0.31549183846511347, "grad_norm": 0.328125, "learning_rate": 0.002, "loss": 5.3028, "step": 2779 }, { "epoch": 0.31560536557503255, "grad_norm": 0.361328125, "learning_rate": 0.002, "loss": 5.3259, "step": 2780 }, { "epoch": 0.3157188926849516, "grad_norm": 0.384765625, "learning_rate": 0.002, "loss": 5.339, "step": 2781 }, { "epoch": 0.3158324197948707, "grad_norm": 0.3828125, "learning_rate": 0.002, "loss": 5.3177, "step": 2782 }, { "epoch": 0.3159459469047898, "grad_norm": 0.32421875, "learning_rate": 0.002, "loss": 5.3406, "step": 2783 }, { "epoch": 0.31605947401470885, "grad_norm": 0.322265625, "learning_rate": 0.002, "loss": 5.3191, "step": 2784 }, { "epoch": 0.31617300112462793, "grad_norm": 0.28125, "learning_rate": 0.002, "loss": 5.3351, "step": 2785 }, { "epoch": 0.316286528234547, "grad_norm": 0.294921875, "learning_rate": 0.002, "loss": 5.3275, "step": 2786 }, { "epoch": 0.3164000553444661, "grad_norm": 0.263671875, "learning_rate": 0.002, "loss": 5.3255, "step": 2787 }, { "epoch": 0.31651358245438516, "grad_norm": 0.265625, "learning_rate": 0.002, "loss": 5.3377, "step": 2788 }, { "epoch": 0.31662710956430423, "grad_norm": 0.265625, "learning_rate": 0.002, "loss": 5.3617, "step": 2789 }, { "epoch": 0.3167406366742233, "grad_norm": 0.28125, "learning_rate": 0.002, "loss": 5.2955, "step": 2790 }, { "epoch": 0.3168541637841424, "grad_norm": 0.296875, "learning_rate": 0.002, "loss": 5.3278, "step": 2791 }, { "epoch": 0.31696769089406146, "grad_norm": 0.328125, "learning_rate": 0.002, "loss": 5.349, "step": 2792 }, { "epoch": 0.31708121800398054, "grad_norm": 0.328125, "learning_rate": 0.002, "loss": 5.342, "step": 2793 }, { "epoch": 0.3171947451138996, "grad_norm": 0.326171875, "learning_rate": 0.002, "loss": 5.3315, "step": 2794 }, { "epoch": 0.3173082722238187, "grad_norm": 0.330078125, "learning_rate": 0.002, "loss": 5.3461, "step": 2795 }, { "epoch": 0.31742179933373776, "grad_norm": 0.3359375, "learning_rate": 0.002, "loss": 5.3226, "step": 2796 }, { "epoch": 0.31753532644365684, "grad_norm": 0.32421875, "learning_rate": 0.002, "loss": 5.3329, "step": 2797 }, { "epoch": 0.3176488535535759, "grad_norm": 0.3203125, "learning_rate": 0.002, "loss": 5.3572, "step": 2798 }, { "epoch": 0.317762380663495, "grad_norm": 0.298828125, "learning_rate": 0.002, "loss": 5.3387, "step": 2799 }, { "epoch": 0.31787590777341407, "grad_norm": 0.267578125, "learning_rate": 0.002, "loss": 5.3468, "step": 2800 }, { "epoch": 0.31798943488333314, "grad_norm": 0.251953125, "learning_rate": 0.002, "loss": 5.3465, "step": 2801 }, { "epoch": 0.3181029619932522, "grad_norm": 0.279296875, "learning_rate": 0.002, "loss": 5.3266, "step": 2802 }, { "epoch": 0.3182164891031713, "grad_norm": 0.33203125, "learning_rate": 0.002, "loss": 5.3318, "step": 2803 }, { "epoch": 0.31833001621309037, "grad_norm": 0.3671875, "learning_rate": 0.002, "loss": 5.3378, "step": 2804 }, { "epoch": 0.31844354332300945, "grad_norm": 0.380859375, "learning_rate": 0.002, "loss": 5.3332, "step": 2805 }, { "epoch": 0.3185570704329285, "grad_norm": 0.37890625, "learning_rate": 0.002, "loss": 5.3283, "step": 2806 }, { "epoch": 0.3186705975428476, "grad_norm": 0.359375, "learning_rate": 0.002, "loss": 5.329, "step": 2807 }, { "epoch": 0.3187841246527667, "grad_norm": 0.39453125, "learning_rate": 0.002, "loss": 5.3282, "step": 2808 }, { "epoch": 0.31889765176268575, "grad_norm": 0.353515625, "learning_rate": 0.002, "loss": 5.3157, "step": 2809 }, { "epoch": 0.31901117887260483, "grad_norm": 0.341796875, "learning_rate": 0.002, "loss": 5.3247, "step": 2810 }, { "epoch": 0.3191247059825239, "grad_norm": 0.296875, "learning_rate": 0.002, "loss": 5.3271, "step": 2811 }, { "epoch": 0.319238233092443, "grad_norm": 0.294921875, "learning_rate": 0.002, "loss": 5.3559, "step": 2812 }, { "epoch": 0.31935176020236206, "grad_norm": 0.26953125, "learning_rate": 0.002, "loss": 5.316, "step": 2813 }, { "epoch": 0.31946528731228113, "grad_norm": 0.271484375, "learning_rate": 0.002, "loss": 5.324, "step": 2814 }, { "epoch": 0.3195788144222002, "grad_norm": 0.27734375, "learning_rate": 0.002, "loss": 5.3461, "step": 2815 }, { "epoch": 0.3196923415321193, "grad_norm": 0.30078125, "learning_rate": 0.002, "loss": 5.3474, "step": 2816 }, { "epoch": 0.31980586864203836, "grad_norm": 0.275390625, "learning_rate": 0.002, "loss": 5.3344, "step": 2817 }, { "epoch": 0.31991939575195744, "grad_norm": 0.267578125, "learning_rate": 0.002, "loss": 5.3186, "step": 2818 }, { "epoch": 0.3200329228618765, "grad_norm": 0.25390625, "learning_rate": 0.002, "loss": 5.3265, "step": 2819 }, { "epoch": 0.3201464499717956, "grad_norm": 0.26953125, "learning_rate": 0.002, "loss": 5.3307, "step": 2820 }, { "epoch": 0.32025997708171466, "grad_norm": 0.271484375, "learning_rate": 0.002, "loss": 5.3419, "step": 2821 }, { "epoch": 0.32037350419163374, "grad_norm": 0.283203125, "learning_rate": 0.002, "loss": 5.341, "step": 2822 }, { "epoch": 0.3204870313015528, "grad_norm": 0.2734375, "learning_rate": 0.002, "loss": 5.3275, "step": 2823 }, { "epoch": 0.3206005584114719, "grad_norm": 0.2578125, "learning_rate": 0.002, "loss": 5.3393, "step": 2824 }, { "epoch": 0.32071408552139097, "grad_norm": 0.2373046875, "learning_rate": 0.002, "loss": 5.3532, "step": 2825 }, { "epoch": 0.32082761263131004, "grad_norm": 0.240234375, "learning_rate": 0.002, "loss": 5.3255, "step": 2826 }, { "epoch": 0.3209411397412291, "grad_norm": 0.251953125, "learning_rate": 0.002, "loss": 5.3202, "step": 2827 }, { "epoch": 0.3210546668511482, "grad_norm": 0.296875, "learning_rate": 0.002, "loss": 5.3522, "step": 2828 }, { "epoch": 0.32116819396106727, "grad_norm": 0.30078125, "learning_rate": 0.002, "loss": 5.3643, "step": 2829 }, { "epoch": 0.32128172107098635, "grad_norm": 0.306640625, "learning_rate": 0.002, "loss": 5.342, "step": 2830 }, { "epoch": 0.3213952481809054, "grad_norm": 0.3046875, "learning_rate": 0.002, "loss": 5.3307, "step": 2831 }, { "epoch": 0.3215087752908245, "grad_norm": 0.302734375, "learning_rate": 0.002, "loss": 5.3492, "step": 2832 }, { "epoch": 0.3216223024007436, "grad_norm": 0.353515625, "learning_rate": 0.002, "loss": 5.3297, "step": 2833 }, { "epoch": 0.32173582951066265, "grad_norm": 0.365234375, "learning_rate": 0.002, "loss": 5.3318, "step": 2834 }, { "epoch": 0.32184935662058173, "grad_norm": 0.3828125, "learning_rate": 0.002, "loss": 5.3228, "step": 2835 }, { "epoch": 0.32196288373050086, "grad_norm": 0.38671875, "learning_rate": 0.002, "loss": 5.321, "step": 2836 }, { "epoch": 0.32207641084041994, "grad_norm": 0.35546875, "learning_rate": 0.002, "loss": 5.3407, "step": 2837 }, { "epoch": 0.322189937950339, "grad_norm": 0.34765625, "learning_rate": 0.002, "loss": 5.2928, "step": 2838 }, { "epoch": 0.3223034650602581, "grad_norm": 0.33203125, "learning_rate": 0.002, "loss": 5.3478, "step": 2839 }, { "epoch": 0.32241699217017716, "grad_norm": 0.357421875, "learning_rate": 0.002, "loss": 5.3466, "step": 2840 }, { "epoch": 0.32253051928009624, "grad_norm": 0.32421875, "learning_rate": 0.002, "loss": 5.3429, "step": 2841 }, { "epoch": 0.3226440463900153, "grad_norm": 0.330078125, "learning_rate": 0.002, "loss": 5.342, "step": 2842 }, { "epoch": 0.3227575734999344, "grad_norm": 0.310546875, "learning_rate": 0.002, "loss": 5.3264, "step": 2843 }, { "epoch": 0.32287110060985347, "grad_norm": 0.306640625, "learning_rate": 0.002, "loss": 5.3213, "step": 2844 }, { "epoch": 0.32298462771977254, "grad_norm": 0.275390625, "learning_rate": 0.002, "loss": 5.3204, "step": 2845 }, { "epoch": 0.3230981548296916, "grad_norm": 0.251953125, "learning_rate": 0.002, "loss": 5.3334, "step": 2846 }, { "epoch": 0.3232116819396107, "grad_norm": 0.234375, "learning_rate": 0.002, "loss": 5.3355, "step": 2847 }, { "epoch": 0.32332520904952977, "grad_norm": 0.23828125, "learning_rate": 0.002, "loss": 5.3395, "step": 2848 }, { "epoch": 0.32343873615944885, "grad_norm": 0.234375, "learning_rate": 0.002, "loss": 5.3408, "step": 2849 }, { "epoch": 0.3235522632693679, "grad_norm": 0.2412109375, "learning_rate": 0.002, "loss": 5.3461, "step": 2850 }, { "epoch": 0.323665790379287, "grad_norm": 0.2578125, "learning_rate": 0.002, "loss": 5.3486, "step": 2851 }, { "epoch": 0.3237793174892061, "grad_norm": 0.271484375, "learning_rate": 0.002, "loss": 5.3219, "step": 2852 }, { "epoch": 0.32389284459912515, "grad_norm": 0.267578125, "learning_rate": 0.002, "loss": 5.3319, "step": 2853 }, { "epoch": 0.3240063717090442, "grad_norm": 0.279296875, "learning_rate": 0.002, "loss": 5.3161, "step": 2854 }, { "epoch": 0.3241198988189633, "grad_norm": 0.294921875, "learning_rate": 0.002, "loss": 5.3457, "step": 2855 }, { "epoch": 0.3242334259288824, "grad_norm": 0.3359375, "learning_rate": 0.002, "loss": 5.3243, "step": 2856 }, { "epoch": 0.32434695303880146, "grad_norm": 0.357421875, "learning_rate": 0.002, "loss": 5.3149, "step": 2857 }, { "epoch": 0.32446048014872053, "grad_norm": 0.38671875, "learning_rate": 0.002, "loss": 5.3407, "step": 2858 }, { "epoch": 0.3245740072586396, "grad_norm": 0.380859375, "learning_rate": 0.002, "loss": 5.3279, "step": 2859 }, { "epoch": 0.3246875343685587, "grad_norm": 0.38671875, "learning_rate": 0.002, "loss": 5.3447, "step": 2860 }, { "epoch": 0.32480106147847776, "grad_norm": 0.34765625, "learning_rate": 0.002, "loss": 5.3372, "step": 2861 }, { "epoch": 0.32491458858839684, "grad_norm": 0.310546875, "learning_rate": 0.002, "loss": 5.3345, "step": 2862 }, { "epoch": 0.3250281156983159, "grad_norm": 0.28515625, "learning_rate": 0.002, "loss": 5.3291, "step": 2863 }, { "epoch": 0.325141642808235, "grad_norm": 0.287109375, "learning_rate": 0.002, "loss": 5.325, "step": 2864 }, { "epoch": 0.32525516991815406, "grad_norm": 0.26953125, "learning_rate": 0.002, "loss": 5.3521, "step": 2865 }, { "epoch": 0.32536869702807314, "grad_norm": 0.294921875, "learning_rate": 0.002, "loss": 5.3367, "step": 2866 }, { "epoch": 0.3254822241379922, "grad_norm": 0.3046875, "learning_rate": 0.002, "loss": 5.33, "step": 2867 }, { "epoch": 0.3255957512479113, "grad_norm": 0.322265625, "learning_rate": 0.002, "loss": 5.3272, "step": 2868 }, { "epoch": 0.32570927835783037, "grad_norm": 0.3125, "learning_rate": 0.002, "loss": 5.3302, "step": 2869 }, { "epoch": 0.32582280546774944, "grad_norm": 0.33984375, "learning_rate": 0.002, "loss": 5.3327, "step": 2870 }, { "epoch": 0.3259363325776685, "grad_norm": 0.33984375, "learning_rate": 0.002, "loss": 5.3452, "step": 2871 }, { "epoch": 0.3260498596875876, "grad_norm": 0.328125, "learning_rate": 0.002, "loss": 5.3306, "step": 2872 }, { "epoch": 0.32616338679750667, "grad_norm": 0.3046875, "learning_rate": 0.002, "loss": 5.3186, "step": 2873 }, { "epoch": 0.32627691390742575, "grad_norm": 0.310546875, "learning_rate": 0.002, "loss": 5.3402, "step": 2874 }, { "epoch": 0.3263904410173448, "grad_norm": 0.318359375, "learning_rate": 0.002, "loss": 5.3481, "step": 2875 }, { "epoch": 0.3265039681272639, "grad_norm": 0.3203125, "learning_rate": 0.002, "loss": 5.3264, "step": 2876 }, { "epoch": 0.326617495237183, "grad_norm": 0.32421875, "learning_rate": 0.002, "loss": 5.3282, "step": 2877 }, { "epoch": 0.32673102234710205, "grad_norm": 0.3046875, "learning_rate": 0.002, "loss": 5.3183, "step": 2878 }, { "epoch": 0.3268445494570211, "grad_norm": 0.2734375, "learning_rate": 0.002, "loss": 5.3151, "step": 2879 }, { "epoch": 0.3269580765669402, "grad_norm": 0.248046875, "learning_rate": 0.002, "loss": 5.3318, "step": 2880 }, { "epoch": 0.3270716036768593, "grad_norm": 0.2470703125, "learning_rate": 0.002, "loss": 5.3209, "step": 2881 }, { "epoch": 0.32718513078677836, "grad_norm": 0.271484375, "learning_rate": 0.002, "loss": 5.337, "step": 2882 }, { "epoch": 0.32729865789669743, "grad_norm": 0.306640625, "learning_rate": 0.002, "loss": 5.3284, "step": 2883 }, { "epoch": 0.3274121850066165, "grad_norm": 0.353515625, "learning_rate": 0.002, "loss": 5.3222, "step": 2884 }, { "epoch": 0.3275257121165356, "grad_norm": 0.37890625, "learning_rate": 0.002, "loss": 5.3146, "step": 2885 }, { "epoch": 0.32763923922645466, "grad_norm": 0.3984375, "learning_rate": 0.002, "loss": 5.3317, "step": 2886 }, { "epoch": 0.32775276633637374, "grad_norm": 0.392578125, "learning_rate": 0.002, "loss": 5.3342, "step": 2887 }, { "epoch": 0.3278662934462928, "grad_norm": 0.400390625, "learning_rate": 0.002, "loss": 5.3026, "step": 2888 }, { "epoch": 0.3279798205562119, "grad_norm": 0.357421875, "learning_rate": 0.002, "loss": 5.3248, "step": 2889 }, { "epoch": 0.32809334766613096, "grad_norm": 0.365234375, "learning_rate": 0.002, "loss": 5.3417, "step": 2890 }, { "epoch": 0.32820687477605004, "grad_norm": 0.359375, "learning_rate": 0.002, "loss": 5.3284, "step": 2891 }, { "epoch": 0.3283204018859691, "grad_norm": 0.365234375, "learning_rate": 0.002, "loss": 5.3465, "step": 2892 }, { "epoch": 0.3284339289958882, "grad_norm": 0.373046875, "learning_rate": 0.002, "loss": 5.3252, "step": 2893 }, { "epoch": 0.32854745610580727, "grad_norm": 0.3359375, "learning_rate": 0.002, "loss": 5.3226, "step": 2894 }, { "epoch": 0.32866098321572634, "grad_norm": 0.341796875, "learning_rate": 0.002, "loss": 5.3254, "step": 2895 }, { "epoch": 0.3287745103256454, "grad_norm": 0.318359375, "learning_rate": 0.002, "loss": 5.3207, "step": 2896 }, { "epoch": 0.3288880374355645, "grad_norm": 0.30859375, "learning_rate": 0.002, "loss": 5.3287, "step": 2897 }, { "epoch": 0.32900156454548357, "grad_norm": 0.27734375, "learning_rate": 0.002, "loss": 5.3232, "step": 2898 }, { "epoch": 0.32911509165540265, "grad_norm": 0.279296875, "learning_rate": 0.002, "loss": 5.319, "step": 2899 }, { "epoch": 0.3292286187653217, "grad_norm": 0.27734375, "learning_rate": 0.002, "loss": 5.3463, "step": 2900 }, { "epoch": 0.3293421458752408, "grad_norm": 0.279296875, "learning_rate": 0.002, "loss": 5.3265, "step": 2901 }, { "epoch": 0.3294556729851599, "grad_norm": 0.26171875, "learning_rate": 0.002, "loss": 5.3294, "step": 2902 }, { "epoch": 0.32956920009507895, "grad_norm": 0.2734375, "learning_rate": 0.002, "loss": 5.3316, "step": 2903 }, { "epoch": 0.329682727204998, "grad_norm": 0.26953125, "learning_rate": 0.002, "loss": 5.3094, "step": 2904 }, { "epoch": 0.3297962543149171, "grad_norm": 0.263671875, "learning_rate": 0.002, "loss": 5.3288, "step": 2905 }, { "epoch": 0.3299097814248362, "grad_norm": 0.25390625, "learning_rate": 0.002, "loss": 5.317, "step": 2906 }, { "epoch": 0.33002330853475526, "grad_norm": 0.287109375, "learning_rate": 0.002, "loss": 5.3368, "step": 2907 }, { "epoch": 0.33013683564467433, "grad_norm": 0.357421875, "learning_rate": 0.002, "loss": 5.352, "step": 2908 }, { "epoch": 0.3302503627545934, "grad_norm": 0.400390625, "learning_rate": 0.002, "loss": 5.336, "step": 2909 }, { "epoch": 0.3303638898645125, "grad_norm": 0.408203125, "learning_rate": 0.002, "loss": 5.3242, "step": 2910 }, { "epoch": 0.33047741697443156, "grad_norm": 0.39453125, "learning_rate": 0.002, "loss": 5.3347, "step": 2911 }, { "epoch": 0.33059094408435064, "grad_norm": 0.328125, "learning_rate": 0.002, "loss": 5.3348, "step": 2912 }, { "epoch": 0.3307044711942697, "grad_norm": 0.3203125, "learning_rate": 0.002, "loss": 5.3109, "step": 2913 }, { "epoch": 0.3308179983041888, "grad_norm": 0.283203125, "learning_rate": 0.002, "loss": 5.3434, "step": 2914 }, { "epoch": 0.33093152541410786, "grad_norm": 0.2890625, "learning_rate": 0.002, "loss": 5.3241, "step": 2915 }, { "epoch": 0.33104505252402694, "grad_norm": 0.267578125, "learning_rate": 0.002, "loss": 5.3201, "step": 2916 }, { "epoch": 0.331158579633946, "grad_norm": 0.287109375, "learning_rate": 0.002, "loss": 5.3388, "step": 2917 }, { "epoch": 0.3312721067438651, "grad_norm": 0.279296875, "learning_rate": 0.002, "loss": 5.3363, "step": 2918 }, { "epoch": 0.33138563385378417, "grad_norm": 0.296875, "learning_rate": 0.002, "loss": 5.3154, "step": 2919 }, { "epoch": 0.33149916096370324, "grad_norm": 0.27734375, "learning_rate": 0.002, "loss": 5.3413, "step": 2920 }, { "epoch": 0.3316126880736223, "grad_norm": 0.2890625, "learning_rate": 0.002, "loss": 5.3314, "step": 2921 }, { "epoch": 0.3317262151835414, "grad_norm": 0.27734375, "learning_rate": 0.002, "loss": 5.3209, "step": 2922 }, { "epoch": 0.33183974229346047, "grad_norm": 0.28125, "learning_rate": 0.002, "loss": 5.3384, "step": 2923 }, { "epoch": 0.33195326940337955, "grad_norm": 0.298828125, "learning_rate": 0.002, "loss": 5.3117, "step": 2924 }, { "epoch": 0.3320667965132986, "grad_norm": 0.3046875, "learning_rate": 0.002, "loss": 5.331, "step": 2925 }, { "epoch": 0.3321803236232177, "grad_norm": 0.2890625, "learning_rate": 0.002, "loss": 5.314, "step": 2926 }, { "epoch": 0.3322938507331368, "grad_norm": 0.287109375, "learning_rate": 0.002, "loss": 5.3115, "step": 2927 }, { "epoch": 0.33240737784305585, "grad_norm": 0.291015625, "learning_rate": 0.002, "loss": 5.3048, "step": 2928 }, { "epoch": 0.3325209049529749, "grad_norm": 0.287109375, "learning_rate": 0.002, "loss": 5.3258, "step": 2929 }, { "epoch": 0.332634432062894, "grad_norm": 0.279296875, "learning_rate": 0.002, "loss": 5.3133, "step": 2930 }, { "epoch": 0.3327479591728131, "grad_norm": 0.28515625, "learning_rate": 0.002, "loss": 5.3123, "step": 2931 }, { "epoch": 0.33286148628273216, "grad_norm": 0.298828125, "learning_rate": 0.002, "loss": 5.3258, "step": 2932 }, { "epoch": 0.33297501339265123, "grad_norm": 0.322265625, "learning_rate": 0.002, "loss": 5.3353, "step": 2933 }, { "epoch": 0.3330885405025703, "grad_norm": 0.35546875, "learning_rate": 0.002, "loss": 5.3366, "step": 2934 }, { "epoch": 0.3332020676124894, "grad_norm": 0.365234375, "learning_rate": 0.002, "loss": 5.3252, "step": 2935 }, { "epoch": 0.33331559472240846, "grad_norm": 0.37890625, "learning_rate": 0.002, "loss": 5.3358, "step": 2936 }, { "epoch": 0.33342912183232754, "grad_norm": 0.357421875, "learning_rate": 0.002, "loss": 5.3205, "step": 2937 }, { "epoch": 0.3335426489422466, "grad_norm": 0.3671875, "learning_rate": 0.002, "loss": 5.3479, "step": 2938 }, { "epoch": 0.3336561760521657, "grad_norm": 0.328125, "learning_rate": 0.002, "loss": 5.3189, "step": 2939 }, { "epoch": 0.33376970316208476, "grad_norm": 0.349609375, "learning_rate": 0.002, "loss": 5.3274, "step": 2940 }, { "epoch": 0.33388323027200384, "grad_norm": 0.33203125, "learning_rate": 0.002, "loss": 5.3057, "step": 2941 }, { "epoch": 0.3339967573819229, "grad_norm": 0.34765625, "learning_rate": 0.002, "loss": 5.3098, "step": 2942 }, { "epoch": 0.334110284491842, "grad_norm": 0.34765625, "learning_rate": 0.002, "loss": 5.3237, "step": 2943 }, { "epoch": 0.33422381160176107, "grad_norm": 0.396484375, "learning_rate": 0.002, "loss": 5.3071, "step": 2944 }, { "epoch": 0.33433733871168014, "grad_norm": 0.396484375, "learning_rate": 0.002, "loss": 5.336, "step": 2945 }, { "epoch": 0.3344508658215992, "grad_norm": 0.390625, "learning_rate": 0.002, "loss": 5.3172, "step": 2946 }, { "epoch": 0.3345643929315183, "grad_norm": 0.36328125, "learning_rate": 0.002, "loss": 5.3317, "step": 2947 }, { "epoch": 0.33467792004143737, "grad_norm": 0.38671875, "learning_rate": 0.002, "loss": 5.3274, "step": 2948 }, { "epoch": 0.33479144715135645, "grad_norm": 0.388671875, "learning_rate": 0.002, "loss": 5.342, "step": 2949 }, { "epoch": 0.3349049742612755, "grad_norm": 0.38671875, "learning_rate": 0.002, "loss": 5.3324, "step": 2950 }, { "epoch": 0.3350185013711946, "grad_norm": 0.361328125, "learning_rate": 0.002, "loss": 5.3296, "step": 2951 }, { "epoch": 0.3351320284811137, "grad_norm": 0.375, "learning_rate": 0.002, "loss": 5.3554, "step": 2952 }, { "epoch": 0.33524555559103275, "grad_norm": 0.359375, "learning_rate": 0.002, "loss": 5.3294, "step": 2953 }, { "epoch": 0.3353590827009518, "grad_norm": 0.3671875, "learning_rate": 0.002, "loss": 5.314, "step": 2954 }, { "epoch": 0.3354726098108709, "grad_norm": 0.3359375, "learning_rate": 0.002, "loss": 5.3497, "step": 2955 }, { "epoch": 0.33558613692079, "grad_norm": 0.337890625, "learning_rate": 0.002, "loss": 5.3047, "step": 2956 }, { "epoch": 0.3356996640307091, "grad_norm": 0.302734375, "learning_rate": 0.002, "loss": 5.3393, "step": 2957 }, { "epoch": 0.3358131911406282, "grad_norm": 0.32421875, "learning_rate": 0.002, "loss": 5.3344, "step": 2958 }, { "epoch": 0.33592671825054726, "grad_norm": 0.318359375, "learning_rate": 0.002, "loss": 5.3252, "step": 2959 }, { "epoch": 0.33604024536046634, "grad_norm": 0.35546875, "learning_rate": 0.002, "loss": 5.3176, "step": 2960 }, { "epoch": 0.3361537724703854, "grad_norm": 0.39453125, "learning_rate": 0.002, "loss": 5.335, "step": 2961 }, { "epoch": 0.3362672995803045, "grad_norm": 0.431640625, "learning_rate": 0.002, "loss": 5.3354, "step": 2962 }, { "epoch": 0.33638082669022357, "grad_norm": 0.3984375, "learning_rate": 0.002, "loss": 5.3451, "step": 2963 }, { "epoch": 0.33649435380014264, "grad_norm": 0.380859375, "learning_rate": 0.002, "loss": 5.3318, "step": 2964 }, { "epoch": 0.3366078809100617, "grad_norm": 0.310546875, "learning_rate": 0.002, "loss": 5.3326, "step": 2965 }, { "epoch": 0.3367214080199808, "grad_norm": 0.314453125, "learning_rate": 0.002, "loss": 5.3261, "step": 2966 }, { "epoch": 0.33683493512989987, "grad_norm": 0.271484375, "learning_rate": 0.002, "loss": 5.342, "step": 2967 }, { "epoch": 0.33694846223981895, "grad_norm": 0.271484375, "learning_rate": 0.002, "loss": 5.3323, "step": 2968 }, { "epoch": 0.337061989349738, "grad_norm": 0.24609375, "learning_rate": 0.002, "loss": 5.3338, "step": 2969 }, { "epoch": 0.3371755164596571, "grad_norm": 0.259765625, "learning_rate": 0.002, "loss": 5.3543, "step": 2970 }, { "epoch": 0.3372890435695762, "grad_norm": 0.26171875, "learning_rate": 0.002, "loss": 5.3273, "step": 2971 }, { "epoch": 0.33740257067949525, "grad_norm": 0.279296875, "learning_rate": 0.002, "loss": 5.3193, "step": 2972 }, { "epoch": 0.3375160977894143, "grad_norm": 0.287109375, "learning_rate": 0.002, "loss": 5.3292, "step": 2973 }, { "epoch": 0.3376296248993334, "grad_norm": 0.30078125, "learning_rate": 0.002, "loss": 5.3019, "step": 2974 }, { "epoch": 0.3377431520092525, "grad_norm": 0.291015625, "learning_rate": 0.002, "loss": 5.3022, "step": 2975 }, { "epoch": 0.33785667911917155, "grad_norm": 0.275390625, "learning_rate": 0.002, "loss": 5.3382, "step": 2976 }, { "epoch": 0.33797020622909063, "grad_norm": 0.25, "learning_rate": 0.002, "loss": 5.3395, "step": 2977 }, { "epoch": 0.3380837333390097, "grad_norm": 0.251953125, "learning_rate": 0.002, "loss": 5.3156, "step": 2978 }, { "epoch": 0.3381972604489288, "grad_norm": 0.234375, "learning_rate": 0.002, "loss": 5.3168, "step": 2979 }, { "epoch": 0.33831078755884786, "grad_norm": 0.26171875, "learning_rate": 0.002, "loss": 5.3191, "step": 2980 }, { "epoch": 0.33842431466876693, "grad_norm": 0.265625, "learning_rate": 0.002, "loss": 5.3107, "step": 2981 }, { "epoch": 0.338537841778686, "grad_norm": 0.30078125, "learning_rate": 0.002, "loss": 5.3145, "step": 2982 }, { "epoch": 0.3386513688886051, "grad_norm": 0.310546875, "learning_rate": 0.002, "loss": 5.3414, "step": 2983 }, { "epoch": 0.33876489599852416, "grad_norm": 0.333984375, "learning_rate": 0.002, "loss": 5.3291, "step": 2984 }, { "epoch": 0.33887842310844324, "grad_norm": 0.3515625, "learning_rate": 0.002, "loss": 5.3253, "step": 2985 }, { "epoch": 0.3389919502183623, "grad_norm": 0.353515625, "learning_rate": 0.002, "loss": 5.3406, "step": 2986 }, { "epoch": 0.3391054773282814, "grad_norm": 0.314453125, "learning_rate": 0.002, "loss": 5.3289, "step": 2987 }, { "epoch": 0.33921900443820047, "grad_norm": 0.298828125, "learning_rate": 0.002, "loss": 5.3112, "step": 2988 }, { "epoch": 0.33933253154811954, "grad_norm": 0.2578125, "learning_rate": 0.002, "loss": 5.3254, "step": 2989 }, { "epoch": 0.3394460586580386, "grad_norm": 0.25, "learning_rate": 0.002, "loss": 5.3211, "step": 2990 }, { "epoch": 0.3395595857679577, "grad_norm": 0.2197265625, "learning_rate": 0.002, "loss": 5.3269, "step": 2991 }, { "epoch": 0.33967311287787677, "grad_norm": 0.2314453125, "learning_rate": 0.002, "loss": 5.3512, "step": 2992 }, { "epoch": 0.33978663998779585, "grad_norm": 0.2265625, "learning_rate": 0.002, "loss": 5.3346, "step": 2993 }, { "epoch": 0.3399001670977149, "grad_norm": 0.2431640625, "learning_rate": 0.002, "loss": 5.3354, "step": 2994 }, { "epoch": 0.340013694207634, "grad_norm": 0.279296875, "learning_rate": 0.002, "loss": 5.3292, "step": 2995 }, { "epoch": 0.3401272213175531, "grad_norm": 0.32421875, "learning_rate": 0.002, "loss": 5.3184, "step": 2996 }, { "epoch": 0.34024074842747215, "grad_norm": 0.376953125, "learning_rate": 0.002, "loss": 5.3216, "step": 2997 }, { "epoch": 0.3403542755373912, "grad_norm": 0.392578125, "learning_rate": 0.002, "loss": 5.3234, "step": 2998 }, { "epoch": 0.3404678026473103, "grad_norm": 0.447265625, "learning_rate": 0.002, "loss": 5.3348, "step": 2999 }, { "epoch": 0.3405813297572294, "grad_norm": 0.439453125, "learning_rate": 0.002, "loss": 5.3241, "step": 3000 }, { "epoch": 0.34069485686714845, "grad_norm": 0.41015625, "learning_rate": 0.002, "loss": 5.3281, "step": 3001 }, { "epoch": 0.34080838397706753, "grad_norm": 0.37109375, "learning_rate": 0.002, "loss": 5.3485, "step": 3002 }, { "epoch": 0.3409219110869866, "grad_norm": 0.349609375, "learning_rate": 0.002, "loss": 5.338, "step": 3003 }, { "epoch": 0.3410354381969057, "grad_norm": 0.31640625, "learning_rate": 0.002, "loss": 5.3191, "step": 3004 }, { "epoch": 0.34114896530682476, "grad_norm": 0.31640625, "learning_rate": 0.002, "loss": 5.3209, "step": 3005 }, { "epoch": 0.34126249241674383, "grad_norm": 0.29296875, "learning_rate": 0.002, "loss": 5.308, "step": 3006 }, { "epoch": 0.3413760195266629, "grad_norm": 0.28515625, "learning_rate": 0.002, "loss": 5.3311, "step": 3007 }, { "epoch": 0.341489546636582, "grad_norm": 0.26171875, "learning_rate": 0.002, "loss": 5.3338, "step": 3008 }, { "epoch": 0.34160307374650106, "grad_norm": 0.255859375, "learning_rate": 0.002, "loss": 5.3379, "step": 3009 }, { "epoch": 0.34171660085642014, "grad_norm": 0.244140625, "learning_rate": 0.002, "loss": 5.3112, "step": 3010 }, { "epoch": 0.3418301279663392, "grad_norm": 0.275390625, "learning_rate": 0.002, "loss": 5.3238, "step": 3011 }, { "epoch": 0.3419436550762583, "grad_norm": 0.29296875, "learning_rate": 0.002, "loss": 5.3304, "step": 3012 }, { "epoch": 0.34205718218617737, "grad_norm": 0.322265625, "learning_rate": 0.002, "loss": 5.3081, "step": 3013 }, { "epoch": 0.34217070929609644, "grad_norm": 0.3359375, "learning_rate": 0.002, "loss": 5.3116, "step": 3014 }, { "epoch": 0.3422842364060155, "grad_norm": 0.337890625, "learning_rate": 0.002, "loss": 5.3105, "step": 3015 }, { "epoch": 0.3423977635159346, "grad_norm": 0.3359375, "learning_rate": 0.002, "loss": 5.3233, "step": 3016 }, { "epoch": 0.34251129062585367, "grad_norm": 0.296875, "learning_rate": 0.002, "loss": 5.2972, "step": 3017 }, { "epoch": 0.34262481773577275, "grad_norm": 0.2470703125, "learning_rate": 0.002, "loss": 5.3195, "step": 3018 }, { "epoch": 0.3427383448456918, "grad_norm": 0.251953125, "learning_rate": 0.002, "loss": 5.3048, "step": 3019 }, { "epoch": 0.3428518719556109, "grad_norm": 0.2490234375, "learning_rate": 0.002, "loss": 5.3248, "step": 3020 }, { "epoch": 0.34296539906553, "grad_norm": 0.30078125, "learning_rate": 0.002, "loss": 5.3049, "step": 3021 }, { "epoch": 0.34307892617544905, "grad_norm": 0.31640625, "learning_rate": 0.002, "loss": 5.3356, "step": 3022 }, { "epoch": 0.3431924532853681, "grad_norm": 0.349609375, "learning_rate": 0.002, "loss": 5.3357, "step": 3023 }, { "epoch": 0.3433059803952872, "grad_norm": 0.34765625, "learning_rate": 0.002, "loss": 5.3218, "step": 3024 }, { "epoch": 0.3434195075052063, "grad_norm": 0.375, "learning_rate": 0.002, "loss": 5.3314, "step": 3025 }, { "epoch": 0.34353303461512535, "grad_norm": 0.3671875, "learning_rate": 0.002, "loss": 5.3361, "step": 3026 }, { "epoch": 0.34364656172504443, "grad_norm": 0.3515625, "learning_rate": 0.002, "loss": 5.3372, "step": 3027 }, { "epoch": 0.3437600888349635, "grad_norm": 0.314453125, "learning_rate": 0.002, "loss": 5.3272, "step": 3028 }, { "epoch": 0.3438736159448826, "grad_norm": 0.322265625, "learning_rate": 0.002, "loss": 5.3178, "step": 3029 }, { "epoch": 0.34398714305480166, "grad_norm": 0.296875, "learning_rate": 0.002, "loss": 5.3228, "step": 3030 }, { "epoch": 0.34410067016472073, "grad_norm": 0.318359375, "learning_rate": 0.002, "loss": 5.3156, "step": 3031 }, { "epoch": 0.3442141972746398, "grad_norm": 0.3125, "learning_rate": 0.002, "loss": 5.3386, "step": 3032 }, { "epoch": 0.3443277243845589, "grad_norm": 0.30859375, "learning_rate": 0.002, "loss": 5.3128, "step": 3033 }, { "epoch": 0.34444125149447796, "grad_norm": 0.296875, "learning_rate": 0.002, "loss": 5.309, "step": 3034 }, { "epoch": 0.34455477860439704, "grad_norm": 0.294921875, "learning_rate": 0.002, "loss": 5.3168, "step": 3035 }, { "epoch": 0.3446683057143161, "grad_norm": 0.306640625, "learning_rate": 0.002, "loss": 5.3249, "step": 3036 }, { "epoch": 0.3447818328242352, "grad_norm": 0.345703125, "learning_rate": 0.002, "loss": 5.3292, "step": 3037 }, { "epoch": 0.34489535993415427, "grad_norm": 0.3671875, "learning_rate": 0.002, "loss": 5.3285, "step": 3038 }, { "epoch": 0.34500888704407334, "grad_norm": 0.37109375, "learning_rate": 0.002, "loss": 5.3247, "step": 3039 }, { "epoch": 0.3451224141539924, "grad_norm": 0.37890625, "learning_rate": 0.002, "loss": 5.3333, "step": 3040 }, { "epoch": 0.3452359412639115, "grad_norm": 0.3203125, "learning_rate": 0.002, "loss": 5.3243, "step": 3041 }, { "epoch": 0.34534946837383057, "grad_norm": 0.326171875, "learning_rate": 0.002, "loss": 5.3349, "step": 3042 }, { "epoch": 0.34546299548374965, "grad_norm": 0.296875, "learning_rate": 0.002, "loss": 5.3014, "step": 3043 }, { "epoch": 0.3455765225936687, "grad_norm": 0.294921875, "learning_rate": 0.002, "loss": 5.3158, "step": 3044 }, { "epoch": 0.3456900497035878, "grad_norm": 0.2734375, "learning_rate": 0.002, "loss": 5.34, "step": 3045 }, { "epoch": 0.3458035768135069, "grad_norm": 0.28125, "learning_rate": 0.002, "loss": 5.3326, "step": 3046 }, { "epoch": 0.34591710392342595, "grad_norm": 0.283203125, "learning_rate": 0.002, "loss": 5.3482, "step": 3047 }, { "epoch": 0.346030631033345, "grad_norm": 0.296875, "learning_rate": 0.002, "loss": 5.3207, "step": 3048 }, { "epoch": 0.3461441581432641, "grad_norm": 0.2890625, "learning_rate": 0.002, "loss": 5.3131, "step": 3049 }, { "epoch": 0.3462576852531832, "grad_norm": 0.298828125, "learning_rate": 0.002, "loss": 5.3176, "step": 3050 }, { "epoch": 0.34637121236310225, "grad_norm": 0.271484375, "learning_rate": 0.002, "loss": 5.3297, "step": 3051 }, { "epoch": 0.34648473947302133, "grad_norm": 0.2734375, "learning_rate": 0.002, "loss": 5.3321, "step": 3052 }, { "epoch": 0.3465982665829404, "grad_norm": 0.24609375, "learning_rate": 0.002, "loss": 5.319, "step": 3053 }, { "epoch": 0.3467117936928595, "grad_norm": 0.240234375, "learning_rate": 0.002, "loss": 5.3066, "step": 3054 }, { "epoch": 0.34682532080277856, "grad_norm": 0.2353515625, "learning_rate": 0.002, "loss": 5.3165, "step": 3055 }, { "epoch": 0.34693884791269763, "grad_norm": 0.2470703125, "learning_rate": 0.002, "loss": 5.3123, "step": 3056 }, { "epoch": 0.3470523750226167, "grad_norm": 0.2578125, "learning_rate": 0.002, "loss": 5.3185, "step": 3057 }, { "epoch": 0.3471659021325358, "grad_norm": 0.26953125, "learning_rate": 0.002, "loss": 5.3288, "step": 3058 }, { "epoch": 0.34727942924245486, "grad_norm": 0.30078125, "learning_rate": 0.002, "loss": 5.3089, "step": 3059 }, { "epoch": 0.34739295635237394, "grad_norm": 0.349609375, "learning_rate": 0.002, "loss": 5.314, "step": 3060 }, { "epoch": 0.347506483462293, "grad_norm": 0.38671875, "learning_rate": 0.002, "loss": 5.314, "step": 3061 }, { "epoch": 0.3476200105722121, "grad_norm": 0.38671875, "learning_rate": 0.002, "loss": 5.3109, "step": 3062 }, { "epoch": 0.34773353768213117, "grad_norm": 0.390625, "learning_rate": 0.002, "loss": 5.3109, "step": 3063 }, { "epoch": 0.34784706479205024, "grad_norm": 0.34765625, "learning_rate": 0.002, "loss": 5.3166, "step": 3064 }, { "epoch": 0.3479605919019693, "grad_norm": 0.349609375, "learning_rate": 0.002, "loss": 5.3213, "step": 3065 }, { "epoch": 0.3480741190118884, "grad_norm": 0.3125, "learning_rate": 0.002, "loss": 5.3529, "step": 3066 }, { "epoch": 0.34818764612180747, "grad_norm": 0.31640625, "learning_rate": 0.002, "loss": 5.3441, "step": 3067 }, { "epoch": 0.34830117323172655, "grad_norm": 0.28515625, "learning_rate": 0.002, "loss": 5.3203, "step": 3068 }, { "epoch": 0.3484147003416456, "grad_norm": 0.302734375, "learning_rate": 0.002, "loss": 5.3232, "step": 3069 }, { "epoch": 0.3485282274515647, "grad_norm": 0.310546875, "learning_rate": 0.002, "loss": 5.3089, "step": 3070 }, { "epoch": 0.3486417545614838, "grad_norm": 0.345703125, "learning_rate": 0.002, "loss": 5.3129, "step": 3071 }, { "epoch": 0.34875528167140285, "grad_norm": 0.361328125, "learning_rate": 0.002, "loss": 5.3029, "step": 3072 }, { "epoch": 0.3488688087813219, "grad_norm": 0.365234375, "learning_rate": 0.002, "loss": 5.3018, "step": 3073 }, { "epoch": 0.348982335891241, "grad_norm": 0.3671875, "learning_rate": 0.002, "loss": 5.316, "step": 3074 }, { "epoch": 0.3490958630011601, "grad_norm": 0.349609375, "learning_rate": 0.002, "loss": 5.2986, "step": 3075 }, { "epoch": 0.34920939011107915, "grad_norm": 0.361328125, "learning_rate": 0.002, "loss": 5.312, "step": 3076 }, { "epoch": 0.3493229172209983, "grad_norm": 0.365234375, "learning_rate": 0.002, "loss": 5.3352, "step": 3077 }, { "epoch": 0.34943644433091736, "grad_norm": 0.361328125, "learning_rate": 0.002, "loss": 5.3344, "step": 3078 }, { "epoch": 0.34954997144083644, "grad_norm": 0.384765625, "learning_rate": 0.002, "loss": 5.329, "step": 3079 }, { "epoch": 0.3496634985507555, "grad_norm": 0.353515625, "learning_rate": 0.002, "loss": 5.311, "step": 3080 }, { "epoch": 0.3497770256606746, "grad_norm": 0.314453125, "learning_rate": 0.002, "loss": 5.3275, "step": 3081 }, { "epoch": 0.34989055277059367, "grad_norm": 0.291015625, "learning_rate": 0.002, "loss": 5.3306, "step": 3082 }, { "epoch": 0.35000407988051274, "grad_norm": 0.271484375, "learning_rate": 0.002, "loss": 5.3277, "step": 3083 }, { "epoch": 0.3501176069904318, "grad_norm": 0.263671875, "learning_rate": 0.002, "loss": 5.342, "step": 3084 }, { "epoch": 0.3502311341003509, "grad_norm": 0.251953125, "learning_rate": 0.002, "loss": 5.3292, "step": 3085 }, { "epoch": 0.35034466121026997, "grad_norm": 0.2734375, "learning_rate": 0.002, "loss": 5.3075, "step": 3086 }, { "epoch": 0.35045818832018905, "grad_norm": 0.283203125, "learning_rate": 0.002, "loss": 5.315, "step": 3087 }, { "epoch": 0.3505717154301081, "grad_norm": 0.296875, "learning_rate": 0.002, "loss": 5.3349, "step": 3088 }, { "epoch": 0.3506852425400272, "grad_norm": 0.294921875, "learning_rate": 0.002, "loss": 5.3212, "step": 3089 }, { "epoch": 0.3507987696499463, "grad_norm": 0.30078125, "learning_rate": 0.002, "loss": 5.3096, "step": 3090 }, { "epoch": 0.35091229675986535, "grad_norm": 0.291015625, "learning_rate": 0.002, "loss": 5.322, "step": 3091 }, { "epoch": 0.3510258238697844, "grad_norm": 0.30078125, "learning_rate": 0.002, "loss": 5.3226, "step": 3092 }, { "epoch": 0.3511393509797035, "grad_norm": 0.26171875, "learning_rate": 0.002, "loss": 5.3051, "step": 3093 }, { "epoch": 0.3512528780896226, "grad_norm": 0.26953125, "learning_rate": 0.002, "loss": 5.3034, "step": 3094 }, { "epoch": 0.35136640519954165, "grad_norm": 0.25, "learning_rate": 0.002, "loss": 5.3082, "step": 3095 }, { "epoch": 0.35147993230946073, "grad_norm": 0.25, "learning_rate": 0.002, "loss": 5.3186, "step": 3096 }, { "epoch": 0.3515934594193798, "grad_norm": 0.240234375, "learning_rate": 0.002, "loss": 5.3335, "step": 3097 }, { "epoch": 0.3517069865292989, "grad_norm": 0.2421875, "learning_rate": 0.002, "loss": 5.2968, "step": 3098 }, { "epoch": 0.35182051363921796, "grad_norm": 0.25390625, "learning_rate": 0.002, "loss": 5.3212, "step": 3099 }, { "epoch": 0.35193404074913703, "grad_norm": 0.251953125, "learning_rate": 0.002, "loss": 5.3047, "step": 3100 }, { "epoch": 0.3520475678590561, "grad_norm": 0.2734375, "learning_rate": 0.002, "loss": 5.3045, "step": 3101 }, { "epoch": 0.3521610949689752, "grad_norm": 0.28125, "learning_rate": 0.002, "loss": 5.3172, "step": 3102 }, { "epoch": 0.35227462207889426, "grad_norm": 0.318359375, "learning_rate": 0.002, "loss": 5.3127, "step": 3103 }, { "epoch": 0.35238814918881334, "grad_norm": 0.349609375, "learning_rate": 0.002, "loss": 5.3219, "step": 3104 }, { "epoch": 0.3525016762987324, "grad_norm": 0.365234375, "learning_rate": 0.002, "loss": 5.3156, "step": 3105 }, { "epoch": 0.3526152034086515, "grad_norm": 0.359375, "learning_rate": 0.002, "loss": 5.331, "step": 3106 }, { "epoch": 0.35272873051857057, "grad_norm": 0.375, "learning_rate": 0.002, "loss": 5.3147, "step": 3107 }, { "epoch": 0.35284225762848964, "grad_norm": 0.36328125, "learning_rate": 0.002, "loss": 5.306, "step": 3108 }, { "epoch": 0.3529557847384087, "grad_norm": 0.369140625, "learning_rate": 0.002, "loss": 5.3189, "step": 3109 }, { "epoch": 0.3530693118483278, "grad_norm": 0.361328125, "learning_rate": 0.002, "loss": 5.328, "step": 3110 }, { "epoch": 0.35318283895824687, "grad_norm": 0.375, "learning_rate": 0.002, "loss": 5.3258, "step": 3111 }, { "epoch": 0.35329636606816595, "grad_norm": 0.380859375, "learning_rate": 0.002, "loss": 5.3152, "step": 3112 }, { "epoch": 0.353409893178085, "grad_norm": 0.40234375, "learning_rate": 0.002, "loss": 5.3245, "step": 3113 }, { "epoch": 0.3535234202880041, "grad_norm": 0.375, "learning_rate": 0.002, "loss": 5.3179, "step": 3114 }, { "epoch": 0.3536369473979232, "grad_norm": 0.380859375, "learning_rate": 0.002, "loss": 5.3127, "step": 3115 }, { "epoch": 0.35375047450784225, "grad_norm": 0.35546875, "learning_rate": 0.002, "loss": 5.3291, "step": 3116 }, { "epoch": 0.3538640016177613, "grad_norm": 0.359375, "learning_rate": 0.002, "loss": 5.3073, "step": 3117 }, { "epoch": 0.3539775287276804, "grad_norm": 0.33203125, "learning_rate": 0.002, "loss": 5.3408, "step": 3118 }, { "epoch": 0.3540910558375995, "grad_norm": 0.314453125, "learning_rate": 0.002, "loss": 5.3225, "step": 3119 }, { "epoch": 0.35420458294751855, "grad_norm": 0.302734375, "learning_rate": 0.002, "loss": 5.3249, "step": 3120 }, { "epoch": 0.35431811005743763, "grad_norm": 0.296875, "learning_rate": 0.002, "loss": 5.3279, "step": 3121 }, { "epoch": 0.3544316371673567, "grad_norm": 0.3203125, "learning_rate": 0.002, "loss": 5.3102, "step": 3122 }, { "epoch": 0.3545451642772758, "grad_norm": 0.30859375, "learning_rate": 0.002, "loss": 5.3192, "step": 3123 }, { "epoch": 0.35465869138719486, "grad_norm": 0.322265625, "learning_rate": 0.002, "loss": 5.3057, "step": 3124 }, { "epoch": 0.35477221849711393, "grad_norm": 0.328125, "learning_rate": 0.002, "loss": 5.309, "step": 3125 }, { "epoch": 0.354885745607033, "grad_norm": 0.3203125, "learning_rate": 0.002, "loss": 5.3038, "step": 3126 }, { "epoch": 0.3549992727169521, "grad_norm": 0.3125, "learning_rate": 0.002, "loss": 5.3342, "step": 3127 }, { "epoch": 0.35511279982687116, "grad_norm": 0.3046875, "learning_rate": 0.002, "loss": 5.3238, "step": 3128 }, { "epoch": 0.35522632693679024, "grad_norm": 0.306640625, "learning_rate": 0.002, "loss": 5.3152, "step": 3129 }, { "epoch": 0.3553398540467093, "grad_norm": 0.306640625, "learning_rate": 0.002, "loss": 5.3117, "step": 3130 }, { "epoch": 0.3554533811566284, "grad_norm": 0.2734375, "learning_rate": 0.002, "loss": 5.3244, "step": 3131 }, { "epoch": 0.35556690826654747, "grad_norm": 0.28515625, "learning_rate": 0.002, "loss": 5.2969, "step": 3132 }, { "epoch": 0.35568043537646654, "grad_norm": 0.259765625, "learning_rate": 0.002, "loss": 5.3235, "step": 3133 }, { "epoch": 0.3557939624863856, "grad_norm": 0.259765625, "learning_rate": 0.002, "loss": 5.3051, "step": 3134 }, { "epoch": 0.3559074895963047, "grad_norm": 0.2451171875, "learning_rate": 0.002, "loss": 5.2841, "step": 3135 }, { "epoch": 0.35602101670622377, "grad_norm": 0.283203125, "learning_rate": 0.002, "loss": 5.3125, "step": 3136 }, { "epoch": 0.35613454381614285, "grad_norm": 0.29296875, "learning_rate": 0.002, "loss": 5.3048, "step": 3137 }, { "epoch": 0.3562480709260619, "grad_norm": 0.318359375, "learning_rate": 0.002, "loss": 5.3234, "step": 3138 }, { "epoch": 0.356361598035981, "grad_norm": 0.333984375, "learning_rate": 0.002, "loss": 5.3029, "step": 3139 }, { "epoch": 0.3564751251459001, "grad_norm": 0.34375, "learning_rate": 0.002, "loss": 5.3099, "step": 3140 }, { "epoch": 0.35658865225581915, "grad_norm": 0.314453125, "learning_rate": 0.002, "loss": 5.3199, "step": 3141 }, { "epoch": 0.3567021793657382, "grad_norm": 0.306640625, "learning_rate": 0.002, "loss": 5.3249, "step": 3142 }, { "epoch": 0.3568157064756573, "grad_norm": 0.2734375, "learning_rate": 0.002, "loss": 5.3361, "step": 3143 }, { "epoch": 0.3569292335855764, "grad_norm": 0.2890625, "learning_rate": 0.002, "loss": 5.317, "step": 3144 }, { "epoch": 0.35704276069549545, "grad_norm": 0.294921875, "learning_rate": 0.002, "loss": 5.3049, "step": 3145 }, { "epoch": 0.35715628780541453, "grad_norm": 0.322265625, "learning_rate": 0.002, "loss": 5.3286, "step": 3146 }, { "epoch": 0.3572698149153336, "grad_norm": 0.333984375, "learning_rate": 0.002, "loss": 5.3296, "step": 3147 }, { "epoch": 0.3573833420252527, "grad_norm": 0.345703125, "learning_rate": 0.002, "loss": 5.3186, "step": 3148 }, { "epoch": 0.35749686913517176, "grad_norm": 0.341796875, "learning_rate": 0.002, "loss": 5.2932, "step": 3149 }, { "epoch": 0.35761039624509083, "grad_norm": 0.337890625, "learning_rate": 0.002, "loss": 5.2966, "step": 3150 }, { "epoch": 0.3577239233550099, "grad_norm": 0.341796875, "learning_rate": 0.002, "loss": 5.2995, "step": 3151 }, { "epoch": 0.357837450464929, "grad_norm": 0.326171875, "learning_rate": 0.002, "loss": 5.3147, "step": 3152 }, { "epoch": 0.35795097757484806, "grad_norm": 0.283203125, "learning_rate": 0.002, "loss": 5.3156, "step": 3153 }, { "epoch": 0.35806450468476714, "grad_norm": 0.25, "learning_rate": 0.002, "loss": 5.2979, "step": 3154 }, { "epoch": 0.3581780317946862, "grad_norm": 0.2255859375, "learning_rate": 0.002, "loss": 5.3416, "step": 3155 }, { "epoch": 0.3582915589046053, "grad_norm": 0.2431640625, "learning_rate": 0.002, "loss": 5.3202, "step": 3156 }, { "epoch": 0.35840508601452437, "grad_norm": 0.265625, "learning_rate": 0.002, "loss": 5.3069, "step": 3157 }, { "epoch": 0.35851861312444344, "grad_norm": 0.2734375, "learning_rate": 0.002, "loss": 5.3093, "step": 3158 }, { "epoch": 0.3586321402343625, "grad_norm": 0.275390625, "learning_rate": 0.002, "loss": 5.3125, "step": 3159 }, { "epoch": 0.3587456673442816, "grad_norm": 0.267578125, "learning_rate": 0.002, "loss": 5.3123, "step": 3160 }, { "epoch": 0.35885919445420067, "grad_norm": 0.251953125, "learning_rate": 0.002, "loss": 5.3165, "step": 3161 }, { "epoch": 0.35897272156411975, "grad_norm": 0.26171875, "learning_rate": 0.002, "loss": 5.3106, "step": 3162 }, { "epoch": 0.3590862486740388, "grad_norm": 0.2578125, "learning_rate": 0.002, "loss": 5.3291, "step": 3163 }, { "epoch": 0.3591997757839579, "grad_norm": 0.263671875, "learning_rate": 0.002, "loss": 5.329, "step": 3164 }, { "epoch": 0.359313302893877, "grad_norm": 0.28515625, "learning_rate": 0.002, "loss": 5.3282, "step": 3165 }, { "epoch": 0.35942683000379605, "grad_norm": 0.31640625, "learning_rate": 0.002, "loss": 5.303, "step": 3166 }, { "epoch": 0.3595403571137151, "grad_norm": 0.353515625, "learning_rate": 0.002, "loss": 5.3195, "step": 3167 }, { "epoch": 0.3596538842236342, "grad_norm": 0.384765625, "learning_rate": 0.002, "loss": 5.3166, "step": 3168 }, { "epoch": 0.3597674113335533, "grad_norm": 0.37890625, "learning_rate": 0.002, "loss": 5.3063, "step": 3169 }, { "epoch": 0.35988093844347235, "grad_norm": 0.359375, "learning_rate": 0.002, "loss": 5.3031, "step": 3170 }, { "epoch": 0.35999446555339143, "grad_norm": 0.361328125, "learning_rate": 0.002, "loss": 5.3288, "step": 3171 }, { "epoch": 0.3601079926633105, "grad_norm": 0.3515625, "learning_rate": 0.002, "loss": 5.3227, "step": 3172 }, { "epoch": 0.3602215197732296, "grad_norm": 0.34375, "learning_rate": 0.002, "loss": 5.3279, "step": 3173 }, { "epoch": 0.36033504688314866, "grad_norm": 0.373046875, "learning_rate": 0.002, "loss": 5.326, "step": 3174 }, { "epoch": 0.36044857399306773, "grad_norm": 0.357421875, "learning_rate": 0.002, "loss": 5.3191, "step": 3175 }, { "epoch": 0.3605621011029868, "grad_norm": 0.3125, "learning_rate": 0.002, "loss": 5.3155, "step": 3176 }, { "epoch": 0.3606756282129059, "grad_norm": 0.3203125, "learning_rate": 0.002, "loss": 5.3063, "step": 3177 }, { "epoch": 0.36078915532282496, "grad_norm": 0.27734375, "learning_rate": 0.002, "loss": 5.3223, "step": 3178 }, { "epoch": 0.36090268243274404, "grad_norm": 0.302734375, "learning_rate": 0.002, "loss": 5.3163, "step": 3179 }, { "epoch": 0.3610162095426631, "grad_norm": 0.32421875, "learning_rate": 0.002, "loss": 5.3038, "step": 3180 }, { "epoch": 0.3611297366525822, "grad_norm": 0.326171875, "learning_rate": 0.002, "loss": 5.3241, "step": 3181 }, { "epoch": 0.36124326376250127, "grad_norm": 0.3046875, "learning_rate": 0.002, "loss": 5.3181, "step": 3182 }, { "epoch": 0.36135679087242034, "grad_norm": 0.314453125, "learning_rate": 0.002, "loss": 5.3181, "step": 3183 }, { "epoch": 0.3614703179823394, "grad_norm": 0.287109375, "learning_rate": 0.002, "loss": 5.3372, "step": 3184 }, { "epoch": 0.3615838450922585, "grad_norm": 0.298828125, "learning_rate": 0.002, "loss": 5.3051, "step": 3185 }, { "epoch": 0.36169737220217757, "grad_norm": 0.283203125, "learning_rate": 0.002, "loss": 5.309, "step": 3186 }, { "epoch": 0.36181089931209665, "grad_norm": 0.30859375, "learning_rate": 0.002, "loss": 5.3042, "step": 3187 }, { "epoch": 0.3619244264220157, "grad_norm": 0.314453125, "learning_rate": 0.002, "loss": 5.2981, "step": 3188 }, { "epoch": 0.3620379535319348, "grad_norm": 0.3359375, "learning_rate": 0.002, "loss": 5.3048, "step": 3189 }, { "epoch": 0.3621514806418539, "grad_norm": 0.33984375, "learning_rate": 0.002, "loss": 5.3262, "step": 3190 }, { "epoch": 0.36226500775177295, "grad_norm": 0.333984375, "learning_rate": 0.002, "loss": 5.3297, "step": 3191 }, { "epoch": 0.362378534861692, "grad_norm": 0.326171875, "learning_rate": 0.002, "loss": 5.3192, "step": 3192 }, { "epoch": 0.3624920619716111, "grad_norm": 0.33203125, "learning_rate": 0.002, "loss": 5.3149, "step": 3193 }, { "epoch": 0.3626055890815302, "grad_norm": 0.330078125, "learning_rate": 0.002, "loss": 5.3346, "step": 3194 }, { "epoch": 0.36271911619144925, "grad_norm": 0.30859375, "learning_rate": 0.002, "loss": 5.3045, "step": 3195 }, { "epoch": 0.36283264330136833, "grad_norm": 0.322265625, "learning_rate": 0.002, "loss": 5.2903, "step": 3196 }, { "epoch": 0.3629461704112874, "grad_norm": 0.30859375, "learning_rate": 0.002, "loss": 5.3244, "step": 3197 }, { "epoch": 0.36305969752120654, "grad_norm": 0.3359375, "learning_rate": 0.002, "loss": 5.2937, "step": 3198 }, { "epoch": 0.3631732246311256, "grad_norm": 0.33203125, "learning_rate": 0.002, "loss": 5.3175, "step": 3199 }, { "epoch": 0.3632867517410447, "grad_norm": 0.3125, "learning_rate": 0.002, "loss": 5.3192, "step": 3200 }, { "epoch": 0.36340027885096376, "grad_norm": 0.271484375, "learning_rate": 0.002, "loss": 5.305, "step": 3201 }, { "epoch": 0.36351380596088284, "grad_norm": 0.2470703125, "learning_rate": 0.002, "loss": 5.3004, "step": 3202 }, { "epoch": 0.3636273330708019, "grad_norm": 0.2421875, "learning_rate": 0.002, "loss": 5.3369, "step": 3203 }, { "epoch": 0.363740860180721, "grad_norm": 0.26953125, "learning_rate": 0.002, "loss": 5.3221, "step": 3204 }, { "epoch": 0.36385438729064007, "grad_norm": 0.328125, "learning_rate": 0.002, "loss": 5.3127, "step": 3205 }, { "epoch": 0.36396791440055914, "grad_norm": 0.408203125, "learning_rate": 0.002, "loss": 5.2887, "step": 3206 }, { "epoch": 0.3640814415104782, "grad_norm": 0.462890625, "learning_rate": 0.002, "loss": 5.3157, "step": 3207 }, { "epoch": 0.3641949686203973, "grad_norm": 0.4296875, "learning_rate": 0.002, "loss": 5.3085, "step": 3208 }, { "epoch": 0.3643084957303164, "grad_norm": 0.412109375, "learning_rate": 0.002, "loss": 5.3228, "step": 3209 }, { "epoch": 0.36442202284023545, "grad_norm": 0.337890625, "learning_rate": 0.002, "loss": 5.3223, "step": 3210 }, { "epoch": 0.3645355499501545, "grad_norm": 0.322265625, "learning_rate": 0.002, "loss": 5.3314, "step": 3211 }, { "epoch": 0.3646490770600736, "grad_norm": 0.279296875, "learning_rate": 0.002, "loss": 5.3099, "step": 3212 }, { "epoch": 0.3647626041699927, "grad_norm": 0.26953125, "learning_rate": 0.002, "loss": 5.3108, "step": 3213 }, { "epoch": 0.36487613127991175, "grad_norm": 0.259765625, "learning_rate": 0.002, "loss": 5.3112, "step": 3214 }, { "epoch": 0.36498965838983083, "grad_norm": 0.27734375, "learning_rate": 0.002, "loss": 5.3203, "step": 3215 }, { "epoch": 0.3651031854997499, "grad_norm": 0.2734375, "learning_rate": 0.002, "loss": 5.2962, "step": 3216 }, { "epoch": 0.365216712609669, "grad_norm": 0.29296875, "learning_rate": 0.002, "loss": 5.3033, "step": 3217 }, { "epoch": 0.36533023971958806, "grad_norm": 0.318359375, "learning_rate": 0.002, "loss": 5.323, "step": 3218 }, { "epoch": 0.36544376682950713, "grad_norm": 0.345703125, "learning_rate": 0.002, "loss": 5.3085, "step": 3219 }, { "epoch": 0.3655572939394262, "grad_norm": 0.375, "learning_rate": 0.002, "loss": 5.3226, "step": 3220 }, { "epoch": 0.3656708210493453, "grad_norm": 0.3828125, "learning_rate": 0.002, "loss": 5.3234, "step": 3221 }, { "epoch": 0.36578434815926436, "grad_norm": 0.380859375, "learning_rate": 0.002, "loss": 5.3133, "step": 3222 }, { "epoch": 0.36589787526918344, "grad_norm": 0.359375, "learning_rate": 0.002, "loss": 5.3283, "step": 3223 }, { "epoch": 0.3660114023791025, "grad_norm": 0.330078125, "learning_rate": 0.002, "loss": 5.3181, "step": 3224 }, { "epoch": 0.3661249294890216, "grad_norm": 0.306640625, "learning_rate": 0.002, "loss": 5.2906, "step": 3225 }, { "epoch": 0.36623845659894066, "grad_norm": 0.2734375, "learning_rate": 0.002, "loss": 5.311, "step": 3226 }, { "epoch": 0.36635198370885974, "grad_norm": 0.26953125, "learning_rate": 0.002, "loss": 5.3213, "step": 3227 }, { "epoch": 0.3664655108187788, "grad_norm": 0.27734375, "learning_rate": 0.002, "loss": 5.3002, "step": 3228 }, { "epoch": 0.3665790379286979, "grad_norm": 0.291015625, "learning_rate": 0.002, "loss": 5.3069, "step": 3229 }, { "epoch": 0.36669256503861697, "grad_norm": 0.31640625, "learning_rate": 0.002, "loss": 5.2891, "step": 3230 }, { "epoch": 0.36680609214853604, "grad_norm": 0.298828125, "learning_rate": 0.002, "loss": 5.3018, "step": 3231 }, { "epoch": 0.3669196192584551, "grad_norm": 0.32421875, "learning_rate": 0.002, "loss": 5.2983, "step": 3232 }, { "epoch": 0.3670331463683742, "grad_norm": 0.3203125, "learning_rate": 0.002, "loss": 5.3005, "step": 3233 }, { "epoch": 0.3671466734782933, "grad_norm": 0.33984375, "learning_rate": 0.002, "loss": 5.3322, "step": 3234 }, { "epoch": 0.36726020058821235, "grad_norm": 0.31640625, "learning_rate": 0.002, "loss": 5.3175, "step": 3235 }, { "epoch": 0.3673737276981314, "grad_norm": 0.306640625, "learning_rate": 0.002, "loss": 5.3278, "step": 3236 }, { "epoch": 0.3674872548080505, "grad_norm": 0.25390625, "learning_rate": 0.002, "loss": 5.3255, "step": 3237 }, { "epoch": 0.3676007819179696, "grad_norm": 0.25390625, "learning_rate": 0.002, "loss": 5.313, "step": 3238 }, { "epoch": 0.36771430902788865, "grad_norm": 0.2470703125, "learning_rate": 0.002, "loss": 5.3094, "step": 3239 }, { "epoch": 0.36782783613780773, "grad_norm": 0.265625, "learning_rate": 0.002, "loss": 5.3037, "step": 3240 }, { "epoch": 0.3679413632477268, "grad_norm": 0.2734375, "learning_rate": 0.002, "loss": 5.3215, "step": 3241 }, { "epoch": 0.3680548903576459, "grad_norm": 0.291015625, "learning_rate": 0.002, "loss": 5.2966, "step": 3242 }, { "epoch": 0.36816841746756496, "grad_norm": 0.302734375, "learning_rate": 0.002, "loss": 5.2997, "step": 3243 }, { "epoch": 0.36828194457748403, "grad_norm": 0.302734375, "learning_rate": 0.002, "loss": 5.3044, "step": 3244 }, { "epoch": 0.3683954716874031, "grad_norm": 0.294921875, "learning_rate": 0.002, "loss": 5.2939, "step": 3245 }, { "epoch": 0.3685089987973222, "grad_norm": 0.267578125, "learning_rate": 0.002, "loss": 5.3082, "step": 3246 }, { "epoch": 0.36862252590724126, "grad_norm": 0.28515625, "learning_rate": 0.002, "loss": 5.3151, "step": 3247 }, { "epoch": 0.36873605301716034, "grad_norm": 0.271484375, "learning_rate": 0.002, "loss": 5.3188, "step": 3248 }, { "epoch": 0.3688495801270794, "grad_norm": 0.287109375, "learning_rate": 0.002, "loss": 5.316, "step": 3249 }, { "epoch": 0.3689631072369985, "grad_norm": 0.26953125, "learning_rate": 0.002, "loss": 5.2889, "step": 3250 }, { "epoch": 0.36907663434691756, "grad_norm": 0.265625, "learning_rate": 0.002, "loss": 5.2942, "step": 3251 }, { "epoch": 0.36919016145683664, "grad_norm": 0.23828125, "learning_rate": 0.002, "loss": 5.3072, "step": 3252 }, { "epoch": 0.3693036885667557, "grad_norm": 0.2392578125, "learning_rate": 0.002, "loss": 5.32, "step": 3253 }, { "epoch": 0.3694172156766748, "grad_norm": 0.287109375, "learning_rate": 0.002, "loss": 5.3133, "step": 3254 }, { "epoch": 0.36953074278659387, "grad_norm": 0.373046875, "learning_rate": 0.002, "loss": 5.3143, "step": 3255 }, { "epoch": 0.36964426989651294, "grad_norm": 0.4296875, "learning_rate": 0.002, "loss": 5.3153, "step": 3256 }, { "epoch": 0.369757797006432, "grad_norm": 0.4609375, "learning_rate": 0.002, "loss": 5.3181, "step": 3257 }, { "epoch": 0.3698713241163511, "grad_norm": 0.46875, "learning_rate": 0.002, "loss": 5.3032, "step": 3258 }, { "epoch": 0.3699848512262702, "grad_norm": 0.42578125, "learning_rate": 0.002, "loss": 5.2921, "step": 3259 }, { "epoch": 0.37009837833618925, "grad_norm": 0.40625, "learning_rate": 0.002, "loss": 5.3105, "step": 3260 }, { "epoch": 0.3702119054461083, "grad_norm": 0.314453125, "learning_rate": 0.002, "loss": 5.3234, "step": 3261 }, { "epoch": 0.3703254325560274, "grad_norm": 0.326171875, "learning_rate": 0.002, "loss": 5.3226, "step": 3262 }, { "epoch": 0.3704389596659465, "grad_norm": 0.2734375, "learning_rate": 0.002, "loss": 5.3033, "step": 3263 }, { "epoch": 0.37055248677586555, "grad_norm": 0.296875, "learning_rate": 0.002, "loss": 5.3234, "step": 3264 }, { "epoch": 0.37066601388578463, "grad_norm": 0.30078125, "learning_rate": 0.002, "loss": 5.3089, "step": 3265 }, { "epoch": 0.3707795409957037, "grad_norm": 0.30859375, "learning_rate": 0.002, "loss": 5.3144, "step": 3266 }, { "epoch": 0.3708930681056228, "grad_norm": 0.3203125, "learning_rate": 0.002, "loss": 5.3109, "step": 3267 }, { "epoch": 0.37100659521554186, "grad_norm": 0.34375, "learning_rate": 0.002, "loss": 5.3064, "step": 3268 }, { "epoch": 0.37112012232546093, "grad_norm": 0.353515625, "learning_rate": 0.002, "loss": 5.312, "step": 3269 }, { "epoch": 0.37123364943538, "grad_norm": 0.333984375, "learning_rate": 0.002, "loss": 5.317, "step": 3270 }, { "epoch": 0.3713471765452991, "grad_norm": 0.275390625, "learning_rate": 0.002, "loss": 5.3365, "step": 3271 }, { "epoch": 0.37146070365521816, "grad_norm": 0.294921875, "learning_rate": 0.002, "loss": 5.2891, "step": 3272 }, { "epoch": 0.37157423076513724, "grad_norm": 0.2890625, "learning_rate": 0.002, "loss": 5.3191, "step": 3273 }, { "epoch": 0.3716877578750563, "grad_norm": 0.328125, "learning_rate": 0.002, "loss": 5.3033, "step": 3274 }, { "epoch": 0.3718012849849754, "grad_norm": 0.349609375, "learning_rate": 0.002, "loss": 5.3263, "step": 3275 }, { "epoch": 0.37191481209489446, "grad_norm": 0.37109375, "learning_rate": 0.002, "loss": 5.3138, "step": 3276 }, { "epoch": 0.37202833920481354, "grad_norm": 0.3359375, "learning_rate": 0.002, "loss": 5.3031, "step": 3277 }, { "epoch": 0.3721418663147326, "grad_norm": 0.359375, "learning_rate": 0.002, "loss": 5.3059, "step": 3278 }, { "epoch": 0.3722553934246517, "grad_norm": 0.3515625, "learning_rate": 0.002, "loss": 5.2929, "step": 3279 }, { "epoch": 0.37236892053457077, "grad_norm": 0.353515625, "learning_rate": 0.002, "loss": 5.317, "step": 3280 }, { "epoch": 0.37248244764448984, "grad_norm": 0.337890625, "learning_rate": 0.002, "loss": 5.3127, "step": 3281 }, { "epoch": 0.3725959747544089, "grad_norm": 0.33203125, "learning_rate": 0.002, "loss": 5.3216, "step": 3282 }, { "epoch": 0.372709501864328, "grad_norm": 0.291015625, "learning_rate": 0.002, "loss": 5.2894, "step": 3283 }, { "epoch": 0.3728230289742471, "grad_norm": 0.2890625, "learning_rate": 0.002, "loss": 5.2889, "step": 3284 }, { "epoch": 0.37293655608416615, "grad_norm": 0.2734375, "learning_rate": 0.002, "loss": 5.3017, "step": 3285 }, { "epoch": 0.3730500831940852, "grad_norm": 0.26953125, "learning_rate": 0.002, "loss": 5.3205, "step": 3286 }, { "epoch": 0.3731636103040043, "grad_norm": 0.25, "learning_rate": 0.002, "loss": 5.3003, "step": 3287 }, { "epoch": 0.3732771374139234, "grad_norm": 0.265625, "learning_rate": 0.002, "loss": 5.3238, "step": 3288 }, { "epoch": 0.37339066452384245, "grad_norm": 0.265625, "learning_rate": 0.002, "loss": 5.2894, "step": 3289 }, { "epoch": 0.37350419163376153, "grad_norm": 0.29296875, "learning_rate": 0.002, "loss": 5.3041, "step": 3290 }, { "epoch": 0.3736177187436806, "grad_norm": 0.2890625, "learning_rate": 0.002, "loss": 5.2978, "step": 3291 }, { "epoch": 0.3737312458535997, "grad_norm": 0.296875, "learning_rate": 0.002, "loss": 5.3051, "step": 3292 }, { "epoch": 0.37384477296351876, "grad_norm": 0.263671875, "learning_rate": 0.002, "loss": 5.3247, "step": 3293 }, { "epoch": 0.37395830007343783, "grad_norm": 0.25390625, "learning_rate": 0.002, "loss": 5.2791, "step": 3294 }, { "epoch": 0.3740718271833569, "grad_norm": 0.23046875, "learning_rate": 0.002, "loss": 5.3095, "step": 3295 }, { "epoch": 0.374185354293276, "grad_norm": 0.2236328125, "learning_rate": 0.002, "loss": 5.2932, "step": 3296 }, { "epoch": 0.37429888140319506, "grad_norm": 0.22265625, "learning_rate": 0.002, "loss": 5.3145, "step": 3297 }, { "epoch": 0.37441240851311414, "grad_norm": 0.2197265625, "learning_rate": 0.002, "loss": 5.3071, "step": 3298 }, { "epoch": 0.3745259356230332, "grad_norm": 0.25390625, "learning_rate": 0.002, "loss": 5.2972, "step": 3299 }, { "epoch": 0.3746394627329523, "grad_norm": 0.265625, "learning_rate": 0.002, "loss": 5.3253, "step": 3300 }, { "epoch": 0.37475298984287136, "grad_norm": 0.283203125, "learning_rate": 0.002, "loss": 5.3118, "step": 3301 }, { "epoch": 0.37486651695279044, "grad_norm": 0.294921875, "learning_rate": 0.002, "loss": 5.2949, "step": 3302 }, { "epoch": 0.3749800440627095, "grad_norm": 0.302734375, "learning_rate": 0.002, "loss": 5.3095, "step": 3303 }, { "epoch": 0.3750935711726286, "grad_norm": 0.318359375, "learning_rate": 0.002, "loss": 5.307, "step": 3304 }, { "epoch": 0.37520709828254767, "grad_norm": 0.357421875, "learning_rate": 0.002, "loss": 5.3159, "step": 3305 }, { "epoch": 0.37532062539246674, "grad_norm": 0.384765625, "learning_rate": 0.002, "loss": 5.2928, "step": 3306 }, { "epoch": 0.3754341525023858, "grad_norm": 0.421875, "learning_rate": 0.002, "loss": 5.3079, "step": 3307 }, { "epoch": 0.3755476796123049, "grad_norm": 0.392578125, "learning_rate": 0.002, "loss": 5.3211, "step": 3308 }, { "epoch": 0.375661206722224, "grad_norm": 0.384765625, "learning_rate": 0.002, "loss": 5.3178, "step": 3309 }, { "epoch": 0.37577473383214305, "grad_norm": 0.357421875, "learning_rate": 0.002, "loss": 5.2976, "step": 3310 }, { "epoch": 0.3758882609420621, "grad_norm": 0.353515625, "learning_rate": 0.002, "loss": 5.3059, "step": 3311 }, { "epoch": 0.3760017880519812, "grad_norm": 0.314453125, "learning_rate": 0.002, "loss": 5.3111, "step": 3312 }, { "epoch": 0.3761153151619003, "grad_norm": 0.333984375, "learning_rate": 0.002, "loss": 5.3226, "step": 3313 }, { "epoch": 0.37622884227181935, "grad_norm": 0.310546875, "learning_rate": 0.002, "loss": 5.3025, "step": 3314 }, { "epoch": 0.37634236938173843, "grad_norm": 0.314453125, "learning_rate": 0.002, "loss": 5.2729, "step": 3315 }, { "epoch": 0.3764558964916575, "grad_norm": 0.302734375, "learning_rate": 0.002, "loss": 5.296, "step": 3316 }, { "epoch": 0.3765694236015766, "grad_norm": 0.30859375, "learning_rate": 0.002, "loss": 5.306, "step": 3317 }, { "epoch": 0.37668295071149566, "grad_norm": 0.3125, "learning_rate": 0.002, "loss": 5.3082, "step": 3318 }, { "epoch": 0.3767964778214148, "grad_norm": 0.34765625, "learning_rate": 0.002, "loss": 5.3173, "step": 3319 }, { "epoch": 0.37691000493133386, "grad_norm": 0.341796875, "learning_rate": 0.002, "loss": 5.3247, "step": 3320 }, { "epoch": 0.37702353204125294, "grad_norm": 0.375, "learning_rate": 0.002, "loss": 5.3139, "step": 3321 }, { "epoch": 0.377137059151172, "grad_norm": 0.365234375, "learning_rate": 0.002, "loss": 5.3161, "step": 3322 }, { "epoch": 0.3772505862610911, "grad_norm": 0.375, "learning_rate": 0.002, "loss": 5.3052, "step": 3323 }, { "epoch": 0.37736411337101017, "grad_norm": 0.330078125, "learning_rate": 0.002, "loss": 5.2994, "step": 3324 }, { "epoch": 0.37747764048092924, "grad_norm": 0.345703125, "learning_rate": 0.002, "loss": 5.3116, "step": 3325 }, { "epoch": 0.3775911675908483, "grad_norm": 0.3203125, "learning_rate": 0.002, "loss": 5.3029, "step": 3326 }, { "epoch": 0.3777046947007674, "grad_norm": 0.3203125, "learning_rate": 0.002, "loss": 5.317, "step": 3327 }, { "epoch": 0.37781822181068647, "grad_norm": 0.291015625, "learning_rate": 0.002, "loss": 5.3163, "step": 3328 }, { "epoch": 0.37793174892060555, "grad_norm": 0.29296875, "learning_rate": 0.002, "loss": 5.3218, "step": 3329 }, { "epoch": 0.3780452760305246, "grad_norm": 0.263671875, "learning_rate": 0.002, "loss": 5.3068, "step": 3330 }, { "epoch": 0.3781588031404437, "grad_norm": 0.2734375, "learning_rate": 0.002, "loss": 5.2996, "step": 3331 }, { "epoch": 0.3782723302503628, "grad_norm": 0.267578125, "learning_rate": 0.002, "loss": 5.3009, "step": 3332 }, { "epoch": 0.37838585736028185, "grad_norm": 0.2734375, "learning_rate": 0.002, "loss": 5.3158, "step": 3333 }, { "epoch": 0.37849938447020093, "grad_norm": 0.26171875, "learning_rate": 0.002, "loss": 5.2897, "step": 3334 }, { "epoch": 0.37861291158012, "grad_norm": 0.255859375, "learning_rate": 0.002, "loss": 5.2965, "step": 3335 }, { "epoch": 0.3787264386900391, "grad_norm": 0.240234375, "learning_rate": 0.002, "loss": 5.3016, "step": 3336 }, { "epoch": 0.37883996579995816, "grad_norm": 0.2294921875, "learning_rate": 0.002, "loss": 5.3235, "step": 3337 }, { "epoch": 0.37895349290987723, "grad_norm": 0.234375, "learning_rate": 0.002, "loss": 5.2885, "step": 3338 }, { "epoch": 0.3790670200197963, "grad_norm": 0.244140625, "learning_rate": 0.002, "loss": 5.3143, "step": 3339 }, { "epoch": 0.3791805471297154, "grad_norm": 0.265625, "learning_rate": 0.002, "loss": 5.2938, "step": 3340 }, { "epoch": 0.37929407423963446, "grad_norm": 0.296875, "learning_rate": 0.002, "loss": 5.3177, "step": 3341 }, { "epoch": 0.37940760134955354, "grad_norm": 0.34375, "learning_rate": 0.002, "loss": 5.3067, "step": 3342 }, { "epoch": 0.3795211284594726, "grad_norm": 0.375, "learning_rate": 0.002, "loss": 5.3083, "step": 3343 }, { "epoch": 0.3796346555693917, "grad_norm": 0.3984375, "learning_rate": 0.002, "loss": 5.3052, "step": 3344 }, { "epoch": 0.37974818267931076, "grad_norm": 0.38671875, "learning_rate": 0.002, "loss": 5.2903, "step": 3345 }, { "epoch": 0.37986170978922984, "grad_norm": 0.396484375, "learning_rate": 0.002, "loss": 5.3206, "step": 3346 }, { "epoch": 0.3799752368991489, "grad_norm": 0.36328125, "learning_rate": 0.002, "loss": 5.3173, "step": 3347 }, { "epoch": 0.380088764009068, "grad_norm": 0.34765625, "learning_rate": 0.002, "loss": 5.3131, "step": 3348 }, { "epoch": 0.38020229111898707, "grad_norm": 0.318359375, "learning_rate": 0.002, "loss": 5.3136, "step": 3349 }, { "epoch": 0.38031581822890614, "grad_norm": 0.341796875, "learning_rate": 0.002, "loss": 5.3236, "step": 3350 }, { "epoch": 0.3804293453388252, "grad_norm": 0.3359375, "learning_rate": 0.002, "loss": 5.3102, "step": 3351 }, { "epoch": 0.3805428724487443, "grad_norm": 0.376953125, "learning_rate": 0.002, "loss": 5.3174, "step": 3352 }, { "epoch": 0.38065639955866337, "grad_norm": 0.416015625, "learning_rate": 0.002, "loss": 5.3144, "step": 3353 }, { "epoch": 0.38076992666858245, "grad_norm": 0.3984375, "learning_rate": 0.002, "loss": 5.2903, "step": 3354 }, { "epoch": 0.3808834537785015, "grad_norm": 0.33984375, "learning_rate": 0.002, "loss": 5.3128, "step": 3355 }, { "epoch": 0.3809969808884206, "grad_norm": 0.318359375, "learning_rate": 0.002, "loss": 5.2917, "step": 3356 }, { "epoch": 0.3811105079983397, "grad_norm": 0.2431640625, "learning_rate": 0.002, "loss": 5.3145, "step": 3357 }, { "epoch": 0.38122403510825875, "grad_norm": 0.2578125, "learning_rate": 0.002, "loss": 5.2955, "step": 3358 }, { "epoch": 0.38133756221817783, "grad_norm": 0.2431640625, "learning_rate": 0.002, "loss": 5.2991, "step": 3359 }, { "epoch": 0.3814510893280969, "grad_norm": 0.255859375, "learning_rate": 0.002, "loss": 5.3034, "step": 3360 }, { "epoch": 0.381564616438016, "grad_norm": 0.2578125, "learning_rate": 0.002, "loss": 5.3114, "step": 3361 }, { "epoch": 0.38167814354793506, "grad_norm": 0.267578125, "learning_rate": 0.002, "loss": 5.3143, "step": 3362 }, { "epoch": 0.38179167065785413, "grad_norm": 0.265625, "learning_rate": 0.002, "loss": 5.2907, "step": 3363 }, { "epoch": 0.3819051977677732, "grad_norm": 0.279296875, "learning_rate": 0.002, "loss": 5.3209, "step": 3364 }, { "epoch": 0.3820187248776923, "grad_norm": 0.271484375, "learning_rate": 0.002, "loss": 5.2979, "step": 3365 }, { "epoch": 0.38213225198761136, "grad_norm": 0.2734375, "learning_rate": 0.002, "loss": 5.2874, "step": 3366 }, { "epoch": 0.38224577909753044, "grad_norm": 0.259765625, "learning_rate": 0.002, "loss": 5.3126, "step": 3367 }, { "epoch": 0.3823593062074495, "grad_norm": 0.255859375, "learning_rate": 0.002, "loss": 5.2988, "step": 3368 }, { "epoch": 0.3824728333173686, "grad_norm": 0.2490234375, "learning_rate": 0.002, "loss": 5.3126, "step": 3369 }, { "epoch": 0.38258636042728766, "grad_norm": 0.265625, "learning_rate": 0.002, "loss": 5.3096, "step": 3370 }, { "epoch": 0.38269988753720674, "grad_norm": 0.259765625, "learning_rate": 0.002, "loss": 5.3, "step": 3371 }, { "epoch": 0.3828134146471258, "grad_norm": 0.26953125, "learning_rate": 0.002, "loss": 5.2919, "step": 3372 }, { "epoch": 0.3829269417570449, "grad_norm": 0.31640625, "learning_rate": 0.002, "loss": 5.3093, "step": 3373 }, { "epoch": 0.38304046886696397, "grad_norm": 0.32421875, "learning_rate": 0.002, "loss": 5.3092, "step": 3374 }, { "epoch": 0.38315399597688304, "grad_norm": 0.3359375, "learning_rate": 0.002, "loss": 5.3262, "step": 3375 }, { "epoch": 0.3832675230868021, "grad_norm": 0.322265625, "learning_rate": 0.002, "loss": 5.302, "step": 3376 }, { "epoch": 0.3833810501967212, "grad_norm": 0.333984375, "learning_rate": 0.002, "loss": 5.3078, "step": 3377 }, { "epoch": 0.38349457730664027, "grad_norm": 0.328125, "learning_rate": 0.002, "loss": 5.2978, "step": 3378 }, { "epoch": 0.38360810441655935, "grad_norm": 0.349609375, "learning_rate": 0.002, "loss": 5.3158, "step": 3379 }, { "epoch": 0.3837216315264784, "grad_norm": 0.349609375, "learning_rate": 0.002, "loss": 5.3061, "step": 3380 }, { "epoch": 0.3838351586363975, "grad_norm": 0.390625, "learning_rate": 0.002, "loss": 5.2861, "step": 3381 }, { "epoch": 0.3839486857463166, "grad_norm": 0.408203125, "learning_rate": 0.002, "loss": 5.3241, "step": 3382 }, { "epoch": 0.38406221285623565, "grad_norm": 0.400390625, "learning_rate": 0.002, "loss": 5.2907, "step": 3383 }, { "epoch": 0.38417573996615473, "grad_norm": 0.357421875, "learning_rate": 0.002, "loss": 5.309, "step": 3384 }, { "epoch": 0.3842892670760738, "grad_norm": 0.375, "learning_rate": 0.002, "loss": 5.3131, "step": 3385 }, { "epoch": 0.3844027941859929, "grad_norm": 0.328125, "learning_rate": 0.002, "loss": 5.3057, "step": 3386 }, { "epoch": 0.38451632129591196, "grad_norm": 0.3125, "learning_rate": 0.002, "loss": 5.3159, "step": 3387 }, { "epoch": 0.38462984840583103, "grad_norm": 0.294921875, "learning_rate": 0.002, "loss": 5.3073, "step": 3388 }, { "epoch": 0.3847433755157501, "grad_norm": 0.291015625, "learning_rate": 0.002, "loss": 5.3024, "step": 3389 }, { "epoch": 0.3848569026256692, "grad_norm": 0.28125, "learning_rate": 0.002, "loss": 5.3007, "step": 3390 }, { "epoch": 0.38497042973558826, "grad_norm": 0.283203125, "learning_rate": 0.002, "loss": 5.3183, "step": 3391 }, { "epoch": 0.38508395684550734, "grad_norm": 0.306640625, "learning_rate": 0.002, "loss": 5.3184, "step": 3392 }, { "epoch": 0.3851974839554264, "grad_norm": 0.3125, "learning_rate": 0.002, "loss": 5.2942, "step": 3393 }, { "epoch": 0.3853110110653455, "grad_norm": 0.330078125, "learning_rate": 0.002, "loss": 5.306, "step": 3394 }, { "epoch": 0.38542453817526456, "grad_norm": 0.333984375, "learning_rate": 0.002, "loss": 5.306, "step": 3395 }, { "epoch": 0.38553806528518364, "grad_norm": 0.3125, "learning_rate": 0.002, "loss": 5.2898, "step": 3396 }, { "epoch": 0.3856515923951027, "grad_norm": 0.267578125, "learning_rate": 0.002, "loss": 5.3073, "step": 3397 }, { "epoch": 0.3857651195050218, "grad_norm": 0.28125, "learning_rate": 0.002, "loss": 5.2993, "step": 3398 }, { "epoch": 0.38587864661494087, "grad_norm": 0.25390625, "learning_rate": 0.002, "loss": 5.316, "step": 3399 }, { "epoch": 0.38599217372485994, "grad_norm": 0.251953125, "learning_rate": 0.002, "loss": 5.2966, "step": 3400 }, { "epoch": 0.386105700834779, "grad_norm": 0.2431640625, "learning_rate": 0.002, "loss": 5.2976, "step": 3401 }, { "epoch": 0.3862192279446981, "grad_norm": 0.244140625, "learning_rate": 0.002, "loss": 5.2977, "step": 3402 }, { "epoch": 0.38633275505461717, "grad_norm": 0.26171875, "learning_rate": 0.002, "loss": 5.3054, "step": 3403 }, { "epoch": 0.38644628216453625, "grad_norm": 0.30859375, "learning_rate": 0.002, "loss": 5.3014, "step": 3404 }, { "epoch": 0.3865598092744553, "grad_norm": 0.345703125, "learning_rate": 0.002, "loss": 5.2918, "step": 3405 }, { "epoch": 0.3866733363843744, "grad_norm": 0.36328125, "learning_rate": 0.002, "loss": 5.2981, "step": 3406 }, { "epoch": 0.3867868634942935, "grad_norm": 0.373046875, "learning_rate": 0.002, "loss": 5.3097, "step": 3407 }, { "epoch": 0.38690039060421255, "grad_norm": 0.369140625, "learning_rate": 0.002, "loss": 5.302, "step": 3408 }, { "epoch": 0.38701391771413163, "grad_norm": 0.3515625, "learning_rate": 0.002, "loss": 5.3007, "step": 3409 }, { "epoch": 0.3871274448240507, "grad_norm": 0.341796875, "learning_rate": 0.002, "loss": 5.3025, "step": 3410 }, { "epoch": 0.3872409719339698, "grad_norm": 0.33203125, "learning_rate": 0.002, "loss": 5.3125, "step": 3411 }, { "epoch": 0.38735449904388886, "grad_norm": 0.3046875, "learning_rate": 0.002, "loss": 5.3009, "step": 3412 }, { "epoch": 0.38746802615380793, "grad_norm": 0.30078125, "learning_rate": 0.002, "loss": 5.2904, "step": 3413 }, { "epoch": 0.387581553263727, "grad_norm": 0.275390625, "learning_rate": 0.002, "loss": 5.3, "step": 3414 }, { "epoch": 0.3876950803736461, "grad_norm": 0.279296875, "learning_rate": 0.002, "loss": 5.3193, "step": 3415 }, { "epoch": 0.38780860748356516, "grad_norm": 0.279296875, "learning_rate": 0.002, "loss": 5.3055, "step": 3416 }, { "epoch": 0.38792213459348424, "grad_norm": 0.28515625, "learning_rate": 0.002, "loss": 5.3106, "step": 3417 }, { "epoch": 0.3880356617034033, "grad_norm": 0.294921875, "learning_rate": 0.002, "loss": 5.2826, "step": 3418 }, { "epoch": 0.3881491888133224, "grad_norm": 0.306640625, "learning_rate": 0.002, "loss": 5.2808, "step": 3419 }, { "epoch": 0.38826271592324146, "grad_norm": 0.306640625, "learning_rate": 0.002, "loss": 5.3091, "step": 3420 }, { "epoch": 0.38837624303316054, "grad_norm": 0.330078125, "learning_rate": 0.002, "loss": 5.308, "step": 3421 }, { "epoch": 0.3884897701430796, "grad_norm": 0.34765625, "learning_rate": 0.002, "loss": 5.3116, "step": 3422 }, { "epoch": 0.3886032972529987, "grad_norm": 0.32421875, "learning_rate": 0.002, "loss": 5.2926, "step": 3423 }, { "epoch": 0.38871682436291777, "grad_norm": 0.3359375, "learning_rate": 0.002, "loss": 5.3214, "step": 3424 }, { "epoch": 0.38883035147283684, "grad_norm": 0.30078125, "learning_rate": 0.002, "loss": 5.32, "step": 3425 }, { "epoch": 0.3889438785827559, "grad_norm": 0.279296875, "learning_rate": 0.002, "loss": 5.3081, "step": 3426 }, { "epoch": 0.389057405692675, "grad_norm": 0.236328125, "learning_rate": 0.002, "loss": 5.3099, "step": 3427 }, { "epoch": 0.38917093280259407, "grad_norm": 0.2333984375, "learning_rate": 0.002, "loss": 5.3099, "step": 3428 }, { "epoch": 0.38928445991251315, "grad_norm": 0.25, "learning_rate": 0.002, "loss": 5.3123, "step": 3429 }, { "epoch": 0.3893979870224322, "grad_norm": 0.2734375, "learning_rate": 0.002, "loss": 5.3189, "step": 3430 }, { "epoch": 0.3895115141323513, "grad_norm": 0.306640625, "learning_rate": 0.002, "loss": 5.2955, "step": 3431 }, { "epoch": 0.3896250412422704, "grad_norm": 0.337890625, "learning_rate": 0.002, "loss": 5.2989, "step": 3432 }, { "epoch": 0.38973856835218945, "grad_norm": 0.359375, "learning_rate": 0.002, "loss": 5.3033, "step": 3433 }, { "epoch": 0.3898520954621085, "grad_norm": 0.3203125, "learning_rate": 0.002, "loss": 5.3075, "step": 3434 }, { "epoch": 0.3899656225720276, "grad_norm": 0.326171875, "learning_rate": 0.002, "loss": 5.3146, "step": 3435 }, { "epoch": 0.3900791496819467, "grad_norm": 0.28125, "learning_rate": 0.002, "loss": 5.2914, "step": 3436 }, { "epoch": 0.39019267679186576, "grad_norm": 0.263671875, "learning_rate": 0.002, "loss": 5.3067, "step": 3437 }, { "epoch": 0.39030620390178483, "grad_norm": 0.2412109375, "learning_rate": 0.002, "loss": 5.2906, "step": 3438 }, { "epoch": 0.39041973101170396, "grad_norm": 0.244140625, "learning_rate": 0.002, "loss": 5.296, "step": 3439 }, { "epoch": 0.39053325812162304, "grad_norm": 0.2275390625, "learning_rate": 0.002, "loss": 5.3257, "step": 3440 }, { "epoch": 0.3906467852315421, "grad_norm": 0.23046875, "learning_rate": 0.002, "loss": 5.3018, "step": 3441 }, { "epoch": 0.3907603123414612, "grad_norm": 0.21875, "learning_rate": 0.002, "loss": 5.2783, "step": 3442 }, { "epoch": 0.39087383945138027, "grad_norm": 0.2353515625, "learning_rate": 0.002, "loss": 5.3081, "step": 3443 }, { "epoch": 0.39098736656129934, "grad_norm": 0.23046875, "learning_rate": 0.002, "loss": 5.268, "step": 3444 }, { "epoch": 0.3911008936712184, "grad_norm": 0.244140625, "learning_rate": 0.002, "loss": 5.3024, "step": 3445 }, { "epoch": 0.3912144207811375, "grad_norm": 0.2470703125, "learning_rate": 0.002, "loss": 5.286, "step": 3446 }, { "epoch": 0.39132794789105657, "grad_norm": 0.255859375, "learning_rate": 0.002, "loss": 5.3, "step": 3447 }, { "epoch": 0.39144147500097565, "grad_norm": 0.255859375, "learning_rate": 0.002, "loss": 5.3078, "step": 3448 }, { "epoch": 0.3915550021108947, "grad_norm": 0.2392578125, "learning_rate": 0.002, "loss": 5.3012, "step": 3449 }, { "epoch": 0.3916685292208138, "grad_norm": 0.2451171875, "learning_rate": 0.002, "loss": 5.3216, "step": 3450 }, { "epoch": 0.3917820563307329, "grad_norm": 0.240234375, "learning_rate": 0.002, "loss": 5.2945, "step": 3451 }, { "epoch": 0.39189558344065195, "grad_norm": 0.25390625, "learning_rate": 0.002, "loss": 5.299, "step": 3452 }, { "epoch": 0.392009110550571, "grad_norm": 0.251953125, "learning_rate": 0.002, "loss": 5.2933, "step": 3453 }, { "epoch": 0.3921226376604901, "grad_norm": 0.283203125, "learning_rate": 0.002, "loss": 5.3217, "step": 3454 }, { "epoch": 0.3922361647704092, "grad_norm": 0.50390625, "learning_rate": 0.002, "loss": 5.3, "step": 3455 }, { "epoch": 0.39234969188032826, "grad_norm": 0.470703125, "learning_rate": 0.002, "loss": 5.2957, "step": 3456 }, { "epoch": 0.39246321899024733, "grad_norm": 0.40625, "learning_rate": 0.002, "loss": 5.3054, "step": 3457 }, { "epoch": 0.3925767461001664, "grad_norm": 0.46484375, "learning_rate": 0.002, "loss": 5.2942, "step": 3458 }, { "epoch": 0.3926902732100855, "grad_norm": 0.375, "learning_rate": 0.002, "loss": 5.2916, "step": 3459 }, { "epoch": 0.39280380032000456, "grad_norm": 0.40234375, "learning_rate": 0.002, "loss": 5.3069, "step": 3460 }, { "epoch": 0.39291732742992364, "grad_norm": 0.380859375, "learning_rate": 0.002, "loss": 5.3339, "step": 3461 }, { "epoch": 0.3930308545398427, "grad_norm": 0.330078125, "learning_rate": 0.002, "loss": 5.2988, "step": 3462 }, { "epoch": 0.3931443816497618, "grad_norm": 0.291015625, "learning_rate": 0.002, "loss": 5.2844, "step": 3463 }, { "epoch": 0.39325790875968086, "grad_norm": 0.2451171875, "learning_rate": 0.002, "loss": 5.293, "step": 3464 }, { "epoch": 0.39337143586959994, "grad_norm": 0.234375, "learning_rate": 0.002, "loss": 5.3167, "step": 3465 }, { "epoch": 0.393484962979519, "grad_norm": 0.248046875, "learning_rate": 0.002, "loss": 5.2966, "step": 3466 }, { "epoch": 0.3935984900894381, "grad_norm": 0.2490234375, "learning_rate": 0.002, "loss": 5.3009, "step": 3467 }, { "epoch": 0.39371201719935717, "grad_norm": 0.28125, "learning_rate": 0.002, "loss": 5.3213, "step": 3468 }, { "epoch": 0.39382554430927624, "grad_norm": 0.3125, "learning_rate": 0.002, "loss": 5.3021, "step": 3469 }, { "epoch": 0.3939390714191953, "grad_norm": 0.318359375, "learning_rate": 0.002, "loss": 5.3151, "step": 3470 }, { "epoch": 0.3940525985291144, "grad_norm": 0.32421875, "learning_rate": 0.002, "loss": 5.3003, "step": 3471 }, { "epoch": 0.39416612563903347, "grad_norm": 0.35546875, "learning_rate": 0.002, "loss": 5.2806, "step": 3472 }, { "epoch": 0.39427965274895255, "grad_norm": 0.349609375, "learning_rate": 0.002, "loss": 5.2983, "step": 3473 }, { "epoch": 0.3943931798588716, "grad_norm": 0.390625, "learning_rate": 0.002, "loss": 5.2978, "step": 3474 }, { "epoch": 0.3945067069687907, "grad_norm": 0.373046875, "learning_rate": 0.002, "loss": 5.3034, "step": 3475 }, { "epoch": 0.3946202340787098, "grad_norm": 0.392578125, "learning_rate": 0.002, "loss": 5.3052, "step": 3476 }, { "epoch": 0.39473376118862885, "grad_norm": 0.3828125, "learning_rate": 0.002, "loss": 5.3185, "step": 3477 }, { "epoch": 0.3948472882985479, "grad_norm": 0.37890625, "learning_rate": 0.002, "loss": 5.3108, "step": 3478 }, { "epoch": 0.394960815408467, "grad_norm": 0.310546875, "learning_rate": 0.002, "loss": 5.3001, "step": 3479 }, { "epoch": 0.3950743425183861, "grad_norm": 0.3125, "learning_rate": 0.002, "loss": 5.3039, "step": 3480 }, { "epoch": 0.39518786962830516, "grad_norm": 0.26953125, "learning_rate": 0.002, "loss": 5.3097, "step": 3481 }, { "epoch": 0.39530139673822423, "grad_norm": 0.263671875, "learning_rate": 0.002, "loss": 5.305, "step": 3482 }, { "epoch": 0.3954149238481433, "grad_norm": 0.25, "learning_rate": 0.002, "loss": 5.2938, "step": 3483 }, { "epoch": 0.3955284509580624, "grad_norm": 0.2734375, "learning_rate": 0.002, "loss": 5.2985, "step": 3484 }, { "epoch": 0.39564197806798146, "grad_norm": 0.294921875, "learning_rate": 0.002, "loss": 5.3001, "step": 3485 }, { "epoch": 0.39575550517790054, "grad_norm": 0.298828125, "learning_rate": 0.002, "loss": 5.2935, "step": 3486 }, { "epoch": 0.3958690322878196, "grad_norm": 0.28515625, "learning_rate": 0.002, "loss": 5.2927, "step": 3487 }, { "epoch": 0.3959825593977387, "grad_norm": 0.296875, "learning_rate": 0.002, "loss": 5.3173, "step": 3488 }, { "epoch": 0.39609608650765776, "grad_norm": 0.287109375, "learning_rate": 0.002, "loss": 5.298, "step": 3489 }, { "epoch": 0.39620961361757684, "grad_norm": 0.267578125, "learning_rate": 0.002, "loss": 5.3123, "step": 3490 }, { "epoch": 0.3963231407274959, "grad_norm": 0.263671875, "learning_rate": 0.002, "loss": 5.2835, "step": 3491 }, { "epoch": 0.396436667837415, "grad_norm": 0.265625, "learning_rate": 0.002, "loss": 5.2936, "step": 3492 }, { "epoch": 0.39655019494733407, "grad_norm": 0.2734375, "learning_rate": 0.002, "loss": 5.2981, "step": 3493 }, { "epoch": 0.39666372205725314, "grad_norm": 0.2578125, "learning_rate": 0.002, "loss": 5.3031, "step": 3494 }, { "epoch": 0.3967772491671722, "grad_norm": 0.251953125, "learning_rate": 0.002, "loss": 5.2965, "step": 3495 }, { "epoch": 0.3968907762770913, "grad_norm": 0.2431640625, "learning_rate": 0.002, "loss": 5.3012, "step": 3496 }, { "epoch": 0.39700430338701037, "grad_norm": 0.2451171875, "learning_rate": 0.002, "loss": 5.29, "step": 3497 }, { "epoch": 0.39711783049692945, "grad_norm": 0.244140625, "learning_rate": 0.002, "loss": 5.3111, "step": 3498 }, { "epoch": 0.3972313576068485, "grad_norm": 0.251953125, "learning_rate": 0.002, "loss": 5.3024, "step": 3499 }, { "epoch": 0.3973448847167676, "grad_norm": 0.2421875, "learning_rate": 0.002, "loss": 5.3032, "step": 3500 }, { "epoch": 0.3974584118266867, "grad_norm": 0.265625, "learning_rate": 0.002, "loss": 5.3017, "step": 3501 }, { "epoch": 0.39757193893660575, "grad_norm": 0.26953125, "learning_rate": 0.002, "loss": 5.3034, "step": 3502 }, { "epoch": 0.3976854660465248, "grad_norm": 0.306640625, "learning_rate": 0.002, "loss": 5.3329, "step": 3503 }, { "epoch": 0.3977989931564439, "grad_norm": 0.322265625, "learning_rate": 0.002, "loss": 5.3098, "step": 3504 }, { "epoch": 0.397912520266363, "grad_norm": 0.32421875, "learning_rate": 0.002, "loss": 5.295, "step": 3505 }, { "epoch": 0.39802604737628206, "grad_norm": 0.31640625, "learning_rate": 0.002, "loss": 5.3028, "step": 3506 }, { "epoch": 0.39813957448620113, "grad_norm": 0.302734375, "learning_rate": 0.002, "loss": 5.2981, "step": 3507 }, { "epoch": 0.3982531015961202, "grad_norm": 0.296875, "learning_rate": 0.002, "loss": 5.2944, "step": 3508 }, { "epoch": 0.3983666287060393, "grad_norm": 0.291015625, "learning_rate": 0.002, "loss": 5.3039, "step": 3509 }, { "epoch": 0.39848015581595836, "grad_norm": 0.2890625, "learning_rate": 0.002, "loss": 5.285, "step": 3510 }, { "epoch": 0.39859368292587743, "grad_norm": 0.26171875, "learning_rate": 0.002, "loss": 5.317, "step": 3511 }, { "epoch": 0.3987072100357965, "grad_norm": 0.25390625, "learning_rate": 0.002, "loss": 5.2916, "step": 3512 }, { "epoch": 0.3988207371457156, "grad_norm": 0.25, "learning_rate": 0.002, "loss": 5.3098, "step": 3513 }, { "epoch": 0.39893426425563466, "grad_norm": 0.23828125, "learning_rate": 0.002, "loss": 5.3116, "step": 3514 }, { "epoch": 0.39904779136555374, "grad_norm": 0.2353515625, "learning_rate": 0.002, "loss": 5.3072, "step": 3515 }, { "epoch": 0.3991613184754728, "grad_norm": 0.2275390625, "learning_rate": 0.002, "loss": 5.2904, "step": 3516 }, { "epoch": 0.3992748455853919, "grad_norm": 0.23828125, "learning_rate": 0.002, "loss": 5.2925, "step": 3517 }, { "epoch": 0.39938837269531097, "grad_norm": 0.27734375, "learning_rate": 0.002, "loss": 5.2987, "step": 3518 }, { "epoch": 0.39950189980523004, "grad_norm": 0.328125, "learning_rate": 0.002, "loss": 5.301, "step": 3519 }, { "epoch": 0.3996154269151491, "grad_norm": 0.349609375, "learning_rate": 0.002, "loss": 5.2903, "step": 3520 }, { "epoch": 0.3997289540250682, "grad_norm": 0.38671875, "learning_rate": 0.002, "loss": 5.3041, "step": 3521 }, { "epoch": 0.39984248113498727, "grad_norm": 0.37890625, "learning_rate": 0.002, "loss": 5.3045, "step": 3522 }, { "epoch": 0.39995600824490635, "grad_norm": 0.369140625, "learning_rate": 0.002, "loss": 5.2957, "step": 3523 }, { "epoch": 0.4000695353548254, "grad_norm": 0.384765625, "learning_rate": 0.002, "loss": 5.3071, "step": 3524 }, { "epoch": 0.4001830624647445, "grad_norm": 0.384765625, "learning_rate": 0.002, "loss": 5.2996, "step": 3525 }, { "epoch": 0.4002965895746636, "grad_norm": 0.361328125, "learning_rate": 0.002, "loss": 5.3032, "step": 3526 }, { "epoch": 0.40041011668458265, "grad_norm": 0.375, "learning_rate": 0.002, "loss": 5.2764, "step": 3527 }, { "epoch": 0.4005236437945017, "grad_norm": 0.302734375, "learning_rate": 0.002, "loss": 5.2986, "step": 3528 }, { "epoch": 0.4006371709044208, "grad_norm": 0.318359375, "learning_rate": 0.002, "loss": 5.3104, "step": 3529 }, { "epoch": 0.4007506980143399, "grad_norm": 0.263671875, "learning_rate": 0.002, "loss": 5.2727, "step": 3530 }, { "epoch": 0.40086422512425895, "grad_norm": 0.27734375, "learning_rate": 0.002, "loss": 5.293, "step": 3531 }, { "epoch": 0.40097775223417803, "grad_norm": 0.259765625, "learning_rate": 0.002, "loss": 5.3032, "step": 3532 }, { "epoch": 0.4010912793440971, "grad_norm": 0.27734375, "learning_rate": 0.002, "loss": 5.2937, "step": 3533 }, { "epoch": 0.4012048064540162, "grad_norm": 0.30859375, "learning_rate": 0.002, "loss": 5.3117, "step": 3534 }, { "epoch": 0.40131833356393526, "grad_norm": 0.345703125, "learning_rate": 0.002, "loss": 5.3185, "step": 3535 }, { "epoch": 0.40143186067385433, "grad_norm": 0.353515625, "learning_rate": 0.002, "loss": 5.2879, "step": 3536 }, { "epoch": 0.4015453877837734, "grad_norm": 0.365234375, "learning_rate": 0.002, "loss": 5.2881, "step": 3537 }, { "epoch": 0.4016589148936925, "grad_norm": 0.330078125, "learning_rate": 0.002, "loss": 5.3153, "step": 3538 }, { "epoch": 0.40177244200361156, "grad_norm": 0.31640625, "learning_rate": 0.002, "loss": 5.3068, "step": 3539 }, { "epoch": 0.40188596911353064, "grad_norm": 0.271484375, "learning_rate": 0.002, "loss": 5.2959, "step": 3540 }, { "epoch": 0.4019994962234497, "grad_norm": 0.2431640625, "learning_rate": 0.002, "loss": 5.3149, "step": 3541 }, { "epoch": 0.4021130233333688, "grad_norm": 0.236328125, "learning_rate": 0.002, "loss": 5.3011, "step": 3542 }, { "epoch": 0.40222655044328787, "grad_norm": 0.2578125, "learning_rate": 0.002, "loss": 5.307, "step": 3543 }, { "epoch": 0.40234007755320694, "grad_norm": 0.267578125, "learning_rate": 0.002, "loss": 5.278, "step": 3544 }, { "epoch": 0.402453604663126, "grad_norm": 0.3046875, "learning_rate": 0.002, "loss": 5.2931, "step": 3545 }, { "epoch": 0.4025671317730451, "grad_norm": 0.333984375, "learning_rate": 0.002, "loss": 5.2876, "step": 3546 }, { "epoch": 0.40268065888296417, "grad_norm": 0.353515625, "learning_rate": 0.002, "loss": 5.3264, "step": 3547 }, { "epoch": 0.40279418599288325, "grad_norm": 0.3203125, "learning_rate": 0.002, "loss": 5.3071, "step": 3548 }, { "epoch": 0.4029077131028023, "grad_norm": 0.326171875, "learning_rate": 0.002, "loss": 5.3006, "step": 3549 }, { "epoch": 0.4030212402127214, "grad_norm": 0.296875, "learning_rate": 0.002, "loss": 5.2991, "step": 3550 }, { "epoch": 0.4031347673226405, "grad_norm": 0.3046875, "learning_rate": 0.002, "loss": 5.3044, "step": 3551 }, { "epoch": 0.40324829443255955, "grad_norm": 0.2890625, "learning_rate": 0.002, "loss": 5.284, "step": 3552 }, { "epoch": 0.4033618215424786, "grad_norm": 0.314453125, "learning_rate": 0.002, "loss": 5.2986, "step": 3553 }, { "epoch": 0.4034753486523977, "grad_norm": 0.3125, "learning_rate": 0.002, "loss": 5.3029, "step": 3554 }, { "epoch": 0.4035888757623168, "grad_norm": 0.333984375, "learning_rate": 0.002, "loss": 5.2868, "step": 3555 }, { "epoch": 0.40370240287223585, "grad_norm": 0.330078125, "learning_rate": 0.002, "loss": 5.3079, "step": 3556 }, { "epoch": 0.40381592998215493, "grad_norm": 0.31640625, "learning_rate": 0.002, "loss": 5.3003, "step": 3557 }, { "epoch": 0.403929457092074, "grad_norm": 0.279296875, "learning_rate": 0.002, "loss": 5.3022, "step": 3558 }, { "epoch": 0.4040429842019931, "grad_norm": 0.2431640625, "learning_rate": 0.002, "loss": 5.2988, "step": 3559 }, { "epoch": 0.4041565113119122, "grad_norm": 0.232421875, "learning_rate": 0.002, "loss": 5.3062, "step": 3560 }, { "epoch": 0.4042700384218313, "grad_norm": 0.2275390625, "learning_rate": 0.002, "loss": 5.2976, "step": 3561 }, { "epoch": 0.40438356553175037, "grad_norm": 0.2578125, "learning_rate": 0.002, "loss": 5.2917, "step": 3562 }, { "epoch": 0.40449709264166944, "grad_norm": 0.275390625, "learning_rate": 0.002, "loss": 5.2905, "step": 3563 }, { "epoch": 0.4046106197515885, "grad_norm": 0.3046875, "learning_rate": 0.002, "loss": 5.297, "step": 3564 }, { "epoch": 0.4047241468615076, "grad_norm": 0.302734375, "learning_rate": 0.002, "loss": 5.3048, "step": 3565 }, { "epoch": 0.40483767397142667, "grad_norm": 0.310546875, "learning_rate": 0.002, "loss": 5.2934, "step": 3566 }, { "epoch": 0.40495120108134575, "grad_norm": 0.283203125, "learning_rate": 0.002, "loss": 5.312, "step": 3567 }, { "epoch": 0.4050647281912648, "grad_norm": 0.279296875, "learning_rate": 0.002, "loss": 5.3029, "step": 3568 }, { "epoch": 0.4051782553011839, "grad_norm": 0.27734375, "learning_rate": 0.002, "loss": 5.2987, "step": 3569 }, { "epoch": 0.405291782411103, "grad_norm": 0.291015625, "learning_rate": 0.002, "loss": 5.3009, "step": 3570 }, { "epoch": 0.40540530952102205, "grad_norm": 0.294921875, "learning_rate": 0.002, "loss": 5.2972, "step": 3571 }, { "epoch": 0.4055188366309411, "grad_norm": 0.298828125, "learning_rate": 0.002, "loss": 5.294, "step": 3572 }, { "epoch": 0.4056323637408602, "grad_norm": 0.28515625, "learning_rate": 0.002, "loss": 5.308, "step": 3573 }, { "epoch": 0.4057458908507793, "grad_norm": 0.302734375, "learning_rate": 0.002, "loss": 5.2801, "step": 3574 }, { "epoch": 0.40585941796069835, "grad_norm": 0.28515625, "learning_rate": 0.002, "loss": 5.2828, "step": 3575 }, { "epoch": 0.40597294507061743, "grad_norm": 0.302734375, "learning_rate": 0.002, "loss": 5.2875, "step": 3576 }, { "epoch": 0.4060864721805365, "grad_norm": 0.28515625, "learning_rate": 0.002, "loss": 5.3125, "step": 3577 }, { "epoch": 0.4061999992904556, "grad_norm": 0.314453125, "learning_rate": 0.002, "loss": 5.2972, "step": 3578 }, { "epoch": 0.40631352640037466, "grad_norm": 0.3203125, "learning_rate": 0.002, "loss": 5.3157, "step": 3579 }, { "epoch": 0.40642705351029373, "grad_norm": 0.35546875, "learning_rate": 0.002, "loss": 5.3299, "step": 3580 }, { "epoch": 0.4065405806202128, "grad_norm": 0.330078125, "learning_rate": 0.002, "loss": 5.2951, "step": 3581 }, { "epoch": 0.4066541077301319, "grad_norm": 0.3203125, "learning_rate": 0.002, "loss": 5.2847, "step": 3582 }, { "epoch": 0.40676763484005096, "grad_norm": 0.306640625, "learning_rate": 0.002, "loss": 5.2937, "step": 3583 }, { "epoch": 0.40688116194997004, "grad_norm": 0.314453125, "learning_rate": 0.002, "loss": 5.2945, "step": 3584 }, { "epoch": 0.4069946890598891, "grad_norm": 0.32421875, "learning_rate": 0.002, "loss": 5.3009, "step": 3585 }, { "epoch": 0.4071082161698082, "grad_norm": 0.365234375, "learning_rate": 0.002, "loss": 5.2816, "step": 3586 }, { "epoch": 0.40722174327972727, "grad_norm": 0.359375, "learning_rate": 0.002, "loss": 5.286, "step": 3587 }, { "epoch": 0.40733527038964634, "grad_norm": 0.357421875, "learning_rate": 0.002, "loss": 5.2799, "step": 3588 }, { "epoch": 0.4074487974995654, "grad_norm": 0.349609375, "learning_rate": 0.002, "loss": 5.3011, "step": 3589 }, { "epoch": 0.4075623246094845, "grad_norm": 0.310546875, "learning_rate": 0.002, "loss": 5.3049, "step": 3590 }, { "epoch": 0.40767585171940357, "grad_norm": 0.296875, "learning_rate": 0.002, "loss": 5.2948, "step": 3591 }, { "epoch": 0.40778937882932265, "grad_norm": 0.296875, "learning_rate": 0.002, "loss": 5.2978, "step": 3592 }, { "epoch": 0.4079029059392417, "grad_norm": 0.291015625, "learning_rate": 0.002, "loss": 5.3154, "step": 3593 }, { "epoch": 0.4080164330491608, "grad_norm": 0.330078125, "learning_rate": 0.002, "loss": 5.3182, "step": 3594 }, { "epoch": 0.4081299601590799, "grad_norm": 0.333984375, "learning_rate": 0.002, "loss": 5.2984, "step": 3595 }, { "epoch": 0.40824348726899895, "grad_norm": 0.349609375, "learning_rate": 0.002, "loss": 5.3003, "step": 3596 }, { "epoch": 0.408357014378918, "grad_norm": 0.34375, "learning_rate": 0.002, "loss": 5.2951, "step": 3597 }, { "epoch": 0.4084705414888371, "grad_norm": 0.369140625, "learning_rate": 0.002, "loss": 5.3, "step": 3598 }, { "epoch": 0.4085840685987562, "grad_norm": 0.345703125, "learning_rate": 0.002, "loss": 5.2876, "step": 3599 }, { "epoch": 0.40869759570867525, "grad_norm": 0.376953125, "learning_rate": 0.002, "loss": 5.3136, "step": 3600 }, { "epoch": 0.40881112281859433, "grad_norm": 0.380859375, "learning_rate": 0.002, "loss": 5.2939, "step": 3601 }, { "epoch": 0.4089246499285134, "grad_norm": 0.412109375, "learning_rate": 0.002, "loss": 5.3082, "step": 3602 }, { "epoch": 0.4090381770384325, "grad_norm": 0.39453125, "learning_rate": 0.002, "loss": 5.3045, "step": 3603 }, { "epoch": 0.40915170414835156, "grad_norm": 0.40234375, "learning_rate": 0.002, "loss": 5.3018, "step": 3604 }, { "epoch": 0.40926523125827063, "grad_norm": 0.373046875, "learning_rate": 0.002, "loss": 5.2978, "step": 3605 }, { "epoch": 0.4093787583681897, "grad_norm": 0.373046875, "learning_rate": 0.002, "loss": 5.3034, "step": 3606 }, { "epoch": 0.4094922854781088, "grad_norm": 0.3359375, "learning_rate": 0.002, "loss": 5.3036, "step": 3607 }, { "epoch": 0.40960581258802786, "grad_norm": 0.3203125, "learning_rate": 0.002, "loss": 5.2857, "step": 3608 }, { "epoch": 0.40971933969794694, "grad_norm": 0.294921875, "learning_rate": 0.002, "loss": 5.2897, "step": 3609 }, { "epoch": 0.409832866807866, "grad_norm": 0.337890625, "learning_rate": 0.002, "loss": 5.294, "step": 3610 }, { "epoch": 0.4099463939177851, "grad_norm": 0.345703125, "learning_rate": 0.002, "loss": 5.3061, "step": 3611 }, { "epoch": 0.41005992102770417, "grad_norm": 0.310546875, "learning_rate": 0.002, "loss": 5.2969, "step": 3612 }, { "epoch": 0.41017344813762324, "grad_norm": 0.322265625, "learning_rate": 0.002, "loss": 5.2939, "step": 3613 }, { "epoch": 0.4102869752475423, "grad_norm": 0.296875, "learning_rate": 0.002, "loss": 5.2876, "step": 3614 }, { "epoch": 0.4104005023574614, "grad_norm": 0.302734375, "learning_rate": 0.002, "loss": 5.3014, "step": 3615 }, { "epoch": 0.41051402946738047, "grad_norm": 0.29296875, "learning_rate": 0.002, "loss": 5.3089, "step": 3616 }, { "epoch": 0.41062755657729955, "grad_norm": 0.30859375, "learning_rate": 0.002, "loss": 5.2978, "step": 3617 }, { "epoch": 0.4107410836872186, "grad_norm": 0.2734375, "learning_rate": 0.002, "loss": 5.3136, "step": 3618 }, { "epoch": 0.4108546107971377, "grad_norm": 0.271484375, "learning_rate": 0.002, "loss": 5.2935, "step": 3619 }, { "epoch": 0.4109681379070568, "grad_norm": 0.265625, "learning_rate": 0.002, "loss": 5.2758, "step": 3620 }, { "epoch": 0.41108166501697585, "grad_norm": 0.259765625, "learning_rate": 0.002, "loss": 5.2908, "step": 3621 }, { "epoch": 0.4111951921268949, "grad_norm": 0.251953125, "learning_rate": 0.002, "loss": 5.3007, "step": 3622 }, { "epoch": 0.411308719236814, "grad_norm": 0.2578125, "learning_rate": 0.002, "loss": 5.2993, "step": 3623 }, { "epoch": 0.4114222463467331, "grad_norm": 0.2890625, "learning_rate": 0.002, "loss": 5.2886, "step": 3624 }, { "epoch": 0.41153577345665215, "grad_norm": 0.306640625, "learning_rate": 0.002, "loss": 5.2864, "step": 3625 }, { "epoch": 0.41164930056657123, "grad_norm": 0.30859375, "learning_rate": 0.002, "loss": 5.2879, "step": 3626 }, { "epoch": 0.4117628276764903, "grad_norm": 0.298828125, "learning_rate": 0.002, "loss": 5.2943, "step": 3627 }, { "epoch": 0.4118763547864094, "grad_norm": 0.3046875, "learning_rate": 0.002, "loss": 5.2781, "step": 3628 }, { "epoch": 0.41198988189632846, "grad_norm": 0.283203125, "learning_rate": 0.002, "loss": 5.308, "step": 3629 }, { "epoch": 0.41210340900624753, "grad_norm": 0.29296875, "learning_rate": 0.002, "loss": 5.3124, "step": 3630 }, { "epoch": 0.4122169361161666, "grad_norm": 0.283203125, "learning_rate": 0.002, "loss": 5.2937, "step": 3631 }, { "epoch": 0.4123304632260857, "grad_norm": 0.294921875, "learning_rate": 0.002, "loss": 5.2811, "step": 3632 }, { "epoch": 0.41244399033600476, "grad_norm": 0.296875, "learning_rate": 0.002, "loss": 5.3014, "step": 3633 }, { "epoch": 0.41255751744592384, "grad_norm": 0.287109375, "learning_rate": 0.002, "loss": 5.2935, "step": 3634 }, { "epoch": 0.4126710445558429, "grad_norm": 0.283203125, "learning_rate": 0.002, "loss": 5.3082, "step": 3635 }, { "epoch": 0.412784571665762, "grad_norm": 0.2890625, "learning_rate": 0.002, "loss": 5.2898, "step": 3636 }, { "epoch": 0.41289809877568107, "grad_norm": 0.263671875, "learning_rate": 0.002, "loss": 5.2917, "step": 3637 }, { "epoch": 0.41301162588560014, "grad_norm": 0.31640625, "learning_rate": 0.002, "loss": 5.291, "step": 3638 }, { "epoch": 0.4131251529955192, "grad_norm": 0.353515625, "learning_rate": 0.002, "loss": 5.2981, "step": 3639 }, { "epoch": 0.4132386801054383, "grad_norm": 0.4140625, "learning_rate": 0.002, "loss": 5.2854, "step": 3640 }, { "epoch": 0.41335220721535737, "grad_norm": 0.44140625, "learning_rate": 0.002, "loss": 5.3038, "step": 3641 }, { "epoch": 0.41346573432527645, "grad_norm": 0.44921875, "learning_rate": 0.002, "loss": 5.297, "step": 3642 }, { "epoch": 0.4135792614351955, "grad_norm": 0.39453125, "learning_rate": 0.002, "loss": 5.2583, "step": 3643 }, { "epoch": 0.4136927885451146, "grad_norm": 0.38671875, "learning_rate": 0.002, "loss": 5.2793, "step": 3644 }, { "epoch": 0.4138063156550337, "grad_norm": 0.353515625, "learning_rate": 0.002, "loss": 5.2934, "step": 3645 }, { "epoch": 0.41391984276495275, "grad_norm": 0.35546875, "learning_rate": 0.002, "loss": 5.2896, "step": 3646 }, { "epoch": 0.4140333698748718, "grad_norm": 0.314453125, "learning_rate": 0.002, "loss": 5.2604, "step": 3647 }, { "epoch": 0.4141468969847909, "grad_norm": 0.31640625, "learning_rate": 0.002, "loss": 5.3003, "step": 3648 }, { "epoch": 0.41426042409471, "grad_norm": 0.30078125, "learning_rate": 0.002, "loss": 5.3102, "step": 3649 }, { "epoch": 0.41437395120462905, "grad_norm": 0.279296875, "learning_rate": 0.002, "loss": 5.2925, "step": 3650 }, { "epoch": 0.41448747831454813, "grad_norm": 0.25390625, "learning_rate": 0.002, "loss": 5.3125, "step": 3651 }, { "epoch": 0.4146010054244672, "grad_norm": 0.2451171875, "learning_rate": 0.002, "loss": 5.2844, "step": 3652 }, { "epoch": 0.4147145325343863, "grad_norm": 0.236328125, "learning_rate": 0.002, "loss": 5.2941, "step": 3653 }, { "epoch": 0.41482805964430536, "grad_norm": 0.2412109375, "learning_rate": 0.002, "loss": 5.283, "step": 3654 }, { "epoch": 0.41494158675422443, "grad_norm": 0.232421875, "learning_rate": 0.002, "loss": 5.307, "step": 3655 }, { "epoch": 0.4150551138641435, "grad_norm": 0.232421875, "learning_rate": 0.002, "loss": 5.2789, "step": 3656 }, { "epoch": 0.4151686409740626, "grad_norm": 0.23046875, "learning_rate": 0.002, "loss": 5.3001, "step": 3657 }, { "epoch": 0.41528216808398166, "grad_norm": 0.240234375, "learning_rate": 0.002, "loss": 5.2774, "step": 3658 }, { "epoch": 0.41539569519390074, "grad_norm": 0.263671875, "learning_rate": 0.002, "loss": 5.3043, "step": 3659 }, { "epoch": 0.4155092223038198, "grad_norm": 0.26953125, "learning_rate": 0.002, "loss": 5.3003, "step": 3660 }, { "epoch": 0.4156227494137389, "grad_norm": 0.271484375, "learning_rate": 0.002, "loss": 5.2833, "step": 3661 }, { "epoch": 0.41573627652365797, "grad_norm": 0.2451171875, "learning_rate": 0.002, "loss": 5.2877, "step": 3662 }, { "epoch": 0.41584980363357704, "grad_norm": 0.23828125, "learning_rate": 0.002, "loss": 5.2747, "step": 3663 }, { "epoch": 0.4159633307434961, "grad_norm": 0.228515625, "learning_rate": 0.002, "loss": 5.2928, "step": 3664 }, { "epoch": 0.4160768578534152, "grad_norm": 0.2431640625, "learning_rate": 0.002, "loss": 5.2731, "step": 3665 }, { "epoch": 0.41619038496333427, "grad_norm": 0.232421875, "learning_rate": 0.002, "loss": 5.2717, "step": 3666 }, { "epoch": 0.41630391207325335, "grad_norm": 0.26171875, "learning_rate": 0.002, "loss": 5.3072, "step": 3667 }, { "epoch": 0.4164174391831724, "grad_norm": 0.27734375, "learning_rate": 0.002, "loss": 5.2901, "step": 3668 }, { "epoch": 0.4165309662930915, "grad_norm": 0.33203125, "learning_rate": 0.002, "loss": 5.3037, "step": 3669 }, { "epoch": 0.4166444934030106, "grad_norm": 0.50390625, "learning_rate": 0.002, "loss": 5.2888, "step": 3670 }, { "epoch": 0.41675802051292965, "grad_norm": 0.48046875, "learning_rate": 0.002, "loss": 5.3021, "step": 3671 }, { "epoch": 0.4168715476228487, "grad_norm": 0.443359375, "learning_rate": 0.002, "loss": 5.3102, "step": 3672 }, { "epoch": 0.4169850747327678, "grad_norm": 0.4453125, "learning_rate": 0.002, "loss": 5.2874, "step": 3673 }, { "epoch": 0.4170986018426869, "grad_norm": 0.33984375, "learning_rate": 0.002, "loss": 5.2871, "step": 3674 }, { "epoch": 0.41721212895260595, "grad_norm": 0.3671875, "learning_rate": 0.002, "loss": 5.2872, "step": 3675 }, { "epoch": 0.41732565606252503, "grad_norm": 0.328125, "learning_rate": 0.002, "loss": 5.291, "step": 3676 }, { "epoch": 0.4174391831724441, "grad_norm": 0.326171875, "learning_rate": 0.002, "loss": 5.2627, "step": 3677 }, { "epoch": 0.4175527102823632, "grad_norm": 0.30078125, "learning_rate": 0.002, "loss": 5.2903, "step": 3678 }, { "epoch": 0.41766623739228226, "grad_norm": 0.296875, "learning_rate": 0.002, "loss": 5.2882, "step": 3679 }, { "epoch": 0.41777976450220133, "grad_norm": 0.279296875, "learning_rate": 0.002, "loss": 5.3105, "step": 3680 }, { "epoch": 0.41789329161212047, "grad_norm": 0.28515625, "learning_rate": 0.002, "loss": 5.2805, "step": 3681 }, { "epoch": 0.41800681872203954, "grad_norm": 0.248046875, "learning_rate": 0.002, "loss": 5.2992, "step": 3682 }, { "epoch": 0.4181203458319586, "grad_norm": 0.259765625, "learning_rate": 0.002, "loss": 5.3232, "step": 3683 }, { "epoch": 0.4182338729418777, "grad_norm": 0.2451171875, "learning_rate": 0.002, "loss": 5.301, "step": 3684 }, { "epoch": 0.41834740005179677, "grad_norm": 0.2490234375, "learning_rate": 0.002, "loss": 5.271, "step": 3685 }, { "epoch": 0.41846092716171585, "grad_norm": 0.251953125, "learning_rate": 0.002, "loss": 5.3078, "step": 3686 }, { "epoch": 0.4185744542716349, "grad_norm": 0.263671875, "learning_rate": 0.002, "loss": 5.3062, "step": 3687 }, { "epoch": 0.418687981381554, "grad_norm": 0.265625, "learning_rate": 0.002, "loss": 5.2966, "step": 3688 }, { "epoch": 0.4188015084914731, "grad_norm": 0.27734375, "learning_rate": 0.002, "loss": 5.302, "step": 3689 }, { "epoch": 0.41891503560139215, "grad_norm": 0.306640625, "learning_rate": 0.002, "loss": 5.305, "step": 3690 }, { "epoch": 0.4190285627113112, "grad_norm": 0.322265625, "learning_rate": 0.002, "loss": 5.296, "step": 3691 }, { "epoch": 0.4191420898212303, "grad_norm": 0.34765625, "learning_rate": 0.002, "loss": 5.2692, "step": 3692 }, { "epoch": 0.4192556169311494, "grad_norm": 0.35546875, "learning_rate": 0.002, "loss": 5.3002, "step": 3693 }, { "epoch": 0.41936914404106845, "grad_norm": 0.375, "learning_rate": 0.002, "loss": 5.2997, "step": 3694 }, { "epoch": 0.41948267115098753, "grad_norm": 0.376953125, "learning_rate": 0.002, "loss": 5.2662, "step": 3695 }, { "epoch": 0.4195961982609066, "grad_norm": 0.3671875, "learning_rate": 0.002, "loss": 5.2947, "step": 3696 }, { "epoch": 0.4197097253708257, "grad_norm": 0.396484375, "learning_rate": 0.002, "loss": 5.3024, "step": 3697 }, { "epoch": 0.41982325248074476, "grad_norm": 0.375, "learning_rate": 0.002, "loss": 5.2864, "step": 3698 }, { "epoch": 0.41993677959066383, "grad_norm": 0.380859375, "learning_rate": 0.002, "loss": 5.2949, "step": 3699 }, { "epoch": 0.4200503067005829, "grad_norm": 0.353515625, "learning_rate": 0.002, "loss": 5.2846, "step": 3700 }, { "epoch": 0.420163833810502, "grad_norm": 0.359375, "learning_rate": 0.002, "loss": 5.3051, "step": 3701 }, { "epoch": 0.42027736092042106, "grad_norm": 0.322265625, "learning_rate": 0.002, "loss": 5.3035, "step": 3702 }, { "epoch": 0.42039088803034014, "grad_norm": 0.328125, "learning_rate": 0.002, "loss": 5.299, "step": 3703 }, { "epoch": 0.4205044151402592, "grad_norm": 0.3125, "learning_rate": 0.002, "loss": 5.2945, "step": 3704 }, { "epoch": 0.4206179422501783, "grad_norm": 0.32421875, "learning_rate": 0.002, "loss": 5.2941, "step": 3705 }, { "epoch": 0.42073146936009737, "grad_norm": 0.330078125, "learning_rate": 0.002, "loss": 5.3235, "step": 3706 }, { "epoch": 0.42084499647001644, "grad_norm": 0.33984375, "learning_rate": 0.002, "loss": 5.2808, "step": 3707 }, { "epoch": 0.4209585235799355, "grad_norm": 0.306640625, "learning_rate": 0.002, "loss": 5.3046, "step": 3708 }, { "epoch": 0.4210720506898546, "grad_norm": 0.314453125, "learning_rate": 0.002, "loss": 5.2938, "step": 3709 }, { "epoch": 0.42118557779977367, "grad_norm": 0.30078125, "learning_rate": 0.002, "loss": 5.2916, "step": 3710 }, { "epoch": 0.42129910490969275, "grad_norm": 0.318359375, "learning_rate": 0.002, "loss": 5.3209, "step": 3711 }, { "epoch": 0.4214126320196118, "grad_norm": 0.30078125, "learning_rate": 0.002, "loss": 5.292, "step": 3712 }, { "epoch": 0.4215261591295309, "grad_norm": 0.31640625, "learning_rate": 0.002, "loss": 5.2895, "step": 3713 }, { "epoch": 0.42163968623945, "grad_norm": 0.291015625, "learning_rate": 0.002, "loss": 5.2933, "step": 3714 }, { "epoch": 0.42175321334936905, "grad_norm": 0.294921875, "learning_rate": 0.002, "loss": 5.2959, "step": 3715 }, { "epoch": 0.4218667404592881, "grad_norm": 0.265625, "learning_rate": 0.002, "loss": 5.2868, "step": 3716 }, { "epoch": 0.4219802675692072, "grad_norm": 0.267578125, "learning_rate": 0.002, "loss": 5.2855, "step": 3717 }, { "epoch": 0.4220937946791263, "grad_norm": 0.2734375, "learning_rate": 0.002, "loss": 5.2936, "step": 3718 }, { "epoch": 0.42220732178904535, "grad_norm": 0.287109375, "learning_rate": 0.002, "loss": 5.3081, "step": 3719 }, { "epoch": 0.42232084889896443, "grad_norm": 0.30859375, "learning_rate": 0.002, "loss": 5.3117, "step": 3720 }, { "epoch": 0.4224343760088835, "grad_norm": 0.341796875, "learning_rate": 0.002, "loss": 5.3087, "step": 3721 }, { "epoch": 0.4225479031188026, "grad_norm": 0.37890625, "learning_rate": 0.002, "loss": 5.307, "step": 3722 }, { "epoch": 0.42266143022872166, "grad_norm": 0.40625, "learning_rate": 0.002, "loss": 5.2946, "step": 3723 }, { "epoch": 0.42277495733864073, "grad_norm": 0.408203125, "learning_rate": 0.002, "loss": 5.3042, "step": 3724 }, { "epoch": 0.4228884844485598, "grad_norm": 0.39453125, "learning_rate": 0.002, "loss": 5.2966, "step": 3725 }, { "epoch": 0.4230020115584789, "grad_norm": 0.376953125, "learning_rate": 0.002, "loss": 5.282, "step": 3726 }, { "epoch": 0.42311553866839796, "grad_norm": 0.384765625, "learning_rate": 0.002, "loss": 5.2891, "step": 3727 }, { "epoch": 0.42322906577831704, "grad_norm": 0.37109375, "learning_rate": 0.002, "loss": 5.2962, "step": 3728 }, { "epoch": 0.4233425928882361, "grad_norm": 0.322265625, "learning_rate": 0.002, "loss": 5.301, "step": 3729 }, { "epoch": 0.4234561199981552, "grad_norm": 0.29296875, "learning_rate": 0.002, "loss": 5.2796, "step": 3730 }, { "epoch": 0.42356964710807427, "grad_norm": 0.2490234375, "learning_rate": 0.002, "loss": 5.2876, "step": 3731 }, { "epoch": 0.42368317421799334, "grad_norm": 0.25390625, "learning_rate": 0.002, "loss": 5.2825, "step": 3732 }, { "epoch": 0.4237967013279124, "grad_norm": 0.236328125, "learning_rate": 0.002, "loss": 5.2911, "step": 3733 }, { "epoch": 0.4239102284378315, "grad_norm": 0.232421875, "learning_rate": 0.002, "loss": 5.3028, "step": 3734 }, { "epoch": 0.42402375554775057, "grad_norm": 0.2392578125, "learning_rate": 0.002, "loss": 5.283, "step": 3735 }, { "epoch": 0.42413728265766965, "grad_norm": 0.248046875, "learning_rate": 0.002, "loss": 5.3015, "step": 3736 }, { "epoch": 0.4242508097675887, "grad_norm": 0.279296875, "learning_rate": 0.002, "loss": 5.3062, "step": 3737 }, { "epoch": 0.4243643368775078, "grad_norm": 0.298828125, "learning_rate": 0.002, "loss": 5.3115, "step": 3738 }, { "epoch": 0.4244778639874269, "grad_norm": 0.3359375, "learning_rate": 0.002, "loss": 5.3011, "step": 3739 }, { "epoch": 0.42459139109734595, "grad_norm": 0.345703125, "learning_rate": 0.002, "loss": 5.2951, "step": 3740 }, { "epoch": 0.424704918207265, "grad_norm": 0.361328125, "learning_rate": 0.002, "loss": 5.3041, "step": 3741 }, { "epoch": 0.4248184453171841, "grad_norm": 0.34765625, "learning_rate": 0.002, "loss": 5.2975, "step": 3742 }, { "epoch": 0.4249319724271032, "grad_norm": 0.3515625, "learning_rate": 0.002, "loss": 5.2928, "step": 3743 }, { "epoch": 0.42504549953702225, "grad_norm": 0.310546875, "learning_rate": 0.002, "loss": 5.2868, "step": 3744 }, { "epoch": 0.42515902664694133, "grad_norm": 0.30859375, "learning_rate": 0.002, "loss": 5.3056, "step": 3745 }, { "epoch": 0.4252725537568604, "grad_norm": 0.2734375, "learning_rate": 0.002, "loss": 5.3118, "step": 3746 }, { "epoch": 0.4253860808667795, "grad_norm": 0.267578125, "learning_rate": 0.002, "loss": 5.2948, "step": 3747 }, { "epoch": 0.42549960797669856, "grad_norm": 0.23828125, "learning_rate": 0.002, "loss": 5.2825, "step": 3748 }, { "epoch": 0.42561313508661763, "grad_norm": 0.25, "learning_rate": 0.002, "loss": 5.2953, "step": 3749 }, { "epoch": 0.4257266621965367, "grad_norm": 0.287109375, "learning_rate": 0.002, "loss": 5.2879, "step": 3750 }, { "epoch": 0.4258401893064558, "grad_norm": 0.37890625, "learning_rate": 0.002, "loss": 5.2991, "step": 3751 }, { "epoch": 0.42595371641637486, "grad_norm": 0.416015625, "learning_rate": 0.002, "loss": 5.306, "step": 3752 }, { "epoch": 0.42606724352629394, "grad_norm": 0.3984375, "learning_rate": 0.002, "loss": 5.2958, "step": 3753 }, { "epoch": 0.426180770636213, "grad_norm": 0.39453125, "learning_rate": 0.002, "loss": 5.291, "step": 3754 }, { "epoch": 0.4262942977461321, "grad_norm": 0.3203125, "learning_rate": 0.002, "loss": 5.2881, "step": 3755 }, { "epoch": 0.42640782485605117, "grad_norm": 0.30859375, "learning_rate": 0.002, "loss": 5.2915, "step": 3756 }, { "epoch": 0.42652135196597024, "grad_norm": 0.2451171875, "learning_rate": 0.002, "loss": 5.2984, "step": 3757 }, { "epoch": 0.4266348790758893, "grad_norm": 0.2373046875, "learning_rate": 0.002, "loss": 5.2982, "step": 3758 }, { "epoch": 0.4267484061858084, "grad_norm": 0.2138671875, "learning_rate": 0.002, "loss": 5.3014, "step": 3759 }, { "epoch": 0.42686193329572747, "grad_norm": 0.2197265625, "learning_rate": 0.002, "loss": 5.2723, "step": 3760 }, { "epoch": 0.42697546040564655, "grad_norm": 0.236328125, "learning_rate": 0.002, "loss": 5.2868, "step": 3761 }, { "epoch": 0.4270889875155656, "grad_norm": 0.259765625, "learning_rate": 0.002, "loss": 5.2959, "step": 3762 }, { "epoch": 0.4272025146254847, "grad_norm": 0.2890625, "learning_rate": 0.002, "loss": 5.2997, "step": 3763 }, { "epoch": 0.4273160417354038, "grad_norm": 0.298828125, "learning_rate": 0.002, "loss": 5.2964, "step": 3764 }, { "epoch": 0.42742956884532285, "grad_norm": 0.3046875, "learning_rate": 0.002, "loss": 5.2984, "step": 3765 }, { "epoch": 0.4275430959552419, "grad_norm": 0.294921875, "learning_rate": 0.002, "loss": 5.2935, "step": 3766 }, { "epoch": 0.427656623065161, "grad_norm": 0.2490234375, "learning_rate": 0.002, "loss": 5.2907, "step": 3767 }, { "epoch": 0.4277701501750801, "grad_norm": 0.25390625, "learning_rate": 0.002, "loss": 5.2951, "step": 3768 }, { "epoch": 0.42788367728499915, "grad_norm": 0.2490234375, "learning_rate": 0.002, "loss": 5.2967, "step": 3769 }, { "epoch": 0.42799720439491823, "grad_norm": 0.2490234375, "learning_rate": 0.002, "loss": 5.2795, "step": 3770 }, { "epoch": 0.4281107315048373, "grad_norm": 0.263671875, "learning_rate": 0.002, "loss": 5.287, "step": 3771 }, { "epoch": 0.4282242586147564, "grad_norm": 0.326171875, "learning_rate": 0.002, "loss": 5.2671, "step": 3772 }, { "epoch": 0.42833778572467546, "grad_norm": 0.36328125, "learning_rate": 0.002, "loss": 5.3001, "step": 3773 }, { "epoch": 0.42845131283459453, "grad_norm": 0.404296875, "learning_rate": 0.002, "loss": 5.2821, "step": 3774 }, { "epoch": 0.4285648399445136, "grad_norm": 0.400390625, "learning_rate": 0.002, "loss": 5.3044, "step": 3775 }, { "epoch": 0.4286783670544327, "grad_norm": 0.37109375, "learning_rate": 0.002, "loss": 5.2747, "step": 3776 }, { "epoch": 0.42879189416435176, "grad_norm": 0.392578125, "learning_rate": 0.002, "loss": 5.2895, "step": 3777 }, { "epoch": 0.42890542127427084, "grad_norm": 0.3359375, "learning_rate": 0.002, "loss": 5.294, "step": 3778 }, { "epoch": 0.4290189483841899, "grad_norm": 0.359375, "learning_rate": 0.002, "loss": 5.3025, "step": 3779 }, { "epoch": 0.429132475494109, "grad_norm": 0.318359375, "learning_rate": 0.002, "loss": 5.2991, "step": 3780 }, { "epoch": 0.42924600260402807, "grad_norm": 0.314453125, "learning_rate": 0.002, "loss": 5.2889, "step": 3781 }, { "epoch": 0.42935952971394714, "grad_norm": 0.275390625, "learning_rate": 0.002, "loss": 5.2656, "step": 3782 }, { "epoch": 0.4294730568238662, "grad_norm": 0.263671875, "learning_rate": 0.002, "loss": 5.3098, "step": 3783 }, { "epoch": 0.4295865839337853, "grad_norm": 0.263671875, "learning_rate": 0.002, "loss": 5.2785, "step": 3784 }, { "epoch": 0.42970011104370437, "grad_norm": 0.275390625, "learning_rate": 0.002, "loss": 5.2952, "step": 3785 }, { "epoch": 0.42981363815362345, "grad_norm": 0.326171875, "learning_rate": 0.002, "loss": 5.2848, "step": 3786 }, { "epoch": 0.4299271652635425, "grad_norm": 0.37109375, "learning_rate": 0.002, "loss": 5.2969, "step": 3787 }, { "epoch": 0.4300406923734616, "grad_norm": 0.408203125, "learning_rate": 0.002, "loss": 5.2926, "step": 3788 }, { "epoch": 0.4301542194833807, "grad_norm": 0.4140625, "learning_rate": 0.002, "loss": 5.2859, "step": 3789 }, { "epoch": 0.43026774659329975, "grad_norm": 0.3828125, "learning_rate": 0.002, "loss": 5.2677, "step": 3790 }, { "epoch": 0.4303812737032188, "grad_norm": 0.3515625, "learning_rate": 0.002, "loss": 5.2999, "step": 3791 }, { "epoch": 0.4304948008131379, "grad_norm": 0.31640625, "learning_rate": 0.002, "loss": 5.3005, "step": 3792 }, { "epoch": 0.430608327923057, "grad_norm": 0.2578125, "learning_rate": 0.002, "loss": 5.2825, "step": 3793 }, { "epoch": 0.43072185503297605, "grad_norm": 0.2578125, "learning_rate": 0.002, "loss": 5.299, "step": 3794 }, { "epoch": 0.43083538214289513, "grad_norm": 0.259765625, "learning_rate": 0.002, "loss": 5.2773, "step": 3795 }, { "epoch": 0.4309489092528142, "grad_norm": 0.2734375, "learning_rate": 0.002, "loss": 5.2918, "step": 3796 }, { "epoch": 0.4310624363627333, "grad_norm": 0.27734375, "learning_rate": 0.002, "loss": 5.3051, "step": 3797 }, { "epoch": 0.43117596347265236, "grad_norm": 0.28515625, "learning_rate": 0.002, "loss": 5.2897, "step": 3798 }, { "epoch": 0.43128949058257143, "grad_norm": 0.296875, "learning_rate": 0.002, "loss": 5.2962, "step": 3799 }, { "epoch": 0.4314030176924905, "grad_norm": 0.302734375, "learning_rate": 0.002, "loss": 5.2889, "step": 3800 }, { "epoch": 0.43151654480240964, "grad_norm": 0.310546875, "learning_rate": 0.002, "loss": 5.2915, "step": 3801 }, { "epoch": 0.4316300719123287, "grad_norm": 0.283203125, "learning_rate": 0.002, "loss": 5.284, "step": 3802 }, { "epoch": 0.4317435990222478, "grad_norm": 0.259765625, "learning_rate": 0.002, "loss": 5.2944, "step": 3803 }, { "epoch": 0.43185712613216687, "grad_norm": 0.2392578125, "learning_rate": 0.002, "loss": 5.2712, "step": 3804 }, { "epoch": 0.43197065324208594, "grad_norm": 0.2275390625, "learning_rate": 0.002, "loss": 5.31, "step": 3805 }, { "epoch": 0.432084180352005, "grad_norm": 0.2353515625, "learning_rate": 0.002, "loss": 5.2562, "step": 3806 }, { "epoch": 0.4321977074619241, "grad_norm": 0.2431640625, "learning_rate": 0.002, "loss": 5.295, "step": 3807 }, { "epoch": 0.4323112345718432, "grad_norm": 0.279296875, "learning_rate": 0.002, "loss": 5.2754, "step": 3808 }, { "epoch": 0.43242476168176225, "grad_norm": 0.27734375, "learning_rate": 0.002, "loss": 5.297, "step": 3809 }, { "epoch": 0.4325382887916813, "grad_norm": 0.2890625, "learning_rate": 0.002, "loss": 5.283, "step": 3810 }, { "epoch": 0.4326518159016004, "grad_norm": 0.26171875, "learning_rate": 0.002, "loss": 5.3044, "step": 3811 }, { "epoch": 0.4327653430115195, "grad_norm": 0.275390625, "learning_rate": 0.002, "loss": 5.3017, "step": 3812 }, { "epoch": 0.43287887012143855, "grad_norm": 0.26953125, "learning_rate": 0.002, "loss": 5.2844, "step": 3813 }, { "epoch": 0.43299239723135763, "grad_norm": 0.275390625, "learning_rate": 0.002, "loss": 5.281, "step": 3814 }, { "epoch": 0.4331059243412767, "grad_norm": 0.263671875, "learning_rate": 0.002, "loss": 5.2785, "step": 3815 }, { "epoch": 0.4332194514511958, "grad_norm": 0.271484375, "learning_rate": 0.002, "loss": 5.2897, "step": 3816 }, { "epoch": 0.43333297856111486, "grad_norm": 0.2578125, "learning_rate": 0.002, "loss": 5.2632, "step": 3817 }, { "epoch": 0.43344650567103393, "grad_norm": 0.2578125, "learning_rate": 0.002, "loss": 5.2693, "step": 3818 }, { "epoch": 0.433560032780953, "grad_norm": 0.2451171875, "learning_rate": 0.002, "loss": 5.2938, "step": 3819 }, { "epoch": 0.4336735598908721, "grad_norm": 0.259765625, "learning_rate": 0.002, "loss": 5.2851, "step": 3820 }, { "epoch": 0.43378708700079116, "grad_norm": 0.2578125, "learning_rate": 0.002, "loss": 5.2821, "step": 3821 }, { "epoch": 0.43390061411071024, "grad_norm": 0.291015625, "learning_rate": 0.002, "loss": 5.2816, "step": 3822 }, { "epoch": 0.4340141412206293, "grad_norm": 0.353515625, "learning_rate": 0.002, "loss": 5.2922, "step": 3823 }, { "epoch": 0.4341276683305484, "grad_norm": 0.37890625, "learning_rate": 0.002, "loss": 5.2939, "step": 3824 }, { "epoch": 0.43424119544046746, "grad_norm": 0.33984375, "learning_rate": 0.002, "loss": 5.274, "step": 3825 }, { "epoch": 0.43435472255038654, "grad_norm": 0.353515625, "learning_rate": 0.002, "loss": 5.2963, "step": 3826 }, { "epoch": 0.4344682496603056, "grad_norm": 0.314453125, "learning_rate": 0.002, "loss": 5.305, "step": 3827 }, { "epoch": 0.4345817767702247, "grad_norm": 0.306640625, "learning_rate": 0.002, "loss": 5.3021, "step": 3828 }, { "epoch": 0.43469530388014377, "grad_norm": 0.2734375, "learning_rate": 0.002, "loss": 5.2887, "step": 3829 }, { "epoch": 0.43480883099006284, "grad_norm": 0.2734375, "learning_rate": 0.002, "loss": 5.2871, "step": 3830 }, { "epoch": 0.4349223580999819, "grad_norm": 0.2470703125, "learning_rate": 0.002, "loss": 5.2864, "step": 3831 }, { "epoch": 0.435035885209901, "grad_norm": 0.26953125, "learning_rate": 0.002, "loss": 5.308, "step": 3832 }, { "epoch": 0.4351494123198201, "grad_norm": 0.337890625, "learning_rate": 0.002, "loss": 5.2937, "step": 3833 }, { "epoch": 0.43526293942973915, "grad_norm": 0.330078125, "learning_rate": 0.002, "loss": 5.2912, "step": 3834 }, { "epoch": 0.4353764665396582, "grad_norm": 0.328125, "learning_rate": 0.002, "loss": 5.2915, "step": 3835 }, { "epoch": 0.4354899936495773, "grad_norm": 0.34765625, "learning_rate": 0.002, "loss": 5.3117, "step": 3836 }, { "epoch": 0.4356035207594964, "grad_norm": 0.337890625, "learning_rate": 0.002, "loss": 5.2952, "step": 3837 }, { "epoch": 0.43571704786941545, "grad_norm": 0.36328125, "learning_rate": 0.002, "loss": 5.2897, "step": 3838 }, { "epoch": 0.43583057497933453, "grad_norm": 0.373046875, "learning_rate": 0.002, "loss": 5.2984, "step": 3839 }, { "epoch": 0.4359441020892536, "grad_norm": 0.361328125, "learning_rate": 0.002, "loss": 5.2844, "step": 3840 }, { "epoch": 0.4360576291991727, "grad_norm": 0.369140625, "learning_rate": 0.002, "loss": 5.2814, "step": 3841 }, { "epoch": 0.43617115630909176, "grad_norm": 0.345703125, "learning_rate": 0.002, "loss": 5.2826, "step": 3842 }, { "epoch": 0.43628468341901083, "grad_norm": 0.361328125, "learning_rate": 0.002, "loss": 5.2849, "step": 3843 }, { "epoch": 0.4363982105289299, "grad_norm": 0.326171875, "learning_rate": 0.002, "loss": 5.3129, "step": 3844 }, { "epoch": 0.436511737638849, "grad_norm": 0.369140625, "learning_rate": 0.002, "loss": 5.2702, "step": 3845 }, { "epoch": 0.43662526474876806, "grad_norm": 0.31640625, "learning_rate": 0.002, "loss": 5.307, "step": 3846 }, { "epoch": 0.43673879185868714, "grad_norm": 0.328125, "learning_rate": 0.002, "loss": 5.2912, "step": 3847 }, { "epoch": 0.4368523189686062, "grad_norm": 0.298828125, "learning_rate": 0.002, "loss": 5.2948, "step": 3848 }, { "epoch": 0.4369658460785253, "grad_norm": 0.294921875, "learning_rate": 0.002, "loss": 5.2832, "step": 3849 }, { "epoch": 0.43707937318844436, "grad_norm": 0.2578125, "learning_rate": 0.002, "loss": 5.285, "step": 3850 }, { "epoch": 0.43719290029836344, "grad_norm": 0.2578125, "learning_rate": 0.002, "loss": 5.2932, "step": 3851 }, { "epoch": 0.4373064274082825, "grad_norm": 0.2490234375, "learning_rate": 0.002, "loss": 5.3, "step": 3852 }, { "epoch": 0.4374199545182016, "grad_norm": 0.251953125, "learning_rate": 0.002, "loss": 5.2956, "step": 3853 }, { "epoch": 0.43753348162812067, "grad_norm": 0.265625, "learning_rate": 0.002, "loss": 5.3149, "step": 3854 }, { "epoch": 0.43764700873803974, "grad_norm": 0.265625, "learning_rate": 0.002, "loss": 5.3006, "step": 3855 }, { "epoch": 0.4377605358479588, "grad_norm": 0.267578125, "learning_rate": 0.002, "loss": 5.2646, "step": 3856 }, { "epoch": 0.4378740629578779, "grad_norm": 0.263671875, "learning_rate": 0.002, "loss": 5.281, "step": 3857 }, { "epoch": 0.437987590067797, "grad_norm": 0.279296875, "learning_rate": 0.002, "loss": 5.2853, "step": 3858 }, { "epoch": 0.43810111717771605, "grad_norm": 0.26953125, "learning_rate": 0.002, "loss": 5.3017, "step": 3859 }, { "epoch": 0.4382146442876351, "grad_norm": 0.29296875, "learning_rate": 0.002, "loss": 5.2803, "step": 3860 }, { "epoch": 0.4383281713975542, "grad_norm": 0.279296875, "learning_rate": 0.002, "loss": 5.2938, "step": 3861 }, { "epoch": 0.4384416985074733, "grad_norm": 0.2890625, "learning_rate": 0.002, "loss": 5.3087, "step": 3862 }, { "epoch": 0.43855522561739235, "grad_norm": 0.26171875, "learning_rate": 0.002, "loss": 5.2709, "step": 3863 }, { "epoch": 0.43866875272731143, "grad_norm": 0.28515625, "learning_rate": 0.002, "loss": 5.279, "step": 3864 }, { "epoch": 0.4387822798372305, "grad_norm": 0.279296875, "learning_rate": 0.002, "loss": 5.2843, "step": 3865 }, { "epoch": 0.4388958069471496, "grad_norm": 0.31640625, "learning_rate": 0.002, "loss": 5.2765, "step": 3866 }, { "epoch": 0.43900933405706866, "grad_norm": 0.34765625, "learning_rate": 0.002, "loss": 5.2856, "step": 3867 }, { "epoch": 0.43912286116698773, "grad_norm": 0.3828125, "learning_rate": 0.002, "loss": 5.2983, "step": 3868 }, { "epoch": 0.4392363882769068, "grad_norm": 0.3984375, "learning_rate": 0.002, "loss": 5.2912, "step": 3869 }, { "epoch": 0.4393499153868259, "grad_norm": 0.388671875, "learning_rate": 0.002, "loss": 5.2999, "step": 3870 }, { "epoch": 0.43946344249674496, "grad_norm": 0.3515625, "learning_rate": 0.002, "loss": 5.2997, "step": 3871 }, { "epoch": 0.43957696960666404, "grad_norm": 0.353515625, "learning_rate": 0.002, "loss": 5.2761, "step": 3872 }, { "epoch": 0.4396904967165831, "grad_norm": 0.341796875, "learning_rate": 0.002, "loss": 5.2813, "step": 3873 }, { "epoch": 0.4398040238265022, "grad_norm": 0.3515625, "learning_rate": 0.002, "loss": 5.2742, "step": 3874 }, { "epoch": 0.43991755093642126, "grad_norm": 0.353515625, "learning_rate": 0.002, "loss": 5.2915, "step": 3875 }, { "epoch": 0.44003107804634034, "grad_norm": 0.361328125, "learning_rate": 0.002, "loss": 5.2783, "step": 3876 }, { "epoch": 0.4401446051562594, "grad_norm": 0.33984375, "learning_rate": 0.002, "loss": 5.2887, "step": 3877 }, { "epoch": 0.4402581322661785, "grad_norm": 0.349609375, "learning_rate": 0.002, "loss": 5.2969, "step": 3878 }, { "epoch": 0.44037165937609757, "grad_norm": 0.302734375, "learning_rate": 0.002, "loss": 5.2546, "step": 3879 }, { "epoch": 0.44048518648601664, "grad_norm": 0.28515625, "learning_rate": 0.002, "loss": 5.2898, "step": 3880 }, { "epoch": 0.4405987135959357, "grad_norm": 0.267578125, "learning_rate": 0.002, "loss": 5.2849, "step": 3881 }, { "epoch": 0.4407122407058548, "grad_norm": 0.265625, "learning_rate": 0.002, "loss": 5.2905, "step": 3882 }, { "epoch": 0.4408257678157739, "grad_norm": 0.271484375, "learning_rate": 0.002, "loss": 5.2861, "step": 3883 }, { "epoch": 0.44093929492569295, "grad_norm": 0.298828125, "learning_rate": 0.002, "loss": 5.2859, "step": 3884 }, { "epoch": 0.441052822035612, "grad_norm": 0.296875, "learning_rate": 0.002, "loss": 5.2764, "step": 3885 }, { "epoch": 0.4411663491455311, "grad_norm": 0.33203125, "learning_rate": 0.002, "loss": 5.2883, "step": 3886 }, { "epoch": 0.4412798762554502, "grad_norm": 0.33203125, "learning_rate": 0.002, "loss": 5.2796, "step": 3887 }, { "epoch": 0.44139340336536925, "grad_norm": 0.333984375, "learning_rate": 0.002, "loss": 5.2888, "step": 3888 }, { "epoch": 0.44150693047528833, "grad_norm": 0.291015625, "learning_rate": 0.002, "loss": 5.2913, "step": 3889 }, { "epoch": 0.4416204575852074, "grad_norm": 0.30078125, "learning_rate": 0.002, "loss": 5.2887, "step": 3890 }, { "epoch": 0.4417339846951265, "grad_norm": 0.283203125, "learning_rate": 0.002, "loss": 5.297, "step": 3891 }, { "epoch": 0.44184751180504556, "grad_norm": 0.298828125, "learning_rate": 0.002, "loss": 5.2846, "step": 3892 }, { "epoch": 0.44196103891496463, "grad_norm": 0.31640625, "learning_rate": 0.002, "loss": 5.2845, "step": 3893 }, { "epoch": 0.4420745660248837, "grad_norm": 0.357421875, "learning_rate": 0.002, "loss": 5.3043, "step": 3894 }, { "epoch": 0.4421880931348028, "grad_norm": 0.376953125, "learning_rate": 0.002, "loss": 5.2902, "step": 3895 }, { "epoch": 0.44230162024472186, "grad_norm": 0.388671875, "learning_rate": 0.002, "loss": 5.2924, "step": 3896 }, { "epoch": 0.44241514735464094, "grad_norm": 0.349609375, "learning_rate": 0.002, "loss": 5.3143, "step": 3897 }, { "epoch": 0.44252867446456, "grad_norm": 0.3359375, "learning_rate": 0.002, "loss": 5.3083, "step": 3898 }, { "epoch": 0.4426422015744791, "grad_norm": 0.2734375, "learning_rate": 0.002, "loss": 5.2736, "step": 3899 }, { "epoch": 0.44275572868439816, "grad_norm": 0.265625, "learning_rate": 0.002, "loss": 5.2645, "step": 3900 }, { "epoch": 0.44286925579431724, "grad_norm": 0.23828125, "learning_rate": 0.002, "loss": 5.2921, "step": 3901 }, { "epoch": 0.4429827829042363, "grad_norm": 0.251953125, "learning_rate": 0.002, "loss": 5.2833, "step": 3902 }, { "epoch": 0.4430963100141554, "grad_norm": 0.25390625, "learning_rate": 0.002, "loss": 5.3081, "step": 3903 }, { "epoch": 0.44320983712407447, "grad_norm": 0.271484375, "learning_rate": 0.002, "loss": 5.2944, "step": 3904 }, { "epoch": 0.44332336423399354, "grad_norm": 0.271484375, "learning_rate": 0.002, "loss": 5.2932, "step": 3905 }, { "epoch": 0.4434368913439126, "grad_norm": 0.322265625, "learning_rate": 0.002, "loss": 5.2769, "step": 3906 }, { "epoch": 0.4435504184538317, "grad_norm": 0.34375, "learning_rate": 0.002, "loss": 5.2969, "step": 3907 }, { "epoch": 0.4436639455637508, "grad_norm": 0.361328125, "learning_rate": 0.002, "loss": 5.29, "step": 3908 }, { "epoch": 0.44377747267366985, "grad_norm": 0.32421875, "learning_rate": 0.002, "loss": 5.2713, "step": 3909 }, { "epoch": 0.4438909997835889, "grad_norm": 0.328125, "learning_rate": 0.002, "loss": 5.2919, "step": 3910 }, { "epoch": 0.444004526893508, "grad_norm": 0.3046875, "learning_rate": 0.002, "loss": 5.296, "step": 3911 }, { "epoch": 0.4441180540034271, "grad_norm": 0.314453125, "learning_rate": 0.002, "loss": 5.2732, "step": 3912 }, { "epoch": 0.44423158111334615, "grad_norm": 0.30859375, "learning_rate": 0.002, "loss": 5.2838, "step": 3913 }, { "epoch": 0.44434510822326523, "grad_norm": 0.3359375, "learning_rate": 0.002, "loss": 5.2893, "step": 3914 }, { "epoch": 0.4444586353331843, "grad_norm": 0.3515625, "learning_rate": 0.002, "loss": 5.2903, "step": 3915 }, { "epoch": 0.4445721624431034, "grad_norm": 0.384765625, "learning_rate": 0.002, "loss": 5.2994, "step": 3916 }, { "epoch": 0.44468568955302246, "grad_norm": 0.39453125, "learning_rate": 0.002, "loss": 5.2762, "step": 3917 }, { "epoch": 0.44479921666294153, "grad_norm": 0.404296875, "learning_rate": 0.002, "loss": 5.3021, "step": 3918 }, { "epoch": 0.4449127437728606, "grad_norm": 0.388671875, "learning_rate": 0.002, "loss": 5.2769, "step": 3919 }, { "epoch": 0.4450262708827797, "grad_norm": 0.396484375, "learning_rate": 0.002, "loss": 5.2904, "step": 3920 }, { "epoch": 0.44513979799269876, "grad_norm": 0.36328125, "learning_rate": 0.002, "loss": 5.2882, "step": 3921 }, { "epoch": 0.4452533251026179, "grad_norm": 0.353515625, "learning_rate": 0.002, "loss": 5.2838, "step": 3922 }, { "epoch": 0.44536685221253697, "grad_norm": 0.330078125, "learning_rate": 0.002, "loss": 5.2931, "step": 3923 }, { "epoch": 0.44548037932245604, "grad_norm": 0.3515625, "learning_rate": 0.002, "loss": 5.2976, "step": 3924 }, { "epoch": 0.4455939064323751, "grad_norm": 0.306640625, "learning_rate": 0.002, "loss": 5.2809, "step": 3925 }, { "epoch": 0.4457074335422942, "grad_norm": 0.337890625, "learning_rate": 0.002, "loss": 5.2923, "step": 3926 }, { "epoch": 0.44582096065221327, "grad_norm": 0.314453125, "learning_rate": 0.002, "loss": 5.3006, "step": 3927 }, { "epoch": 0.44593448776213235, "grad_norm": 0.306640625, "learning_rate": 0.002, "loss": 5.2877, "step": 3928 }, { "epoch": 0.4460480148720514, "grad_norm": 0.271484375, "learning_rate": 0.002, "loss": 5.2896, "step": 3929 }, { "epoch": 0.4461615419819705, "grad_norm": 0.2890625, "learning_rate": 0.002, "loss": 5.2913, "step": 3930 }, { "epoch": 0.4462750690918896, "grad_norm": 0.267578125, "learning_rate": 0.002, "loss": 5.251, "step": 3931 }, { "epoch": 0.44638859620180865, "grad_norm": 0.275390625, "learning_rate": 0.002, "loss": 5.2739, "step": 3932 }, { "epoch": 0.4465021233117277, "grad_norm": 0.2734375, "learning_rate": 0.002, "loss": 5.2822, "step": 3933 }, { "epoch": 0.4466156504216468, "grad_norm": 0.3046875, "learning_rate": 0.002, "loss": 5.2813, "step": 3934 }, { "epoch": 0.4467291775315659, "grad_norm": 0.32421875, "learning_rate": 0.002, "loss": 5.2803, "step": 3935 }, { "epoch": 0.44684270464148496, "grad_norm": 0.337890625, "learning_rate": 0.002, "loss": 5.2984, "step": 3936 }, { "epoch": 0.44695623175140403, "grad_norm": 0.29296875, "learning_rate": 0.002, "loss": 5.2748, "step": 3937 }, { "epoch": 0.4470697588613231, "grad_norm": 0.3203125, "learning_rate": 0.002, "loss": 5.287, "step": 3938 }, { "epoch": 0.4471832859712422, "grad_norm": 0.30859375, "learning_rate": 0.002, "loss": 5.2815, "step": 3939 }, { "epoch": 0.44729681308116126, "grad_norm": 0.349609375, "learning_rate": 0.002, "loss": 5.2993, "step": 3940 }, { "epoch": 0.44741034019108034, "grad_norm": 0.345703125, "learning_rate": 0.002, "loss": 5.2724, "step": 3941 }, { "epoch": 0.4475238673009994, "grad_norm": 0.359375, "learning_rate": 0.002, "loss": 5.2768, "step": 3942 }, { "epoch": 0.4476373944109185, "grad_norm": 0.330078125, "learning_rate": 0.002, "loss": 5.2772, "step": 3943 }, { "epoch": 0.44775092152083756, "grad_norm": 0.306640625, "learning_rate": 0.002, "loss": 5.2781, "step": 3944 }, { "epoch": 0.44786444863075664, "grad_norm": 0.26171875, "learning_rate": 0.002, "loss": 5.284, "step": 3945 }, { "epoch": 0.4479779757406757, "grad_norm": 0.255859375, "learning_rate": 0.002, "loss": 5.2891, "step": 3946 }, { "epoch": 0.4480915028505948, "grad_norm": 0.2353515625, "learning_rate": 0.002, "loss": 5.3101, "step": 3947 }, { "epoch": 0.44820502996051387, "grad_norm": 0.2392578125, "learning_rate": 0.002, "loss": 5.266, "step": 3948 }, { "epoch": 0.44831855707043294, "grad_norm": 0.2314453125, "learning_rate": 0.002, "loss": 5.2694, "step": 3949 }, { "epoch": 0.448432084180352, "grad_norm": 0.244140625, "learning_rate": 0.002, "loss": 5.2755, "step": 3950 }, { "epoch": 0.4485456112902711, "grad_norm": 0.2412109375, "learning_rate": 0.002, "loss": 5.2878, "step": 3951 }, { "epoch": 0.44865913840019017, "grad_norm": 0.263671875, "learning_rate": 0.002, "loss": 5.3013, "step": 3952 }, { "epoch": 0.44877266551010925, "grad_norm": 0.267578125, "learning_rate": 0.002, "loss": 5.2806, "step": 3953 }, { "epoch": 0.4488861926200283, "grad_norm": 0.296875, "learning_rate": 0.002, "loss": 5.3035, "step": 3954 }, { "epoch": 0.4489997197299474, "grad_norm": 0.318359375, "learning_rate": 0.002, "loss": 5.2878, "step": 3955 }, { "epoch": 0.4491132468398665, "grad_norm": 0.337890625, "learning_rate": 0.002, "loss": 5.2928, "step": 3956 }, { "epoch": 0.44922677394978555, "grad_norm": 0.3359375, "learning_rate": 0.002, "loss": 5.2913, "step": 3957 }, { "epoch": 0.4493403010597046, "grad_norm": 0.357421875, "learning_rate": 0.002, "loss": 5.2884, "step": 3958 }, { "epoch": 0.4494538281696237, "grad_norm": 0.330078125, "learning_rate": 0.002, "loss": 5.284, "step": 3959 }, { "epoch": 0.4495673552795428, "grad_norm": 0.32421875, "learning_rate": 0.002, "loss": 5.2891, "step": 3960 }, { "epoch": 0.44968088238946186, "grad_norm": 0.283203125, "learning_rate": 0.002, "loss": 5.278, "step": 3961 }, { "epoch": 0.44979440949938093, "grad_norm": 0.29296875, "learning_rate": 0.002, "loss": 5.2729, "step": 3962 }, { "epoch": 0.4499079366093, "grad_norm": 0.259765625, "learning_rate": 0.002, "loss": 5.2766, "step": 3963 }, { "epoch": 0.4500214637192191, "grad_norm": 0.279296875, "learning_rate": 0.002, "loss": 5.3046, "step": 3964 }, { "epoch": 0.45013499082913816, "grad_norm": 0.28515625, "learning_rate": 0.002, "loss": 5.2786, "step": 3965 }, { "epoch": 0.45024851793905724, "grad_norm": 0.294921875, "learning_rate": 0.002, "loss": 5.2798, "step": 3966 }, { "epoch": 0.4503620450489763, "grad_norm": 0.298828125, "learning_rate": 0.002, "loss": 5.2921, "step": 3967 }, { "epoch": 0.4504755721588954, "grad_norm": 0.3359375, "learning_rate": 0.002, "loss": 5.2837, "step": 3968 }, { "epoch": 0.45058909926881446, "grad_norm": 0.373046875, "learning_rate": 0.002, "loss": 5.296, "step": 3969 }, { "epoch": 0.45070262637873354, "grad_norm": 0.421875, "learning_rate": 0.002, "loss": 5.3064, "step": 3970 }, { "epoch": 0.4508161534886526, "grad_norm": 0.4296875, "learning_rate": 0.002, "loss": 5.2776, "step": 3971 }, { "epoch": 0.4509296805985717, "grad_norm": 0.4140625, "learning_rate": 0.002, "loss": 5.2819, "step": 3972 }, { "epoch": 0.45104320770849077, "grad_norm": 0.404296875, "learning_rate": 0.002, "loss": 5.2912, "step": 3973 }, { "epoch": 0.45115673481840984, "grad_norm": 0.31640625, "learning_rate": 0.002, "loss": 5.2886, "step": 3974 }, { "epoch": 0.4512702619283289, "grad_norm": 0.3125, "learning_rate": 0.002, "loss": 5.3105, "step": 3975 }, { "epoch": 0.451383789038248, "grad_norm": 0.255859375, "learning_rate": 0.002, "loss": 5.2944, "step": 3976 }, { "epoch": 0.45149731614816707, "grad_norm": 0.26171875, "learning_rate": 0.002, "loss": 5.2867, "step": 3977 }, { "epoch": 0.45161084325808615, "grad_norm": 0.255859375, "learning_rate": 0.002, "loss": 5.2758, "step": 3978 }, { "epoch": 0.4517243703680052, "grad_norm": 0.298828125, "learning_rate": 0.002, "loss": 5.2941, "step": 3979 }, { "epoch": 0.4518378974779243, "grad_norm": 0.328125, "learning_rate": 0.002, "loss": 5.2723, "step": 3980 }, { "epoch": 0.4519514245878434, "grad_norm": 0.318359375, "learning_rate": 0.002, "loss": 5.2902, "step": 3981 }, { "epoch": 0.45206495169776245, "grad_norm": 0.30859375, "learning_rate": 0.002, "loss": 5.28, "step": 3982 }, { "epoch": 0.4521784788076815, "grad_norm": 0.29296875, "learning_rate": 0.002, "loss": 5.2714, "step": 3983 }, { "epoch": 0.4522920059176006, "grad_norm": 0.267578125, "learning_rate": 0.002, "loss": 5.3035, "step": 3984 }, { "epoch": 0.4524055330275197, "grad_norm": 0.263671875, "learning_rate": 0.002, "loss": 5.2948, "step": 3985 }, { "epoch": 0.45251906013743876, "grad_norm": 0.236328125, "learning_rate": 0.002, "loss": 5.2786, "step": 3986 }, { "epoch": 0.45263258724735783, "grad_norm": 0.224609375, "learning_rate": 0.002, "loss": 5.2893, "step": 3987 }, { "epoch": 0.4527461143572769, "grad_norm": 0.26953125, "learning_rate": 0.002, "loss": 5.2721, "step": 3988 }, { "epoch": 0.452859641467196, "grad_norm": 0.30078125, "learning_rate": 0.002, "loss": 5.2958, "step": 3989 }, { "epoch": 0.45297316857711506, "grad_norm": 0.353515625, "learning_rate": 0.002, "loss": 5.2866, "step": 3990 }, { "epoch": 0.45308669568703414, "grad_norm": 0.365234375, "learning_rate": 0.002, "loss": 5.3043, "step": 3991 }, { "epoch": 0.4532002227969532, "grad_norm": 0.375, "learning_rate": 0.002, "loss": 5.2802, "step": 3992 }, { "epoch": 0.4533137499068723, "grad_norm": 0.326171875, "learning_rate": 0.002, "loss": 5.2818, "step": 3993 }, { "epoch": 0.45342727701679136, "grad_norm": 0.3359375, "learning_rate": 0.002, "loss": 5.3004, "step": 3994 }, { "epoch": 0.45354080412671044, "grad_norm": 0.302734375, "learning_rate": 0.002, "loss": 5.2793, "step": 3995 }, { "epoch": 0.4536543312366295, "grad_norm": 0.291015625, "learning_rate": 0.002, "loss": 5.2803, "step": 3996 }, { "epoch": 0.4537678583465486, "grad_norm": 0.259765625, "learning_rate": 0.002, "loss": 5.2796, "step": 3997 }, { "epoch": 0.45388138545646767, "grad_norm": 0.259765625, "learning_rate": 0.002, "loss": 5.2712, "step": 3998 }, { "epoch": 0.45399491256638674, "grad_norm": 0.25, "learning_rate": 0.002, "loss": 5.2955, "step": 3999 }, { "epoch": 0.4541084396763058, "grad_norm": 0.2578125, "learning_rate": 0.002, "loss": 5.2819, "step": 4000 }, { "epoch": 0.4542219667862249, "grad_norm": 0.271484375, "learning_rate": 0.002, "loss": 5.2817, "step": 4001 }, { "epoch": 0.45433549389614397, "grad_norm": 0.28125, "learning_rate": 0.002, "loss": 5.2827, "step": 4002 }, { "epoch": 0.45444902100606305, "grad_norm": 0.30078125, "learning_rate": 0.002, "loss": 5.2932, "step": 4003 }, { "epoch": 0.4545625481159821, "grad_norm": 0.3515625, "learning_rate": 0.002, "loss": 5.2668, "step": 4004 }, { "epoch": 0.4546760752259012, "grad_norm": 0.390625, "learning_rate": 0.002, "loss": 5.2457, "step": 4005 }, { "epoch": 0.4547896023358203, "grad_norm": 0.384765625, "learning_rate": 0.002, "loss": 5.3042, "step": 4006 }, { "epoch": 0.45490312944573935, "grad_norm": 0.365234375, "learning_rate": 0.002, "loss": 5.284, "step": 4007 }, { "epoch": 0.4550166565556584, "grad_norm": 0.345703125, "learning_rate": 0.002, "loss": 5.2686, "step": 4008 }, { "epoch": 0.4551301836655775, "grad_norm": 0.306640625, "learning_rate": 0.002, "loss": 5.2903, "step": 4009 }, { "epoch": 0.4552437107754966, "grad_norm": 0.275390625, "learning_rate": 0.002, "loss": 5.2873, "step": 4010 }, { "epoch": 0.45535723788541566, "grad_norm": 0.259765625, "learning_rate": 0.002, "loss": 5.2706, "step": 4011 }, { "epoch": 0.45547076499533473, "grad_norm": 0.259765625, "learning_rate": 0.002, "loss": 5.301, "step": 4012 }, { "epoch": 0.4555842921052538, "grad_norm": 0.26171875, "learning_rate": 0.002, "loss": 5.2909, "step": 4013 }, { "epoch": 0.4556978192151729, "grad_norm": 0.259765625, "learning_rate": 0.002, "loss": 5.2811, "step": 4014 }, { "epoch": 0.45581134632509196, "grad_norm": 0.2578125, "learning_rate": 0.002, "loss": 5.289, "step": 4015 }, { "epoch": 0.45592487343501104, "grad_norm": 0.251953125, "learning_rate": 0.002, "loss": 5.2723, "step": 4016 }, { "epoch": 0.4560384005449301, "grad_norm": 0.24609375, "learning_rate": 0.002, "loss": 5.2955, "step": 4017 }, { "epoch": 0.4561519276548492, "grad_norm": 0.259765625, "learning_rate": 0.002, "loss": 5.292, "step": 4018 }, { "epoch": 0.45626545476476826, "grad_norm": 0.3203125, "learning_rate": 0.002, "loss": 5.2962, "step": 4019 }, { "epoch": 0.45637898187468734, "grad_norm": 0.357421875, "learning_rate": 0.002, "loss": 5.2682, "step": 4020 }, { "epoch": 0.4564925089846064, "grad_norm": 0.423828125, "learning_rate": 0.002, "loss": 5.2834, "step": 4021 }, { "epoch": 0.4566060360945255, "grad_norm": 0.4453125, "learning_rate": 0.002, "loss": 5.2766, "step": 4022 }, { "epoch": 0.45671956320444457, "grad_norm": 0.4140625, "learning_rate": 0.002, "loss": 5.2896, "step": 4023 }, { "epoch": 0.45683309031436364, "grad_norm": 0.349609375, "learning_rate": 0.002, "loss": 5.2806, "step": 4024 }, { "epoch": 0.4569466174242827, "grad_norm": 0.3515625, "learning_rate": 0.002, "loss": 5.2889, "step": 4025 }, { "epoch": 0.4570601445342018, "grad_norm": 0.3203125, "learning_rate": 0.002, "loss": 5.2772, "step": 4026 }, { "epoch": 0.45717367164412087, "grad_norm": 0.318359375, "learning_rate": 0.002, "loss": 5.2828, "step": 4027 }, { "epoch": 0.45728719875403995, "grad_norm": 0.27734375, "learning_rate": 0.002, "loss": 5.2946, "step": 4028 }, { "epoch": 0.457400725863959, "grad_norm": 0.275390625, "learning_rate": 0.002, "loss": 5.2919, "step": 4029 }, { "epoch": 0.4575142529738781, "grad_norm": 0.2578125, "learning_rate": 0.002, "loss": 5.2979, "step": 4030 }, { "epoch": 0.4576277800837972, "grad_norm": 0.271484375, "learning_rate": 0.002, "loss": 5.2864, "step": 4031 }, { "epoch": 0.45774130719371625, "grad_norm": 0.2734375, "learning_rate": 0.002, "loss": 5.2572, "step": 4032 }, { "epoch": 0.4578548343036353, "grad_norm": 0.318359375, "learning_rate": 0.002, "loss": 5.2861, "step": 4033 }, { "epoch": 0.4579683614135544, "grad_norm": 0.369140625, "learning_rate": 0.002, "loss": 5.2833, "step": 4034 }, { "epoch": 0.4580818885234735, "grad_norm": 0.421875, "learning_rate": 0.002, "loss": 5.2831, "step": 4035 }, { "epoch": 0.45819541563339256, "grad_norm": 0.408203125, "learning_rate": 0.002, "loss": 5.2842, "step": 4036 }, { "epoch": 0.45830894274331163, "grad_norm": 0.359375, "learning_rate": 0.002, "loss": 5.2789, "step": 4037 }, { "epoch": 0.4584224698532307, "grad_norm": 0.33984375, "learning_rate": 0.002, "loss": 5.3044, "step": 4038 }, { "epoch": 0.4585359969631498, "grad_norm": 0.294921875, "learning_rate": 0.002, "loss": 5.2897, "step": 4039 }, { "epoch": 0.45864952407306886, "grad_norm": 0.28125, "learning_rate": 0.002, "loss": 5.2436, "step": 4040 }, { "epoch": 0.45876305118298794, "grad_norm": 0.255859375, "learning_rate": 0.002, "loss": 5.2704, "step": 4041 }, { "epoch": 0.458876578292907, "grad_norm": 0.2490234375, "learning_rate": 0.002, "loss": 5.26, "step": 4042 }, { "epoch": 0.45899010540282614, "grad_norm": 0.2353515625, "learning_rate": 0.002, "loss": 5.2919, "step": 4043 }, { "epoch": 0.4591036325127452, "grad_norm": 0.232421875, "learning_rate": 0.002, "loss": 5.2863, "step": 4044 }, { "epoch": 0.4592171596226643, "grad_norm": 0.2265625, "learning_rate": 0.002, "loss": 5.3056, "step": 4045 }, { "epoch": 0.45933068673258337, "grad_norm": 0.2109375, "learning_rate": 0.002, "loss": 5.2907, "step": 4046 }, { "epoch": 0.45944421384250245, "grad_norm": 0.2197265625, "learning_rate": 0.002, "loss": 5.2774, "step": 4047 }, { "epoch": 0.4595577409524215, "grad_norm": 0.259765625, "learning_rate": 0.002, "loss": 5.2928, "step": 4048 }, { "epoch": 0.4596712680623406, "grad_norm": 0.3125, "learning_rate": 0.002, "loss": 5.3004, "step": 4049 }, { "epoch": 0.4597847951722597, "grad_norm": 0.36328125, "learning_rate": 0.002, "loss": 5.2973, "step": 4050 }, { "epoch": 0.45989832228217875, "grad_norm": 0.376953125, "learning_rate": 0.002, "loss": 5.2725, "step": 4051 }, { "epoch": 0.4600118493920978, "grad_norm": 0.376953125, "learning_rate": 0.002, "loss": 5.2891, "step": 4052 }, { "epoch": 0.4601253765020169, "grad_norm": 0.326171875, "learning_rate": 0.002, "loss": 5.2991, "step": 4053 }, { "epoch": 0.460238903611936, "grad_norm": 0.3359375, "learning_rate": 0.002, "loss": 5.2829, "step": 4054 }, { "epoch": 0.46035243072185505, "grad_norm": 0.33203125, "learning_rate": 0.002, "loss": 5.2852, "step": 4055 }, { "epoch": 0.46046595783177413, "grad_norm": 0.357421875, "learning_rate": 0.002, "loss": 5.2875, "step": 4056 }, { "epoch": 0.4605794849416932, "grad_norm": 0.33984375, "learning_rate": 0.002, "loss": 5.3042, "step": 4057 }, { "epoch": 0.4606930120516123, "grad_norm": 0.353515625, "learning_rate": 0.002, "loss": 5.2936, "step": 4058 }, { "epoch": 0.46080653916153136, "grad_norm": 0.333984375, "learning_rate": 0.002, "loss": 5.2652, "step": 4059 }, { "epoch": 0.46092006627145043, "grad_norm": 0.326171875, "learning_rate": 0.002, "loss": 5.2743, "step": 4060 }, { "epoch": 0.4610335933813695, "grad_norm": 0.326171875, "learning_rate": 0.002, "loss": 5.2879, "step": 4061 }, { "epoch": 0.4611471204912886, "grad_norm": 0.3359375, "learning_rate": 0.002, "loss": 5.2528, "step": 4062 }, { "epoch": 0.46126064760120766, "grad_norm": 0.337890625, "learning_rate": 0.002, "loss": 5.3061, "step": 4063 }, { "epoch": 0.46137417471112674, "grad_norm": 0.361328125, "learning_rate": 0.002, "loss": 5.2697, "step": 4064 }, { "epoch": 0.4614877018210458, "grad_norm": 0.314453125, "learning_rate": 0.002, "loss": 5.266, "step": 4065 }, { "epoch": 0.4616012289309649, "grad_norm": 0.296875, "learning_rate": 0.002, "loss": 5.2803, "step": 4066 }, { "epoch": 0.46171475604088397, "grad_norm": 0.255859375, "learning_rate": 0.002, "loss": 5.2868, "step": 4067 }, { "epoch": 0.46182828315080304, "grad_norm": 0.255859375, "learning_rate": 0.002, "loss": 5.2661, "step": 4068 }, { "epoch": 0.4619418102607221, "grad_norm": 0.251953125, "learning_rate": 0.002, "loss": 5.2778, "step": 4069 }, { "epoch": 0.4620553373706412, "grad_norm": 0.2578125, "learning_rate": 0.002, "loss": 5.3047, "step": 4070 }, { "epoch": 0.46216886448056027, "grad_norm": 0.248046875, "learning_rate": 0.002, "loss": 5.257, "step": 4071 }, { "epoch": 0.46228239159047935, "grad_norm": 0.267578125, "learning_rate": 0.002, "loss": 5.274, "step": 4072 }, { "epoch": 0.4623959187003984, "grad_norm": 0.275390625, "learning_rate": 0.002, "loss": 5.2878, "step": 4073 }, { "epoch": 0.4625094458103175, "grad_norm": 0.314453125, "learning_rate": 0.002, "loss": 5.2696, "step": 4074 }, { "epoch": 0.4626229729202366, "grad_norm": 0.3125, "learning_rate": 0.002, "loss": 5.2632, "step": 4075 }, { "epoch": 0.46273650003015565, "grad_norm": 0.330078125, "learning_rate": 0.002, "loss": 5.3005, "step": 4076 }, { "epoch": 0.4628500271400747, "grad_norm": 0.30078125, "learning_rate": 0.002, "loss": 5.2812, "step": 4077 }, { "epoch": 0.4629635542499938, "grad_norm": 0.294921875, "learning_rate": 0.002, "loss": 5.2902, "step": 4078 }, { "epoch": 0.4630770813599129, "grad_norm": 0.287109375, "learning_rate": 0.002, "loss": 5.2795, "step": 4079 }, { "epoch": 0.46319060846983195, "grad_norm": 0.29296875, "learning_rate": 0.002, "loss": 5.2712, "step": 4080 }, { "epoch": 0.46330413557975103, "grad_norm": 0.314453125, "learning_rate": 0.002, "loss": 5.2934, "step": 4081 }, { "epoch": 0.4634176626896701, "grad_norm": 0.34375, "learning_rate": 0.002, "loss": 5.2806, "step": 4082 }, { "epoch": 0.4635311897995892, "grad_norm": 0.33203125, "learning_rate": 0.002, "loss": 5.2759, "step": 4083 }, { "epoch": 0.46364471690950826, "grad_norm": 0.34375, "learning_rate": 0.002, "loss": 5.2846, "step": 4084 }, { "epoch": 0.46375824401942733, "grad_norm": 0.337890625, "learning_rate": 0.002, "loss": 5.2951, "step": 4085 }, { "epoch": 0.4638717711293464, "grad_norm": 0.328125, "learning_rate": 0.002, "loss": 5.2864, "step": 4086 }, { "epoch": 0.4639852982392655, "grad_norm": 0.3515625, "learning_rate": 0.002, "loss": 5.2733, "step": 4087 }, { "epoch": 0.46409882534918456, "grad_norm": 0.35546875, "learning_rate": 0.002, "loss": 5.3129, "step": 4088 }, { "epoch": 0.46421235245910364, "grad_norm": 0.3828125, "learning_rate": 0.002, "loss": 5.2757, "step": 4089 }, { "epoch": 0.4643258795690227, "grad_norm": 0.3515625, "learning_rate": 0.002, "loss": 5.2806, "step": 4090 }, { "epoch": 0.4644394066789418, "grad_norm": 0.349609375, "learning_rate": 0.002, "loss": 5.2868, "step": 4091 }, { "epoch": 0.46455293378886087, "grad_norm": 0.28515625, "learning_rate": 0.002, "loss": 5.2857, "step": 4092 }, { "epoch": 0.46466646089877994, "grad_norm": 0.2734375, "learning_rate": 0.002, "loss": 5.2733, "step": 4093 }, { "epoch": 0.464779988008699, "grad_norm": 0.2392578125, "learning_rate": 0.002, "loss": 5.2803, "step": 4094 }, { "epoch": 0.4648935151186181, "grad_norm": 0.2353515625, "learning_rate": 0.002, "loss": 5.2663, "step": 4095 }, { "epoch": 0.46500704222853717, "grad_norm": 0.2138671875, "learning_rate": 0.002, "loss": 5.2682, "step": 4096 }, { "epoch": 0.46512056933845625, "grad_norm": 0.228515625, "learning_rate": 0.002, "loss": 5.2762, "step": 4097 }, { "epoch": 0.4652340964483753, "grad_norm": 0.2392578125, "learning_rate": 0.002, "loss": 5.2764, "step": 4098 }, { "epoch": 0.4653476235582944, "grad_norm": 0.265625, "learning_rate": 0.002, "loss": 5.258, "step": 4099 }, { "epoch": 0.4654611506682135, "grad_norm": 0.306640625, "learning_rate": 0.002, "loss": 5.2764, "step": 4100 }, { "epoch": 0.46557467777813255, "grad_norm": 0.341796875, "learning_rate": 0.002, "loss": 5.2613, "step": 4101 }, { "epoch": 0.4656882048880516, "grad_norm": 0.349609375, "learning_rate": 0.002, "loss": 5.2942, "step": 4102 }, { "epoch": 0.4658017319979707, "grad_norm": 0.365234375, "learning_rate": 0.002, "loss": 5.2912, "step": 4103 }, { "epoch": 0.4659152591078898, "grad_norm": 0.30859375, "learning_rate": 0.002, "loss": 5.2793, "step": 4104 }, { "epoch": 0.46602878621780885, "grad_norm": 0.306640625, "learning_rate": 0.002, "loss": 5.2731, "step": 4105 }, { "epoch": 0.46614231332772793, "grad_norm": 0.283203125, "learning_rate": 0.002, "loss": 5.2958, "step": 4106 }, { "epoch": 0.466255840437647, "grad_norm": 0.271484375, "learning_rate": 0.002, "loss": 5.249, "step": 4107 }, { "epoch": 0.4663693675475661, "grad_norm": 0.2734375, "learning_rate": 0.002, "loss": 5.261, "step": 4108 }, { "epoch": 0.46648289465748516, "grad_norm": 0.283203125, "learning_rate": 0.002, "loss": 5.2911, "step": 4109 }, { "epoch": 0.46659642176740423, "grad_norm": 0.26953125, "learning_rate": 0.002, "loss": 5.2865, "step": 4110 }, { "epoch": 0.4667099488773233, "grad_norm": 0.265625, "learning_rate": 0.002, "loss": 5.2802, "step": 4111 }, { "epoch": 0.4668234759872424, "grad_norm": 0.25390625, "learning_rate": 0.002, "loss": 5.2738, "step": 4112 }, { "epoch": 0.46693700309716146, "grad_norm": 0.24609375, "learning_rate": 0.002, "loss": 5.2809, "step": 4113 }, { "epoch": 0.46705053020708054, "grad_norm": 0.25390625, "learning_rate": 0.002, "loss": 5.2591, "step": 4114 }, { "epoch": 0.4671640573169996, "grad_norm": 0.25390625, "learning_rate": 0.002, "loss": 5.2916, "step": 4115 }, { "epoch": 0.4672775844269187, "grad_norm": 0.275390625, "learning_rate": 0.002, "loss": 5.2841, "step": 4116 }, { "epoch": 0.46739111153683777, "grad_norm": 0.287109375, "learning_rate": 0.002, "loss": 5.3, "step": 4117 }, { "epoch": 0.46750463864675684, "grad_norm": 0.298828125, "learning_rate": 0.002, "loss": 5.2533, "step": 4118 }, { "epoch": 0.4676181657566759, "grad_norm": 0.291015625, "learning_rate": 0.002, "loss": 5.2761, "step": 4119 }, { "epoch": 0.467731692866595, "grad_norm": 0.322265625, "learning_rate": 0.002, "loss": 5.2845, "step": 4120 }, { "epoch": 0.46784521997651407, "grad_norm": 0.32421875, "learning_rate": 0.002, "loss": 5.2878, "step": 4121 }, { "epoch": 0.46795874708643315, "grad_norm": 0.34375, "learning_rate": 0.002, "loss": 5.2692, "step": 4122 }, { "epoch": 0.4680722741963522, "grad_norm": 0.34375, "learning_rate": 0.002, "loss": 5.2755, "step": 4123 }, { "epoch": 0.4681858013062713, "grad_norm": 0.359375, "learning_rate": 0.002, "loss": 5.2861, "step": 4124 }, { "epoch": 0.4682993284161904, "grad_norm": 0.33203125, "learning_rate": 0.002, "loss": 5.2918, "step": 4125 }, { "epoch": 0.46841285552610945, "grad_norm": 0.326171875, "learning_rate": 0.002, "loss": 5.2607, "step": 4126 }, { "epoch": 0.4685263826360285, "grad_norm": 0.30078125, "learning_rate": 0.002, "loss": 5.2812, "step": 4127 }, { "epoch": 0.4686399097459476, "grad_norm": 0.30859375, "learning_rate": 0.002, "loss": 5.2799, "step": 4128 }, { "epoch": 0.4687534368558667, "grad_norm": 0.298828125, "learning_rate": 0.002, "loss": 5.2948, "step": 4129 }, { "epoch": 0.46886696396578575, "grad_norm": 0.302734375, "learning_rate": 0.002, "loss": 5.2733, "step": 4130 }, { "epoch": 0.46898049107570483, "grad_norm": 0.302734375, "learning_rate": 0.002, "loss": 5.2945, "step": 4131 }, { "epoch": 0.4690940181856239, "grad_norm": 0.3125, "learning_rate": 0.002, "loss": 5.2952, "step": 4132 }, { "epoch": 0.469207545295543, "grad_norm": 0.30859375, "learning_rate": 0.002, "loss": 5.3019, "step": 4133 }, { "epoch": 0.46932107240546206, "grad_norm": 0.30859375, "learning_rate": 0.002, "loss": 5.2648, "step": 4134 }, { "epoch": 0.46943459951538113, "grad_norm": 0.302734375, "learning_rate": 0.002, "loss": 5.2838, "step": 4135 }, { "epoch": 0.4695481266253002, "grad_norm": 0.302734375, "learning_rate": 0.002, "loss": 5.2817, "step": 4136 }, { "epoch": 0.4696616537352193, "grad_norm": 0.322265625, "learning_rate": 0.002, "loss": 5.2793, "step": 4137 }, { "epoch": 0.46977518084513836, "grad_norm": 0.34375, "learning_rate": 0.002, "loss": 5.2858, "step": 4138 }, { "epoch": 0.46988870795505744, "grad_norm": 0.34765625, "learning_rate": 0.002, "loss": 5.2728, "step": 4139 }, { "epoch": 0.4700022350649765, "grad_norm": 0.326171875, "learning_rate": 0.002, "loss": 5.2713, "step": 4140 }, { "epoch": 0.4701157621748956, "grad_norm": 0.35546875, "learning_rate": 0.002, "loss": 5.2692, "step": 4141 }, { "epoch": 0.47022928928481467, "grad_norm": 0.33203125, "learning_rate": 0.002, "loss": 5.276, "step": 4142 }, { "epoch": 0.47034281639473374, "grad_norm": 0.34765625, "learning_rate": 0.002, "loss": 5.2783, "step": 4143 }, { "epoch": 0.4704563435046528, "grad_norm": 0.330078125, "learning_rate": 0.002, "loss": 5.2683, "step": 4144 }, { "epoch": 0.4705698706145719, "grad_norm": 0.328125, "learning_rate": 0.002, "loss": 5.2619, "step": 4145 }, { "epoch": 0.47068339772449097, "grad_norm": 0.296875, "learning_rate": 0.002, "loss": 5.271, "step": 4146 }, { "epoch": 0.47079692483441005, "grad_norm": 0.322265625, "learning_rate": 0.002, "loss": 5.2878, "step": 4147 }, { "epoch": 0.4709104519443291, "grad_norm": 0.291015625, "learning_rate": 0.002, "loss": 5.2918, "step": 4148 }, { "epoch": 0.4710239790542482, "grad_norm": 0.302734375, "learning_rate": 0.002, "loss": 5.2767, "step": 4149 }, { "epoch": 0.4711375061641673, "grad_norm": 0.29296875, "learning_rate": 0.002, "loss": 5.2808, "step": 4150 }, { "epoch": 0.47125103327408635, "grad_norm": 0.3125, "learning_rate": 0.002, "loss": 5.273, "step": 4151 }, { "epoch": 0.4713645603840054, "grad_norm": 0.287109375, "learning_rate": 0.002, "loss": 5.2629, "step": 4152 }, { "epoch": 0.4714780874939245, "grad_norm": 0.28125, "learning_rate": 0.002, "loss": 5.2921, "step": 4153 }, { "epoch": 0.4715916146038436, "grad_norm": 0.2578125, "learning_rate": 0.002, "loss": 5.2972, "step": 4154 }, { "epoch": 0.47170514171376265, "grad_norm": 0.2451171875, "learning_rate": 0.002, "loss": 5.3023, "step": 4155 }, { "epoch": 0.47181866882368173, "grad_norm": 0.23828125, "learning_rate": 0.002, "loss": 5.2756, "step": 4156 }, { "epoch": 0.4719321959336008, "grad_norm": 0.26953125, "learning_rate": 0.002, "loss": 5.2768, "step": 4157 }, { "epoch": 0.4720457230435199, "grad_norm": 0.296875, "learning_rate": 0.002, "loss": 5.2821, "step": 4158 }, { "epoch": 0.47215925015343896, "grad_norm": 0.33203125, "learning_rate": 0.002, "loss": 5.2838, "step": 4159 }, { "epoch": 0.47227277726335803, "grad_norm": 0.3515625, "learning_rate": 0.002, "loss": 5.2825, "step": 4160 }, { "epoch": 0.4723863043732771, "grad_norm": 0.3359375, "learning_rate": 0.002, "loss": 5.278, "step": 4161 }, { "epoch": 0.4724998314831962, "grad_norm": 0.310546875, "learning_rate": 0.002, "loss": 5.2833, "step": 4162 }, { "epoch": 0.4726133585931153, "grad_norm": 0.283203125, "learning_rate": 0.002, "loss": 5.2699, "step": 4163 }, { "epoch": 0.4727268857030344, "grad_norm": 0.271484375, "learning_rate": 0.002, "loss": 5.3013, "step": 4164 }, { "epoch": 0.47284041281295347, "grad_norm": 0.271484375, "learning_rate": 0.002, "loss": 5.2818, "step": 4165 }, { "epoch": 0.47295393992287255, "grad_norm": 0.27734375, "learning_rate": 0.002, "loss": 5.2717, "step": 4166 }, { "epoch": 0.4730674670327916, "grad_norm": 0.33203125, "learning_rate": 0.002, "loss": 5.2927, "step": 4167 }, { "epoch": 0.4731809941427107, "grad_norm": 0.330078125, "learning_rate": 0.002, "loss": 5.2719, "step": 4168 }, { "epoch": 0.4732945212526298, "grad_norm": 0.3359375, "learning_rate": 0.002, "loss": 5.2836, "step": 4169 }, { "epoch": 0.47340804836254885, "grad_norm": 0.302734375, "learning_rate": 0.002, "loss": 5.2851, "step": 4170 }, { "epoch": 0.4735215754724679, "grad_norm": 0.3203125, "learning_rate": 0.002, "loss": 5.29, "step": 4171 }, { "epoch": 0.473635102582387, "grad_norm": 0.291015625, "learning_rate": 0.002, "loss": 5.2873, "step": 4172 }, { "epoch": 0.4737486296923061, "grad_norm": 0.337890625, "learning_rate": 0.002, "loss": 5.3, "step": 4173 }, { "epoch": 0.47386215680222515, "grad_norm": 0.359375, "learning_rate": 0.002, "loss": 5.2803, "step": 4174 }, { "epoch": 0.47397568391214423, "grad_norm": 0.390625, "learning_rate": 0.002, "loss": 5.2982, "step": 4175 }, { "epoch": 0.4740892110220633, "grad_norm": 0.3828125, "learning_rate": 0.002, "loss": 5.2887, "step": 4176 }, { "epoch": 0.4742027381319824, "grad_norm": 0.376953125, "learning_rate": 0.002, "loss": 5.2971, "step": 4177 }, { "epoch": 0.47431626524190146, "grad_norm": 0.341796875, "learning_rate": 0.002, "loss": 5.2501, "step": 4178 }, { "epoch": 0.47442979235182053, "grad_norm": 0.333984375, "learning_rate": 0.002, "loss": 5.2807, "step": 4179 }, { "epoch": 0.4745433194617396, "grad_norm": 0.28515625, "learning_rate": 0.002, "loss": 5.2766, "step": 4180 }, { "epoch": 0.4746568465716587, "grad_norm": 0.302734375, "learning_rate": 0.002, "loss": 5.2766, "step": 4181 }, { "epoch": 0.47477037368157776, "grad_norm": 0.298828125, "learning_rate": 0.002, "loss": 5.2923, "step": 4182 }, { "epoch": 0.47488390079149684, "grad_norm": 0.306640625, "learning_rate": 0.002, "loss": 5.2686, "step": 4183 }, { "epoch": 0.4749974279014159, "grad_norm": 0.3125, "learning_rate": 0.002, "loss": 5.2881, "step": 4184 }, { "epoch": 0.475110955011335, "grad_norm": 0.318359375, "learning_rate": 0.002, "loss": 5.2908, "step": 4185 }, { "epoch": 0.47522448212125407, "grad_norm": 0.296875, "learning_rate": 0.002, "loss": 5.2915, "step": 4186 }, { "epoch": 0.47533800923117314, "grad_norm": 0.296875, "learning_rate": 0.002, "loss": 5.2809, "step": 4187 }, { "epoch": 0.4754515363410922, "grad_norm": 0.2734375, "learning_rate": 0.002, "loss": 5.277, "step": 4188 }, { "epoch": 0.4755650634510113, "grad_norm": 0.2734375, "learning_rate": 0.002, "loss": 5.2705, "step": 4189 }, { "epoch": 0.47567859056093037, "grad_norm": 0.263671875, "learning_rate": 0.002, "loss": 5.2763, "step": 4190 }, { "epoch": 0.47579211767084945, "grad_norm": 0.279296875, "learning_rate": 0.002, "loss": 5.2898, "step": 4191 }, { "epoch": 0.4759056447807685, "grad_norm": 0.275390625, "learning_rate": 0.002, "loss": 5.2893, "step": 4192 }, { "epoch": 0.4760191718906876, "grad_norm": 0.27734375, "learning_rate": 0.002, "loss": 5.2652, "step": 4193 }, { "epoch": 0.4761326990006067, "grad_norm": 0.2734375, "learning_rate": 0.002, "loss": 5.2725, "step": 4194 }, { "epoch": 0.47624622611052575, "grad_norm": 0.294921875, "learning_rate": 0.002, "loss": 5.2617, "step": 4195 }, { "epoch": 0.4763597532204448, "grad_norm": 0.28515625, "learning_rate": 0.002, "loss": 5.2768, "step": 4196 }, { "epoch": 0.4764732803303639, "grad_norm": 0.302734375, "learning_rate": 0.002, "loss": 5.2846, "step": 4197 }, { "epoch": 0.476586807440283, "grad_norm": 0.279296875, "learning_rate": 0.002, "loss": 5.2661, "step": 4198 }, { "epoch": 0.47670033455020205, "grad_norm": 0.30859375, "learning_rate": 0.002, "loss": 5.2783, "step": 4199 }, { "epoch": 0.47681386166012113, "grad_norm": 0.3125, "learning_rate": 0.002, "loss": 5.278, "step": 4200 }, { "epoch": 0.4769273887700402, "grad_norm": 0.328125, "learning_rate": 0.002, "loss": 5.2849, "step": 4201 }, { "epoch": 0.4770409158799593, "grad_norm": 0.328125, "learning_rate": 0.002, "loss": 5.2701, "step": 4202 }, { "epoch": 0.47715444298987836, "grad_norm": 0.330078125, "learning_rate": 0.002, "loss": 5.2718, "step": 4203 }, { "epoch": 0.47726797009979743, "grad_norm": 0.322265625, "learning_rate": 0.002, "loss": 5.2737, "step": 4204 }, { "epoch": 0.4773814972097165, "grad_norm": 0.29296875, "learning_rate": 0.002, "loss": 5.2646, "step": 4205 }, { "epoch": 0.4774950243196356, "grad_norm": 0.29296875, "learning_rate": 0.002, "loss": 5.2951, "step": 4206 }, { "epoch": 0.47760855142955466, "grad_norm": 0.3046875, "learning_rate": 0.002, "loss": 5.2739, "step": 4207 }, { "epoch": 0.47772207853947374, "grad_norm": 0.306640625, "learning_rate": 0.002, "loss": 5.271, "step": 4208 }, { "epoch": 0.4778356056493928, "grad_norm": 0.333984375, "learning_rate": 0.002, "loss": 5.2796, "step": 4209 }, { "epoch": 0.4779491327593119, "grad_norm": 0.337890625, "learning_rate": 0.002, "loss": 5.2817, "step": 4210 }, { "epoch": 0.47806265986923097, "grad_norm": 0.361328125, "learning_rate": 0.002, "loss": 5.2706, "step": 4211 }, { "epoch": 0.47817618697915004, "grad_norm": 0.35546875, "learning_rate": 0.002, "loss": 5.3022, "step": 4212 }, { "epoch": 0.4782897140890691, "grad_norm": 0.3828125, "learning_rate": 0.002, "loss": 5.276, "step": 4213 }, { "epoch": 0.4784032411989882, "grad_norm": 0.3515625, "learning_rate": 0.002, "loss": 5.2765, "step": 4214 }, { "epoch": 0.47851676830890727, "grad_norm": 0.37109375, "learning_rate": 0.002, "loss": 5.272, "step": 4215 }, { "epoch": 0.47863029541882635, "grad_norm": 0.349609375, "learning_rate": 0.002, "loss": 5.2928, "step": 4216 }, { "epoch": 0.4787438225287454, "grad_norm": 0.373046875, "learning_rate": 0.002, "loss": 5.2649, "step": 4217 }, { "epoch": 0.4788573496386645, "grad_norm": 0.330078125, "learning_rate": 0.002, "loss": 5.2961, "step": 4218 }, { "epoch": 0.4789708767485836, "grad_norm": 0.330078125, "learning_rate": 0.002, "loss": 5.2805, "step": 4219 }, { "epoch": 0.47908440385850265, "grad_norm": 0.294921875, "learning_rate": 0.002, "loss": 5.2756, "step": 4220 }, { "epoch": 0.4791979309684217, "grad_norm": 0.30078125, "learning_rate": 0.002, "loss": 5.2657, "step": 4221 }, { "epoch": 0.4793114580783408, "grad_norm": 0.32421875, "learning_rate": 0.002, "loss": 5.3049, "step": 4222 }, { "epoch": 0.4794249851882599, "grad_norm": 0.34375, "learning_rate": 0.002, "loss": 5.2901, "step": 4223 }, { "epoch": 0.47953851229817895, "grad_norm": 0.365234375, "learning_rate": 0.002, "loss": 5.2723, "step": 4224 }, { "epoch": 0.47965203940809803, "grad_norm": 0.353515625, "learning_rate": 0.002, "loss": 5.2568, "step": 4225 }, { "epoch": 0.4797655665180171, "grad_norm": 0.380859375, "learning_rate": 0.002, "loss": 5.2746, "step": 4226 }, { "epoch": 0.4798790936279362, "grad_norm": 0.361328125, "learning_rate": 0.002, "loss": 5.2832, "step": 4227 }, { "epoch": 0.47999262073785526, "grad_norm": 0.357421875, "learning_rate": 0.002, "loss": 5.2877, "step": 4228 }, { "epoch": 0.48010614784777433, "grad_norm": 0.31640625, "learning_rate": 0.002, "loss": 5.3013, "step": 4229 }, { "epoch": 0.4802196749576934, "grad_norm": 0.32421875, "learning_rate": 0.002, "loss": 5.2827, "step": 4230 }, { "epoch": 0.4803332020676125, "grad_norm": 0.306640625, "learning_rate": 0.002, "loss": 5.2846, "step": 4231 }, { "epoch": 0.48044672917753156, "grad_norm": 0.291015625, "learning_rate": 0.002, "loss": 5.2667, "step": 4232 }, { "epoch": 0.48056025628745064, "grad_norm": 0.25, "learning_rate": 0.002, "loss": 5.275, "step": 4233 }, { "epoch": 0.4806737833973697, "grad_norm": 0.255859375, "learning_rate": 0.002, "loss": 5.2753, "step": 4234 }, { "epoch": 0.4807873105072888, "grad_norm": 0.275390625, "learning_rate": 0.002, "loss": 5.2781, "step": 4235 }, { "epoch": 0.48090083761720787, "grad_norm": 0.314453125, "learning_rate": 0.002, "loss": 5.2808, "step": 4236 }, { "epoch": 0.48101436472712694, "grad_norm": 0.359375, "learning_rate": 0.002, "loss": 5.2913, "step": 4237 }, { "epoch": 0.481127891837046, "grad_norm": 0.365234375, "learning_rate": 0.002, "loss": 5.2696, "step": 4238 }, { "epoch": 0.4812414189469651, "grad_norm": 0.337890625, "learning_rate": 0.002, "loss": 5.2676, "step": 4239 }, { "epoch": 0.48135494605688417, "grad_norm": 0.341796875, "learning_rate": 0.002, "loss": 5.285, "step": 4240 }, { "epoch": 0.48146847316680325, "grad_norm": 0.283203125, "learning_rate": 0.002, "loss": 5.3024, "step": 4241 }, { "epoch": 0.4815820002767223, "grad_norm": 0.275390625, "learning_rate": 0.002, "loss": 5.3009, "step": 4242 }, { "epoch": 0.4816955273866414, "grad_norm": 0.26171875, "learning_rate": 0.002, "loss": 5.2913, "step": 4243 }, { "epoch": 0.4818090544965605, "grad_norm": 0.251953125, "learning_rate": 0.002, "loss": 5.2837, "step": 4244 }, { "epoch": 0.48192258160647955, "grad_norm": 0.251953125, "learning_rate": 0.002, "loss": 5.2719, "step": 4245 }, { "epoch": 0.4820361087163986, "grad_norm": 0.263671875, "learning_rate": 0.002, "loss": 5.2745, "step": 4246 }, { "epoch": 0.4821496358263177, "grad_norm": 0.26171875, "learning_rate": 0.002, "loss": 5.2769, "step": 4247 }, { "epoch": 0.4822631629362368, "grad_norm": 0.287109375, "learning_rate": 0.002, "loss": 5.2741, "step": 4248 }, { "epoch": 0.48237669004615585, "grad_norm": 0.291015625, "learning_rate": 0.002, "loss": 5.2689, "step": 4249 }, { "epoch": 0.48249021715607493, "grad_norm": 0.322265625, "learning_rate": 0.002, "loss": 5.2889, "step": 4250 }, { "epoch": 0.482603744265994, "grad_norm": 0.333984375, "learning_rate": 0.002, "loss": 5.2725, "step": 4251 }, { "epoch": 0.4827172713759131, "grad_norm": 0.353515625, "learning_rate": 0.002, "loss": 5.2645, "step": 4252 }, { "epoch": 0.48283079848583216, "grad_norm": 0.353515625, "learning_rate": 0.002, "loss": 5.2783, "step": 4253 }, { "epoch": 0.48294432559575123, "grad_norm": 0.369140625, "learning_rate": 0.002, "loss": 5.2719, "step": 4254 }, { "epoch": 0.4830578527056703, "grad_norm": 0.3828125, "learning_rate": 0.002, "loss": 5.2734, "step": 4255 }, { "epoch": 0.4831713798155894, "grad_norm": 0.369140625, "learning_rate": 0.002, "loss": 5.264, "step": 4256 }, { "epoch": 0.48328490692550846, "grad_norm": 0.396484375, "learning_rate": 0.002, "loss": 5.2741, "step": 4257 }, { "epoch": 0.48339843403542754, "grad_norm": 0.357421875, "learning_rate": 0.002, "loss": 5.271, "step": 4258 }, { "epoch": 0.4835119611453466, "grad_norm": 0.361328125, "learning_rate": 0.002, "loss": 5.278, "step": 4259 }, { "epoch": 0.4836254882552657, "grad_norm": 0.314453125, "learning_rate": 0.002, "loss": 5.2848, "step": 4260 }, { "epoch": 0.48373901536518477, "grad_norm": 0.3203125, "learning_rate": 0.002, "loss": 5.2806, "step": 4261 }, { "epoch": 0.48385254247510384, "grad_norm": 0.291015625, "learning_rate": 0.002, "loss": 5.2913, "step": 4262 }, { "epoch": 0.4839660695850229, "grad_norm": 0.310546875, "learning_rate": 0.002, "loss": 5.2763, "step": 4263 }, { "epoch": 0.484079596694942, "grad_norm": 0.310546875, "learning_rate": 0.002, "loss": 5.2919, "step": 4264 }, { "epoch": 0.48419312380486107, "grad_norm": 0.326171875, "learning_rate": 0.002, "loss": 5.2589, "step": 4265 }, { "epoch": 0.48430665091478015, "grad_norm": 0.353515625, "learning_rate": 0.002, "loss": 5.2554, "step": 4266 }, { "epoch": 0.4844201780246992, "grad_norm": 0.3828125, "learning_rate": 0.002, "loss": 5.281, "step": 4267 }, { "epoch": 0.4845337051346183, "grad_norm": 0.380859375, "learning_rate": 0.002, "loss": 5.2589, "step": 4268 }, { "epoch": 0.4846472322445374, "grad_norm": 0.35546875, "learning_rate": 0.002, "loss": 5.2809, "step": 4269 }, { "epoch": 0.48476075935445645, "grad_norm": 0.3203125, "learning_rate": 0.002, "loss": 5.2884, "step": 4270 }, { "epoch": 0.4848742864643755, "grad_norm": 0.283203125, "learning_rate": 0.002, "loss": 5.3009, "step": 4271 }, { "epoch": 0.4849878135742946, "grad_norm": 0.255859375, "learning_rate": 0.002, "loss": 5.2942, "step": 4272 }, { "epoch": 0.4851013406842137, "grad_norm": 0.2265625, "learning_rate": 0.002, "loss": 5.2842, "step": 4273 }, { "epoch": 0.48521486779413275, "grad_norm": 0.220703125, "learning_rate": 0.002, "loss": 5.2685, "step": 4274 }, { "epoch": 0.48532839490405183, "grad_norm": 0.21484375, "learning_rate": 0.002, "loss": 5.2721, "step": 4275 }, { "epoch": 0.4854419220139709, "grad_norm": 0.2255859375, "learning_rate": 0.002, "loss": 5.2798, "step": 4276 }, { "epoch": 0.48555544912389, "grad_norm": 0.25, "learning_rate": 0.002, "loss": 5.2674, "step": 4277 }, { "epoch": 0.48566897623380906, "grad_norm": 0.275390625, "learning_rate": 0.002, "loss": 5.2881, "step": 4278 }, { "epoch": 0.48578250334372813, "grad_norm": 0.326171875, "learning_rate": 0.002, "loss": 5.289, "step": 4279 }, { "epoch": 0.4858960304536472, "grad_norm": 0.341796875, "learning_rate": 0.002, "loss": 5.2972, "step": 4280 }, { "epoch": 0.4860095575635663, "grad_norm": 0.384765625, "learning_rate": 0.002, "loss": 5.2825, "step": 4281 }, { "epoch": 0.48612308467348536, "grad_norm": 0.41015625, "learning_rate": 0.002, "loss": 5.2715, "step": 4282 }, { "epoch": 0.48623661178340444, "grad_norm": 0.453125, "learning_rate": 0.002, "loss": 5.2685, "step": 4283 }, { "epoch": 0.48635013889332357, "grad_norm": 0.4296875, "learning_rate": 0.002, "loss": 5.275, "step": 4284 }, { "epoch": 0.48646366600324265, "grad_norm": 0.44140625, "learning_rate": 0.002, "loss": 5.2644, "step": 4285 }, { "epoch": 0.4865771931131617, "grad_norm": 0.3984375, "learning_rate": 0.002, "loss": 5.2604, "step": 4286 }, { "epoch": 0.4866907202230808, "grad_norm": 0.4140625, "learning_rate": 0.002, "loss": 5.2898, "step": 4287 }, { "epoch": 0.4868042473329999, "grad_norm": 0.34375, "learning_rate": 0.002, "loss": 5.2765, "step": 4288 }, { "epoch": 0.48691777444291895, "grad_norm": 0.34375, "learning_rate": 0.002, "loss": 5.2545, "step": 4289 }, { "epoch": 0.487031301552838, "grad_norm": 0.310546875, "learning_rate": 0.002, "loss": 5.274, "step": 4290 }, { "epoch": 0.4871448286627571, "grad_norm": 0.30859375, "learning_rate": 0.002, "loss": 5.272, "step": 4291 }, { "epoch": 0.4872583557726762, "grad_norm": 0.3046875, "learning_rate": 0.002, "loss": 5.2914, "step": 4292 }, { "epoch": 0.48737188288259525, "grad_norm": 0.28515625, "learning_rate": 0.002, "loss": 5.2612, "step": 4293 }, { "epoch": 0.48748540999251433, "grad_norm": 0.27734375, "learning_rate": 0.002, "loss": 5.2782, "step": 4294 }, { "epoch": 0.4875989371024334, "grad_norm": 0.267578125, "learning_rate": 0.002, "loss": 5.2715, "step": 4295 }, { "epoch": 0.4877124642123525, "grad_norm": 0.244140625, "learning_rate": 0.002, "loss": 5.2713, "step": 4296 }, { "epoch": 0.48782599132227156, "grad_norm": 0.240234375, "learning_rate": 0.002, "loss": 5.2512, "step": 4297 }, { "epoch": 0.48793951843219063, "grad_norm": 0.236328125, "learning_rate": 0.002, "loss": 5.2924, "step": 4298 }, { "epoch": 0.4880530455421097, "grad_norm": 0.2353515625, "learning_rate": 0.002, "loss": 5.2789, "step": 4299 }, { "epoch": 0.4881665726520288, "grad_norm": 0.234375, "learning_rate": 0.002, "loss": 5.2764, "step": 4300 }, { "epoch": 0.48828009976194786, "grad_norm": 0.2470703125, "learning_rate": 0.002, "loss": 5.2655, "step": 4301 }, { "epoch": 0.48839362687186694, "grad_norm": 0.25390625, "learning_rate": 0.002, "loss": 5.2864, "step": 4302 }, { "epoch": 0.488507153981786, "grad_norm": 0.265625, "learning_rate": 0.002, "loss": 5.2633, "step": 4303 }, { "epoch": 0.4886206810917051, "grad_norm": 0.275390625, "learning_rate": 0.002, "loss": 5.2908, "step": 4304 }, { "epoch": 0.48873420820162417, "grad_norm": 0.287109375, "learning_rate": 0.002, "loss": 5.3012, "step": 4305 }, { "epoch": 0.48884773531154324, "grad_norm": 0.26953125, "learning_rate": 0.002, "loss": 5.2654, "step": 4306 }, { "epoch": 0.4889612624214623, "grad_norm": 0.275390625, "learning_rate": 0.002, "loss": 5.2833, "step": 4307 }, { "epoch": 0.4890747895313814, "grad_norm": 0.271484375, "learning_rate": 0.002, "loss": 5.239, "step": 4308 }, { "epoch": 0.48918831664130047, "grad_norm": 0.271484375, "learning_rate": 0.002, "loss": 5.2725, "step": 4309 }, { "epoch": 0.48930184375121955, "grad_norm": 0.271484375, "learning_rate": 0.002, "loss": 5.2812, "step": 4310 }, { "epoch": 0.4894153708611386, "grad_norm": 0.267578125, "learning_rate": 0.002, "loss": 5.2751, "step": 4311 }, { "epoch": 0.4895288979710577, "grad_norm": 0.26953125, "learning_rate": 0.002, "loss": 5.2899, "step": 4312 }, { "epoch": 0.4896424250809768, "grad_norm": 0.279296875, "learning_rate": 0.002, "loss": 5.27, "step": 4313 }, { "epoch": 0.48975595219089585, "grad_norm": 0.279296875, "learning_rate": 0.002, "loss": 5.2771, "step": 4314 }, { "epoch": 0.4898694793008149, "grad_norm": 0.318359375, "learning_rate": 0.002, "loss": 5.2809, "step": 4315 }, { "epoch": 0.489983006410734, "grad_norm": 0.3515625, "learning_rate": 0.002, "loss": 5.2721, "step": 4316 }, { "epoch": 0.4900965335206531, "grad_norm": 0.376953125, "learning_rate": 0.002, "loss": 5.2596, "step": 4317 }, { "epoch": 0.49021006063057215, "grad_norm": 0.4140625, "learning_rate": 0.002, "loss": 5.2667, "step": 4318 }, { "epoch": 0.49032358774049123, "grad_norm": 0.412109375, "learning_rate": 0.002, "loss": 5.2727, "step": 4319 }, { "epoch": 0.4904371148504103, "grad_norm": 0.369140625, "learning_rate": 0.002, "loss": 5.2581, "step": 4320 }, { "epoch": 0.4905506419603294, "grad_norm": 0.40625, "learning_rate": 0.002, "loss": 5.2885, "step": 4321 }, { "epoch": 0.49066416907024846, "grad_norm": 0.33984375, "learning_rate": 0.002, "loss": 5.2668, "step": 4322 }, { "epoch": 0.49077769618016753, "grad_norm": 0.3515625, "learning_rate": 0.002, "loss": 5.2866, "step": 4323 }, { "epoch": 0.4908912232900866, "grad_norm": 0.326171875, "learning_rate": 0.002, "loss": 5.285, "step": 4324 }, { "epoch": 0.4910047504000057, "grad_norm": 0.337890625, "learning_rate": 0.002, "loss": 5.3013, "step": 4325 }, { "epoch": 0.49111827750992476, "grad_norm": 0.310546875, "learning_rate": 0.002, "loss": 5.2688, "step": 4326 }, { "epoch": 0.49123180461984384, "grad_norm": 0.3046875, "learning_rate": 0.002, "loss": 5.2867, "step": 4327 }, { "epoch": 0.4913453317297629, "grad_norm": 0.3046875, "learning_rate": 0.002, "loss": 5.2655, "step": 4328 }, { "epoch": 0.491458858839682, "grad_norm": 0.30078125, "learning_rate": 0.002, "loss": 5.2834, "step": 4329 }, { "epoch": 0.49157238594960107, "grad_norm": 0.30078125, "learning_rate": 0.002, "loss": 5.2866, "step": 4330 }, { "epoch": 0.49168591305952014, "grad_norm": 0.298828125, "learning_rate": 0.002, "loss": 5.293, "step": 4331 }, { "epoch": 0.4917994401694392, "grad_norm": 0.287109375, "learning_rate": 0.002, "loss": 5.2685, "step": 4332 }, { "epoch": 0.4919129672793583, "grad_norm": 0.3046875, "learning_rate": 0.002, "loss": 5.2922, "step": 4333 }, { "epoch": 0.49202649438927737, "grad_norm": 0.306640625, "learning_rate": 0.002, "loss": 5.2881, "step": 4334 }, { "epoch": 0.49214002149919645, "grad_norm": 0.310546875, "learning_rate": 0.002, "loss": 5.2684, "step": 4335 }, { "epoch": 0.4922535486091155, "grad_norm": 0.330078125, "learning_rate": 0.002, "loss": 5.2828, "step": 4336 }, { "epoch": 0.4923670757190346, "grad_norm": 0.322265625, "learning_rate": 0.002, "loss": 5.265, "step": 4337 }, { "epoch": 0.4924806028289537, "grad_norm": 0.361328125, "learning_rate": 0.002, "loss": 5.2705, "step": 4338 }, { "epoch": 0.49259412993887275, "grad_norm": 0.380859375, "learning_rate": 0.002, "loss": 5.2789, "step": 4339 }, { "epoch": 0.4927076570487918, "grad_norm": 0.384765625, "learning_rate": 0.002, "loss": 5.2735, "step": 4340 }, { "epoch": 0.4928211841587109, "grad_norm": 0.353515625, "learning_rate": 0.002, "loss": 5.2954, "step": 4341 }, { "epoch": 0.49293471126863, "grad_norm": 0.349609375, "learning_rate": 0.002, "loss": 5.2829, "step": 4342 }, { "epoch": 0.49304823837854905, "grad_norm": 0.31640625, "learning_rate": 0.002, "loss": 5.2729, "step": 4343 }, { "epoch": 0.49316176548846813, "grad_norm": 0.330078125, "learning_rate": 0.002, "loss": 5.2861, "step": 4344 }, { "epoch": 0.4932752925983872, "grad_norm": 0.29296875, "learning_rate": 0.002, "loss": 5.2911, "step": 4345 }, { "epoch": 0.4933888197083063, "grad_norm": 0.3125, "learning_rate": 0.002, "loss": 5.2675, "step": 4346 }, { "epoch": 0.49350234681822536, "grad_norm": 0.28515625, "learning_rate": 0.002, "loss": 5.2774, "step": 4347 }, { "epoch": 0.49361587392814443, "grad_norm": 0.31640625, "learning_rate": 0.002, "loss": 5.2792, "step": 4348 }, { "epoch": 0.4937294010380635, "grad_norm": 0.296875, "learning_rate": 0.002, "loss": 5.2845, "step": 4349 }, { "epoch": 0.4938429281479826, "grad_norm": 0.3125, "learning_rate": 0.002, "loss": 5.259, "step": 4350 }, { "epoch": 0.49395645525790166, "grad_norm": 0.29296875, "learning_rate": 0.002, "loss": 5.2573, "step": 4351 }, { "epoch": 0.49406998236782074, "grad_norm": 0.29296875, "learning_rate": 0.002, "loss": 5.259, "step": 4352 }, { "epoch": 0.4941835094777398, "grad_norm": 0.2734375, "learning_rate": 0.002, "loss": 5.2878, "step": 4353 }, { "epoch": 0.4942970365876589, "grad_norm": 0.279296875, "learning_rate": 0.002, "loss": 5.2687, "step": 4354 }, { "epoch": 0.49441056369757796, "grad_norm": 0.26171875, "learning_rate": 0.002, "loss": 5.2737, "step": 4355 }, { "epoch": 0.49452409080749704, "grad_norm": 0.271484375, "learning_rate": 0.002, "loss": 5.2714, "step": 4356 }, { "epoch": 0.4946376179174161, "grad_norm": 0.259765625, "learning_rate": 0.002, "loss": 5.2749, "step": 4357 }, { "epoch": 0.4947511450273352, "grad_norm": 0.279296875, "learning_rate": 0.002, "loss": 5.276, "step": 4358 }, { "epoch": 0.49486467213725427, "grad_norm": 0.27734375, "learning_rate": 0.002, "loss": 5.2784, "step": 4359 }, { "epoch": 0.49497819924717334, "grad_norm": 0.279296875, "learning_rate": 0.002, "loss": 5.2792, "step": 4360 }, { "epoch": 0.4950917263570924, "grad_norm": 0.26171875, "learning_rate": 0.002, "loss": 5.2767, "step": 4361 }, { "epoch": 0.4952052534670115, "grad_norm": 0.236328125, "learning_rate": 0.002, "loss": 5.2824, "step": 4362 }, { "epoch": 0.4953187805769306, "grad_norm": 0.2314453125, "learning_rate": 0.002, "loss": 5.2581, "step": 4363 }, { "epoch": 0.49543230768684965, "grad_norm": 0.275390625, "learning_rate": 0.002, "loss": 5.2796, "step": 4364 }, { "epoch": 0.4955458347967687, "grad_norm": 0.291015625, "learning_rate": 0.002, "loss": 5.2757, "step": 4365 }, { "epoch": 0.4956593619066878, "grad_norm": 0.337890625, "learning_rate": 0.002, "loss": 5.2657, "step": 4366 }, { "epoch": 0.4957728890166069, "grad_norm": 0.3359375, "learning_rate": 0.002, "loss": 5.2829, "step": 4367 }, { "epoch": 0.49588641612652595, "grad_norm": 0.345703125, "learning_rate": 0.002, "loss": 5.2794, "step": 4368 }, { "epoch": 0.49599994323644503, "grad_norm": 0.349609375, "learning_rate": 0.002, "loss": 5.2744, "step": 4369 }, { "epoch": 0.4961134703463641, "grad_norm": 0.373046875, "learning_rate": 0.002, "loss": 5.2651, "step": 4370 }, { "epoch": 0.4962269974562832, "grad_norm": 0.359375, "learning_rate": 0.002, "loss": 5.2864, "step": 4371 }, { "epoch": 0.49634052456620226, "grad_norm": 0.375, "learning_rate": 0.002, "loss": 5.2515, "step": 4372 }, { "epoch": 0.49645405167612133, "grad_norm": 0.34765625, "learning_rate": 0.002, "loss": 5.2747, "step": 4373 }, { "epoch": 0.4965675787860404, "grad_norm": 0.34765625, "learning_rate": 0.002, "loss": 5.2938, "step": 4374 }, { "epoch": 0.4966811058959595, "grad_norm": 0.31640625, "learning_rate": 0.002, "loss": 5.2761, "step": 4375 }, { "epoch": 0.49679463300587856, "grad_norm": 0.32421875, "learning_rate": 0.002, "loss": 5.2859, "step": 4376 }, { "epoch": 0.49690816011579764, "grad_norm": 0.318359375, "learning_rate": 0.002, "loss": 5.2499, "step": 4377 }, { "epoch": 0.4970216872257167, "grad_norm": 0.337890625, "learning_rate": 0.002, "loss": 5.2793, "step": 4378 }, { "epoch": 0.4971352143356358, "grad_norm": 0.37109375, "learning_rate": 0.002, "loss": 5.2832, "step": 4379 }, { "epoch": 0.49724874144555486, "grad_norm": 0.365234375, "learning_rate": 0.002, "loss": 5.2891, "step": 4380 }, { "epoch": 0.49736226855547394, "grad_norm": 0.392578125, "learning_rate": 0.002, "loss": 5.2661, "step": 4381 }, { "epoch": 0.497475795665393, "grad_norm": 0.392578125, "learning_rate": 0.002, "loss": 5.2564, "step": 4382 }, { "epoch": 0.4975893227753121, "grad_norm": 0.33984375, "learning_rate": 0.002, "loss": 5.2822, "step": 4383 }, { "epoch": 0.49770284988523117, "grad_norm": 0.333984375, "learning_rate": 0.002, "loss": 5.2446, "step": 4384 }, { "epoch": 0.49781637699515024, "grad_norm": 0.287109375, "learning_rate": 0.002, "loss": 5.2813, "step": 4385 }, { "epoch": 0.4979299041050693, "grad_norm": 0.283203125, "learning_rate": 0.002, "loss": 5.277, "step": 4386 }, { "epoch": 0.4980434312149884, "grad_norm": 0.271484375, "learning_rate": 0.002, "loss": 5.2752, "step": 4387 }, { "epoch": 0.4981569583249075, "grad_norm": 0.2490234375, "learning_rate": 0.002, "loss": 5.2691, "step": 4388 }, { "epoch": 0.49827048543482655, "grad_norm": 0.244140625, "learning_rate": 0.002, "loss": 5.2692, "step": 4389 }, { "epoch": 0.4983840125447456, "grad_norm": 0.2421875, "learning_rate": 0.002, "loss": 5.2716, "step": 4390 }, { "epoch": 0.4984975396546647, "grad_norm": 0.236328125, "learning_rate": 0.002, "loss": 5.2966, "step": 4391 }, { "epoch": 0.4986110667645838, "grad_norm": 0.2373046875, "learning_rate": 0.002, "loss": 5.2567, "step": 4392 }, { "epoch": 0.49872459387450285, "grad_norm": 0.240234375, "learning_rate": 0.002, "loss": 5.2874, "step": 4393 }, { "epoch": 0.49883812098442193, "grad_norm": 0.28125, "learning_rate": 0.002, "loss": 5.297, "step": 4394 }, { "epoch": 0.498951648094341, "grad_norm": 0.30859375, "learning_rate": 0.002, "loss": 5.2886, "step": 4395 }, { "epoch": 0.4990651752042601, "grad_norm": 0.333984375, "learning_rate": 0.002, "loss": 5.2704, "step": 4396 }, { "epoch": 0.49917870231417916, "grad_norm": 0.353515625, "learning_rate": 0.002, "loss": 5.2579, "step": 4397 }, { "epoch": 0.49929222942409823, "grad_norm": 0.353515625, "learning_rate": 0.002, "loss": 5.2935, "step": 4398 }, { "epoch": 0.4994057565340173, "grad_norm": 0.306640625, "learning_rate": 0.002, "loss": 5.2758, "step": 4399 }, { "epoch": 0.4995192836439364, "grad_norm": 0.28125, "learning_rate": 0.002, "loss": 5.2413, "step": 4400 }, { "epoch": 0.49963281075385546, "grad_norm": 0.2578125, "learning_rate": 0.002, "loss": 5.2778, "step": 4401 }, { "epoch": 0.49974633786377454, "grad_norm": 0.267578125, "learning_rate": 0.002, "loss": 5.2682, "step": 4402 }, { "epoch": 0.4998598649736936, "grad_norm": 0.296875, "learning_rate": 0.002, "loss": 5.2754, "step": 4403 }, { "epoch": 0.4999733920836127, "grad_norm": 0.326171875, "learning_rate": 0.002, "loss": 5.2544, "step": 4404 }, { "epoch": 0.5000869191935318, "grad_norm": 0.33984375, "learning_rate": 0.002, "loss": 5.2859, "step": 4405 }, { "epoch": 0.5002004463034508, "grad_norm": 0.3359375, "learning_rate": 0.002, "loss": 5.2678, "step": 4406 }, { "epoch": 0.5003139734133699, "grad_norm": 0.3359375, "learning_rate": 0.002, "loss": 5.2839, "step": 4407 }, { "epoch": 0.500427500523289, "grad_norm": 0.283203125, "learning_rate": 0.002, "loss": 5.2951, "step": 4408 }, { "epoch": 0.5005410276332081, "grad_norm": 0.21875, "learning_rate": 0.002, "loss": 5.262, "step": 4409 }, { "epoch": 0.5006545547431271, "grad_norm": 0.2177734375, "learning_rate": 0.002, "loss": 5.2515, "step": 4410 }, { "epoch": 0.5007680818530462, "grad_norm": 0.2314453125, "learning_rate": 0.002, "loss": 5.2542, "step": 4411 }, { "epoch": 0.5008816089629653, "grad_norm": 0.298828125, "learning_rate": 0.002, "loss": 5.2926, "step": 4412 }, { "epoch": 0.5009951360728844, "grad_norm": 0.341796875, "learning_rate": 0.002, "loss": 5.2731, "step": 4413 }, { "epoch": 0.5011086631828034, "grad_norm": 0.392578125, "learning_rate": 0.002, "loss": 5.2616, "step": 4414 }, { "epoch": 0.5012221902927225, "grad_norm": 0.384765625, "learning_rate": 0.002, "loss": 5.2803, "step": 4415 }, { "epoch": 0.5013357174026416, "grad_norm": 0.34765625, "learning_rate": 0.002, "loss": 5.2849, "step": 4416 }, { "epoch": 0.5014492445125607, "grad_norm": 0.37890625, "learning_rate": 0.002, "loss": 5.2535, "step": 4417 }, { "epoch": 0.5015627716224798, "grad_norm": 0.34765625, "learning_rate": 0.002, "loss": 5.2637, "step": 4418 }, { "epoch": 0.5016762987323988, "grad_norm": 0.388671875, "learning_rate": 0.002, "loss": 5.2848, "step": 4419 }, { "epoch": 0.5017898258423179, "grad_norm": 0.34375, "learning_rate": 0.002, "loss": 5.2842, "step": 4420 }, { "epoch": 0.501903352952237, "grad_norm": 0.337890625, "learning_rate": 0.002, "loss": 5.2852, "step": 4421 }, { "epoch": 0.5020168800621561, "grad_norm": 0.306640625, "learning_rate": 0.002, "loss": 5.2745, "step": 4422 }, { "epoch": 0.5021304071720751, "grad_norm": 0.298828125, "learning_rate": 0.002, "loss": 5.2847, "step": 4423 }, { "epoch": 0.5022439342819942, "grad_norm": 0.26953125, "learning_rate": 0.002, "loss": 5.2735, "step": 4424 }, { "epoch": 0.5023574613919133, "grad_norm": 0.265625, "learning_rate": 0.002, "loss": 5.2885, "step": 4425 }, { "epoch": 0.5024709885018324, "grad_norm": 0.23828125, "learning_rate": 0.002, "loss": 5.2935, "step": 4426 }, { "epoch": 0.5025845156117514, "grad_norm": 0.2421875, "learning_rate": 0.002, "loss": 5.2628, "step": 4427 }, { "epoch": 0.5026980427216705, "grad_norm": 0.2197265625, "learning_rate": 0.002, "loss": 5.2868, "step": 4428 }, { "epoch": 0.5028115698315896, "grad_norm": 0.2373046875, "learning_rate": 0.002, "loss": 5.2769, "step": 4429 }, { "epoch": 0.5029250969415087, "grad_norm": 0.23828125, "learning_rate": 0.002, "loss": 5.2724, "step": 4430 }, { "epoch": 0.5030386240514277, "grad_norm": 0.26171875, "learning_rate": 0.002, "loss": 5.2838, "step": 4431 }, { "epoch": 0.5031521511613468, "grad_norm": 0.29296875, "learning_rate": 0.002, "loss": 5.2899, "step": 4432 }, { "epoch": 0.5032656782712659, "grad_norm": 0.34765625, "learning_rate": 0.002, "loss": 5.2833, "step": 4433 }, { "epoch": 0.503379205381185, "grad_norm": 0.3828125, "learning_rate": 0.002, "loss": 5.2755, "step": 4434 }, { "epoch": 0.503492732491104, "grad_norm": 0.49609375, "learning_rate": 0.002, "loss": 5.2613, "step": 4435 }, { "epoch": 0.5036062596010231, "grad_norm": 0.447265625, "learning_rate": 0.002, "loss": 5.2894, "step": 4436 }, { "epoch": 0.5037197867109422, "grad_norm": 0.4453125, "learning_rate": 0.002, "loss": 5.2776, "step": 4437 }, { "epoch": 0.5038333138208613, "grad_norm": 0.41796875, "learning_rate": 0.002, "loss": 5.2769, "step": 4438 }, { "epoch": 0.5039468409307803, "grad_norm": 0.33203125, "learning_rate": 0.002, "loss": 5.257, "step": 4439 }, { "epoch": 0.5040603680406994, "grad_norm": 0.333984375, "learning_rate": 0.002, "loss": 5.2543, "step": 4440 }, { "epoch": 0.5041738951506185, "grad_norm": 0.24609375, "learning_rate": 0.002, "loss": 5.2533, "step": 4441 }, { "epoch": 0.5042874222605376, "grad_norm": 0.2578125, "learning_rate": 0.002, "loss": 5.2752, "step": 4442 }, { "epoch": 0.5044009493704567, "grad_norm": 0.23828125, "learning_rate": 0.002, "loss": 5.2787, "step": 4443 }, { "epoch": 0.5045144764803757, "grad_norm": 0.251953125, "learning_rate": 0.002, "loss": 5.2588, "step": 4444 }, { "epoch": 0.5046280035902948, "grad_norm": 0.2490234375, "learning_rate": 0.002, "loss": 5.2626, "step": 4445 }, { "epoch": 0.5047415307002139, "grad_norm": 0.2578125, "learning_rate": 0.002, "loss": 5.2998, "step": 4446 }, { "epoch": 0.504855057810133, "grad_norm": 0.263671875, "learning_rate": 0.002, "loss": 5.2842, "step": 4447 }, { "epoch": 0.504968584920052, "grad_norm": 0.255859375, "learning_rate": 0.002, "loss": 5.269, "step": 4448 }, { "epoch": 0.5050821120299711, "grad_norm": 0.23828125, "learning_rate": 0.002, "loss": 5.2676, "step": 4449 }, { "epoch": 0.5051956391398902, "grad_norm": 0.2412109375, "learning_rate": 0.002, "loss": 5.2915, "step": 4450 }, { "epoch": 0.5053091662498093, "grad_norm": 0.255859375, "learning_rate": 0.002, "loss": 5.2783, "step": 4451 }, { "epoch": 0.5054226933597283, "grad_norm": 0.263671875, "learning_rate": 0.002, "loss": 5.251, "step": 4452 }, { "epoch": 0.5055362204696474, "grad_norm": 0.310546875, "learning_rate": 0.002, "loss": 5.2532, "step": 4453 }, { "epoch": 0.5056497475795665, "grad_norm": 0.33984375, "learning_rate": 0.002, "loss": 5.2629, "step": 4454 }, { "epoch": 0.5057632746894856, "grad_norm": 0.380859375, "learning_rate": 0.002, "loss": 5.2785, "step": 4455 }, { "epoch": 0.5058768017994046, "grad_norm": 0.3671875, "learning_rate": 0.002, "loss": 5.2722, "step": 4456 }, { "epoch": 0.5059903289093237, "grad_norm": 0.3671875, "learning_rate": 0.002, "loss": 5.2931, "step": 4457 }, { "epoch": 0.5061038560192428, "grad_norm": 0.30859375, "learning_rate": 0.002, "loss": 5.2488, "step": 4458 }, { "epoch": 0.5062173831291619, "grad_norm": 0.302734375, "learning_rate": 0.002, "loss": 5.2554, "step": 4459 }, { "epoch": 0.506330910239081, "grad_norm": 0.2412109375, "learning_rate": 0.002, "loss": 5.2653, "step": 4460 }, { "epoch": 0.506444437349, "grad_norm": 0.23828125, "learning_rate": 0.002, "loss": 5.2808, "step": 4461 }, { "epoch": 0.5065579644589191, "grad_norm": 0.265625, "learning_rate": 0.002, "loss": 5.2679, "step": 4462 }, { "epoch": 0.5066714915688382, "grad_norm": 0.310546875, "learning_rate": 0.002, "loss": 5.2781, "step": 4463 }, { "epoch": 0.5067850186787572, "grad_norm": 0.34765625, "learning_rate": 0.002, "loss": 5.2764, "step": 4464 }, { "epoch": 0.5068985457886764, "grad_norm": 0.373046875, "learning_rate": 0.002, "loss": 5.2822, "step": 4465 }, { "epoch": 0.5070120728985955, "grad_norm": 0.357421875, "learning_rate": 0.002, "loss": 5.2914, "step": 4466 }, { "epoch": 0.5071256000085146, "grad_norm": 0.328125, "learning_rate": 0.002, "loss": 5.2817, "step": 4467 }, { "epoch": 0.5072391271184337, "grad_norm": 0.326171875, "learning_rate": 0.002, "loss": 5.2562, "step": 4468 }, { "epoch": 0.5073526542283527, "grad_norm": 0.263671875, "learning_rate": 0.002, "loss": 5.2588, "step": 4469 }, { "epoch": 0.5074661813382718, "grad_norm": 0.263671875, "learning_rate": 0.002, "loss": 5.2821, "step": 4470 }, { "epoch": 0.5075797084481909, "grad_norm": 0.23828125, "learning_rate": 0.002, "loss": 5.2679, "step": 4471 }, { "epoch": 0.50769323555811, "grad_norm": 0.25390625, "learning_rate": 0.002, "loss": 5.2778, "step": 4472 }, { "epoch": 0.507806762668029, "grad_norm": 0.267578125, "learning_rate": 0.002, "loss": 5.2561, "step": 4473 }, { "epoch": 0.5079202897779481, "grad_norm": 0.29296875, "learning_rate": 0.002, "loss": 5.2599, "step": 4474 }, { "epoch": 0.5080338168878672, "grad_norm": 0.333984375, "learning_rate": 0.002, "loss": 5.264, "step": 4475 }, { "epoch": 0.5081473439977863, "grad_norm": 0.361328125, "learning_rate": 0.002, "loss": 5.2594, "step": 4476 }, { "epoch": 0.5082608711077053, "grad_norm": 0.357421875, "learning_rate": 0.002, "loss": 5.257, "step": 4477 }, { "epoch": 0.5083743982176244, "grad_norm": 0.357421875, "learning_rate": 0.002, "loss": 5.2827, "step": 4478 }, { "epoch": 0.5084879253275435, "grad_norm": 0.294921875, "learning_rate": 0.002, "loss": 5.2675, "step": 4479 }, { "epoch": 0.5086014524374626, "grad_norm": 0.271484375, "learning_rate": 0.002, "loss": 5.275, "step": 4480 }, { "epoch": 0.5087149795473817, "grad_norm": 0.28125, "learning_rate": 0.002, "loss": 5.2754, "step": 4481 }, { "epoch": 0.5088285066573007, "grad_norm": 0.302734375, "learning_rate": 0.002, "loss": 5.285, "step": 4482 }, { "epoch": 0.5089420337672198, "grad_norm": 0.34375, "learning_rate": 0.002, "loss": 5.263, "step": 4483 }, { "epoch": 0.5090555608771389, "grad_norm": 0.337890625, "learning_rate": 0.002, "loss": 5.292, "step": 4484 }, { "epoch": 0.509169087987058, "grad_norm": 0.35546875, "learning_rate": 0.002, "loss": 5.2537, "step": 4485 }, { "epoch": 0.509282615096977, "grad_norm": 0.37109375, "learning_rate": 0.002, "loss": 5.2826, "step": 4486 }, { "epoch": 0.5093961422068961, "grad_norm": 0.369140625, "learning_rate": 0.002, "loss": 5.2706, "step": 4487 }, { "epoch": 0.5095096693168152, "grad_norm": 0.34765625, "learning_rate": 0.002, "loss": 5.2787, "step": 4488 }, { "epoch": 0.5096231964267343, "grad_norm": 0.345703125, "learning_rate": 0.002, "loss": 5.2816, "step": 4489 }, { "epoch": 0.5097367235366533, "grad_norm": 0.298828125, "learning_rate": 0.002, "loss": 5.2761, "step": 4490 }, { "epoch": 0.5098502506465724, "grad_norm": 0.29296875, "learning_rate": 0.002, "loss": 5.2401, "step": 4491 }, { "epoch": 0.5099637777564915, "grad_norm": 0.2490234375, "learning_rate": 0.002, "loss": 5.2777, "step": 4492 }, { "epoch": 0.5100773048664106, "grad_norm": 0.263671875, "learning_rate": 0.002, "loss": 5.2774, "step": 4493 }, { "epoch": 0.5101908319763296, "grad_norm": 0.267578125, "learning_rate": 0.002, "loss": 5.2584, "step": 4494 }, { "epoch": 0.5103043590862487, "grad_norm": 0.263671875, "learning_rate": 0.002, "loss": 5.2725, "step": 4495 }, { "epoch": 0.5104178861961678, "grad_norm": 0.2578125, "learning_rate": 0.002, "loss": 5.2768, "step": 4496 }, { "epoch": 0.5105314133060869, "grad_norm": 0.259765625, "learning_rate": 0.002, "loss": 5.2623, "step": 4497 }, { "epoch": 0.5106449404160059, "grad_norm": 0.251953125, "learning_rate": 0.002, "loss": 5.249, "step": 4498 }, { "epoch": 0.510758467525925, "grad_norm": 0.259765625, "learning_rate": 0.002, "loss": 5.2789, "step": 4499 }, { "epoch": 0.5108719946358441, "grad_norm": 0.265625, "learning_rate": 0.002, "loss": 5.2577, "step": 4500 }, { "epoch": 0.5109855217457632, "grad_norm": 0.2890625, "learning_rate": 0.002, "loss": 5.24, "step": 4501 }, { "epoch": 0.5110990488556822, "grad_norm": 0.30078125, "learning_rate": 0.002, "loss": 5.2747, "step": 4502 }, { "epoch": 0.5112125759656013, "grad_norm": 0.30859375, "learning_rate": 0.002, "loss": 5.2761, "step": 4503 }, { "epoch": 0.5113261030755204, "grad_norm": 0.310546875, "learning_rate": 0.002, "loss": 5.2614, "step": 4504 }, { "epoch": 0.5114396301854395, "grad_norm": 0.2890625, "learning_rate": 0.002, "loss": 5.2729, "step": 4505 }, { "epoch": 0.5115531572953586, "grad_norm": 0.27734375, "learning_rate": 0.002, "loss": 5.276, "step": 4506 }, { "epoch": 0.5116666844052776, "grad_norm": 0.25390625, "learning_rate": 0.002, "loss": 5.2727, "step": 4507 }, { "epoch": 0.5117802115151967, "grad_norm": 0.283203125, "learning_rate": 0.002, "loss": 5.277, "step": 4508 }, { "epoch": 0.5118937386251158, "grad_norm": 0.515625, "learning_rate": 0.002, "loss": 5.2764, "step": 4509 }, { "epoch": 0.5120072657350349, "grad_norm": 0.400390625, "learning_rate": 0.002, "loss": 5.2687, "step": 4510 }, { "epoch": 0.5121207928449539, "grad_norm": 0.2734375, "learning_rate": 0.002, "loss": 5.2862, "step": 4511 }, { "epoch": 0.512234319954873, "grad_norm": 0.306640625, "learning_rate": 0.002, "loss": 5.2527, "step": 4512 }, { "epoch": 0.5123478470647921, "grad_norm": 0.2294921875, "learning_rate": 0.002, "loss": 5.2478, "step": 4513 }, { "epoch": 0.5124613741747112, "grad_norm": 0.2373046875, "learning_rate": 0.002, "loss": 5.272, "step": 4514 }, { "epoch": 0.5125749012846302, "grad_norm": 0.2470703125, "learning_rate": 0.002, "loss": 5.2608, "step": 4515 }, { "epoch": 0.5126884283945493, "grad_norm": 0.265625, "learning_rate": 0.002, "loss": 5.2636, "step": 4516 }, { "epoch": 0.5128019555044684, "grad_norm": 0.287109375, "learning_rate": 0.002, "loss": 5.266, "step": 4517 }, { "epoch": 0.5129154826143875, "grad_norm": 0.28125, "learning_rate": 0.002, "loss": 5.2682, "step": 4518 }, { "epoch": 0.5130290097243065, "grad_norm": 0.27734375, "learning_rate": 0.002, "loss": 5.2658, "step": 4519 }, { "epoch": 0.5131425368342256, "grad_norm": 0.271484375, "learning_rate": 0.002, "loss": 5.274, "step": 4520 }, { "epoch": 0.5132560639441447, "grad_norm": 0.294921875, "learning_rate": 0.002, "loss": 5.2478, "step": 4521 }, { "epoch": 0.5133695910540638, "grad_norm": 0.30078125, "learning_rate": 0.002, "loss": 5.2583, "step": 4522 }, { "epoch": 0.5134831181639828, "grad_norm": 0.310546875, "learning_rate": 0.002, "loss": 5.267, "step": 4523 }, { "epoch": 0.5135966452739019, "grad_norm": 0.345703125, "learning_rate": 0.002, "loss": 5.2777, "step": 4524 }, { "epoch": 0.513710172383821, "grad_norm": 0.341796875, "learning_rate": 0.002, "loss": 5.2638, "step": 4525 }, { "epoch": 0.5138236994937401, "grad_norm": 0.376953125, "learning_rate": 0.002, "loss": 5.2852, "step": 4526 }, { "epoch": 0.5139372266036591, "grad_norm": 0.353515625, "learning_rate": 0.002, "loss": 5.2706, "step": 4527 }, { "epoch": 0.5140507537135782, "grad_norm": 0.369140625, "learning_rate": 0.002, "loss": 5.2595, "step": 4528 }, { "epoch": 0.5141642808234973, "grad_norm": 0.330078125, "learning_rate": 0.002, "loss": 5.2785, "step": 4529 }, { "epoch": 0.5142778079334164, "grad_norm": 0.318359375, "learning_rate": 0.002, "loss": 5.268, "step": 4530 }, { "epoch": 0.5143913350433355, "grad_norm": 0.30078125, "learning_rate": 0.002, "loss": 5.2768, "step": 4531 }, { "epoch": 0.5145048621532545, "grad_norm": 0.302734375, "learning_rate": 0.002, "loss": 5.2795, "step": 4532 }, { "epoch": 0.5146183892631736, "grad_norm": 0.283203125, "learning_rate": 0.002, "loss": 5.2858, "step": 4533 }, { "epoch": 0.5147319163730927, "grad_norm": 0.302734375, "learning_rate": 0.002, "loss": 5.2685, "step": 4534 }, { "epoch": 0.5148454434830118, "grad_norm": 0.2890625, "learning_rate": 0.002, "loss": 5.257, "step": 4535 }, { "epoch": 0.5149589705929308, "grad_norm": 0.30859375, "learning_rate": 0.002, "loss": 5.2677, "step": 4536 }, { "epoch": 0.5150724977028499, "grad_norm": 0.328125, "learning_rate": 0.002, "loss": 5.2891, "step": 4537 }, { "epoch": 0.515186024812769, "grad_norm": 0.345703125, "learning_rate": 0.002, "loss": 5.2767, "step": 4538 }, { "epoch": 0.5152995519226881, "grad_norm": 0.328125, "learning_rate": 0.002, "loss": 5.2675, "step": 4539 }, { "epoch": 0.5154130790326071, "grad_norm": 0.333984375, "learning_rate": 0.002, "loss": 5.2758, "step": 4540 }, { "epoch": 0.5155266061425262, "grad_norm": 0.3203125, "learning_rate": 0.002, "loss": 5.2728, "step": 4541 }, { "epoch": 0.5156401332524453, "grad_norm": 0.287109375, "learning_rate": 0.002, "loss": 5.2665, "step": 4542 }, { "epoch": 0.5157536603623644, "grad_norm": 0.259765625, "learning_rate": 0.002, "loss": 5.2827, "step": 4543 }, { "epoch": 0.5158671874722834, "grad_norm": 0.267578125, "learning_rate": 0.002, "loss": 5.2654, "step": 4544 }, { "epoch": 0.5159807145822025, "grad_norm": 0.2470703125, "learning_rate": 0.002, "loss": 5.2621, "step": 4545 }, { "epoch": 0.5160942416921216, "grad_norm": 0.267578125, "learning_rate": 0.002, "loss": 5.2575, "step": 4546 }, { "epoch": 0.5162077688020407, "grad_norm": 0.2578125, "learning_rate": 0.002, "loss": 5.2613, "step": 4547 }, { "epoch": 0.5163212959119597, "grad_norm": 0.26171875, "learning_rate": 0.002, "loss": 5.2578, "step": 4548 }, { "epoch": 0.5164348230218788, "grad_norm": 0.244140625, "learning_rate": 0.002, "loss": 5.2759, "step": 4549 }, { "epoch": 0.5165483501317979, "grad_norm": 0.240234375, "learning_rate": 0.002, "loss": 5.2896, "step": 4550 }, { "epoch": 0.516661877241717, "grad_norm": 0.248046875, "learning_rate": 0.002, "loss": 5.2496, "step": 4551 }, { "epoch": 0.516775404351636, "grad_norm": 0.259765625, "learning_rate": 0.002, "loss": 5.2649, "step": 4552 }, { "epoch": 0.5168889314615551, "grad_norm": 0.3125, "learning_rate": 0.002, "loss": 5.2708, "step": 4553 }, { "epoch": 0.5170024585714742, "grad_norm": 0.33984375, "learning_rate": 0.002, "loss": 5.2592, "step": 4554 }, { "epoch": 0.5171159856813933, "grad_norm": 0.3671875, "learning_rate": 0.002, "loss": 5.252, "step": 4555 }, { "epoch": 0.5172295127913124, "grad_norm": 0.33984375, "learning_rate": 0.002, "loss": 5.2828, "step": 4556 }, { "epoch": 0.5173430399012314, "grad_norm": 0.322265625, "learning_rate": 0.002, "loss": 5.2921, "step": 4557 }, { "epoch": 0.5174565670111505, "grad_norm": 0.267578125, "learning_rate": 0.002, "loss": 5.2711, "step": 4558 }, { "epoch": 0.5175700941210696, "grad_norm": 0.267578125, "learning_rate": 0.002, "loss": 5.2884, "step": 4559 }, { "epoch": 0.5176836212309887, "grad_norm": 0.2412109375, "learning_rate": 0.002, "loss": 5.2598, "step": 4560 }, { "epoch": 0.5177971483409077, "grad_norm": 0.2490234375, "learning_rate": 0.002, "loss": 5.2724, "step": 4561 }, { "epoch": 0.5179106754508268, "grad_norm": 0.267578125, "learning_rate": 0.002, "loss": 5.2781, "step": 4562 }, { "epoch": 0.5180242025607459, "grad_norm": 0.291015625, "learning_rate": 0.002, "loss": 5.2622, "step": 4563 }, { "epoch": 0.518137729670665, "grad_norm": 0.32421875, "learning_rate": 0.002, "loss": 5.2789, "step": 4564 }, { "epoch": 0.518251256780584, "grad_norm": 0.310546875, "learning_rate": 0.002, "loss": 5.2813, "step": 4565 }, { "epoch": 0.5183647838905031, "grad_norm": 0.31640625, "learning_rate": 0.002, "loss": 5.2625, "step": 4566 }, { "epoch": 0.5184783110004222, "grad_norm": 0.314453125, "learning_rate": 0.002, "loss": 5.2652, "step": 4567 }, { "epoch": 0.5185918381103413, "grad_norm": 0.322265625, "learning_rate": 0.002, "loss": 5.2826, "step": 4568 }, { "epoch": 0.5187053652202603, "grad_norm": 0.3203125, "learning_rate": 0.002, "loss": 5.2973, "step": 4569 }, { "epoch": 0.5188188923301794, "grad_norm": 0.33984375, "learning_rate": 0.002, "loss": 5.2557, "step": 4570 }, { "epoch": 0.5189324194400985, "grad_norm": 0.302734375, "learning_rate": 0.002, "loss": 5.2761, "step": 4571 }, { "epoch": 0.5190459465500176, "grad_norm": 0.296875, "learning_rate": 0.002, "loss": 5.2556, "step": 4572 }, { "epoch": 0.5191594736599366, "grad_norm": 0.26171875, "learning_rate": 0.002, "loss": 5.262, "step": 4573 }, { "epoch": 0.5192730007698557, "grad_norm": 0.25, "learning_rate": 0.002, "loss": 5.2994, "step": 4574 }, { "epoch": 0.5193865278797748, "grad_norm": 0.2265625, "learning_rate": 0.002, "loss": 5.2627, "step": 4575 }, { "epoch": 0.5195000549896939, "grad_norm": 0.232421875, "learning_rate": 0.002, "loss": 5.2736, "step": 4576 }, { "epoch": 0.519613582099613, "grad_norm": 0.244140625, "learning_rate": 0.002, "loss": 5.2822, "step": 4577 }, { "epoch": 0.519727109209532, "grad_norm": 0.263671875, "learning_rate": 0.002, "loss": 5.2554, "step": 4578 }, { "epoch": 0.5198406363194511, "grad_norm": 0.29296875, "learning_rate": 0.002, "loss": 5.2642, "step": 4579 }, { "epoch": 0.5199541634293702, "grad_norm": 0.326171875, "learning_rate": 0.002, "loss": 5.2858, "step": 4580 }, { "epoch": 0.5200676905392893, "grad_norm": 0.361328125, "learning_rate": 0.002, "loss": 5.2613, "step": 4581 }, { "epoch": 0.5201812176492083, "grad_norm": 0.35546875, "learning_rate": 0.002, "loss": 5.2585, "step": 4582 }, { "epoch": 0.5202947447591274, "grad_norm": 0.35546875, "learning_rate": 0.002, "loss": 5.2865, "step": 4583 }, { "epoch": 0.5204082718690465, "grad_norm": 0.31640625, "learning_rate": 0.002, "loss": 5.2731, "step": 4584 }, { "epoch": 0.5205217989789656, "grad_norm": 0.3203125, "learning_rate": 0.002, "loss": 5.2687, "step": 4585 }, { "epoch": 0.5206353260888846, "grad_norm": 0.291015625, "learning_rate": 0.002, "loss": 5.2729, "step": 4586 }, { "epoch": 0.5207488531988037, "grad_norm": 0.30859375, "learning_rate": 0.002, "loss": 5.2746, "step": 4587 }, { "epoch": 0.5208623803087228, "grad_norm": 0.29296875, "learning_rate": 0.002, "loss": 5.2524, "step": 4588 }, { "epoch": 0.5209759074186419, "grad_norm": 0.30078125, "learning_rate": 0.002, "loss": 5.2844, "step": 4589 }, { "epoch": 0.5210894345285609, "grad_norm": 0.302734375, "learning_rate": 0.002, "loss": 5.2802, "step": 4590 }, { "epoch": 0.52120296163848, "grad_norm": 0.302734375, "learning_rate": 0.002, "loss": 5.2804, "step": 4591 }, { "epoch": 0.5213164887483991, "grad_norm": 0.30078125, "learning_rate": 0.002, "loss": 5.2526, "step": 4592 }, { "epoch": 0.5214300158583182, "grad_norm": 0.3046875, "learning_rate": 0.002, "loss": 5.2675, "step": 4593 }, { "epoch": 0.5215435429682372, "grad_norm": 0.298828125, "learning_rate": 0.002, "loss": 5.267, "step": 4594 }, { "epoch": 0.5216570700781563, "grad_norm": 0.31640625, "learning_rate": 0.002, "loss": 5.2815, "step": 4595 }, { "epoch": 0.5217705971880754, "grad_norm": 0.31640625, "learning_rate": 0.002, "loss": 5.267, "step": 4596 }, { "epoch": 0.5218841242979945, "grad_norm": 0.337890625, "learning_rate": 0.002, "loss": 5.2847, "step": 4597 }, { "epoch": 0.5219976514079135, "grad_norm": 0.341796875, "learning_rate": 0.002, "loss": 5.2788, "step": 4598 }, { "epoch": 0.5221111785178326, "grad_norm": 0.33984375, "learning_rate": 0.002, "loss": 5.2759, "step": 4599 }, { "epoch": 0.5222247056277517, "grad_norm": 0.318359375, "learning_rate": 0.002, "loss": 5.2738, "step": 4600 }, { "epoch": 0.5223382327376708, "grad_norm": 0.32421875, "learning_rate": 0.002, "loss": 5.2744, "step": 4601 }, { "epoch": 0.5224517598475898, "grad_norm": 0.28515625, "learning_rate": 0.002, "loss": 5.287, "step": 4602 }, { "epoch": 0.5225652869575089, "grad_norm": 0.291015625, "learning_rate": 0.002, "loss": 5.2836, "step": 4603 }, { "epoch": 0.522678814067428, "grad_norm": 0.271484375, "learning_rate": 0.002, "loss": 5.2817, "step": 4604 }, { "epoch": 0.5227923411773471, "grad_norm": 0.265625, "learning_rate": 0.002, "loss": 5.2503, "step": 4605 }, { "epoch": 0.5229058682872662, "grad_norm": 0.2578125, "learning_rate": 0.002, "loss": 5.2808, "step": 4606 }, { "epoch": 0.5230193953971852, "grad_norm": 0.279296875, "learning_rate": 0.002, "loss": 5.2864, "step": 4607 }, { "epoch": 0.5231329225071043, "grad_norm": 0.283203125, "learning_rate": 0.002, "loss": 5.2666, "step": 4608 }, { "epoch": 0.5232464496170234, "grad_norm": 0.28125, "learning_rate": 0.002, "loss": 5.2798, "step": 4609 }, { "epoch": 0.5233599767269425, "grad_norm": 0.255859375, "learning_rate": 0.002, "loss": 5.2871, "step": 4610 }, { "epoch": 0.5234735038368615, "grad_norm": 0.232421875, "learning_rate": 0.002, "loss": 5.2519, "step": 4611 }, { "epoch": 0.5235870309467806, "grad_norm": 0.2333984375, "learning_rate": 0.002, "loss": 5.2784, "step": 4612 }, { "epoch": 0.5237005580566997, "grad_norm": 0.29296875, "learning_rate": 0.002, "loss": 5.2724, "step": 4613 }, { "epoch": 0.5238140851666188, "grad_norm": 0.298828125, "learning_rate": 0.002, "loss": 5.2588, "step": 4614 }, { "epoch": 0.5239276122765378, "grad_norm": 0.263671875, "learning_rate": 0.002, "loss": 5.2797, "step": 4615 }, { "epoch": 0.5240411393864569, "grad_norm": 0.265625, "learning_rate": 0.002, "loss": 5.2612, "step": 4616 }, { "epoch": 0.524154666496376, "grad_norm": 0.26171875, "learning_rate": 0.002, "loss": 5.2873, "step": 4617 }, { "epoch": 0.5242681936062951, "grad_norm": 0.265625, "learning_rate": 0.002, "loss": 5.2602, "step": 4618 }, { "epoch": 0.5243817207162141, "grad_norm": 0.267578125, "learning_rate": 0.002, "loss": 5.2693, "step": 4619 }, { "epoch": 0.5244952478261332, "grad_norm": 0.267578125, "learning_rate": 0.002, "loss": 5.2778, "step": 4620 }, { "epoch": 0.5246087749360523, "grad_norm": 0.287109375, "learning_rate": 0.002, "loss": 5.2516, "step": 4621 }, { "epoch": 0.5247223020459714, "grad_norm": 0.3046875, "learning_rate": 0.002, "loss": 5.287, "step": 4622 }, { "epoch": 0.5248358291558904, "grad_norm": 0.322265625, "learning_rate": 0.002, "loss": 5.2522, "step": 4623 }, { "epoch": 0.5249493562658095, "grad_norm": 0.345703125, "learning_rate": 0.002, "loss": 5.2626, "step": 4624 }, { "epoch": 0.5250628833757286, "grad_norm": 0.369140625, "learning_rate": 0.002, "loss": 5.2707, "step": 4625 }, { "epoch": 0.5251764104856477, "grad_norm": 0.365234375, "learning_rate": 0.002, "loss": 5.269, "step": 4626 }, { "epoch": 0.5252899375955667, "grad_norm": 0.388671875, "learning_rate": 0.002, "loss": 5.2843, "step": 4627 }, { "epoch": 0.5254034647054858, "grad_norm": 0.380859375, "learning_rate": 0.002, "loss": 5.2642, "step": 4628 }, { "epoch": 0.5255169918154049, "grad_norm": 0.375, "learning_rate": 0.002, "loss": 5.2685, "step": 4629 }, { "epoch": 0.525630518925324, "grad_norm": 0.345703125, "learning_rate": 0.002, "loss": 5.2693, "step": 4630 }, { "epoch": 0.525744046035243, "grad_norm": 0.330078125, "learning_rate": 0.002, "loss": 5.2869, "step": 4631 }, { "epoch": 0.5258575731451621, "grad_norm": 0.28515625, "learning_rate": 0.002, "loss": 5.2516, "step": 4632 }, { "epoch": 0.5259711002550812, "grad_norm": 0.259765625, "learning_rate": 0.002, "loss": 5.2452, "step": 4633 }, { "epoch": 0.5260846273650003, "grad_norm": 0.28125, "learning_rate": 0.002, "loss": 5.2657, "step": 4634 }, { "epoch": 0.5261981544749194, "grad_norm": 0.294921875, "learning_rate": 0.002, "loss": 5.2556, "step": 4635 }, { "epoch": 0.5263116815848384, "grad_norm": 0.37109375, "learning_rate": 0.002, "loss": 5.2759, "step": 4636 }, { "epoch": 0.5264252086947575, "grad_norm": 0.431640625, "learning_rate": 0.002, "loss": 5.295, "step": 4637 }, { "epoch": 0.5265387358046766, "grad_norm": 0.478515625, "learning_rate": 0.002, "loss": 5.2628, "step": 4638 }, { "epoch": 0.5266522629145957, "grad_norm": 0.466796875, "learning_rate": 0.002, "loss": 5.2665, "step": 4639 }, { "epoch": 0.5267657900245147, "grad_norm": 0.46484375, "learning_rate": 0.002, "loss": 5.2609, "step": 4640 }, { "epoch": 0.5268793171344338, "grad_norm": 0.40234375, "learning_rate": 0.002, "loss": 5.2637, "step": 4641 }, { "epoch": 0.5269928442443529, "grad_norm": 0.404296875, "learning_rate": 0.002, "loss": 5.2693, "step": 4642 }, { "epoch": 0.527106371354272, "grad_norm": 0.33203125, "learning_rate": 0.002, "loss": 5.2838, "step": 4643 }, { "epoch": 0.527219898464191, "grad_norm": 0.314453125, "learning_rate": 0.002, "loss": 5.2675, "step": 4644 }, { "epoch": 0.5273334255741101, "grad_norm": 0.298828125, "learning_rate": 0.002, "loss": 5.2669, "step": 4645 }, { "epoch": 0.5274469526840292, "grad_norm": 0.322265625, "learning_rate": 0.002, "loss": 5.2685, "step": 4646 }, { "epoch": 0.5275604797939483, "grad_norm": 0.333984375, "learning_rate": 0.002, "loss": 5.2591, "step": 4647 }, { "epoch": 0.5276740069038673, "grad_norm": 0.369140625, "learning_rate": 0.002, "loss": 5.2725, "step": 4648 }, { "epoch": 0.5277875340137864, "grad_norm": 0.353515625, "learning_rate": 0.002, "loss": 5.2576, "step": 4649 }, { "epoch": 0.5279010611237055, "grad_norm": 0.3203125, "learning_rate": 0.002, "loss": 5.2674, "step": 4650 }, { "epoch": 0.5280145882336246, "grad_norm": 0.3203125, "learning_rate": 0.002, "loss": 5.2735, "step": 4651 }, { "epoch": 0.5281281153435436, "grad_norm": 0.279296875, "learning_rate": 0.002, "loss": 5.2575, "step": 4652 }, { "epoch": 0.5282416424534627, "grad_norm": 0.28125, "learning_rate": 0.002, "loss": 5.2633, "step": 4653 }, { "epoch": 0.5283551695633818, "grad_norm": 0.259765625, "learning_rate": 0.002, "loss": 5.2644, "step": 4654 }, { "epoch": 0.5284686966733009, "grad_norm": 0.2578125, "learning_rate": 0.002, "loss": 5.275, "step": 4655 }, { "epoch": 0.52858222378322, "grad_norm": 0.2470703125, "learning_rate": 0.002, "loss": 5.2638, "step": 4656 }, { "epoch": 0.528695750893139, "grad_norm": 0.263671875, "learning_rate": 0.002, "loss": 5.2458, "step": 4657 }, { "epoch": 0.5288092780030581, "grad_norm": 0.25390625, "learning_rate": 0.002, "loss": 5.2556, "step": 4658 }, { "epoch": 0.5289228051129772, "grad_norm": 0.2470703125, "learning_rate": 0.002, "loss": 5.2712, "step": 4659 }, { "epoch": 0.5290363322228963, "grad_norm": 0.236328125, "learning_rate": 0.002, "loss": 5.2643, "step": 4660 }, { "epoch": 0.5291498593328153, "grad_norm": 0.240234375, "learning_rate": 0.002, "loss": 5.291, "step": 4661 }, { "epoch": 0.5292633864427344, "grad_norm": 0.26953125, "learning_rate": 0.002, "loss": 5.2924, "step": 4662 }, { "epoch": 0.5293769135526535, "grad_norm": 0.296875, "learning_rate": 0.002, "loss": 5.2956, "step": 4663 }, { "epoch": 0.5294904406625726, "grad_norm": 0.34375, "learning_rate": 0.002, "loss": 5.2764, "step": 4664 }, { "epoch": 0.5296039677724916, "grad_norm": 0.33984375, "learning_rate": 0.002, "loss": 5.2938, "step": 4665 }, { "epoch": 0.5297174948824107, "grad_norm": 0.357421875, "learning_rate": 0.002, "loss": 5.252, "step": 4666 }, { "epoch": 0.5298310219923298, "grad_norm": 0.326171875, "learning_rate": 0.002, "loss": 5.2632, "step": 4667 }, { "epoch": 0.5299445491022489, "grad_norm": 0.341796875, "learning_rate": 0.002, "loss": 5.2767, "step": 4668 }, { "epoch": 0.5300580762121679, "grad_norm": 0.322265625, "learning_rate": 0.002, "loss": 5.2681, "step": 4669 }, { "epoch": 0.530171603322087, "grad_norm": 0.30859375, "learning_rate": 0.002, "loss": 5.2644, "step": 4670 }, { "epoch": 0.5302851304320061, "grad_norm": 0.310546875, "learning_rate": 0.002, "loss": 5.2742, "step": 4671 }, { "epoch": 0.5303986575419252, "grad_norm": 0.294921875, "learning_rate": 0.002, "loss": 5.2872, "step": 4672 }, { "epoch": 0.5305121846518442, "grad_norm": 0.298828125, "learning_rate": 0.002, "loss": 5.2853, "step": 4673 }, { "epoch": 0.5306257117617633, "grad_norm": 0.33984375, "learning_rate": 0.002, "loss": 5.2595, "step": 4674 }, { "epoch": 0.5307392388716824, "grad_norm": 0.3828125, "learning_rate": 0.002, "loss": 5.2788, "step": 4675 }, { "epoch": 0.5308527659816015, "grad_norm": 0.40234375, "learning_rate": 0.002, "loss": 5.2727, "step": 4676 }, { "epoch": 0.5309662930915205, "grad_norm": 0.373046875, "learning_rate": 0.002, "loss": 5.2789, "step": 4677 }, { "epoch": 0.5310798202014396, "grad_norm": 0.39453125, "learning_rate": 0.002, "loss": 5.2691, "step": 4678 }, { "epoch": 0.5311933473113587, "grad_norm": 0.361328125, "learning_rate": 0.002, "loss": 5.2821, "step": 4679 }, { "epoch": 0.5313068744212778, "grad_norm": 0.376953125, "learning_rate": 0.002, "loss": 5.2646, "step": 4680 }, { "epoch": 0.5314204015311969, "grad_norm": 0.337890625, "learning_rate": 0.002, "loss": 5.2663, "step": 4681 }, { "epoch": 0.5315339286411159, "grad_norm": 0.34765625, "learning_rate": 0.002, "loss": 5.2468, "step": 4682 }, { "epoch": 0.531647455751035, "grad_norm": 0.33203125, "learning_rate": 0.002, "loss": 5.2525, "step": 4683 }, { "epoch": 0.5317609828609541, "grad_norm": 0.361328125, "learning_rate": 0.002, "loss": 5.2644, "step": 4684 }, { "epoch": 0.5318745099708732, "grad_norm": 0.365234375, "learning_rate": 0.002, "loss": 5.2676, "step": 4685 }, { "epoch": 0.5319880370807922, "grad_norm": 0.390625, "learning_rate": 0.002, "loss": 5.2533, "step": 4686 }, { "epoch": 0.5321015641907113, "grad_norm": 0.373046875, "learning_rate": 0.002, "loss": 5.2618, "step": 4687 }, { "epoch": 0.5322150913006304, "grad_norm": 0.3671875, "learning_rate": 0.002, "loss": 5.2868, "step": 4688 }, { "epoch": 0.5323286184105495, "grad_norm": 0.35546875, "learning_rate": 0.002, "loss": 5.286, "step": 4689 }, { "epoch": 0.5324421455204685, "grad_norm": 0.341796875, "learning_rate": 0.002, "loss": 5.2853, "step": 4690 }, { "epoch": 0.5325556726303876, "grad_norm": 0.3125, "learning_rate": 0.002, "loss": 5.2608, "step": 4691 }, { "epoch": 0.5326691997403067, "grad_norm": 0.3125, "learning_rate": 0.002, "loss": 5.2534, "step": 4692 }, { "epoch": 0.5327827268502258, "grad_norm": 0.296875, "learning_rate": 0.002, "loss": 5.2533, "step": 4693 }, { "epoch": 0.5328962539601448, "grad_norm": 0.27734375, "learning_rate": 0.002, "loss": 5.2674, "step": 4694 }, { "epoch": 0.5330097810700639, "grad_norm": 0.2734375, "learning_rate": 0.002, "loss": 5.2653, "step": 4695 }, { "epoch": 0.533123308179983, "grad_norm": 0.27734375, "learning_rate": 0.002, "loss": 5.2691, "step": 4696 }, { "epoch": 0.5332368352899021, "grad_norm": 0.26953125, "learning_rate": 0.002, "loss": 5.2582, "step": 4697 }, { "epoch": 0.5333503623998211, "grad_norm": 0.3125, "learning_rate": 0.002, "loss": 5.2716, "step": 4698 }, { "epoch": 0.5334638895097402, "grad_norm": 0.306640625, "learning_rate": 0.002, "loss": 5.2584, "step": 4699 }, { "epoch": 0.5335774166196593, "grad_norm": 0.326171875, "learning_rate": 0.002, "loss": 5.2741, "step": 4700 }, { "epoch": 0.5336909437295784, "grad_norm": 0.30859375, "learning_rate": 0.002, "loss": 5.2861, "step": 4701 }, { "epoch": 0.5338044708394974, "grad_norm": 0.3046875, "learning_rate": 0.002, "loss": 5.2831, "step": 4702 }, { "epoch": 0.5339179979494165, "grad_norm": 0.24609375, "learning_rate": 0.002, "loss": 5.268, "step": 4703 }, { "epoch": 0.5340315250593356, "grad_norm": 0.2275390625, "learning_rate": 0.002, "loss": 5.2463, "step": 4704 }, { "epoch": 0.5341450521692547, "grad_norm": 0.216796875, "learning_rate": 0.002, "loss": 5.2673, "step": 4705 }, { "epoch": 0.5342585792791739, "grad_norm": 0.2373046875, "learning_rate": 0.002, "loss": 5.289, "step": 4706 }, { "epoch": 0.5343721063890929, "grad_norm": 0.287109375, "learning_rate": 0.002, "loss": 5.2744, "step": 4707 }, { "epoch": 0.534485633499012, "grad_norm": 0.30078125, "learning_rate": 0.002, "loss": 5.2681, "step": 4708 }, { "epoch": 0.5345991606089311, "grad_norm": 0.3125, "learning_rate": 0.002, "loss": 5.2951, "step": 4709 }, { "epoch": 0.5347126877188502, "grad_norm": 0.298828125, "learning_rate": 0.002, "loss": 5.2497, "step": 4710 }, { "epoch": 0.5348262148287692, "grad_norm": 0.28515625, "learning_rate": 0.002, "loss": 5.2657, "step": 4711 }, { "epoch": 0.5349397419386883, "grad_norm": 0.26953125, "learning_rate": 0.002, "loss": 5.2862, "step": 4712 }, { "epoch": 0.5350532690486074, "grad_norm": 0.2734375, "learning_rate": 0.002, "loss": 5.2674, "step": 4713 }, { "epoch": 0.5351667961585265, "grad_norm": 0.25390625, "learning_rate": 0.002, "loss": 5.264, "step": 4714 }, { "epoch": 0.5352803232684455, "grad_norm": 0.265625, "learning_rate": 0.002, "loss": 5.2677, "step": 4715 }, { "epoch": 0.5353938503783646, "grad_norm": 0.255859375, "learning_rate": 0.002, "loss": 5.2734, "step": 4716 }, { "epoch": 0.5355073774882837, "grad_norm": 0.265625, "learning_rate": 0.002, "loss": 5.2763, "step": 4717 }, { "epoch": 0.5356209045982028, "grad_norm": 0.283203125, "learning_rate": 0.002, "loss": 5.2434, "step": 4718 }, { "epoch": 0.5357344317081218, "grad_norm": 0.279296875, "learning_rate": 0.002, "loss": 5.266, "step": 4719 }, { "epoch": 0.5358479588180409, "grad_norm": 0.3203125, "learning_rate": 0.002, "loss": 5.2668, "step": 4720 }, { "epoch": 0.53596148592796, "grad_norm": 0.330078125, "learning_rate": 0.002, "loss": 5.2604, "step": 4721 }, { "epoch": 0.5360750130378791, "grad_norm": 0.35546875, "learning_rate": 0.002, "loss": 5.2683, "step": 4722 }, { "epoch": 0.5361885401477982, "grad_norm": 0.3671875, "learning_rate": 0.002, "loss": 5.2601, "step": 4723 }, { "epoch": 0.5363020672577172, "grad_norm": 0.375, "learning_rate": 0.002, "loss": 5.2473, "step": 4724 }, { "epoch": 0.5364155943676363, "grad_norm": 0.349609375, "learning_rate": 0.002, "loss": 5.2721, "step": 4725 }, { "epoch": 0.5365291214775554, "grad_norm": 0.375, "learning_rate": 0.002, "loss": 5.291, "step": 4726 }, { "epoch": 0.5366426485874745, "grad_norm": 0.337890625, "learning_rate": 0.002, "loss": 5.2722, "step": 4727 }, { "epoch": 0.5367561756973935, "grad_norm": 0.349609375, "learning_rate": 0.002, "loss": 5.2896, "step": 4728 }, { "epoch": 0.5368697028073126, "grad_norm": 0.34375, "learning_rate": 0.002, "loss": 5.2558, "step": 4729 }, { "epoch": 0.5369832299172317, "grad_norm": 0.357421875, "learning_rate": 0.002, "loss": 5.2672, "step": 4730 }, { "epoch": 0.5370967570271508, "grad_norm": 0.353515625, "learning_rate": 0.002, "loss": 5.279, "step": 4731 }, { "epoch": 0.5372102841370698, "grad_norm": 0.357421875, "learning_rate": 0.002, "loss": 5.2847, "step": 4732 }, { "epoch": 0.5373238112469889, "grad_norm": 0.330078125, "learning_rate": 0.002, "loss": 5.2575, "step": 4733 }, { "epoch": 0.537437338356908, "grad_norm": 0.349609375, "learning_rate": 0.002, "loss": 5.2789, "step": 4734 }, { "epoch": 0.5375508654668271, "grad_norm": 0.330078125, "learning_rate": 0.002, "loss": 5.2528, "step": 4735 }, { "epoch": 0.5376643925767461, "grad_norm": 0.33984375, "learning_rate": 0.002, "loss": 5.2638, "step": 4736 }, { "epoch": 0.5377779196866652, "grad_norm": 0.478515625, "learning_rate": 0.002, "loss": 5.2796, "step": 4737 }, { "epoch": 0.5378914467965843, "grad_norm": 0.421875, "learning_rate": 0.002, "loss": 5.2742, "step": 4738 }, { "epoch": 0.5380049739065034, "grad_norm": 0.314453125, "learning_rate": 0.002, "loss": 5.3038, "step": 4739 }, { "epoch": 0.5381185010164224, "grad_norm": 0.375, "learning_rate": 0.002, "loss": 5.2565, "step": 4740 }, { "epoch": 0.5382320281263415, "grad_norm": 0.326171875, "learning_rate": 0.002, "loss": 5.2664, "step": 4741 }, { "epoch": 0.5383455552362606, "grad_norm": 0.341796875, "learning_rate": 0.002, "loss": 5.2826, "step": 4742 }, { "epoch": 0.5384590823461797, "grad_norm": 0.302734375, "learning_rate": 0.002, "loss": 5.2574, "step": 4743 }, { "epoch": 0.5385726094560987, "grad_norm": 0.2890625, "learning_rate": 0.002, "loss": 5.2592, "step": 4744 }, { "epoch": 0.5386861365660178, "grad_norm": 0.263671875, "learning_rate": 0.002, "loss": 5.2493, "step": 4745 }, { "epoch": 0.5387996636759369, "grad_norm": 0.265625, "learning_rate": 0.002, "loss": 5.2667, "step": 4746 }, { "epoch": 0.538913190785856, "grad_norm": 0.25390625, "learning_rate": 0.002, "loss": 5.2665, "step": 4747 }, { "epoch": 0.539026717895775, "grad_norm": 0.255859375, "learning_rate": 0.002, "loss": 5.2505, "step": 4748 }, { "epoch": 0.5391402450056941, "grad_norm": 0.251953125, "learning_rate": 0.002, "loss": 5.2598, "step": 4749 }, { "epoch": 0.5392537721156132, "grad_norm": 0.265625, "learning_rate": 0.002, "loss": 5.2859, "step": 4750 }, { "epoch": 0.5393672992255323, "grad_norm": 0.255859375, "learning_rate": 0.002, "loss": 5.2975, "step": 4751 }, { "epoch": 0.5394808263354514, "grad_norm": 0.265625, "learning_rate": 0.002, "loss": 5.2652, "step": 4752 }, { "epoch": 0.5395943534453704, "grad_norm": 0.271484375, "learning_rate": 0.002, "loss": 5.2702, "step": 4753 }, { "epoch": 0.5397078805552895, "grad_norm": 0.28515625, "learning_rate": 0.002, "loss": 5.2767, "step": 4754 }, { "epoch": 0.5398214076652086, "grad_norm": 0.27734375, "learning_rate": 0.002, "loss": 5.2694, "step": 4755 }, { "epoch": 0.5399349347751277, "grad_norm": 0.28515625, "learning_rate": 0.002, "loss": 5.261, "step": 4756 }, { "epoch": 0.5400484618850467, "grad_norm": 0.271484375, "learning_rate": 0.002, "loss": 5.2656, "step": 4757 }, { "epoch": 0.5401619889949658, "grad_norm": 0.287109375, "learning_rate": 0.002, "loss": 5.2525, "step": 4758 }, { "epoch": 0.5402755161048849, "grad_norm": 0.287109375, "learning_rate": 0.002, "loss": 5.2514, "step": 4759 }, { "epoch": 0.540389043214804, "grad_norm": 0.34375, "learning_rate": 0.002, "loss": 5.27, "step": 4760 }, { "epoch": 0.540502570324723, "grad_norm": 0.369140625, "learning_rate": 0.002, "loss": 5.2491, "step": 4761 }, { "epoch": 0.5406160974346421, "grad_norm": 0.4140625, "learning_rate": 0.002, "loss": 5.2621, "step": 4762 }, { "epoch": 0.5407296245445612, "grad_norm": 0.41015625, "learning_rate": 0.002, "loss": 5.2698, "step": 4763 }, { "epoch": 0.5408431516544803, "grad_norm": 0.412109375, "learning_rate": 0.002, "loss": 5.2601, "step": 4764 }, { "epoch": 0.5409566787643993, "grad_norm": 0.34765625, "learning_rate": 0.002, "loss": 5.2681, "step": 4765 }, { "epoch": 0.5410702058743184, "grad_norm": 0.34765625, "learning_rate": 0.002, "loss": 5.2899, "step": 4766 }, { "epoch": 0.5411837329842375, "grad_norm": 0.287109375, "learning_rate": 0.002, "loss": 5.2805, "step": 4767 }, { "epoch": 0.5412972600941566, "grad_norm": 0.279296875, "learning_rate": 0.002, "loss": 5.2656, "step": 4768 }, { "epoch": 0.5414107872040756, "grad_norm": 0.25, "learning_rate": 0.002, "loss": 5.2657, "step": 4769 }, { "epoch": 0.5415243143139947, "grad_norm": 0.25, "learning_rate": 0.002, "loss": 5.2539, "step": 4770 }, { "epoch": 0.5416378414239138, "grad_norm": 0.265625, "learning_rate": 0.002, "loss": 5.262, "step": 4771 }, { "epoch": 0.5417513685338329, "grad_norm": 0.255859375, "learning_rate": 0.002, "loss": 5.2779, "step": 4772 }, { "epoch": 0.541864895643752, "grad_norm": 0.26171875, "learning_rate": 0.002, "loss": 5.2721, "step": 4773 }, { "epoch": 0.541978422753671, "grad_norm": 0.263671875, "learning_rate": 0.002, "loss": 5.2482, "step": 4774 }, { "epoch": 0.5420919498635901, "grad_norm": 0.26171875, "learning_rate": 0.002, "loss": 5.2841, "step": 4775 }, { "epoch": 0.5422054769735092, "grad_norm": 0.279296875, "learning_rate": 0.002, "loss": 5.2692, "step": 4776 }, { "epoch": 0.5423190040834283, "grad_norm": 0.283203125, "learning_rate": 0.002, "loss": 5.288, "step": 4777 }, { "epoch": 0.5424325311933473, "grad_norm": 0.28125, "learning_rate": 0.002, "loss": 5.2471, "step": 4778 }, { "epoch": 0.5425460583032664, "grad_norm": 0.271484375, "learning_rate": 0.002, "loss": 5.2591, "step": 4779 }, { "epoch": 0.5426595854131855, "grad_norm": 0.267578125, "learning_rate": 0.002, "loss": 5.2679, "step": 4780 }, { "epoch": 0.5427731125231046, "grad_norm": 0.25390625, "learning_rate": 0.002, "loss": 5.2839, "step": 4781 }, { "epoch": 0.5428866396330236, "grad_norm": 0.2373046875, "learning_rate": 0.002, "loss": 5.2534, "step": 4782 }, { "epoch": 0.5430001667429427, "grad_norm": 0.26171875, "learning_rate": 0.002, "loss": 5.2566, "step": 4783 }, { "epoch": 0.5431136938528618, "grad_norm": 0.31640625, "learning_rate": 0.002, "loss": 5.2677, "step": 4784 }, { "epoch": 0.5432272209627809, "grad_norm": 0.345703125, "learning_rate": 0.002, "loss": 5.2848, "step": 4785 }, { "epoch": 0.5433407480726999, "grad_norm": 0.34765625, "learning_rate": 0.002, "loss": 5.2614, "step": 4786 }, { "epoch": 0.543454275182619, "grad_norm": 0.365234375, "learning_rate": 0.002, "loss": 5.2702, "step": 4787 }, { "epoch": 0.5435678022925381, "grad_norm": 0.330078125, "learning_rate": 0.002, "loss": 5.2501, "step": 4788 }, { "epoch": 0.5436813294024572, "grad_norm": 0.349609375, "learning_rate": 0.002, "loss": 5.2469, "step": 4789 }, { "epoch": 0.5437948565123762, "grad_norm": 0.375, "learning_rate": 0.002, "loss": 5.2621, "step": 4790 }, { "epoch": 0.5439083836222953, "grad_norm": 0.41015625, "learning_rate": 0.002, "loss": 5.2656, "step": 4791 }, { "epoch": 0.5440219107322144, "grad_norm": 0.419921875, "learning_rate": 0.002, "loss": 5.274, "step": 4792 }, { "epoch": 0.5441354378421335, "grad_norm": 0.41015625, "learning_rate": 0.002, "loss": 5.2694, "step": 4793 }, { "epoch": 0.5442489649520525, "grad_norm": 0.380859375, "learning_rate": 0.002, "loss": 5.2536, "step": 4794 }, { "epoch": 0.5443624920619716, "grad_norm": 0.369140625, "learning_rate": 0.002, "loss": 5.2815, "step": 4795 }, { "epoch": 0.5444760191718907, "grad_norm": 0.318359375, "learning_rate": 0.002, "loss": 5.2686, "step": 4796 }, { "epoch": 0.5445895462818098, "grad_norm": 0.318359375, "learning_rate": 0.002, "loss": 5.2558, "step": 4797 }, { "epoch": 0.5447030733917289, "grad_norm": 0.30078125, "learning_rate": 0.002, "loss": 5.2573, "step": 4798 }, { "epoch": 0.5448166005016479, "grad_norm": 0.29296875, "learning_rate": 0.002, "loss": 5.2705, "step": 4799 }, { "epoch": 0.544930127611567, "grad_norm": 0.283203125, "learning_rate": 0.002, "loss": 5.2681, "step": 4800 }, { "epoch": 0.5450436547214861, "grad_norm": 0.26953125, "learning_rate": 0.002, "loss": 5.2624, "step": 4801 }, { "epoch": 0.5451571818314052, "grad_norm": 0.255859375, "learning_rate": 0.002, "loss": 5.2727, "step": 4802 }, { "epoch": 0.5452707089413242, "grad_norm": 0.265625, "learning_rate": 0.002, "loss": 5.2668, "step": 4803 }, { "epoch": 0.5453842360512433, "grad_norm": 0.2578125, "learning_rate": 0.002, "loss": 5.2578, "step": 4804 }, { "epoch": 0.5454977631611624, "grad_norm": 0.28125, "learning_rate": 0.002, "loss": 5.2728, "step": 4805 }, { "epoch": 0.5456112902710815, "grad_norm": 0.2890625, "learning_rate": 0.002, "loss": 5.2691, "step": 4806 }, { "epoch": 0.5457248173810005, "grad_norm": 0.267578125, "learning_rate": 0.002, "loss": 5.2667, "step": 4807 }, { "epoch": 0.5458383444909196, "grad_norm": 0.251953125, "learning_rate": 0.002, "loss": 5.2356, "step": 4808 }, { "epoch": 0.5459518716008387, "grad_norm": 0.2578125, "learning_rate": 0.002, "loss": 5.2789, "step": 4809 }, { "epoch": 0.5460653987107578, "grad_norm": 0.296875, "learning_rate": 0.002, "loss": 5.2598, "step": 4810 }, { "epoch": 0.5461789258206768, "grad_norm": 0.296875, "learning_rate": 0.002, "loss": 5.2812, "step": 4811 }, { "epoch": 0.5462924529305959, "grad_norm": 0.34375, "learning_rate": 0.002, "loss": 5.2471, "step": 4812 }, { "epoch": 0.546405980040515, "grad_norm": 0.376953125, "learning_rate": 0.002, "loss": 5.2602, "step": 4813 }, { "epoch": 0.5465195071504341, "grad_norm": 0.388671875, "learning_rate": 0.002, "loss": 5.259, "step": 4814 }, { "epoch": 0.5466330342603531, "grad_norm": 0.34765625, "learning_rate": 0.002, "loss": 5.2572, "step": 4815 }, { "epoch": 0.5467465613702722, "grad_norm": 0.373046875, "learning_rate": 0.002, "loss": 5.2786, "step": 4816 }, { "epoch": 0.5468600884801913, "grad_norm": 0.35546875, "learning_rate": 0.002, "loss": 5.2753, "step": 4817 }, { "epoch": 0.5469736155901104, "grad_norm": 0.369140625, "learning_rate": 0.002, "loss": 5.2718, "step": 4818 }, { "epoch": 0.5470871427000294, "grad_norm": 0.369140625, "learning_rate": 0.002, "loss": 5.2534, "step": 4819 }, { "epoch": 0.5472006698099485, "grad_norm": 0.39453125, "learning_rate": 0.002, "loss": 5.2783, "step": 4820 }, { "epoch": 0.5473141969198676, "grad_norm": 0.3671875, "learning_rate": 0.002, "loss": 5.2803, "step": 4821 }, { "epoch": 0.5474277240297867, "grad_norm": 0.359375, "learning_rate": 0.002, "loss": 5.2757, "step": 4822 }, { "epoch": 0.5475412511397058, "grad_norm": 0.30078125, "learning_rate": 0.002, "loss": 5.27, "step": 4823 }, { "epoch": 0.5476547782496248, "grad_norm": 0.287109375, "learning_rate": 0.002, "loss": 5.2766, "step": 4824 }, { "epoch": 0.5477683053595439, "grad_norm": 0.2431640625, "learning_rate": 0.002, "loss": 5.2794, "step": 4825 }, { "epoch": 0.547881832469463, "grad_norm": 0.2333984375, "learning_rate": 0.002, "loss": 5.2864, "step": 4826 }, { "epoch": 0.5479953595793821, "grad_norm": 0.228515625, "learning_rate": 0.002, "loss": 5.2738, "step": 4827 }, { "epoch": 0.5481088866893011, "grad_norm": 0.234375, "learning_rate": 0.002, "loss": 5.265, "step": 4828 }, { "epoch": 0.5482224137992202, "grad_norm": 0.25, "learning_rate": 0.002, "loss": 5.2708, "step": 4829 }, { "epoch": 0.5483359409091393, "grad_norm": 0.271484375, "learning_rate": 0.002, "loss": 5.2446, "step": 4830 }, { "epoch": 0.5484494680190584, "grad_norm": 0.28515625, "learning_rate": 0.002, "loss": 5.2725, "step": 4831 }, { "epoch": 0.5485629951289774, "grad_norm": 0.26953125, "learning_rate": 0.002, "loss": 5.2428, "step": 4832 }, { "epoch": 0.5486765222388965, "grad_norm": 0.271484375, "learning_rate": 0.002, "loss": 5.2532, "step": 4833 }, { "epoch": 0.5487900493488156, "grad_norm": 0.27734375, "learning_rate": 0.002, "loss": 5.2713, "step": 4834 }, { "epoch": 0.5489035764587347, "grad_norm": 0.263671875, "learning_rate": 0.002, "loss": 5.2614, "step": 4835 }, { "epoch": 0.5490171035686537, "grad_norm": 0.28515625, "learning_rate": 0.002, "loss": 5.2593, "step": 4836 }, { "epoch": 0.5491306306785728, "grad_norm": 0.279296875, "learning_rate": 0.002, "loss": 5.2472, "step": 4837 }, { "epoch": 0.5492441577884919, "grad_norm": 0.306640625, "learning_rate": 0.002, "loss": 5.2648, "step": 4838 }, { "epoch": 0.549357684898411, "grad_norm": 0.302734375, "learning_rate": 0.002, "loss": 5.2384, "step": 4839 }, { "epoch": 0.54947121200833, "grad_norm": 0.328125, "learning_rate": 0.002, "loss": 5.2734, "step": 4840 }, { "epoch": 0.5495847391182491, "grad_norm": 0.337890625, "learning_rate": 0.002, "loss": 5.2606, "step": 4841 }, { "epoch": 0.5496982662281682, "grad_norm": 0.35546875, "learning_rate": 0.002, "loss": 5.2821, "step": 4842 }, { "epoch": 0.5498117933380873, "grad_norm": 0.3359375, "learning_rate": 0.002, "loss": 5.2549, "step": 4843 }, { "epoch": 0.5499253204480063, "grad_norm": 0.357421875, "learning_rate": 0.002, "loss": 5.2668, "step": 4844 }, { "epoch": 0.5500388475579254, "grad_norm": 0.318359375, "learning_rate": 0.002, "loss": 5.2737, "step": 4845 }, { "epoch": 0.5501523746678445, "grad_norm": 0.318359375, "learning_rate": 0.002, "loss": 5.2557, "step": 4846 }, { "epoch": 0.5502659017777636, "grad_norm": 0.31640625, "learning_rate": 0.002, "loss": 5.2451, "step": 4847 }, { "epoch": 0.5503794288876827, "grad_norm": 0.32421875, "learning_rate": 0.002, "loss": 5.2861, "step": 4848 }, { "epoch": 0.5504929559976017, "grad_norm": 0.33203125, "learning_rate": 0.002, "loss": 5.2678, "step": 4849 }, { "epoch": 0.5506064831075208, "grad_norm": 0.33203125, "learning_rate": 0.002, "loss": 5.2563, "step": 4850 }, { "epoch": 0.5507200102174399, "grad_norm": 0.33203125, "learning_rate": 0.002, "loss": 5.2791, "step": 4851 }, { "epoch": 0.550833537327359, "grad_norm": 0.32421875, "learning_rate": 0.002, "loss": 5.2773, "step": 4852 }, { "epoch": 0.550947064437278, "grad_norm": 0.30859375, "learning_rate": 0.002, "loss": 5.2514, "step": 4853 }, { "epoch": 0.5510605915471971, "grad_norm": 0.306640625, "learning_rate": 0.002, "loss": 5.2465, "step": 4854 }, { "epoch": 0.5511741186571162, "grad_norm": 0.302734375, "learning_rate": 0.002, "loss": 5.2707, "step": 4855 }, { "epoch": 0.5512876457670353, "grad_norm": 0.326171875, "learning_rate": 0.002, "loss": 5.288, "step": 4856 }, { "epoch": 0.5514011728769543, "grad_norm": 0.31640625, "learning_rate": 0.002, "loss": 5.2801, "step": 4857 }, { "epoch": 0.5515146999868734, "grad_norm": 0.32421875, "learning_rate": 0.002, "loss": 5.2546, "step": 4858 }, { "epoch": 0.5516282270967925, "grad_norm": 0.341796875, "learning_rate": 0.002, "loss": 5.2608, "step": 4859 }, { "epoch": 0.5517417542067116, "grad_norm": 0.333984375, "learning_rate": 0.002, "loss": 5.2584, "step": 4860 }, { "epoch": 0.5518552813166306, "grad_norm": 0.326171875, "learning_rate": 0.002, "loss": 5.2602, "step": 4861 }, { "epoch": 0.5519688084265497, "grad_norm": 0.2890625, "learning_rate": 0.002, "loss": 5.2732, "step": 4862 }, { "epoch": 0.5520823355364688, "grad_norm": 0.267578125, "learning_rate": 0.002, "loss": 5.289, "step": 4863 }, { "epoch": 0.5521958626463879, "grad_norm": 0.251953125, "learning_rate": 0.002, "loss": 5.2686, "step": 4864 }, { "epoch": 0.552309389756307, "grad_norm": 0.234375, "learning_rate": 0.002, "loss": 5.2674, "step": 4865 }, { "epoch": 0.552422916866226, "grad_norm": 0.232421875, "learning_rate": 0.002, "loss": 5.2552, "step": 4866 }, { "epoch": 0.5525364439761451, "grad_norm": 0.240234375, "learning_rate": 0.002, "loss": 5.2819, "step": 4867 }, { "epoch": 0.5526499710860642, "grad_norm": 0.2490234375, "learning_rate": 0.002, "loss": 5.2762, "step": 4868 }, { "epoch": 0.5527634981959832, "grad_norm": 0.2734375, "learning_rate": 0.002, "loss": 5.2731, "step": 4869 }, { "epoch": 0.5528770253059023, "grad_norm": 0.271484375, "learning_rate": 0.002, "loss": 5.268, "step": 4870 }, { "epoch": 0.5529905524158214, "grad_norm": 0.3046875, "learning_rate": 0.002, "loss": 5.2658, "step": 4871 }, { "epoch": 0.5531040795257405, "grad_norm": 0.306640625, "learning_rate": 0.002, "loss": 5.2427, "step": 4872 }, { "epoch": 0.5532176066356596, "grad_norm": 0.32421875, "learning_rate": 0.002, "loss": 5.2461, "step": 4873 }, { "epoch": 0.5533311337455786, "grad_norm": 0.337890625, "learning_rate": 0.002, "loss": 5.2468, "step": 4874 }, { "epoch": 0.5534446608554977, "grad_norm": 0.359375, "learning_rate": 0.002, "loss": 5.2644, "step": 4875 }, { "epoch": 0.5535581879654168, "grad_norm": 0.34375, "learning_rate": 0.002, "loss": 5.2604, "step": 4876 }, { "epoch": 0.5536717150753359, "grad_norm": 0.37109375, "learning_rate": 0.002, "loss": 5.2765, "step": 4877 }, { "epoch": 0.5537852421852549, "grad_norm": 0.35546875, "learning_rate": 0.002, "loss": 5.2489, "step": 4878 }, { "epoch": 0.553898769295174, "grad_norm": 0.357421875, "learning_rate": 0.002, "loss": 5.249, "step": 4879 }, { "epoch": 0.5540122964050931, "grad_norm": 0.35546875, "learning_rate": 0.002, "loss": 5.2556, "step": 4880 }, { "epoch": 0.5541258235150122, "grad_norm": 0.3671875, "learning_rate": 0.002, "loss": 5.252, "step": 4881 }, { "epoch": 0.5542393506249312, "grad_norm": 0.3359375, "learning_rate": 0.002, "loss": 5.2476, "step": 4882 }, { "epoch": 0.5543528777348503, "grad_norm": 0.33984375, "learning_rate": 0.002, "loss": 5.2588, "step": 4883 }, { "epoch": 0.5544664048447694, "grad_norm": 0.30859375, "learning_rate": 0.002, "loss": 5.2708, "step": 4884 }, { "epoch": 0.5545799319546885, "grad_norm": 0.328125, "learning_rate": 0.002, "loss": 5.2757, "step": 4885 }, { "epoch": 0.5546934590646075, "grad_norm": 0.345703125, "learning_rate": 0.002, "loss": 5.2743, "step": 4886 }, { "epoch": 0.5548069861745266, "grad_norm": 0.396484375, "learning_rate": 0.002, "loss": 5.2978, "step": 4887 }, { "epoch": 0.5549205132844457, "grad_norm": 0.37890625, "learning_rate": 0.002, "loss": 5.28, "step": 4888 }, { "epoch": 0.5550340403943648, "grad_norm": 0.34765625, "learning_rate": 0.002, "loss": 5.2753, "step": 4889 }, { "epoch": 0.5551475675042838, "grad_norm": 0.33984375, "learning_rate": 0.002, "loss": 5.2616, "step": 4890 }, { "epoch": 0.5552610946142029, "grad_norm": 0.275390625, "learning_rate": 0.002, "loss": 5.2672, "step": 4891 }, { "epoch": 0.555374621724122, "grad_norm": 0.287109375, "learning_rate": 0.002, "loss": 5.2489, "step": 4892 }, { "epoch": 0.5554881488340411, "grad_norm": 0.298828125, "learning_rate": 0.002, "loss": 5.2742, "step": 4893 }, { "epoch": 0.5556016759439601, "grad_norm": 0.3359375, "learning_rate": 0.002, "loss": 5.259, "step": 4894 }, { "epoch": 0.5557152030538792, "grad_norm": 0.32421875, "learning_rate": 0.002, "loss": 5.2753, "step": 4895 }, { "epoch": 0.5558287301637983, "grad_norm": 0.341796875, "learning_rate": 0.002, "loss": 5.2791, "step": 4896 }, { "epoch": 0.5559422572737174, "grad_norm": 0.31640625, "learning_rate": 0.002, "loss": 5.2427, "step": 4897 }, { "epoch": 0.5560557843836365, "grad_norm": 0.3046875, "learning_rate": 0.002, "loss": 5.2631, "step": 4898 }, { "epoch": 0.5561693114935555, "grad_norm": 0.28125, "learning_rate": 0.002, "loss": 5.2643, "step": 4899 }, { "epoch": 0.5562828386034746, "grad_norm": 0.275390625, "learning_rate": 0.002, "loss": 5.2618, "step": 4900 }, { "epoch": 0.5563963657133937, "grad_norm": 0.263671875, "learning_rate": 0.002, "loss": 5.2575, "step": 4901 }, { "epoch": 0.5565098928233128, "grad_norm": 0.26953125, "learning_rate": 0.002, "loss": 5.2599, "step": 4902 }, { "epoch": 0.5566234199332318, "grad_norm": 0.267578125, "learning_rate": 0.002, "loss": 5.2515, "step": 4903 }, { "epoch": 0.5567369470431509, "grad_norm": 0.298828125, "learning_rate": 0.002, "loss": 5.2714, "step": 4904 }, { "epoch": 0.55685047415307, "grad_norm": 0.33203125, "learning_rate": 0.002, "loss": 5.284, "step": 4905 }, { "epoch": 0.5569640012629891, "grad_norm": 0.380859375, "learning_rate": 0.002, "loss": 5.2701, "step": 4906 }, { "epoch": 0.5570775283729081, "grad_norm": 0.375, "learning_rate": 0.002, "loss": 5.2642, "step": 4907 }, { "epoch": 0.5571910554828272, "grad_norm": 0.41796875, "learning_rate": 0.002, "loss": 5.266, "step": 4908 }, { "epoch": 0.5573045825927463, "grad_norm": 0.396484375, "learning_rate": 0.002, "loss": 5.2678, "step": 4909 }, { "epoch": 0.5574181097026654, "grad_norm": 0.41015625, "learning_rate": 0.002, "loss": 5.2475, "step": 4910 }, { "epoch": 0.5575316368125844, "grad_norm": 0.408203125, "learning_rate": 0.002, "loss": 5.2458, "step": 4911 }, { "epoch": 0.5576451639225035, "grad_norm": 0.388671875, "learning_rate": 0.002, "loss": 5.2837, "step": 4912 }, { "epoch": 0.5577586910324226, "grad_norm": 0.349609375, "learning_rate": 0.002, "loss": 5.2611, "step": 4913 }, { "epoch": 0.5578722181423417, "grad_norm": 0.3359375, "learning_rate": 0.002, "loss": 5.2806, "step": 4914 }, { "epoch": 0.5579857452522607, "grad_norm": 0.298828125, "learning_rate": 0.002, "loss": 5.274, "step": 4915 }, { "epoch": 0.5580992723621798, "grad_norm": 0.3046875, "learning_rate": 0.002, "loss": 5.2491, "step": 4916 }, { "epoch": 0.5582127994720989, "grad_norm": 0.275390625, "learning_rate": 0.002, "loss": 5.2842, "step": 4917 }, { "epoch": 0.558326326582018, "grad_norm": 0.265625, "learning_rate": 0.002, "loss": 5.2646, "step": 4918 }, { "epoch": 0.558439853691937, "grad_norm": 0.2490234375, "learning_rate": 0.002, "loss": 5.254, "step": 4919 }, { "epoch": 0.5585533808018561, "grad_norm": 0.2333984375, "learning_rate": 0.002, "loss": 5.2335, "step": 4920 }, { "epoch": 0.5586669079117752, "grad_norm": 0.2275390625, "learning_rate": 0.002, "loss": 5.2735, "step": 4921 }, { "epoch": 0.5587804350216943, "grad_norm": 0.2197265625, "learning_rate": 0.002, "loss": 5.2751, "step": 4922 }, { "epoch": 0.5588939621316134, "grad_norm": 0.224609375, "learning_rate": 0.002, "loss": 5.2644, "step": 4923 }, { "epoch": 0.5590074892415324, "grad_norm": 0.23828125, "learning_rate": 0.002, "loss": 5.2791, "step": 4924 }, { "epoch": 0.5591210163514515, "grad_norm": 0.26171875, "learning_rate": 0.002, "loss": 5.2762, "step": 4925 }, { "epoch": 0.5592345434613706, "grad_norm": 0.28125, "learning_rate": 0.002, "loss": 5.2835, "step": 4926 }, { "epoch": 0.5593480705712897, "grad_norm": 0.291015625, "learning_rate": 0.002, "loss": 5.2573, "step": 4927 }, { "epoch": 0.5594615976812087, "grad_norm": 0.310546875, "learning_rate": 0.002, "loss": 5.2572, "step": 4928 }, { "epoch": 0.5595751247911278, "grad_norm": 0.3125, "learning_rate": 0.002, "loss": 5.2578, "step": 4929 }, { "epoch": 0.5596886519010469, "grad_norm": 0.330078125, "learning_rate": 0.002, "loss": 5.2523, "step": 4930 }, { "epoch": 0.559802179010966, "grad_norm": 0.322265625, "learning_rate": 0.002, "loss": 5.2722, "step": 4931 }, { "epoch": 0.559915706120885, "grad_norm": 0.30859375, "learning_rate": 0.002, "loss": 5.2724, "step": 4932 }, { "epoch": 0.5600292332308041, "grad_norm": 0.294921875, "learning_rate": 0.002, "loss": 5.2436, "step": 4933 }, { "epoch": 0.5601427603407232, "grad_norm": 0.3046875, "learning_rate": 0.002, "loss": 5.2627, "step": 4934 }, { "epoch": 0.5602562874506423, "grad_norm": 0.294921875, "learning_rate": 0.002, "loss": 5.2679, "step": 4935 }, { "epoch": 0.5603698145605613, "grad_norm": 0.310546875, "learning_rate": 0.002, "loss": 5.2673, "step": 4936 }, { "epoch": 0.5604833416704804, "grad_norm": 0.34765625, "learning_rate": 0.002, "loss": 5.2613, "step": 4937 }, { "epoch": 0.5605968687803995, "grad_norm": 0.388671875, "learning_rate": 0.002, "loss": 5.2709, "step": 4938 }, { "epoch": 0.5607103958903186, "grad_norm": 0.390625, "learning_rate": 0.002, "loss": 5.2534, "step": 4939 }, { "epoch": 0.5608239230002376, "grad_norm": 0.4140625, "learning_rate": 0.002, "loss": 5.2902, "step": 4940 }, { "epoch": 0.5609374501101567, "grad_norm": 0.38671875, "learning_rate": 0.002, "loss": 5.2548, "step": 4941 }, { "epoch": 0.5610509772200758, "grad_norm": 0.396484375, "learning_rate": 0.002, "loss": 5.2546, "step": 4942 }, { "epoch": 0.5611645043299949, "grad_norm": 0.359375, "learning_rate": 0.002, "loss": 5.279, "step": 4943 }, { "epoch": 0.561278031439914, "grad_norm": 0.36328125, "learning_rate": 0.002, "loss": 5.2489, "step": 4944 }, { "epoch": 0.561391558549833, "grad_norm": 0.3359375, "learning_rate": 0.002, "loss": 5.2567, "step": 4945 }, { "epoch": 0.5615050856597521, "grad_norm": 0.37109375, "learning_rate": 0.002, "loss": 5.2749, "step": 4946 }, { "epoch": 0.5616186127696712, "grad_norm": 0.33203125, "learning_rate": 0.002, "loss": 5.2626, "step": 4947 }, { "epoch": 0.5617321398795904, "grad_norm": 0.333984375, "learning_rate": 0.002, "loss": 5.272, "step": 4948 }, { "epoch": 0.5618456669895094, "grad_norm": 0.3046875, "learning_rate": 0.002, "loss": 5.2681, "step": 4949 }, { "epoch": 0.5619591940994285, "grad_norm": 0.29296875, "learning_rate": 0.002, "loss": 5.2593, "step": 4950 }, { "epoch": 0.5620727212093476, "grad_norm": 0.263671875, "learning_rate": 0.002, "loss": 5.2599, "step": 4951 }, { "epoch": 0.5621862483192667, "grad_norm": 0.251953125, "learning_rate": 0.002, "loss": 5.2606, "step": 4952 }, { "epoch": 0.5622997754291857, "grad_norm": 0.2314453125, "learning_rate": 0.002, "loss": 5.2742, "step": 4953 }, { "epoch": 0.5624133025391048, "grad_norm": 0.2421875, "learning_rate": 0.002, "loss": 5.2869, "step": 4954 }, { "epoch": 0.5625268296490239, "grad_norm": 0.259765625, "learning_rate": 0.002, "loss": 5.2519, "step": 4955 }, { "epoch": 0.562640356758943, "grad_norm": 0.287109375, "learning_rate": 0.002, "loss": 5.2702, "step": 4956 }, { "epoch": 0.562753883868862, "grad_norm": 0.294921875, "learning_rate": 0.002, "loss": 5.2523, "step": 4957 }, { "epoch": 0.5628674109787811, "grad_norm": 0.3046875, "learning_rate": 0.002, "loss": 5.2656, "step": 4958 }, { "epoch": 0.5629809380887002, "grad_norm": 0.296875, "learning_rate": 0.002, "loss": 5.2715, "step": 4959 }, { "epoch": 0.5630944651986193, "grad_norm": 0.3203125, "learning_rate": 0.002, "loss": 5.2514, "step": 4960 }, { "epoch": 0.5632079923085384, "grad_norm": 0.3125, "learning_rate": 0.002, "loss": 5.2739, "step": 4961 }, { "epoch": 0.5633215194184574, "grad_norm": 0.32421875, "learning_rate": 0.002, "loss": 5.2516, "step": 4962 }, { "epoch": 0.5634350465283765, "grad_norm": 0.291015625, "learning_rate": 0.002, "loss": 5.2959, "step": 4963 }, { "epoch": 0.5635485736382956, "grad_norm": 0.298828125, "learning_rate": 0.002, "loss": 5.2617, "step": 4964 }, { "epoch": 0.5636621007482147, "grad_norm": 0.271484375, "learning_rate": 0.002, "loss": 5.2576, "step": 4965 }, { "epoch": 0.5637756278581337, "grad_norm": 0.2890625, "learning_rate": 0.002, "loss": 5.2649, "step": 4966 }, { "epoch": 0.5638891549680528, "grad_norm": 0.3046875, "learning_rate": 0.002, "loss": 5.2706, "step": 4967 }, { "epoch": 0.5640026820779719, "grad_norm": 0.33984375, "learning_rate": 0.002, "loss": 5.2523, "step": 4968 }, { "epoch": 0.564116209187891, "grad_norm": 0.333984375, "learning_rate": 0.002, "loss": 5.2626, "step": 4969 }, { "epoch": 0.56422973629781, "grad_norm": 0.361328125, "learning_rate": 0.002, "loss": 5.2693, "step": 4970 }, { "epoch": 0.5643432634077291, "grad_norm": 0.357421875, "learning_rate": 0.002, "loss": 5.2718, "step": 4971 }, { "epoch": 0.5644567905176482, "grad_norm": 0.337890625, "learning_rate": 0.002, "loss": 5.2836, "step": 4972 }, { "epoch": 0.5645703176275673, "grad_norm": 0.345703125, "learning_rate": 0.002, "loss": 5.2616, "step": 4973 }, { "epoch": 0.5646838447374863, "grad_norm": 0.33203125, "learning_rate": 0.002, "loss": 5.27, "step": 4974 }, { "epoch": 0.5647973718474054, "grad_norm": 0.3515625, "learning_rate": 0.002, "loss": 5.2288, "step": 4975 }, { "epoch": 0.5649108989573245, "grad_norm": 0.33984375, "learning_rate": 0.002, "loss": 5.272, "step": 4976 }, { "epoch": 0.5650244260672436, "grad_norm": 0.337890625, "learning_rate": 0.002, "loss": 5.2348, "step": 4977 }, { "epoch": 0.5651379531771626, "grad_norm": 0.341796875, "learning_rate": 0.002, "loss": 5.2764, "step": 4978 }, { "epoch": 0.5652514802870817, "grad_norm": 0.328125, "learning_rate": 0.002, "loss": 5.2546, "step": 4979 }, { "epoch": 0.5653650073970008, "grad_norm": 0.298828125, "learning_rate": 0.002, "loss": 5.249, "step": 4980 }, { "epoch": 0.5654785345069199, "grad_norm": 0.259765625, "learning_rate": 0.002, "loss": 5.2379, "step": 4981 }, { "epoch": 0.565592061616839, "grad_norm": 0.2578125, "learning_rate": 0.002, "loss": 5.2597, "step": 4982 }, { "epoch": 0.565705588726758, "grad_norm": 0.26171875, "learning_rate": 0.002, "loss": 5.2864, "step": 4983 }, { "epoch": 0.5658191158366771, "grad_norm": 0.291015625, "learning_rate": 0.002, "loss": 5.2508, "step": 4984 }, { "epoch": 0.5659326429465962, "grad_norm": 0.330078125, "learning_rate": 0.002, "loss": 5.2698, "step": 4985 }, { "epoch": 0.5660461700565153, "grad_norm": 0.322265625, "learning_rate": 0.002, "loss": 5.2628, "step": 4986 }, { "epoch": 0.5661596971664343, "grad_norm": 0.302734375, "learning_rate": 0.002, "loss": 5.2731, "step": 4987 }, { "epoch": 0.5662732242763534, "grad_norm": 0.298828125, "learning_rate": 0.002, "loss": 5.276, "step": 4988 }, { "epoch": 0.5663867513862725, "grad_norm": 0.27734375, "learning_rate": 0.002, "loss": 5.287, "step": 4989 }, { "epoch": 0.5665002784961916, "grad_norm": 0.30859375, "learning_rate": 0.002, "loss": 5.2646, "step": 4990 }, { "epoch": 0.5666138056061106, "grad_norm": 0.365234375, "learning_rate": 0.002, "loss": 5.2592, "step": 4991 }, { "epoch": 0.5667273327160297, "grad_norm": 0.392578125, "learning_rate": 0.002, "loss": 5.2439, "step": 4992 }, { "epoch": 0.5668408598259488, "grad_norm": 0.3828125, "learning_rate": 0.002, "loss": 5.2753, "step": 4993 }, { "epoch": 0.5669543869358679, "grad_norm": 0.376953125, "learning_rate": 0.002, "loss": 5.265, "step": 4994 }, { "epoch": 0.5670679140457869, "grad_norm": 0.30859375, "learning_rate": 0.002, "loss": 5.2626, "step": 4995 }, { "epoch": 0.567181441155706, "grad_norm": 0.318359375, "learning_rate": 0.002, "loss": 5.2856, "step": 4996 }, { "epoch": 0.5672949682656251, "grad_norm": 0.275390625, "learning_rate": 0.002, "loss": 5.2589, "step": 4997 }, { "epoch": 0.5674084953755442, "grad_norm": 0.275390625, "learning_rate": 0.002, "loss": 5.2614, "step": 4998 }, { "epoch": 0.5675220224854632, "grad_norm": 0.28125, "learning_rate": 0.002, "loss": 5.2521, "step": 4999 }, { "epoch": 0.5676355495953823, "grad_norm": 0.3046875, "learning_rate": 0.002, "loss": 5.2686, "step": 5000 }, { "epoch": 0.5677490767053014, "grad_norm": 0.34765625, "learning_rate": 0.002, "loss": 5.2578, "step": 5001 }, { "epoch": 0.5678626038152205, "grad_norm": 0.380859375, "learning_rate": 0.002, "loss": 5.2686, "step": 5002 }, { "epoch": 0.5679761309251395, "grad_norm": 0.37109375, "learning_rate": 0.002, "loss": 5.2643, "step": 5003 }, { "epoch": 0.5680896580350586, "grad_norm": 0.35546875, "learning_rate": 0.002, "loss": 5.2545, "step": 5004 }, { "epoch": 0.5682031851449777, "grad_norm": 0.302734375, "learning_rate": 0.002, "loss": 5.2767, "step": 5005 }, { "epoch": 0.5683167122548968, "grad_norm": 0.291015625, "learning_rate": 0.002, "loss": 5.2592, "step": 5006 }, { "epoch": 0.5684302393648158, "grad_norm": 0.244140625, "learning_rate": 0.002, "loss": 5.2521, "step": 5007 }, { "epoch": 0.5685437664747349, "grad_norm": 0.2470703125, "learning_rate": 0.002, "loss": 5.2628, "step": 5008 }, { "epoch": 0.568657293584654, "grad_norm": 0.234375, "learning_rate": 0.002, "loss": 5.2698, "step": 5009 }, { "epoch": 0.5687708206945731, "grad_norm": 0.23828125, "learning_rate": 0.002, "loss": 5.2457, "step": 5010 }, { "epoch": 0.5688843478044922, "grad_norm": 0.2392578125, "learning_rate": 0.002, "loss": 5.2523, "step": 5011 }, { "epoch": 0.5689978749144112, "grad_norm": 0.265625, "learning_rate": 0.002, "loss": 5.2672, "step": 5012 }, { "epoch": 0.5691114020243303, "grad_norm": 0.2734375, "learning_rate": 0.002, "loss": 5.2653, "step": 5013 }, { "epoch": 0.5692249291342494, "grad_norm": 0.2734375, "learning_rate": 0.002, "loss": 5.2666, "step": 5014 }, { "epoch": 0.5693384562441685, "grad_norm": 0.27734375, "learning_rate": 0.002, "loss": 5.2546, "step": 5015 }, { "epoch": 0.5694519833540875, "grad_norm": 0.283203125, "learning_rate": 0.002, "loss": 5.2734, "step": 5016 }, { "epoch": 0.5695655104640066, "grad_norm": 0.275390625, "learning_rate": 0.002, "loss": 5.2583, "step": 5017 }, { "epoch": 0.5696790375739257, "grad_norm": 0.283203125, "learning_rate": 0.002, "loss": 5.2635, "step": 5018 }, { "epoch": 0.5697925646838448, "grad_norm": 0.263671875, "learning_rate": 0.002, "loss": 5.2713, "step": 5019 }, { "epoch": 0.5699060917937638, "grad_norm": 0.2734375, "learning_rate": 0.002, "loss": 5.261, "step": 5020 }, { "epoch": 0.5700196189036829, "grad_norm": 0.328125, "learning_rate": 0.002, "loss": 5.252, "step": 5021 }, { "epoch": 0.570133146013602, "grad_norm": 0.310546875, "learning_rate": 0.002, "loss": 5.2588, "step": 5022 }, { "epoch": 0.5702466731235211, "grad_norm": 0.322265625, "learning_rate": 0.002, "loss": 5.2708, "step": 5023 }, { "epoch": 0.5703602002334401, "grad_norm": 0.296875, "learning_rate": 0.002, "loss": 5.2664, "step": 5024 }, { "epoch": 0.5704737273433592, "grad_norm": 0.296875, "learning_rate": 0.002, "loss": 5.2676, "step": 5025 }, { "epoch": 0.5705872544532783, "grad_norm": 0.314453125, "learning_rate": 0.002, "loss": 5.2706, "step": 5026 }, { "epoch": 0.5707007815631974, "grad_norm": 0.27734375, "learning_rate": 0.002, "loss": 5.2566, "step": 5027 }, { "epoch": 0.5708143086731164, "grad_norm": 0.283203125, "learning_rate": 0.002, "loss": 5.2639, "step": 5028 }, { "epoch": 0.5709278357830355, "grad_norm": 0.25, "learning_rate": 0.002, "loss": 5.2503, "step": 5029 }, { "epoch": 0.5710413628929546, "grad_norm": 0.24609375, "learning_rate": 0.002, "loss": 5.2927, "step": 5030 }, { "epoch": 0.5711548900028737, "grad_norm": 0.2314453125, "learning_rate": 0.002, "loss": 5.2496, "step": 5031 }, { "epoch": 0.5712684171127927, "grad_norm": 0.224609375, "learning_rate": 0.002, "loss": 5.2634, "step": 5032 }, { "epoch": 0.5713819442227118, "grad_norm": 0.240234375, "learning_rate": 0.002, "loss": 5.267, "step": 5033 }, { "epoch": 0.5714954713326309, "grad_norm": 0.2734375, "learning_rate": 0.002, "loss": 5.2813, "step": 5034 }, { "epoch": 0.57160899844255, "grad_norm": 0.353515625, "learning_rate": 0.002, "loss": 5.2546, "step": 5035 }, { "epoch": 0.571722525552469, "grad_norm": 0.36328125, "learning_rate": 0.002, "loss": 5.2491, "step": 5036 }, { "epoch": 0.5718360526623881, "grad_norm": 0.349609375, "learning_rate": 0.002, "loss": 5.2434, "step": 5037 }, { "epoch": 0.5719495797723072, "grad_norm": 0.35546875, "learning_rate": 0.002, "loss": 5.2554, "step": 5038 }, { "epoch": 0.5720631068822263, "grad_norm": 0.306640625, "learning_rate": 0.002, "loss": 5.2426, "step": 5039 }, { "epoch": 0.5721766339921454, "grad_norm": 0.306640625, "learning_rate": 0.002, "loss": 5.2456, "step": 5040 }, { "epoch": 0.5722901611020644, "grad_norm": 0.3046875, "learning_rate": 0.002, "loss": 5.2911, "step": 5041 }, { "epoch": 0.5724036882119835, "grad_norm": 0.322265625, "learning_rate": 0.002, "loss": 5.2565, "step": 5042 }, { "epoch": 0.5725172153219026, "grad_norm": 0.328125, "learning_rate": 0.002, "loss": 5.2585, "step": 5043 }, { "epoch": 0.5726307424318217, "grad_norm": 0.341796875, "learning_rate": 0.002, "loss": 5.2553, "step": 5044 }, { "epoch": 0.5727442695417407, "grad_norm": 0.353515625, "learning_rate": 0.002, "loss": 5.2581, "step": 5045 }, { "epoch": 0.5728577966516598, "grad_norm": 0.388671875, "learning_rate": 0.002, "loss": 5.2628, "step": 5046 }, { "epoch": 0.5729713237615789, "grad_norm": 0.423828125, "learning_rate": 0.002, "loss": 5.2466, "step": 5047 }, { "epoch": 0.573084850871498, "grad_norm": 0.390625, "learning_rate": 0.002, "loss": 5.2531, "step": 5048 }, { "epoch": 0.573198377981417, "grad_norm": 0.349609375, "learning_rate": 0.002, "loss": 5.2689, "step": 5049 }, { "epoch": 0.5733119050913361, "grad_norm": 0.357421875, "learning_rate": 0.002, "loss": 5.2583, "step": 5050 }, { "epoch": 0.5734254322012552, "grad_norm": 0.349609375, "learning_rate": 0.002, "loss": 5.254, "step": 5051 }, { "epoch": 0.5735389593111743, "grad_norm": 0.34375, "learning_rate": 0.002, "loss": 5.2539, "step": 5052 }, { "epoch": 0.5736524864210933, "grad_norm": 0.359375, "learning_rate": 0.002, "loss": 5.2861, "step": 5053 }, { "epoch": 0.5737660135310124, "grad_norm": 0.36328125, "learning_rate": 0.002, "loss": 5.2428, "step": 5054 }, { "epoch": 0.5738795406409315, "grad_norm": 0.337890625, "learning_rate": 0.002, "loss": 5.2471, "step": 5055 }, { "epoch": 0.5739930677508506, "grad_norm": 0.365234375, "learning_rate": 0.002, "loss": 5.2761, "step": 5056 }, { "epoch": 0.5741065948607696, "grad_norm": 0.3515625, "learning_rate": 0.002, "loss": 5.2671, "step": 5057 }, { "epoch": 0.5742201219706887, "grad_norm": 0.375, "learning_rate": 0.002, "loss": 5.2595, "step": 5058 }, { "epoch": 0.5743336490806078, "grad_norm": 0.33984375, "learning_rate": 0.002, "loss": 5.2505, "step": 5059 }, { "epoch": 0.5744471761905269, "grad_norm": 0.326171875, "learning_rate": 0.002, "loss": 5.2721, "step": 5060 }, { "epoch": 0.574560703300446, "grad_norm": 0.318359375, "learning_rate": 0.002, "loss": 5.2642, "step": 5061 }, { "epoch": 0.574674230410365, "grad_norm": 0.306640625, "learning_rate": 0.002, "loss": 5.2767, "step": 5062 }, { "epoch": 0.5747877575202841, "grad_norm": 0.3046875, "learning_rate": 0.002, "loss": 5.2783, "step": 5063 }, { "epoch": 0.5749012846302032, "grad_norm": 0.306640625, "learning_rate": 0.002, "loss": 5.2391, "step": 5064 }, { "epoch": 0.5750148117401223, "grad_norm": 0.326171875, "learning_rate": 0.002, "loss": 5.2576, "step": 5065 }, { "epoch": 0.5751283388500413, "grad_norm": 0.34765625, "learning_rate": 0.002, "loss": 5.2672, "step": 5066 }, { "epoch": 0.5752418659599604, "grad_norm": 0.9296875, "learning_rate": 0.002, "loss": 5.277, "step": 5067 }, { "epoch": 0.5753553930698795, "grad_norm": 0.376953125, "learning_rate": 0.002, "loss": 5.2663, "step": 5068 }, { "epoch": 0.5754689201797986, "grad_norm": 0.341796875, "learning_rate": 0.002, "loss": 5.2694, "step": 5069 }, { "epoch": 0.5755824472897176, "grad_norm": 0.306640625, "learning_rate": 0.002, "loss": 5.2438, "step": 5070 }, { "epoch": 0.5756959743996367, "grad_norm": 0.29296875, "learning_rate": 0.002, "loss": 5.2638, "step": 5071 }, { "epoch": 0.5758095015095558, "grad_norm": 0.296875, "learning_rate": 0.002, "loss": 5.2409, "step": 5072 }, { "epoch": 0.5759230286194749, "grad_norm": 0.248046875, "learning_rate": 0.002, "loss": 5.245, "step": 5073 }, { "epoch": 0.5760365557293939, "grad_norm": 0.26953125, "learning_rate": 0.002, "loss": 5.272, "step": 5074 }, { "epoch": 0.576150082839313, "grad_norm": 0.2294921875, "learning_rate": 0.002, "loss": 5.2674, "step": 5075 }, { "epoch": 0.5762636099492321, "grad_norm": 0.244140625, "learning_rate": 0.002, "loss": 5.261, "step": 5076 }, { "epoch": 0.5763771370591512, "grad_norm": 0.240234375, "learning_rate": 0.002, "loss": 5.258, "step": 5077 }, { "epoch": 0.5764906641690702, "grad_norm": 0.24609375, "learning_rate": 0.002, "loss": 5.2687, "step": 5078 }, { "epoch": 0.5766041912789893, "grad_norm": 0.25, "learning_rate": 0.002, "loss": 5.2592, "step": 5079 }, { "epoch": 0.5767177183889084, "grad_norm": 0.2578125, "learning_rate": 0.002, "loss": 5.2566, "step": 5080 }, { "epoch": 0.5768312454988275, "grad_norm": 0.255859375, "learning_rate": 0.002, "loss": 5.2544, "step": 5081 }, { "epoch": 0.5769447726087465, "grad_norm": 0.265625, "learning_rate": 0.002, "loss": 5.2699, "step": 5082 }, { "epoch": 0.5770582997186656, "grad_norm": 0.271484375, "learning_rate": 0.002, "loss": 5.2321, "step": 5083 }, { "epoch": 0.5771718268285847, "grad_norm": 0.294921875, "learning_rate": 0.002, "loss": 5.2409, "step": 5084 }, { "epoch": 0.5772853539385038, "grad_norm": 0.31640625, "learning_rate": 0.002, "loss": 5.2617, "step": 5085 }, { "epoch": 0.5773988810484229, "grad_norm": 0.36328125, "learning_rate": 0.002, "loss": 5.2462, "step": 5086 }, { "epoch": 0.5775124081583419, "grad_norm": 0.3828125, "learning_rate": 0.002, "loss": 5.2618, "step": 5087 }, { "epoch": 0.577625935268261, "grad_norm": 0.349609375, "learning_rate": 0.002, "loss": 5.2622, "step": 5088 }, { "epoch": 0.5777394623781801, "grad_norm": 0.35546875, "learning_rate": 0.002, "loss": 5.2508, "step": 5089 }, { "epoch": 0.5778529894880992, "grad_norm": 0.3203125, "learning_rate": 0.002, "loss": 5.2617, "step": 5090 }, { "epoch": 0.5779665165980182, "grad_norm": 0.310546875, "learning_rate": 0.002, "loss": 5.2326, "step": 5091 }, { "epoch": 0.5780800437079373, "grad_norm": 0.275390625, "learning_rate": 0.002, "loss": 5.2541, "step": 5092 }, { "epoch": 0.5781935708178564, "grad_norm": 0.279296875, "learning_rate": 0.002, "loss": 5.2562, "step": 5093 }, { "epoch": 0.5783070979277755, "grad_norm": 0.2490234375, "learning_rate": 0.002, "loss": 5.2614, "step": 5094 }, { "epoch": 0.5784206250376945, "grad_norm": 0.271484375, "learning_rate": 0.002, "loss": 5.2566, "step": 5095 }, { "epoch": 0.5785341521476136, "grad_norm": 0.28125, "learning_rate": 0.002, "loss": 5.2463, "step": 5096 }, { "epoch": 0.5786476792575327, "grad_norm": 0.310546875, "learning_rate": 0.002, "loss": 5.2718, "step": 5097 }, { "epoch": 0.5787612063674518, "grad_norm": 0.328125, "learning_rate": 0.002, "loss": 5.2538, "step": 5098 }, { "epoch": 0.5788747334773708, "grad_norm": 0.34765625, "learning_rate": 0.002, "loss": 5.2363, "step": 5099 }, { "epoch": 0.5789882605872899, "grad_norm": 0.392578125, "learning_rate": 0.002, "loss": 5.2567, "step": 5100 }, { "epoch": 0.579101787697209, "grad_norm": 0.3515625, "learning_rate": 0.002, "loss": 5.2499, "step": 5101 }, { "epoch": 0.5792153148071281, "grad_norm": 0.365234375, "learning_rate": 0.002, "loss": 5.2622, "step": 5102 }, { "epoch": 0.5793288419170471, "grad_norm": 0.337890625, "learning_rate": 0.002, "loss": 5.2555, "step": 5103 }, { "epoch": 0.5794423690269662, "grad_norm": 0.33984375, "learning_rate": 0.002, "loss": 5.2627, "step": 5104 }, { "epoch": 0.5795558961368853, "grad_norm": 0.333984375, "learning_rate": 0.002, "loss": 5.2719, "step": 5105 }, { "epoch": 0.5796694232468044, "grad_norm": 0.376953125, "learning_rate": 0.002, "loss": 5.2705, "step": 5106 }, { "epoch": 0.5797829503567234, "grad_norm": 0.380859375, "learning_rate": 0.002, "loss": 5.2734, "step": 5107 }, { "epoch": 0.5798964774666425, "grad_norm": 0.404296875, "learning_rate": 0.002, "loss": 5.2648, "step": 5108 }, { "epoch": 0.5800100045765616, "grad_norm": 0.40625, "learning_rate": 0.002, "loss": 5.2785, "step": 5109 }, { "epoch": 0.5801235316864807, "grad_norm": 0.396484375, "learning_rate": 0.002, "loss": 5.2624, "step": 5110 }, { "epoch": 0.5802370587963998, "grad_norm": 0.375, "learning_rate": 0.002, "loss": 5.2666, "step": 5111 }, { "epoch": 0.5803505859063188, "grad_norm": 0.36328125, "learning_rate": 0.002, "loss": 5.2467, "step": 5112 }, { "epoch": 0.5804641130162379, "grad_norm": 0.318359375, "learning_rate": 0.002, "loss": 5.2524, "step": 5113 }, { "epoch": 0.580577640126157, "grad_norm": 0.30859375, "learning_rate": 0.002, "loss": 5.2508, "step": 5114 }, { "epoch": 0.5806911672360761, "grad_norm": 0.296875, "learning_rate": 0.002, "loss": 5.2689, "step": 5115 }, { "epoch": 0.5808046943459951, "grad_norm": 0.306640625, "learning_rate": 0.002, "loss": 5.2446, "step": 5116 }, { "epoch": 0.5809182214559142, "grad_norm": 0.31640625, "learning_rate": 0.002, "loss": 5.2664, "step": 5117 }, { "epoch": 0.5810317485658333, "grad_norm": 0.3203125, "learning_rate": 0.002, "loss": 5.261, "step": 5118 }, { "epoch": 0.5811452756757524, "grad_norm": 0.3359375, "learning_rate": 0.002, "loss": 5.2615, "step": 5119 }, { "epoch": 0.5812588027856714, "grad_norm": 0.3359375, "learning_rate": 0.002, "loss": 5.2589, "step": 5120 }, { "epoch": 0.5813723298955905, "grad_norm": 0.3359375, "learning_rate": 0.002, "loss": 5.2528, "step": 5121 }, { "epoch": 0.5814858570055096, "grad_norm": 0.322265625, "learning_rate": 0.002, "loss": 5.2696, "step": 5122 }, { "epoch": 0.5815993841154287, "grad_norm": 0.296875, "learning_rate": 0.002, "loss": 5.2502, "step": 5123 }, { "epoch": 0.5817129112253477, "grad_norm": 0.2734375, "learning_rate": 0.002, "loss": 5.2365, "step": 5124 }, { "epoch": 0.5818264383352668, "grad_norm": 0.2470703125, "learning_rate": 0.002, "loss": 5.2585, "step": 5125 }, { "epoch": 0.5819399654451859, "grad_norm": 0.25, "learning_rate": 0.002, "loss": 5.2575, "step": 5126 }, { "epoch": 0.582053492555105, "grad_norm": 0.2314453125, "learning_rate": 0.002, "loss": 5.2662, "step": 5127 }, { "epoch": 0.582167019665024, "grad_norm": 0.255859375, "learning_rate": 0.002, "loss": 5.2464, "step": 5128 }, { "epoch": 0.5822805467749431, "grad_norm": 0.267578125, "learning_rate": 0.002, "loss": 5.2513, "step": 5129 }, { "epoch": 0.5823940738848622, "grad_norm": 0.2890625, "learning_rate": 0.002, "loss": 5.2852, "step": 5130 }, { "epoch": 0.5825076009947813, "grad_norm": 0.31640625, "learning_rate": 0.002, "loss": 5.2704, "step": 5131 }, { "epoch": 0.5826211281047003, "grad_norm": 0.330078125, "learning_rate": 0.002, "loss": 5.2662, "step": 5132 }, { "epoch": 0.5827346552146194, "grad_norm": 0.3515625, "learning_rate": 0.002, "loss": 5.2529, "step": 5133 }, { "epoch": 0.5828481823245385, "grad_norm": 0.369140625, "learning_rate": 0.002, "loss": 5.2883, "step": 5134 }, { "epoch": 0.5829617094344576, "grad_norm": 0.34375, "learning_rate": 0.002, "loss": 5.2539, "step": 5135 }, { "epoch": 0.5830752365443767, "grad_norm": 0.345703125, "learning_rate": 0.002, "loss": 5.2347, "step": 5136 }, { "epoch": 0.5831887636542957, "grad_norm": 0.31640625, "learning_rate": 0.002, "loss": 5.2561, "step": 5137 }, { "epoch": 0.5833022907642148, "grad_norm": 0.302734375, "learning_rate": 0.002, "loss": 5.2385, "step": 5138 }, { "epoch": 0.5834158178741339, "grad_norm": 0.2734375, "learning_rate": 0.002, "loss": 5.225, "step": 5139 }, { "epoch": 0.583529344984053, "grad_norm": 0.275390625, "learning_rate": 0.002, "loss": 5.2732, "step": 5140 }, { "epoch": 0.583642872093972, "grad_norm": 0.29296875, "learning_rate": 0.002, "loss": 5.257, "step": 5141 }, { "epoch": 0.5837563992038911, "grad_norm": 0.28515625, "learning_rate": 0.002, "loss": 5.2609, "step": 5142 }, { "epoch": 0.5838699263138102, "grad_norm": 0.326171875, "learning_rate": 0.002, "loss": 5.2737, "step": 5143 }, { "epoch": 0.5839834534237293, "grad_norm": 0.330078125, "learning_rate": 0.002, "loss": 5.2533, "step": 5144 }, { "epoch": 0.5840969805336483, "grad_norm": 0.408203125, "learning_rate": 0.002, "loss": 5.2559, "step": 5145 }, { "epoch": 0.5842105076435674, "grad_norm": 0.46875, "learning_rate": 0.002, "loss": 5.2473, "step": 5146 }, { "epoch": 0.5843240347534865, "grad_norm": 0.49609375, "learning_rate": 0.002, "loss": 5.2597, "step": 5147 }, { "epoch": 0.5844375618634056, "grad_norm": 0.50390625, "learning_rate": 0.002, "loss": 5.2586, "step": 5148 }, { "epoch": 0.5845510889733246, "grad_norm": 0.490234375, "learning_rate": 0.002, "loss": 5.2581, "step": 5149 }, { "epoch": 0.5846646160832437, "grad_norm": 0.408203125, "learning_rate": 0.002, "loss": 5.245, "step": 5150 }, { "epoch": 0.5847781431931628, "grad_norm": 0.431640625, "learning_rate": 0.002, "loss": 5.2721, "step": 5151 }, { "epoch": 0.5848916703030819, "grad_norm": 0.349609375, "learning_rate": 0.002, "loss": 5.2573, "step": 5152 }, { "epoch": 0.5850051974130009, "grad_norm": 0.357421875, "learning_rate": 0.002, "loss": 5.2539, "step": 5153 }, { "epoch": 0.58511872452292, "grad_norm": 0.341796875, "learning_rate": 0.002, "loss": 5.2722, "step": 5154 }, { "epoch": 0.5852322516328391, "grad_norm": 0.3515625, "learning_rate": 0.002, "loss": 5.2545, "step": 5155 }, { "epoch": 0.5853457787427582, "grad_norm": 0.34765625, "learning_rate": 0.002, "loss": 5.2622, "step": 5156 }, { "epoch": 0.5854593058526772, "grad_norm": 0.33984375, "learning_rate": 0.002, "loss": 5.2466, "step": 5157 }, { "epoch": 0.5855728329625963, "grad_norm": 0.3203125, "learning_rate": 0.002, "loss": 5.257, "step": 5158 }, { "epoch": 0.5856863600725154, "grad_norm": 0.3046875, "learning_rate": 0.002, "loss": 5.2528, "step": 5159 }, { "epoch": 0.5857998871824345, "grad_norm": 0.302734375, "learning_rate": 0.002, "loss": 5.2651, "step": 5160 }, { "epoch": 0.5859134142923536, "grad_norm": 0.3125, "learning_rate": 0.002, "loss": 5.2415, "step": 5161 }, { "epoch": 0.5860269414022726, "grad_norm": 0.32421875, "learning_rate": 0.002, "loss": 5.2645, "step": 5162 }, { "epoch": 0.5861404685121917, "grad_norm": 0.359375, "learning_rate": 0.002, "loss": 5.2317, "step": 5163 }, { "epoch": 0.5862539956221108, "grad_norm": 0.349609375, "learning_rate": 0.002, "loss": 5.2435, "step": 5164 }, { "epoch": 0.5863675227320299, "grad_norm": 0.36328125, "learning_rate": 0.002, "loss": 5.2519, "step": 5165 }, { "epoch": 0.5864810498419489, "grad_norm": 0.34765625, "learning_rate": 0.002, "loss": 5.2518, "step": 5166 }, { "epoch": 0.586594576951868, "grad_norm": 0.3203125, "learning_rate": 0.002, "loss": 5.2673, "step": 5167 }, { "epoch": 0.5867081040617871, "grad_norm": 0.310546875, "learning_rate": 0.002, "loss": 5.2631, "step": 5168 }, { "epoch": 0.5868216311717062, "grad_norm": 0.2890625, "learning_rate": 0.002, "loss": 5.271, "step": 5169 }, { "epoch": 0.5869351582816252, "grad_norm": 0.2890625, "learning_rate": 0.002, "loss": 5.2642, "step": 5170 }, { "epoch": 0.5870486853915443, "grad_norm": 0.2578125, "learning_rate": 0.002, "loss": 5.2781, "step": 5171 }, { "epoch": 0.5871622125014634, "grad_norm": 0.265625, "learning_rate": 0.002, "loss": 5.2708, "step": 5172 }, { "epoch": 0.5872757396113825, "grad_norm": 0.2490234375, "learning_rate": 0.002, "loss": 5.2594, "step": 5173 }, { "epoch": 0.5873892667213015, "grad_norm": 0.25390625, "learning_rate": 0.002, "loss": 5.2595, "step": 5174 }, { "epoch": 0.5875027938312206, "grad_norm": 0.259765625, "learning_rate": 0.002, "loss": 5.2582, "step": 5175 }, { "epoch": 0.5876163209411397, "grad_norm": 0.267578125, "learning_rate": 0.002, "loss": 5.275, "step": 5176 }, { "epoch": 0.5877298480510588, "grad_norm": 0.279296875, "learning_rate": 0.002, "loss": 5.2654, "step": 5177 }, { "epoch": 0.5878433751609778, "grad_norm": 0.306640625, "learning_rate": 0.002, "loss": 5.234, "step": 5178 }, { "epoch": 0.5879569022708969, "grad_norm": 0.322265625, "learning_rate": 0.002, "loss": 5.2704, "step": 5179 }, { "epoch": 0.588070429380816, "grad_norm": 0.328125, "learning_rate": 0.002, "loss": 5.2387, "step": 5180 }, { "epoch": 0.5881839564907351, "grad_norm": 0.341796875, "learning_rate": 0.002, "loss": 5.2716, "step": 5181 }, { "epoch": 0.5882974836006541, "grad_norm": 0.337890625, "learning_rate": 0.002, "loss": 5.2482, "step": 5182 }, { "epoch": 0.5884110107105732, "grad_norm": 0.310546875, "learning_rate": 0.002, "loss": 5.2657, "step": 5183 }, { "epoch": 0.5885245378204923, "grad_norm": 0.265625, "learning_rate": 0.002, "loss": 5.2374, "step": 5184 }, { "epoch": 0.5886380649304114, "grad_norm": 0.263671875, "learning_rate": 0.002, "loss": 5.2697, "step": 5185 }, { "epoch": 0.5887515920403305, "grad_norm": 0.25390625, "learning_rate": 0.002, "loss": 5.2309, "step": 5186 }, { "epoch": 0.5888651191502495, "grad_norm": 0.2490234375, "learning_rate": 0.002, "loss": 5.2389, "step": 5187 }, { "epoch": 0.5889786462601686, "grad_norm": 0.2490234375, "learning_rate": 0.002, "loss": 5.2659, "step": 5188 }, { "epoch": 0.5890921733700878, "grad_norm": 0.275390625, "learning_rate": 0.002, "loss": 5.2663, "step": 5189 }, { "epoch": 0.5892057004800069, "grad_norm": 0.283203125, "learning_rate": 0.002, "loss": 5.2498, "step": 5190 }, { "epoch": 0.5893192275899259, "grad_norm": 0.318359375, "learning_rate": 0.002, "loss": 5.2371, "step": 5191 }, { "epoch": 0.589432754699845, "grad_norm": 0.34375, "learning_rate": 0.002, "loss": 5.252, "step": 5192 }, { "epoch": 0.5895462818097641, "grad_norm": 0.369140625, "learning_rate": 0.002, "loss": 5.2727, "step": 5193 }, { "epoch": 0.5896598089196832, "grad_norm": 0.380859375, "learning_rate": 0.002, "loss": 5.2689, "step": 5194 }, { "epoch": 0.5897733360296022, "grad_norm": 0.3828125, "learning_rate": 0.002, "loss": 5.26, "step": 5195 }, { "epoch": 0.5898868631395213, "grad_norm": 0.37109375, "learning_rate": 0.002, "loss": 5.2429, "step": 5196 }, { "epoch": 0.5900003902494404, "grad_norm": 0.392578125, "learning_rate": 0.002, "loss": 5.2411, "step": 5197 }, { "epoch": 0.5901139173593595, "grad_norm": 0.3515625, "learning_rate": 0.002, "loss": 5.2626, "step": 5198 }, { "epoch": 0.5902274444692786, "grad_norm": 0.34375, "learning_rate": 0.002, "loss": 5.2747, "step": 5199 }, { "epoch": 0.5903409715791976, "grad_norm": 0.298828125, "learning_rate": 0.002, "loss": 5.2296, "step": 5200 }, { "epoch": 0.5904544986891167, "grad_norm": 0.3125, "learning_rate": 0.002, "loss": 5.2764, "step": 5201 }, { "epoch": 0.5905680257990358, "grad_norm": 0.28125, "learning_rate": 0.002, "loss": 5.2299, "step": 5202 }, { "epoch": 0.5906815529089549, "grad_norm": 0.30078125, "learning_rate": 0.002, "loss": 5.2282, "step": 5203 }, { "epoch": 0.5907950800188739, "grad_norm": 0.30078125, "learning_rate": 0.002, "loss": 5.2665, "step": 5204 }, { "epoch": 0.590908607128793, "grad_norm": 0.330078125, "learning_rate": 0.002, "loss": 5.2421, "step": 5205 }, { "epoch": 0.5910221342387121, "grad_norm": 0.328125, "learning_rate": 0.002, "loss": 5.254, "step": 5206 }, { "epoch": 0.5911356613486312, "grad_norm": 0.3515625, "learning_rate": 0.002, "loss": 5.262, "step": 5207 }, { "epoch": 0.5912491884585502, "grad_norm": 0.318359375, "learning_rate": 0.002, "loss": 5.2487, "step": 5208 }, { "epoch": 0.5913627155684693, "grad_norm": 0.322265625, "learning_rate": 0.002, "loss": 5.2488, "step": 5209 }, { "epoch": 0.5914762426783884, "grad_norm": 0.2890625, "learning_rate": 0.002, "loss": 5.2583, "step": 5210 }, { "epoch": 0.5915897697883075, "grad_norm": 0.2734375, "learning_rate": 0.002, "loss": 5.2607, "step": 5211 }, { "epoch": 0.5917032968982265, "grad_norm": 0.255859375, "learning_rate": 0.002, "loss": 5.2359, "step": 5212 }, { "epoch": 0.5918168240081456, "grad_norm": 0.2890625, "learning_rate": 0.002, "loss": 5.2655, "step": 5213 }, { "epoch": 0.5919303511180647, "grad_norm": 0.30078125, "learning_rate": 0.002, "loss": 5.2623, "step": 5214 }, { "epoch": 0.5920438782279838, "grad_norm": 0.310546875, "learning_rate": 0.002, "loss": 5.2612, "step": 5215 }, { "epoch": 0.5921574053379028, "grad_norm": 0.3125, "learning_rate": 0.002, "loss": 5.242, "step": 5216 }, { "epoch": 0.5922709324478219, "grad_norm": 0.296875, "learning_rate": 0.002, "loss": 5.2404, "step": 5217 }, { "epoch": 0.592384459557741, "grad_norm": 0.3125, "learning_rate": 0.002, "loss": 5.2644, "step": 5218 }, { "epoch": 0.5924979866676601, "grad_norm": 0.3046875, "learning_rate": 0.002, "loss": 5.2483, "step": 5219 }, { "epoch": 0.5926115137775791, "grad_norm": 0.294921875, "learning_rate": 0.002, "loss": 5.2536, "step": 5220 }, { "epoch": 0.5927250408874982, "grad_norm": 0.29296875, "learning_rate": 0.002, "loss": 5.2664, "step": 5221 }, { "epoch": 0.5928385679974173, "grad_norm": 0.2890625, "learning_rate": 0.002, "loss": 5.2746, "step": 5222 }, { "epoch": 0.5929520951073364, "grad_norm": 0.26953125, "learning_rate": 0.002, "loss": 5.2493, "step": 5223 }, { "epoch": 0.5930656222172555, "grad_norm": 0.25, "learning_rate": 0.002, "loss": 5.258, "step": 5224 }, { "epoch": 0.5931791493271745, "grad_norm": 0.244140625, "learning_rate": 0.002, "loss": 5.2746, "step": 5225 }, { "epoch": 0.5932926764370936, "grad_norm": 0.228515625, "learning_rate": 0.002, "loss": 5.2684, "step": 5226 }, { "epoch": 0.5934062035470127, "grad_norm": 0.240234375, "learning_rate": 0.002, "loss": 5.2521, "step": 5227 }, { "epoch": 0.5935197306569318, "grad_norm": 0.2392578125, "learning_rate": 0.002, "loss": 5.2577, "step": 5228 }, { "epoch": 0.5936332577668508, "grad_norm": 0.2470703125, "learning_rate": 0.002, "loss": 5.272, "step": 5229 }, { "epoch": 0.5937467848767699, "grad_norm": 0.25390625, "learning_rate": 0.002, "loss": 5.2604, "step": 5230 }, { "epoch": 0.593860311986689, "grad_norm": 0.2451171875, "learning_rate": 0.002, "loss": 5.2616, "step": 5231 }, { "epoch": 0.5939738390966081, "grad_norm": 0.2470703125, "learning_rate": 0.002, "loss": 5.2644, "step": 5232 }, { "epoch": 0.5940873662065271, "grad_norm": 0.23046875, "learning_rate": 0.002, "loss": 5.2245, "step": 5233 }, { "epoch": 0.5942008933164462, "grad_norm": 0.255859375, "learning_rate": 0.002, "loss": 5.2512, "step": 5234 }, { "epoch": 0.5943144204263653, "grad_norm": 0.283203125, "learning_rate": 0.002, "loss": 5.2504, "step": 5235 }, { "epoch": 0.5944279475362844, "grad_norm": 0.330078125, "learning_rate": 0.002, "loss": 5.2473, "step": 5236 }, { "epoch": 0.5945414746462034, "grad_norm": 0.3984375, "learning_rate": 0.002, "loss": 5.2347, "step": 5237 }, { "epoch": 0.5946550017561225, "grad_norm": 0.421875, "learning_rate": 0.002, "loss": 5.2628, "step": 5238 }, { "epoch": 0.5947685288660416, "grad_norm": 0.42578125, "learning_rate": 0.002, "loss": 5.2607, "step": 5239 }, { "epoch": 0.5948820559759607, "grad_norm": 0.41015625, "learning_rate": 0.002, "loss": 5.2365, "step": 5240 }, { "epoch": 0.5949955830858797, "grad_norm": 0.349609375, "learning_rate": 0.002, "loss": 5.2566, "step": 5241 }, { "epoch": 0.5951091101957988, "grad_norm": 0.369140625, "learning_rate": 0.002, "loss": 5.2744, "step": 5242 }, { "epoch": 0.5952226373057179, "grad_norm": 0.2890625, "learning_rate": 0.002, "loss": 5.2475, "step": 5243 }, { "epoch": 0.595336164415637, "grad_norm": 0.271484375, "learning_rate": 0.002, "loss": 5.2586, "step": 5244 }, { "epoch": 0.595449691525556, "grad_norm": 0.26171875, "learning_rate": 0.002, "loss": 5.2646, "step": 5245 }, { "epoch": 0.5955632186354751, "grad_norm": 0.25390625, "learning_rate": 0.002, "loss": 5.2392, "step": 5246 }, { "epoch": 0.5956767457453942, "grad_norm": 0.251953125, "learning_rate": 0.002, "loss": 5.2544, "step": 5247 }, { "epoch": 0.5957902728553133, "grad_norm": 0.255859375, "learning_rate": 0.002, "loss": 5.259, "step": 5248 }, { "epoch": 0.5959037999652324, "grad_norm": 0.267578125, "learning_rate": 0.002, "loss": 5.2751, "step": 5249 }, { "epoch": 0.5960173270751514, "grad_norm": 0.27734375, "learning_rate": 0.002, "loss": 5.2451, "step": 5250 }, { "epoch": 0.5961308541850705, "grad_norm": 0.2890625, "learning_rate": 0.002, "loss": 5.2434, "step": 5251 }, { "epoch": 0.5962443812949896, "grad_norm": 0.271484375, "learning_rate": 0.002, "loss": 5.2707, "step": 5252 }, { "epoch": 0.5963579084049087, "grad_norm": 0.291015625, "learning_rate": 0.002, "loss": 5.2563, "step": 5253 }, { "epoch": 0.5964714355148277, "grad_norm": 0.27734375, "learning_rate": 0.002, "loss": 5.2741, "step": 5254 }, { "epoch": 0.5965849626247468, "grad_norm": 0.294921875, "learning_rate": 0.002, "loss": 5.2614, "step": 5255 }, { "epoch": 0.5966984897346659, "grad_norm": 0.26171875, "learning_rate": 0.002, "loss": 5.2671, "step": 5256 }, { "epoch": 0.596812016844585, "grad_norm": 0.265625, "learning_rate": 0.002, "loss": 5.2739, "step": 5257 }, { "epoch": 0.596925543954504, "grad_norm": 0.244140625, "learning_rate": 0.002, "loss": 5.235, "step": 5258 }, { "epoch": 0.5970390710644231, "grad_norm": 0.271484375, "learning_rate": 0.002, "loss": 5.2428, "step": 5259 }, { "epoch": 0.5971525981743422, "grad_norm": 0.306640625, "learning_rate": 0.002, "loss": 5.2545, "step": 5260 }, { "epoch": 0.5972661252842613, "grad_norm": 0.31640625, "learning_rate": 0.002, "loss": 5.2599, "step": 5261 }, { "epoch": 0.5973796523941803, "grad_norm": 0.35546875, "learning_rate": 0.002, "loss": 5.2645, "step": 5262 }, { "epoch": 0.5974931795040994, "grad_norm": 0.375, "learning_rate": 0.002, "loss": 5.2762, "step": 5263 }, { "epoch": 0.5976067066140185, "grad_norm": 0.36328125, "learning_rate": 0.002, "loss": 5.2699, "step": 5264 }, { "epoch": 0.5977202337239376, "grad_norm": 0.375, "learning_rate": 0.002, "loss": 5.2585, "step": 5265 }, { "epoch": 0.5978337608338566, "grad_norm": 0.34375, "learning_rate": 0.002, "loss": 5.2605, "step": 5266 }, { "epoch": 0.5979472879437757, "grad_norm": 0.361328125, "learning_rate": 0.002, "loss": 5.2698, "step": 5267 }, { "epoch": 0.5980608150536948, "grad_norm": 0.328125, "learning_rate": 0.002, "loss": 5.247, "step": 5268 }, { "epoch": 0.5981743421636139, "grad_norm": 0.310546875, "learning_rate": 0.002, "loss": 5.2558, "step": 5269 }, { "epoch": 0.598287869273533, "grad_norm": 0.283203125, "learning_rate": 0.002, "loss": 5.2721, "step": 5270 }, { "epoch": 0.598401396383452, "grad_norm": 0.28515625, "learning_rate": 0.002, "loss": 5.2664, "step": 5271 }, { "epoch": 0.5985149234933711, "grad_norm": 0.275390625, "learning_rate": 0.002, "loss": 5.2497, "step": 5272 }, { "epoch": 0.5986284506032902, "grad_norm": 0.2578125, "learning_rate": 0.002, "loss": 5.2392, "step": 5273 }, { "epoch": 0.5987419777132093, "grad_norm": 0.255859375, "learning_rate": 0.002, "loss": 5.2861, "step": 5274 }, { "epoch": 0.5988555048231283, "grad_norm": 0.236328125, "learning_rate": 0.002, "loss": 5.2516, "step": 5275 }, { "epoch": 0.5989690319330474, "grad_norm": 0.23828125, "learning_rate": 0.002, "loss": 5.2648, "step": 5276 }, { "epoch": 0.5990825590429665, "grad_norm": 0.2333984375, "learning_rate": 0.002, "loss": 5.2501, "step": 5277 }, { "epoch": 0.5991960861528856, "grad_norm": 0.2314453125, "learning_rate": 0.002, "loss": 5.2558, "step": 5278 }, { "epoch": 0.5993096132628046, "grad_norm": 0.2392578125, "learning_rate": 0.002, "loss": 5.2599, "step": 5279 }, { "epoch": 0.5994231403727237, "grad_norm": 0.25390625, "learning_rate": 0.002, "loss": 5.2443, "step": 5280 }, { "epoch": 0.5995366674826428, "grad_norm": 0.267578125, "learning_rate": 0.002, "loss": 5.2477, "step": 5281 }, { "epoch": 0.5996501945925619, "grad_norm": 0.3046875, "learning_rate": 0.002, "loss": 5.2688, "step": 5282 }, { "epoch": 0.5997637217024809, "grad_norm": 0.458984375, "learning_rate": 0.002, "loss": 5.2672, "step": 5283 }, { "epoch": 0.5998772488124, "grad_norm": 0.455078125, "learning_rate": 0.002, "loss": 5.2474, "step": 5284 }, { "epoch": 0.5999907759223191, "grad_norm": 0.43359375, "learning_rate": 0.002, "loss": 5.2648, "step": 5285 }, { "epoch": 0.6001043030322382, "grad_norm": 0.39453125, "learning_rate": 0.002, "loss": 5.2432, "step": 5286 }, { "epoch": 0.6002178301421572, "grad_norm": 0.283203125, "learning_rate": 0.002, "loss": 5.267, "step": 5287 }, { "epoch": 0.6003313572520763, "grad_norm": 0.291015625, "learning_rate": 0.002, "loss": 5.2419, "step": 5288 }, { "epoch": 0.6004448843619954, "grad_norm": 0.255859375, "learning_rate": 0.002, "loss": 5.2454, "step": 5289 }, { "epoch": 0.6005584114719145, "grad_norm": 0.259765625, "learning_rate": 0.002, "loss": 5.2602, "step": 5290 }, { "epoch": 0.6006719385818335, "grad_norm": 0.287109375, "learning_rate": 0.002, "loss": 5.2598, "step": 5291 }, { "epoch": 0.6007854656917526, "grad_norm": 0.3046875, "learning_rate": 0.002, "loss": 5.2727, "step": 5292 }, { "epoch": 0.6008989928016717, "grad_norm": 0.349609375, "learning_rate": 0.002, "loss": 5.2549, "step": 5293 }, { "epoch": 0.6010125199115908, "grad_norm": 0.3671875, "learning_rate": 0.002, "loss": 5.2785, "step": 5294 }, { "epoch": 0.6011260470215098, "grad_norm": 0.376953125, "learning_rate": 0.002, "loss": 5.2637, "step": 5295 }, { "epoch": 0.6012395741314289, "grad_norm": 0.37890625, "learning_rate": 0.002, "loss": 5.2356, "step": 5296 }, { "epoch": 0.601353101241348, "grad_norm": 0.359375, "learning_rate": 0.002, "loss": 5.2638, "step": 5297 }, { "epoch": 0.6014666283512671, "grad_norm": 0.365234375, "learning_rate": 0.002, "loss": 5.2754, "step": 5298 }, { "epoch": 0.6015801554611862, "grad_norm": 0.322265625, "learning_rate": 0.002, "loss": 5.2486, "step": 5299 }, { "epoch": 0.6016936825711052, "grad_norm": 0.30859375, "learning_rate": 0.002, "loss": 5.2396, "step": 5300 }, { "epoch": 0.6018072096810243, "grad_norm": 0.26953125, "learning_rate": 0.002, "loss": 5.2538, "step": 5301 }, { "epoch": 0.6019207367909434, "grad_norm": 0.27734375, "learning_rate": 0.002, "loss": 5.271, "step": 5302 }, { "epoch": 0.6020342639008625, "grad_norm": 0.25, "learning_rate": 0.002, "loss": 5.2687, "step": 5303 }, { "epoch": 0.6021477910107815, "grad_norm": 0.2490234375, "learning_rate": 0.002, "loss": 5.2515, "step": 5304 }, { "epoch": 0.6022613181207006, "grad_norm": 0.2412109375, "learning_rate": 0.002, "loss": 5.2502, "step": 5305 }, { "epoch": 0.6023748452306197, "grad_norm": 0.232421875, "learning_rate": 0.002, "loss": 5.2708, "step": 5306 }, { "epoch": 0.6024883723405388, "grad_norm": 0.2314453125, "learning_rate": 0.002, "loss": 5.2513, "step": 5307 }, { "epoch": 0.6026018994504578, "grad_norm": 0.267578125, "learning_rate": 0.002, "loss": 5.2462, "step": 5308 }, { "epoch": 0.6027154265603769, "grad_norm": 0.30859375, "learning_rate": 0.002, "loss": 5.26, "step": 5309 }, { "epoch": 0.602828953670296, "grad_norm": 0.322265625, "learning_rate": 0.002, "loss": 5.2584, "step": 5310 }, { "epoch": 0.6029424807802151, "grad_norm": 0.326171875, "learning_rate": 0.002, "loss": 5.2566, "step": 5311 }, { "epoch": 0.6030560078901341, "grad_norm": 0.345703125, "learning_rate": 0.002, "loss": 5.273, "step": 5312 }, { "epoch": 0.6031695350000532, "grad_norm": 0.34765625, "learning_rate": 0.002, "loss": 5.2666, "step": 5313 }, { "epoch": 0.6032830621099723, "grad_norm": 0.359375, "learning_rate": 0.002, "loss": 5.2355, "step": 5314 }, { "epoch": 0.6033965892198914, "grad_norm": 0.373046875, "learning_rate": 0.002, "loss": 5.2795, "step": 5315 }, { "epoch": 0.6035101163298104, "grad_norm": 0.384765625, "learning_rate": 0.002, "loss": 5.2415, "step": 5316 }, { "epoch": 0.6036236434397295, "grad_norm": 0.369140625, "learning_rate": 0.002, "loss": 5.2623, "step": 5317 }, { "epoch": 0.6037371705496486, "grad_norm": 0.330078125, "learning_rate": 0.002, "loss": 5.244, "step": 5318 }, { "epoch": 0.6038506976595677, "grad_norm": 0.33203125, "learning_rate": 0.002, "loss": 5.2521, "step": 5319 }, { "epoch": 0.6039642247694867, "grad_norm": 0.298828125, "learning_rate": 0.002, "loss": 5.2543, "step": 5320 }, { "epoch": 0.6040777518794058, "grad_norm": 0.28125, "learning_rate": 0.002, "loss": 5.264, "step": 5321 }, { "epoch": 0.6041912789893249, "grad_norm": 0.244140625, "learning_rate": 0.002, "loss": 5.2584, "step": 5322 }, { "epoch": 0.604304806099244, "grad_norm": 0.251953125, "learning_rate": 0.002, "loss": 5.2482, "step": 5323 }, { "epoch": 0.604418333209163, "grad_norm": 0.259765625, "learning_rate": 0.002, "loss": 5.2522, "step": 5324 }, { "epoch": 0.6045318603190821, "grad_norm": 0.291015625, "learning_rate": 0.002, "loss": 5.242, "step": 5325 }, { "epoch": 0.6046453874290012, "grad_norm": 0.34375, "learning_rate": 0.002, "loss": 5.2637, "step": 5326 }, { "epoch": 0.6047589145389203, "grad_norm": 0.388671875, "learning_rate": 0.002, "loss": 5.2611, "step": 5327 }, { "epoch": 0.6048724416488394, "grad_norm": 0.380859375, "learning_rate": 0.002, "loss": 5.2394, "step": 5328 }, { "epoch": 0.6049859687587584, "grad_norm": 0.3984375, "learning_rate": 0.002, "loss": 5.2666, "step": 5329 }, { "epoch": 0.6050994958686775, "grad_norm": 0.376953125, "learning_rate": 0.002, "loss": 5.2464, "step": 5330 }, { "epoch": 0.6052130229785966, "grad_norm": 0.3671875, "learning_rate": 0.002, "loss": 5.2435, "step": 5331 }, { "epoch": 0.6053265500885157, "grad_norm": 0.33984375, "learning_rate": 0.002, "loss": 5.2676, "step": 5332 }, { "epoch": 0.6054400771984347, "grad_norm": 0.36328125, "learning_rate": 0.002, "loss": 5.2704, "step": 5333 }, { "epoch": 0.6055536043083538, "grad_norm": 0.330078125, "learning_rate": 0.002, "loss": 5.2548, "step": 5334 }, { "epoch": 0.6056671314182729, "grad_norm": 0.326171875, "learning_rate": 0.002, "loss": 5.2484, "step": 5335 }, { "epoch": 0.605780658528192, "grad_norm": 0.291015625, "learning_rate": 0.002, "loss": 5.2498, "step": 5336 }, { "epoch": 0.605894185638111, "grad_norm": 0.291015625, "learning_rate": 0.002, "loss": 5.2606, "step": 5337 }, { "epoch": 0.6060077127480301, "grad_norm": 0.275390625, "learning_rate": 0.002, "loss": 5.242, "step": 5338 }, { "epoch": 0.6061212398579492, "grad_norm": 0.259765625, "learning_rate": 0.002, "loss": 5.2569, "step": 5339 }, { "epoch": 0.6062347669678683, "grad_norm": 0.2578125, "learning_rate": 0.002, "loss": 5.2472, "step": 5340 }, { "epoch": 0.6063482940777873, "grad_norm": 0.271484375, "learning_rate": 0.002, "loss": 5.2459, "step": 5341 }, { "epoch": 0.6064618211877064, "grad_norm": 0.267578125, "learning_rate": 0.002, "loss": 5.251, "step": 5342 }, { "epoch": 0.6065753482976255, "grad_norm": 0.28515625, "learning_rate": 0.002, "loss": 5.245, "step": 5343 }, { "epoch": 0.6066888754075446, "grad_norm": 0.29296875, "learning_rate": 0.002, "loss": 5.2367, "step": 5344 }, { "epoch": 0.6068024025174636, "grad_norm": 0.3203125, "learning_rate": 0.002, "loss": 5.27, "step": 5345 }, { "epoch": 0.6069159296273827, "grad_norm": 0.3359375, "learning_rate": 0.002, "loss": 5.2629, "step": 5346 }, { "epoch": 0.6070294567373018, "grad_norm": 0.353515625, "learning_rate": 0.002, "loss": 5.2746, "step": 5347 }, { "epoch": 0.6071429838472209, "grad_norm": 0.328125, "learning_rate": 0.002, "loss": 5.233, "step": 5348 }, { "epoch": 0.60725651095714, "grad_norm": 0.34375, "learning_rate": 0.002, "loss": 5.2547, "step": 5349 }, { "epoch": 0.607370038067059, "grad_norm": 0.298828125, "learning_rate": 0.002, "loss": 5.2669, "step": 5350 }, { "epoch": 0.6074835651769781, "grad_norm": 0.30859375, "learning_rate": 0.002, "loss": 5.2702, "step": 5351 }, { "epoch": 0.6075970922868972, "grad_norm": 0.271484375, "learning_rate": 0.002, "loss": 5.2626, "step": 5352 }, { "epoch": 0.6077106193968163, "grad_norm": 0.2578125, "learning_rate": 0.002, "loss": 5.2409, "step": 5353 }, { "epoch": 0.6078241465067353, "grad_norm": 0.26953125, "learning_rate": 0.002, "loss": 5.2521, "step": 5354 }, { "epoch": 0.6079376736166544, "grad_norm": 0.275390625, "learning_rate": 0.002, "loss": 5.2363, "step": 5355 }, { "epoch": 0.6080512007265735, "grad_norm": 0.306640625, "learning_rate": 0.002, "loss": 5.2493, "step": 5356 }, { "epoch": 0.6081647278364926, "grad_norm": 0.333984375, "learning_rate": 0.002, "loss": 5.2609, "step": 5357 }, { "epoch": 0.6082782549464116, "grad_norm": 0.34765625, "learning_rate": 0.002, "loss": 5.2474, "step": 5358 }, { "epoch": 0.6083917820563307, "grad_norm": 0.345703125, "learning_rate": 0.002, "loss": 5.232, "step": 5359 }, { "epoch": 0.6085053091662498, "grad_norm": 0.3359375, "learning_rate": 0.002, "loss": 5.2526, "step": 5360 }, { "epoch": 0.6086188362761689, "grad_norm": 0.298828125, "learning_rate": 0.002, "loss": 5.2606, "step": 5361 }, { "epoch": 0.6087323633860879, "grad_norm": 0.2890625, "learning_rate": 0.002, "loss": 5.2512, "step": 5362 }, { "epoch": 0.608845890496007, "grad_norm": 0.2734375, "learning_rate": 0.002, "loss": 5.2506, "step": 5363 }, { "epoch": 0.6089594176059261, "grad_norm": 0.2734375, "learning_rate": 0.002, "loss": 5.2475, "step": 5364 }, { "epoch": 0.6090729447158452, "grad_norm": 0.267578125, "learning_rate": 0.002, "loss": 5.2536, "step": 5365 }, { "epoch": 0.6091864718257642, "grad_norm": 0.29296875, "learning_rate": 0.002, "loss": 5.2398, "step": 5366 }, { "epoch": 0.6092999989356833, "grad_norm": 0.29296875, "learning_rate": 0.002, "loss": 5.2631, "step": 5367 }, { "epoch": 0.6094135260456024, "grad_norm": 0.306640625, "learning_rate": 0.002, "loss": 5.2641, "step": 5368 }, { "epoch": 0.6095270531555215, "grad_norm": 0.30078125, "learning_rate": 0.002, "loss": 5.2621, "step": 5369 }, { "epoch": 0.6096405802654405, "grad_norm": 0.251953125, "learning_rate": 0.002, "loss": 5.2609, "step": 5370 }, { "epoch": 0.6097541073753596, "grad_norm": 0.2451171875, "learning_rate": 0.002, "loss": 5.2591, "step": 5371 }, { "epoch": 0.6098676344852787, "grad_norm": 0.23046875, "learning_rate": 0.002, "loss": 5.2626, "step": 5372 }, { "epoch": 0.6099811615951978, "grad_norm": 0.2314453125, "learning_rate": 0.002, "loss": 5.2579, "step": 5373 }, { "epoch": 0.6100946887051168, "grad_norm": 0.2421875, "learning_rate": 0.002, "loss": 5.2467, "step": 5374 }, { "epoch": 0.6102082158150359, "grad_norm": 0.263671875, "learning_rate": 0.002, "loss": 5.2537, "step": 5375 }, { "epoch": 0.610321742924955, "grad_norm": 0.3046875, "learning_rate": 0.002, "loss": 5.2357, "step": 5376 }, { "epoch": 0.6104352700348741, "grad_norm": 0.330078125, "learning_rate": 0.002, "loss": 5.2715, "step": 5377 }, { "epoch": 0.6105487971447932, "grad_norm": 0.353515625, "learning_rate": 0.002, "loss": 5.2627, "step": 5378 }, { "epoch": 0.6106623242547122, "grad_norm": 0.365234375, "learning_rate": 0.002, "loss": 5.2396, "step": 5379 }, { "epoch": 0.6107758513646313, "grad_norm": 0.380859375, "learning_rate": 0.002, "loss": 5.2629, "step": 5380 }, { "epoch": 0.6108893784745504, "grad_norm": 0.3828125, "learning_rate": 0.002, "loss": 5.2644, "step": 5381 }, { "epoch": 0.6110029055844695, "grad_norm": 0.365234375, "learning_rate": 0.002, "loss": 5.2613, "step": 5382 }, { "epoch": 0.6111164326943885, "grad_norm": 0.33203125, "learning_rate": 0.002, "loss": 5.292, "step": 5383 }, { "epoch": 0.6112299598043076, "grad_norm": 0.3359375, "learning_rate": 0.002, "loss": 5.2515, "step": 5384 }, { "epoch": 0.6113434869142267, "grad_norm": 0.30859375, "learning_rate": 0.002, "loss": 5.2357, "step": 5385 }, { "epoch": 0.6114570140241458, "grad_norm": 0.314453125, "learning_rate": 0.002, "loss": 5.2526, "step": 5386 }, { "epoch": 0.6115705411340648, "grad_norm": 0.3125, "learning_rate": 0.002, "loss": 5.2487, "step": 5387 }, { "epoch": 0.6116840682439839, "grad_norm": 0.3203125, "learning_rate": 0.002, "loss": 5.2498, "step": 5388 }, { "epoch": 0.611797595353903, "grad_norm": 0.314453125, "learning_rate": 0.002, "loss": 5.2431, "step": 5389 }, { "epoch": 0.6119111224638221, "grad_norm": 0.30859375, "learning_rate": 0.002, "loss": 5.2743, "step": 5390 }, { "epoch": 0.6120246495737411, "grad_norm": 0.294921875, "learning_rate": 0.002, "loss": 5.2472, "step": 5391 }, { "epoch": 0.6121381766836602, "grad_norm": 0.279296875, "learning_rate": 0.002, "loss": 5.2494, "step": 5392 }, { "epoch": 0.6122517037935793, "grad_norm": 0.265625, "learning_rate": 0.002, "loss": 5.2495, "step": 5393 }, { "epoch": 0.6123652309034984, "grad_norm": 0.271484375, "learning_rate": 0.002, "loss": 5.2873, "step": 5394 }, { "epoch": 0.6124787580134174, "grad_norm": 0.275390625, "learning_rate": 0.002, "loss": 5.246, "step": 5395 }, { "epoch": 0.6125922851233365, "grad_norm": 0.263671875, "learning_rate": 0.002, "loss": 5.2416, "step": 5396 }, { "epoch": 0.6127058122332556, "grad_norm": 0.27734375, "learning_rate": 0.002, "loss": 5.2757, "step": 5397 }, { "epoch": 0.6128193393431747, "grad_norm": 0.265625, "learning_rate": 0.002, "loss": 5.2772, "step": 5398 }, { "epoch": 0.6129328664530937, "grad_norm": 0.271484375, "learning_rate": 0.002, "loss": 5.2584, "step": 5399 }, { "epoch": 0.6130463935630128, "grad_norm": 0.27734375, "learning_rate": 0.002, "loss": 5.2593, "step": 5400 }, { "epoch": 0.6131599206729319, "grad_norm": 0.322265625, "learning_rate": 0.002, "loss": 5.2691, "step": 5401 }, { "epoch": 0.613273447782851, "grad_norm": 0.431640625, "learning_rate": 0.002, "loss": 5.2611, "step": 5402 }, { "epoch": 0.61338697489277, "grad_norm": 0.3984375, "learning_rate": 0.002, "loss": 5.2766, "step": 5403 }, { "epoch": 0.6135005020026891, "grad_norm": 0.388671875, "learning_rate": 0.002, "loss": 5.2555, "step": 5404 }, { "epoch": 0.6136140291126082, "grad_norm": 0.3984375, "learning_rate": 0.002, "loss": 5.2561, "step": 5405 }, { "epoch": 0.6137275562225273, "grad_norm": 0.3125, "learning_rate": 0.002, "loss": 5.2548, "step": 5406 }, { "epoch": 0.6138410833324464, "grad_norm": 0.3203125, "learning_rate": 0.002, "loss": 5.2744, "step": 5407 }, { "epoch": 0.6139546104423654, "grad_norm": 0.275390625, "learning_rate": 0.002, "loss": 5.2686, "step": 5408 }, { "epoch": 0.6140681375522845, "grad_norm": 0.27734375, "learning_rate": 0.002, "loss": 5.2413, "step": 5409 }, { "epoch": 0.6141816646622036, "grad_norm": 0.263671875, "learning_rate": 0.002, "loss": 5.2812, "step": 5410 }, { "epoch": 0.6142951917721227, "grad_norm": 0.2734375, "learning_rate": 0.002, "loss": 5.2492, "step": 5411 }, { "epoch": 0.6144087188820417, "grad_norm": 0.26171875, "learning_rate": 0.002, "loss": 5.2491, "step": 5412 }, { "epoch": 0.6145222459919608, "grad_norm": 0.279296875, "learning_rate": 0.002, "loss": 5.2778, "step": 5413 }, { "epoch": 0.6146357731018799, "grad_norm": 0.2890625, "learning_rate": 0.002, "loss": 5.2514, "step": 5414 }, { "epoch": 0.614749300211799, "grad_norm": 0.296875, "learning_rate": 0.002, "loss": 5.2692, "step": 5415 }, { "epoch": 0.614862827321718, "grad_norm": 0.330078125, "learning_rate": 0.002, "loss": 5.2439, "step": 5416 }, { "epoch": 0.6149763544316371, "grad_norm": 0.33203125, "learning_rate": 0.002, "loss": 5.2646, "step": 5417 }, { "epoch": 0.6150898815415562, "grad_norm": 0.349609375, "learning_rate": 0.002, "loss": 5.2623, "step": 5418 }, { "epoch": 0.6152034086514753, "grad_norm": 0.341796875, "learning_rate": 0.002, "loss": 5.2853, "step": 5419 }, { "epoch": 0.6153169357613943, "grad_norm": 0.314453125, "learning_rate": 0.002, "loss": 5.2627, "step": 5420 }, { "epoch": 0.6154304628713134, "grad_norm": 0.29296875, "learning_rate": 0.002, "loss": 5.2472, "step": 5421 }, { "epoch": 0.6155439899812325, "grad_norm": 0.30078125, "learning_rate": 0.002, "loss": 5.2636, "step": 5422 }, { "epoch": 0.6156575170911516, "grad_norm": 0.32421875, "learning_rate": 0.002, "loss": 5.2643, "step": 5423 }, { "epoch": 0.6157710442010706, "grad_norm": 0.318359375, "learning_rate": 0.002, "loss": 5.2554, "step": 5424 }, { "epoch": 0.6158845713109897, "grad_norm": 0.3203125, "learning_rate": 0.002, "loss": 5.2655, "step": 5425 }, { "epoch": 0.6159980984209088, "grad_norm": 0.359375, "learning_rate": 0.002, "loss": 5.2571, "step": 5426 }, { "epoch": 0.6161116255308279, "grad_norm": 0.365234375, "learning_rate": 0.002, "loss": 5.2751, "step": 5427 }, { "epoch": 0.616225152640747, "grad_norm": 0.375, "learning_rate": 0.002, "loss": 5.2598, "step": 5428 }, { "epoch": 0.616338679750666, "grad_norm": 0.349609375, "learning_rate": 0.002, "loss": 5.2603, "step": 5429 }, { "epoch": 0.6164522068605852, "grad_norm": 0.31640625, "learning_rate": 0.002, "loss": 5.2542, "step": 5430 }, { "epoch": 0.6165657339705043, "grad_norm": 0.28515625, "learning_rate": 0.002, "loss": 5.2505, "step": 5431 }, { "epoch": 0.6166792610804234, "grad_norm": 0.2890625, "learning_rate": 0.002, "loss": 5.2404, "step": 5432 }, { "epoch": 0.6167927881903424, "grad_norm": 0.302734375, "learning_rate": 0.002, "loss": 5.2635, "step": 5433 }, { "epoch": 0.6169063153002615, "grad_norm": 0.291015625, "learning_rate": 0.002, "loss": 5.2748, "step": 5434 }, { "epoch": 0.6170198424101806, "grad_norm": 0.296875, "learning_rate": 0.002, "loss": 5.2609, "step": 5435 }, { "epoch": 0.6171333695200997, "grad_norm": 0.28125, "learning_rate": 0.002, "loss": 5.2477, "step": 5436 }, { "epoch": 0.6172468966300187, "grad_norm": 0.30078125, "learning_rate": 0.002, "loss": 5.2531, "step": 5437 }, { "epoch": 0.6173604237399378, "grad_norm": 0.3046875, "learning_rate": 0.002, "loss": 5.2567, "step": 5438 }, { "epoch": 0.6174739508498569, "grad_norm": 0.322265625, "learning_rate": 0.002, "loss": 5.2669, "step": 5439 }, { "epoch": 0.617587477959776, "grad_norm": 0.337890625, "learning_rate": 0.002, "loss": 5.2463, "step": 5440 }, { "epoch": 0.617701005069695, "grad_norm": 0.33984375, "learning_rate": 0.002, "loss": 5.2529, "step": 5441 }, { "epoch": 0.6178145321796141, "grad_norm": 0.333984375, "learning_rate": 0.002, "loss": 5.2395, "step": 5442 }, { "epoch": 0.6179280592895332, "grad_norm": 0.306640625, "learning_rate": 0.002, "loss": 5.2444, "step": 5443 }, { "epoch": 0.6180415863994523, "grad_norm": 0.283203125, "learning_rate": 0.002, "loss": 5.2543, "step": 5444 }, { "epoch": 0.6181551135093714, "grad_norm": 0.26171875, "learning_rate": 0.002, "loss": 5.2493, "step": 5445 }, { "epoch": 0.6182686406192904, "grad_norm": 0.26953125, "learning_rate": 0.002, "loss": 5.25, "step": 5446 }, { "epoch": 0.6183821677292095, "grad_norm": 0.259765625, "learning_rate": 0.002, "loss": 5.2624, "step": 5447 }, { "epoch": 0.6184956948391286, "grad_norm": 0.267578125, "learning_rate": 0.002, "loss": 5.2647, "step": 5448 }, { "epoch": 0.6186092219490477, "grad_norm": 0.2578125, "learning_rate": 0.002, "loss": 5.2596, "step": 5449 }, { "epoch": 0.6187227490589667, "grad_norm": 0.2578125, "learning_rate": 0.002, "loss": 5.2409, "step": 5450 }, { "epoch": 0.6188362761688858, "grad_norm": 0.263671875, "learning_rate": 0.002, "loss": 5.2474, "step": 5451 }, { "epoch": 0.6189498032788049, "grad_norm": 0.28125, "learning_rate": 0.002, "loss": 5.2943, "step": 5452 }, { "epoch": 0.619063330388724, "grad_norm": 0.291015625, "learning_rate": 0.002, "loss": 5.268, "step": 5453 }, { "epoch": 0.619176857498643, "grad_norm": 0.326171875, "learning_rate": 0.002, "loss": 5.2343, "step": 5454 }, { "epoch": 0.6192903846085621, "grad_norm": 0.333984375, "learning_rate": 0.002, "loss": 5.244, "step": 5455 }, { "epoch": 0.6194039117184812, "grad_norm": 0.365234375, "learning_rate": 0.002, "loss": 5.2566, "step": 5456 }, { "epoch": 0.6195174388284003, "grad_norm": 0.37109375, "learning_rate": 0.002, "loss": 5.2622, "step": 5457 }, { "epoch": 0.6196309659383193, "grad_norm": 0.34765625, "learning_rate": 0.002, "loss": 5.2345, "step": 5458 }, { "epoch": 0.6197444930482384, "grad_norm": 0.359375, "learning_rate": 0.002, "loss": 5.225, "step": 5459 }, { "epoch": 0.6198580201581575, "grad_norm": 0.298828125, "learning_rate": 0.002, "loss": 5.2564, "step": 5460 }, { "epoch": 0.6199715472680766, "grad_norm": 0.30078125, "learning_rate": 0.002, "loss": 5.2531, "step": 5461 }, { "epoch": 0.6200850743779956, "grad_norm": 0.26953125, "learning_rate": 0.002, "loss": 5.2493, "step": 5462 }, { "epoch": 0.6201986014879147, "grad_norm": 0.275390625, "learning_rate": 0.002, "loss": 5.2716, "step": 5463 }, { "epoch": 0.6203121285978338, "grad_norm": 0.275390625, "learning_rate": 0.002, "loss": 5.2576, "step": 5464 }, { "epoch": 0.6204256557077529, "grad_norm": 0.283203125, "learning_rate": 0.002, "loss": 5.2286, "step": 5465 }, { "epoch": 0.620539182817672, "grad_norm": 0.283203125, "learning_rate": 0.002, "loss": 5.2555, "step": 5466 }, { "epoch": 0.620652709927591, "grad_norm": 0.294921875, "learning_rate": 0.002, "loss": 5.2641, "step": 5467 }, { "epoch": 0.6207662370375101, "grad_norm": 0.271484375, "learning_rate": 0.002, "loss": 5.2656, "step": 5468 }, { "epoch": 0.6208797641474292, "grad_norm": 0.259765625, "learning_rate": 0.002, "loss": 5.2712, "step": 5469 }, { "epoch": 0.6209932912573483, "grad_norm": 0.251953125, "learning_rate": 0.002, "loss": 5.2571, "step": 5470 }, { "epoch": 0.6211068183672673, "grad_norm": 0.2578125, "learning_rate": 0.002, "loss": 5.267, "step": 5471 }, { "epoch": 0.6212203454771864, "grad_norm": 0.27734375, "learning_rate": 0.002, "loss": 5.2469, "step": 5472 }, { "epoch": 0.6213338725871055, "grad_norm": 0.30078125, "learning_rate": 0.002, "loss": 5.2388, "step": 5473 }, { "epoch": 0.6214473996970246, "grad_norm": 0.328125, "learning_rate": 0.002, "loss": 5.2681, "step": 5474 }, { "epoch": 0.6215609268069436, "grad_norm": 0.330078125, "learning_rate": 0.002, "loss": 5.2476, "step": 5475 }, { "epoch": 0.6216744539168627, "grad_norm": 0.357421875, "learning_rate": 0.002, "loss": 5.2382, "step": 5476 }, { "epoch": 0.6217879810267818, "grad_norm": 0.341796875, "learning_rate": 0.002, "loss": 5.2475, "step": 5477 }, { "epoch": 0.6219015081367009, "grad_norm": 0.357421875, "learning_rate": 0.002, "loss": 5.2408, "step": 5478 }, { "epoch": 0.6220150352466199, "grad_norm": 0.32421875, "learning_rate": 0.002, "loss": 5.2556, "step": 5479 }, { "epoch": 0.622128562356539, "grad_norm": 0.330078125, "learning_rate": 0.002, "loss": 5.2779, "step": 5480 }, { "epoch": 0.6222420894664581, "grad_norm": 0.30078125, "learning_rate": 0.002, "loss": 5.2565, "step": 5481 }, { "epoch": 0.6223556165763772, "grad_norm": 0.29296875, "learning_rate": 0.002, "loss": 5.2546, "step": 5482 }, { "epoch": 0.6224691436862962, "grad_norm": 0.296875, "learning_rate": 0.002, "loss": 5.2408, "step": 5483 }, { "epoch": 0.6225826707962153, "grad_norm": 0.32421875, "learning_rate": 0.002, "loss": 5.2502, "step": 5484 }, { "epoch": 0.6226961979061344, "grad_norm": 0.291015625, "learning_rate": 0.002, "loss": 5.2628, "step": 5485 }, { "epoch": 0.6228097250160535, "grad_norm": 0.3046875, "learning_rate": 0.002, "loss": 5.2394, "step": 5486 }, { "epoch": 0.6229232521259725, "grad_norm": 0.291015625, "learning_rate": 0.002, "loss": 5.2688, "step": 5487 }, { "epoch": 0.6230367792358916, "grad_norm": 0.294921875, "learning_rate": 0.002, "loss": 5.2547, "step": 5488 }, { "epoch": 0.6231503063458107, "grad_norm": 0.291015625, "learning_rate": 0.002, "loss": 5.2511, "step": 5489 }, { "epoch": 0.6232638334557298, "grad_norm": 0.3046875, "learning_rate": 0.002, "loss": 5.2576, "step": 5490 }, { "epoch": 0.6233773605656489, "grad_norm": 0.32421875, "learning_rate": 0.002, "loss": 5.2314, "step": 5491 }, { "epoch": 0.6234908876755679, "grad_norm": 0.361328125, "learning_rate": 0.002, "loss": 5.267, "step": 5492 }, { "epoch": 0.623604414785487, "grad_norm": 0.384765625, "learning_rate": 0.002, "loss": 5.253, "step": 5493 }, { "epoch": 0.6237179418954061, "grad_norm": 0.400390625, "learning_rate": 0.002, "loss": 5.25, "step": 5494 }, { "epoch": 0.6238314690053252, "grad_norm": 0.39453125, "learning_rate": 0.002, "loss": 5.2679, "step": 5495 }, { "epoch": 0.6239449961152442, "grad_norm": 0.375, "learning_rate": 0.002, "loss": 5.2804, "step": 5496 }, { "epoch": 0.6240585232251633, "grad_norm": 0.353515625, "learning_rate": 0.002, "loss": 5.2516, "step": 5497 }, { "epoch": 0.6241720503350824, "grad_norm": 0.330078125, "learning_rate": 0.002, "loss": 5.236, "step": 5498 }, { "epoch": 0.6242855774450015, "grad_norm": 0.306640625, "learning_rate": 0.002, "loss": 5.2427, "step": 5499 }, { "epoch": 0.6243991045549205, "grad_norm": 0.283203125, "learning_rate": 0.002, "loss": 5.2578, "step": 5500 }, { "epoch": 0.6245126316648396, "grad_norm": 0.302734375, "learning_rate": 0.002, "loss": 5.2594, "step": 5501 }, { "epoch": 0.6246261587747587, "grad_norm": 0.3125, "learning_rate": 0.002, "loss": 5.2399, "step": 5502 }, { "epoch": 0.6247396858846778, "grad_norm": 0.33203125, "learning_rate": 0.002, "loss": 5.2344, "step": 5503 }, { "epoch": 0.6248532129945968, "grad_norm": 0.349609375, "learning_rate": 0.002, "loss": 5.2543, "step": 5504 }, { "epoch": 0.6249667401045159, "grad_norm": 0.33984375, "learning_rate": 0.002, "loss": 5.2599, "step": 5505 }, { "epoch": 0.625080267214435, "grad_norm": 0.30078125, "learning_rate": 0.002, "loss": 5.2656, "step": 5506 }, { "epoch": 0.6251937943243541, "grad_norm": 0.3125, "learning_rate": 0.002, "loss": 5.2475, "step": 5507 }, { "epoch": 0.6253073214342731, "grad_norm": 0.27734375, "learning_rate": 0.002, "loss": 5.2548, "step": 5508 }, { "epoch": 0.6254208485441922, "grad_norm": 0.294921875, "learning_rate": 0.002, "loss": 5.2381, "step": 5509 }, { "epoch": 0.6255343756541113, "grad_norm": 0.265625, "learning_rate": 0.002, "loss": 5.2639, "step": 5510 }, { "epoch": 0.6256479027640304, "grad_norm": 0.28125, "learning_rate": 0.002, "loss": 5.2541, "step": 5511 }, { "epoch": 0.6257614298739494, "grad_norm": 0.314453125, "learning_rate": 0.002, "loss": 5.2394, "step": 5512 }, { "epoch": 0.6258749569838685, "grad_norm": 0.341796875, "learning_rate": 0.002, "loss": 5.251, "step": 5513 }, { "epoch": 0.6259884840937876, "grad_norm": 0.384765625, "learning_rate": 0.002, "loss": 5.2513, "step": 5514 }, { "epoch": 0.6261020112037067, "grad_norm": 0.376953125, "learning_rate": 0.002, "loss": 5.2463, "step": 5515 }, { "epoch": 0.6262155383136258, "grad_norm": 0.34375, "learning_rate": 0.002, "loss": 5.2692, "step": 5516 }, { "epoch": 0.6263290654235448, "grad_norm": 0.34375, "learning_rate": 0.002, "loss": 5.2495, "step": 5517 }, { "epoch": 0.6264425925334639, "grad_norm": 0.322265625, "learning_rate": 0.002, "loss": 5.2588, "step": 5518 }, { "epoch": 0.626556119643383, "grad_norm": 0.31640625, "learning_rate": 0.002, "loss": 5.2438, "step": 5519 }, { "epoch": 0.6266696467533021, "grad_norm": 0.3046875, "learning_rate": 0.002, "loss": 5.2592, "step": 5520 }, { "epoch": 0.6267831738632211, "grad_norm": 0.283203125, "learning_rate": 0.002, "loss": 5.2738, "step": 5521 }, { "epoch": 0.6268967009731402, "grad_norm": 0.287109375, "learning_rate": 0.002, "loss": 5.2529, "step": 5522 }, { "epoch": 0.6270102280830593, "grad_norm": 0.29296875, "learning_rate": 0.002, "loss": 5.2675, "step": 5523 }, { "epoch": 0.6271237551929784, "grad_norm": 0.314453125, "learning_rate": 0.002, "loss": 5.2409, "step": 5524 }, { "epoch": 0.6272372823028974, "grad_norm": 0.318359375, "learning_rate": 0.002, "loss": 5.2557, "step": 5525 }, { "epoch": 0.6273508094128165, "grad_norm": 0.337890625, "learning_rate": 0.002, "loss": 5.2528, "step": 5526 }, { "epoch": 0.6274643365227356, "grad_norm": 0.326171875, "learning_rate": 0.002, "loss": 5.2369, "step": 5527 }, { "epoch": 0.6275778636326547, "grad_norm": 0.33984375, "learning_rate": 0.002, "loss": 5.2477, "step": 5528 }, { "epoch": 0.6276913907425737, "grad_norm": 0.302734375, "learning_rate": 0.002, "loss": 5.2557, "step": 5529 }, { "epoch": 0.6278049178524928, "grad_norm": 0.326171875, "learning_rate": 0.002, "loss": 5.2609, "step": 5530 }, { "epoch": 0.6279184449624119, "grad_norm": 0.306640625, "learning_rate": 0.002, "loss": 5.2338, "step": 5531 }, { "epoch": 0.628031972072331, "grad_norm": 0.326171875, "learning_rate": 0.002, "loss": 5.2351, "step": 5532 }, { "epoch": 0.62814549918225, "grad_norm": 0.330078125, "learning_rate": 0.002, "loss": 5.245, "step": 5533 }, { "epoch": 0.6282590262921691, "grad_norm": 0.330078125, "learning_rate": 0.002, "loss": 5.2501, "step": 5534 }, { "epoch": 0.6283725534020882, "grad_norm": 0.31640625, "learning_rate": 0.002, "loss": 5.26, "step": 5535 }, { "epoch": 0.6284860805120073, "grad_norm": 0.31640625, "learning_rate": 0.002, "loss": 5.2596, "step": 5536 }, { "epoch": 0.6285996076219263, "grad_norm": 0.330078125, "learning_rate": 0.002, "loss": 5.2456, "step": 5537 }, { "epoch": 0.6287131347318454, "grad_norm": 0.33203125, "learning_rate": 0.002, "loss": 5.2576, "step": 5538 }, { "epoch": 0.6288266618417645, "grad_norm": 0.341796875, "learning_rate": 0.002, "loss": 5.2544, "step": 5539 }, { "epoch": 0.6289401889516836, "grad_norm": 0.333984375, "learning_rate": 0.002, "loss": 5.252, "step": 5540 }, { "epoch": 0.6290537160616027, "grad_norm": 0.373046875, "learning_rate": 0.002, "loss": 5.2464, "step": 5541 }, { "epoch": 0.6291672431715217, "grad_norm": 0.36328125, "learning_rate": 0.002, "loss": 5.2549, "step": 5542 }, { "epoch": 0.6292807702814408, "grad_norm": 0.38671875, "learning_rate": 0.002, "loss": 5.2724, "step": 5543 }, { "epoch": 0.6293942973913599, "grad_norm": 0.337890625, "learning_rate": 0.002, "loss": 5.2313, "step": 5544 }, { "epoch": 0.629507824501279, "grad_norm": 0.3359375, "learning_rate": 0.002, "loss": 5.2623, "step": 5545 }, { "epoch": 0.629621351611198, "grad_norm": 0.306640625, "learning_rate": 0.002, "loss": 5.2549, "step": 5546 }, { "epoch": 0.6297348787211171, "grad_norm": 0.298828125, "learning_rate": 0.002, "loss": 5.2753, "step": 5547 }, { "epoch": 0.6298484058310362, "grad_norm": 0.27734375, "learning_rate": 0.002, "loss": 5.2642, "step": 5548 }, { "epoch": 0.6299619329409553, "grad_norm": 0.279296875, "learning_rate": 0.002, "loss": 5.2473, "step": 5549 }, { "epoch": 0.6300754600508743, "grad_norm": 0.296875, "learning_rate": 0.002, "loss": 5.2174, "step": 5550 }, { "epoch": 0.6301889871607934, "grad_norm": 0.3125, "learning_rate": 0.002, "loss": 5.2524, "step": 5551 }, { "epoch": 0.6303025142707125, "grad_norm": 0.361328125, "learning_rate": 0.002, "loss": 5.2636, "step": 5552 }, { "epoch": 0.6304160413806316, "grad_norm": 0.37890625, "learning_rate": 0.002, "loss": 5.2374, "step": 5553 }, { "epoch": 0.6305295684905506, "grad_norm": 0.40625, "learning_rate": 0.002, "loss": 5.2524, "step": 5554 }, { "epoch": 0.6306430956004697, "grad_norm": 0.3671875, "learning_rate": 0.002, "loss": 5.249, "step": 5555 }, { "epoch": 0.6307566227103888, "grad_norm": 0.37109375, "learning_rate": 0.002, "loss": 5.2418, "step": 5556 }, { "epoch": 0.6308701498203079, "grad_norm": 0.318359375, "learning_rate": 0.002, "loss": 5.2524, "step": 5557 }, { "epoch": 0.6309836769302269, "grad_norm": 0.3046875, "learning_rate": 0.002, "loss": 5.2499, "step": 5558 }, { "epoch": 0.631097204040146, "grad_norm": 0.26953125, "learning_rate": 0.002, "loss": 5.2548, "step": 5559 }, { "epoch": 0.6312107311500651, "grad_norm": 0.26171875, "learning_rate": 0.002, "loss": 5.2683, "step": 5560 }, { "epoch": 0.6313242582599842, "grad_norm": 0.26953125, "learning_rate": 0.002, "loss": 5.2556, "step": 5561 }, { "epoch": 0.6314377853699032, "grad_norm": 0.283203125, "learning_rate": 0.002, "loss": 5.2494, "step": 5562 }, { "epoch": 0.6315513124798223, "grad_norm": 0.296875, "learning_rate": 0.002, "loss": 5.2292, "step": 5563 }, { "epoch": 0.6316648395897414, "grad_norm": 0.330078125, "learning_rate": 0.002, "loss": 5.2361, "step": 5564 }, { "epoch": 0.6317783666996605, "grad_norm": 0.33203125, "learning_rate": 0.002, "loss": 5.2515, "step": 5565 }, { "epoch": 0.6318918938095796, "grad_norm": 0.333984375, "learning_rate": 0.002, "loss": 5.267, "step": 5566 }, { "epoch": 0.6320054209194986, "grad_norm": 0.330078125, "learning_rate": 0.002, "loss": 5.254, "step": 5567 }, { "epoch": 0.6321189480294177, "grad_norm": 0.302734375, "learning_rate": 0.002, "loss": 5.2403, "step": 5568 }, { "epoch": 0.6322324751393368, "grad_norm": 0.302734375, "learning_rate": 0.002, "loss": 5.2366, "step": 5569 }, { "epoch": 0.6323460022492559, "grad_norm": 0.30078125, "learning_rate": 0.002, "loss": 5.2506, "step": 5570 }, { "epoch": 0.6324595293591749, "grad_norm": 0.322265625, "learning_rate": 0.002, "loss": 5.26, "step": 5571 }, { "epoch": 0.632573056469094, "grad_norm": 0.302734375, "learning_rate": 0.002, "loss": 5.2518, "step": 5572 }, { "epoch": 0.6326865835790131, "grad_norm": 0.31640625, "learning_rate": 0.002, "loss": 5.2383, "step": 5573 }, { "epoch": 0.6328001106889322, "grad_norm": 0.3046875, "learning_rate": 0.002, "loss": 5.2505, "step": 5574 }, { "epoch": 0.6329136377988512, "grad_norm": 0.298828125, "learning_rate": 0.002, "loss": 5.2609, "step": 5575 }, { "epoch": 0.6330271649087703, "grad_norm": 0.322265625, "learning_rate": 0.002, "loss": 5.235, "step": 5576 }, { "epoch": 0.6331406920186894, "grad_norm": 0.3359375, "learning_rate": 0.002, "loss": 5.2423, "step": 5577 }, { "epoch": 0.6332542191286085, "grad_norm": 0.392578125, "learning_rate": 0.002, "loss": 5.2496, "step": 5578 }, { "epoch": 0.6333677462385275, "grad_norm": 0.40625, "learning_rate": 0.002, "loss": 5.2495, "step": 5579 }, { "epoch": 0.6334812733484466, "grad_norm": 0.419921875, "learning_rate": 0.002, "loss": 5.2522, "step": 5580 }, { "epoch": 0.6335948004583657, "grad_norm": 0.34765625, "learning_rate": 0.002, "loss": 5.2457, "step": 5581 }, { "epoch": 0.6337083275682848, "grad_norm": 0.33203125, "learning_rate": 0.002, "loss": 5.2686, "step": 5582 }, { "epoch": 0.6338218546782038, "grad_norm": 0.302734375, "learning_rate": 0.002, "loss": 5.2627, "step": 5583 }, { "epoch": 0.6339353817881229, "grad_norm": 0.30859375, "learning_rate": 0.002, "loss": 5.2568, "step": 5584 }, { "epoch": 0.634048908898042, "grad_norm": 0.2890625, "learning_rate": 0.002, "loss": 5.2353, "step": 5585 }, { "epoch": 0.6341624360079611, "grad_norm": 0.30078125, "learning_rate": 0.002, "loss": 5.2608, "step": 5586 }, { "epoch": 0.6342759631178801, "grad_norm": 0.29296875, "learning_rate": 0.002, "loss": 5.242, "step": 5587 }, { "epoch": 0.6343894902277992, "grad_norm": 0.3046875, "learning_rate": 0.002, "loss": 5.2517, "step": 5588 }, { "epoch": 0.6345030173377183, "grad_norm": 0.30078125, "learning_rate": 0.002, "loss": 5.2244, "step": 5589 }, { "epoch": 0.6346165444476374, "grad_norm": 0.29296875, "learning_rate": 0.002, "loss": 5.2546, "step": 5590 }, { "epoch": 0.6347300715575565, "grad_norm": 0.287109375, "learning_rate": 0.002, "loss": 5.2615, "step": 5591 }, { "epoch": 0.6348435986674755, "grad_norm": 0.275390625, "learning_rate": 0.002, "loss": 5.2606, "step": 5592 }, { "epoch": 0.6349571257773946, "grad_norm": 0.27734375, "learning_rate": 0.002, "loss": 5.2555, "step": 5593 }, { "epoch": 0.6350706528873137, "grad_norm": 0.26171875, "learning_rate": 0.002, "loss": 5.2448, "step": 5594 }, { "epoch": 0.6351841799972328, "grad_norm": 0.2490234375, "learning_rate": 0.002, "loss": 5.2369, "step": 5595 }, { "epoch": 0.6352977071071518, "grad_norm": 0.244140625, "learning_rate": 0.002, "loss": 5.223, "step": 5596 }, { "epoch": 0.6354112342170709, "grad_norm": 0.2431640625, "learning_rate": 0.002, "loss": 5.2474, "step": 5597 }, { "epoch": 0.63552476132699, "grad_norm": 0.2412109375, "learning_rate": 0.002, "loss": 5.2539, "step": 5598 }, { "epoch": 0.6356382884369091, "grad_norm": 0.248046875, "learning_rate": 0.002, "loss": 5.2579, "step": 5599 }, { "epoch": 0.6357518155468281, "grad_norm": 0.287109375, "learning_rate": 0.002, "loss": 5.2614, "step": 5600 }, { "epoch": 0.6358653426567472, "grad_norm": 0.318359375, "learning_rate": 0.002, "loss": 5.25, "step": 5601 }, { "epoch": 0.6359788697666663, "grad_norm": 0.3203125, "learning_rate": 0.002, "loss": 5.2399, "step": 5602 }, { "epoch": 0.6360923968765854, "grad_norm": 0.3125, "learning_rate": 0.002, "loss": 5.2465, "step": 5603 }, { "epoch": 0.6362059239865044, "grad_norm": 0.3125, "learning_rate": 0.002, "loss": 5.2382, "step": 5604 }, { "epoch": 0.6363194510964235, "grad_norm": 0.29296875, "learning_rate": 0.002, "loss": 5.2258, "step": 5605 }, { "epoch": 0.6364329782063426, "grad_norm": 0.298828125, "learning_rate": 0.002, "loss": 5.2418, "step": 5606 }, { "epoch": 0.6365465053162617, "grad_norm": 0.283203125, "learning_rate": 0.002, "loss": 5.2521, "step": 5607 }, { "epoch": 0.6366600324261807, "grad_norm": 0.283203125, "learning_rate": 0.002, "loss": 5.2518, "step": 5608 }, { "epoch": 0.6367735595360998, "grad_norm": 0.25, "learning_rate": 0.002, "loss": 5.2547, "step": 5609 }, { "epoch": 0.6368870866460189, "grad_norm": 0.255859375, "learning_rate": 0.002, "loss": 5.2609, "step": 5610 }, { "epoch": 0.637000613755938, "grad_norm": 0.251953125, "learning_rate": 0.002, "loss": 5.2567, "step": 5611 }, { "epoch": 0.637114140865857, "grad_norm": 0.267578125, "learning_rate": 0.002, "loss": 5.2522, "step": 5612 }, { "epoch": 0.6372276679757761, "grad_norm": 0.2578125, "learning_rate": 0.002, "loss": 5.2504, "step": 5613 }, { "epoch": 0.6373411950856952, "grad_norm": 0.267578125, "learning_rate": 0.002, "loss": 5.2562, "step": 5614 }, { "epoch": 0.6374547221956143, "grad_norm": 0.28125, "learning_rate": 0.002, "loss": 5.2664, "step": 5615 }, { "epoch": 0.6375682493055334, "grad_norm": 0.263671875, "learning_rate": 0.002, "loss": 5.2328, "step": 5616 }, { "epoch": 0.6376817764154524, "grad_norm": 0.2734375, "learning_rate": 0.002, "loss": 5.2663, "step": 5617 }, { "epoch": 0.6377953035253715, "grad_norm": 0.2470703125, "learning_rate": 0.002, "loss": 5.2565, "step": 5618 }, { "epoch": 0.6379088306352906, "grad_norm": 0.255859375, "learning_rate": 0.002, "loss": 5.2438, "step": 5619 }, { "epoch": 0.6380223577452097, "grad_norm": 0.232421875, "learning_rate": 0.002, "loss": 5.235, "step": 5620 }, { "epoch": 0.6381358848551287, "grad_norm": 0.2392578125, "learning_rate": 0.002, "loss": 5.2359, "step": 5621 }, { "epoch": 0.6382494119650478, "grad_norm": 0.25390625, "learning_rate": 0.002, "loss": 5.2668, "step": 5622 }, { "epoch": 0.6383629390749669, "grad_norm": 0.291015625, "learning_rate": 0.002, "loss": 5.2582, "step": 5623 }, { "epoch": 0.638476466184886, "grad_norm": 0.318359375, "learning_rate": 0.002, "loss": 5.265, "step": 5624 }, { "epoch": 0.638589993294805, "grad_norm": 0.33203125, "learning_rate": 0.002, "loss": 5.2511, "step": 5625 }, { "epoch": 0.6387035204047241, "grad_norm": 0.328125, "learning_rate": 0.002, "loss": 5.222, "step": 5626 }, { "epoch": 0.6388170475146432, "grad_norm": 0.30859375, "learning_rate": 0.002, "loss": 5.2548, "step": 5627 }, { "epoch": 0.6389305746245623, "grad_norm": 0.328125, "learning_rate": 0.002, "loss": 5.2438, "step": 5628 }, { "epoch": 0.6390441017344813, "grad_norm": 0.302734375, "learning_rate": 0.002, "loss": 5.2332, "step": 5629 }, { "epoch": 0.6391576288444004, "grad_norm": 0.314453125, "learning_rate": 0.002, "loss": 5.2459, "step": 5630 }, { "epoch": 0.6392711559543195, "grad_norm": 0.30859375, "learning_rate": 0.002, "loss": 5.2318, "step": 5631 }, { "epoch": 0.6393846830642386, "grad_norm": 0.31640625, "learning_rate": 0.002, "loss": 5.2634, "step": 5632 }, { "epoch": 0.6394982101741576, "grad_norm": 0.298828125, "learning_rate": 0.002, "loss": 5.2417, "step": 5633 }, { "epoch": 0.6396117372840767, "grad_norm": 0.298828125, "learning_rate": 0.002, "loss": 5.2905, "step": 5634 }, { "epoch": 0.6397252643939958, "grad_norm": 0.302734375, "learning_rate": 0.002, "loss": 5.2594, "step": 5635 }, { "epoch": 0.6398387915039149, "grad_norm": 0.298828125, "learning_rate": 0.002, "loss": 5.2458, "step": 5636 }, { "epoch": 0.639952318613834, "grad_norm": 0.310546875, "learning_rate": 0.002, "loss": 5.2654, "step": 5637 }, { "epoch": 0.640065845723753, "grad_norm": 0.3125, "learning_rate": 0.002, "loss": 5.2493, "step": 5638 }, { "epoch": 0.6401793728336721, "grad_norm": 0.33984375, "learning_rate": 0.002, "loss": 5.2461, "step": 5639 }, { "epoch": 0.6402928999435912, "grad_norm": 0.40234375, "learning_rate": 0.002, "loss": 5.255, "step": 5640 }, { "epoch": 0.6404064270535103, "grad_norm": 0.419921875, "learning_rate": 0.002, "loss": 5.2404, "step": 5641 }, { "epoch": 0.6405199541634293, "grad_norm": 0.39453125, "learning_rate": 0.002, "loss": 5.267, "step": 5642 }, { "epoch": 0.6406334812733484, "grad_norm": 0.41796875, "learning_rate": 0.002, "loss": 5.2537, "step": 5643 }, { "epoch": 0.6407470083832675, "grad_norm": 0.361328125, "learning_rate": 0.002, "loss": 5.2537, "step": 5644 }, { "epoch": 0.6408605354931866, "grad_norm": 0.38671875, "learning_rate": 0.002, "loss": 5.2737, "step": 5645 }, { "epoch": 0.6409740626031056, "grad_norm": 0.34375, "learning_rate": 0.002, "loss": 5.2442, "step": 5646 }, { "epoch": 0.6410875897130247, "grad_norm": 0.3359375, "learning_rate": 0.002, "loss": 5.243, "step": 5647 }, { "epoch": 0.6412011168229438, "grad_norm": 0.291015625, "learning_rate": 0.002, "loss": 5.2641, "step": 5648 }, { "epoch": 0.6413146439328629, "grad_norm": 0.287109375, "learning_rate": 0.002, "loss": 5.2508, "step": 5649 }, { "epoch": 0.6414281710427819, "grad_norm": 0.279296875, "learning_rate": 0.002, "loss": 5.2665, "step": 5650 }, { "epoch": 0.641541698152701, "grad_norm": 0.283203125, "learning_rate": 0.002, "loss": 5.2499, "step": 5651 }, { "epoch": 0.6416552252626201, "grad_norm": 0.28125, "learning_rate": 0.002, "loss": 5.2418, "step": 5652 }, { "epoch": 0.6417687523725392, "grad_norm": 0.30078125, "learning_rate": 0.002, "loss": 5.2427, "step": 5653 }, { "epoch": 0.6418822794824582, "grad_norm": 0.32421875, "learning_rate": 0.002, "loss": 5.2678, "step": 5654 }, { "epoch": 0.6419958065923773, "grad_norm": 0.3359375, "learning_rate": 0.002, "loss": 5.2542, "step": 5655 }, { "epoch": 0.6421093337022964, "grad_norm": 0.318359375, "learning_rate": 0.002, "loss": 5.2591, "step": 5656 }, { "epoch": 0.6422228608122155, "grad_norm": 0.326171875, "learning_rate": 0.002, "loss": 5.2515, "step": 5657 }, { "epoch": 0.6423363879221345, "grad_norm": 0.3203125, "learning_rate": 0.002, "loss": 5.2469, "step": 5658 }, { "epoch": 0.6424499150320536, "grad_norm": 0.30859375, "learning_rate": 0.002, "loss": 5.2467, "step": 5659 }, { "epoch": 0.6425634421419727, "grad_norm": 0.30078125, "learning_rate": 0.002, "loss": 5.2342, "step": 5660 }, { "epoch": 0.6426769692518918, "grad_norm": 0.287109375, "learning_rate": 0.002, "loss": 5.2533, "step": 5661 }, { "epoch": 0.6427904963618108, "grad_norm": 0.28515625, "learning_rate": 0.002, "loss": 5.2626, "step": 5662 }, { "epoch": 0.6429040234717299, "grad_norm": 0.28515625, "learning_rate": 0.002, "loss": 5.2479, "step": 5663 }, { "epoch": 0.643017550581649, "grad_norm": 0.279296875, "learning_rate": 0.002, "loss": 5.2365, "step": 5664 }, { "epoch": 0.6431310776915681, "grad_norm": 0.2734375, "learning_rate": 0.002, "loss": 5.2519, "step": 5665 }, { "epoch": 0.6432446048014872, "grad_norm": 0.27734375, "learning_rate": 0.002, "loss": 5.2558, "step": 5666 }, { "epoch": 0.6433581319114062, "grad_norm": 0.279296875, "learning_rate": 0.002, "loss": 5.2557, "step": 5667 }, { "epoch": 0.6434716590213253, "grad_norm": 0.28515625, "learning_rate": 0.002, "loss": 5.2617, "step": 5668 }, { "epoch": 0.6435851861312444, "grad_norm": 0.287109375, "learning_rate": 0.002, "loss": 5.2617, "step": 5669 }, { "epoch": 0.6436987132411635, "grad_norm": 0.294921875, "learning_rate": 0.002, "loss": 5.2603, "step": 5670 }, { "epoch": 0.6438122403510825, "grad_norm": 0.306640625, "learning_rate": 0.002, "loss": 5.2355, "step": 5671 }, { "epoch": 0.6439257674610017, "grad_norm": 0.296875, "learning_rate": 0.002, "loss": 5.2527, "step": 5672 }, { "epoch": 0.6440392945709208, "grad_norm": 0.3125, "learning_rate": 0.002, "loss": 5.2576, "step": 5673 }, { "epoch": 0.6441528216808399, "grad_norm": 0.3203125, "learning_rate": 0.002, "loss": 5.229, "step": 5674 }, { "epoch": 0.644266348790759, "grad_norm": 0.294921875, "learning_rate": 0.002, "loss": 5.2531, "step": 5675 }, { "epoch": 0.644379875900678, "grad_norm": 0.2890625, "learning_rate": 0.002, "loss": 5.2536, "step": 5676 }, { "epoch": 0.6444934030105971, "grad_norm": 0.265625, "learning_rate": 0.002, "loss": 5.2422, "step": 5677 }, { "epoch": 0.6446069301205162, "grad_norm": 0.27734375, "learning_rate": 0.002, "loss": 5.2489, "step": 5678 }, { "epoch": 0.6447204572304353, "grad_norm": 0.294921875, "learning_rate": 0.002, "loss": 5.2607, "step": 5679 }, { "epoch": 0.6448339843403543, "grad_norm": 0.375, "learning_rate": 0.002, "loss": 5.2389, "step": 5680 }, { "epoch": 0.6449475114502734, "grad_norm": 0.5234375, "learning_rate": 0.002, "loss": 5.2634, "step": 5681 }, { "epoch": 0.6450610385601925, "grad_norm": 0.494140625, "learning_rate": 0.002, "loss": 5.2509, "step": 5682 }, { "epoch": 0.6451745656701116, "grad_norm": 0.421875, "learning_rate": 0.002, "loss": 5.2313, "step": 5683 }, { "epoch": 0.6452880927800306, "grad_norm": 0.482421875, "learning_rate": 0.002, "loss": 5.2576, "step": 5684 }, { "epoch": 0.6454016198899497, "grad_norm": 0.39453125, "learning_rate": 0.002, "loss": 5.2456, "step": 5685 }, { "epoch": 0.6455151469998688, "grad_norm": 0.412109375, "learning_rate": 0.002, "loss": 5.2338, "step": 5686 }, { "epoch": 0.6456286741097879, "grad_norm": 0.375, "learning_rate": 0.002, "loss": 5.2339, "step": 5687 }, { "epoch": 0.6457422012197069, "grad_norm": 0.359375, "learning_rate": 0.002, "loss": 5.2463, "step": 5688 }, { "epoch": 0.645855728329626, "grad_norm": 0.357421875, "learning_rate": 0.002, "loss": 5.2319, "step": 5689 }, { "epoch": 0.6459692554395451, "grad_norm": 0.337890625, "learning_rate": 0.002, "loss": 5.2573, "step": 5690 }, { "epoch": 0.6460827825494642, "grad_norm": 0.302734375, "learning_rate": 0.002, "loss": 5.2331, "step": 5691 }, { "epoch": 0.6461963096593832, "grad_norm": 0.314453125, "learning_rate": 0.002, "loss": 5.2396, "step": 5692 }, { "epoch": 0.6463098367693023, "grad_norm": 0.32421875, "learning_rate": 0.002, "loss": 5.2397, "step": 5693 }, { "epoch": 0.6464233638792214, "grad_norm": 0.341796875, "learning_rate": 0.002, "loss": 5.2427, "step": 5694 }, { "epoch": 0.6465368909891405, "grad_norm": 0.330078125, "learning_rate": 0.002, "loss": 5.2612, "step": 5695 }, { "epoch": 0.6466504180990595, "grad_norm": 0.353515625, "learning_rate": 0.002, "loss": 5.2435, "step": 5696 }, { "epoch": 0.6467639452089786, "grad_norm": 0.31640625, "learning_rate": 0.002, "loss": 5.2546, "step": 5697 }, { "epoch": 0.6468774723188977, "grad_norm": 0.3046875, "learning_rate": 0.002, "loss": 5.2511, "step": 5698 }, { "epoch": 0.6469909994288168, "grad_norm": 0.27734375, "learning_rate": 0.002, "loss": 5.2284, "step": 5699 }, { "epoch": 0.6471045265387358, "grad_norm": 0.28125, "learning_rate": 0.002, "loss": 5.2534, "step": 5700 }, { "epoch": 0.6472180536486549, "grad_norm": 0.298828125, "learning_rate": 0.002, "loss": 5.2725, "step": 5701 }, { "epoch": 0.647331580758574, "grad_norm": 0.30859375, "learning_rate": 0.002, "loss": 5.244, "step": 5702 }, { "epoch": 0.6474451078684931, "grad_norm": 0.322265625, "learning_rate": 0.002, "loss": 5.2709, "step": 5703 }, { "epoch": 0.6475586349784122, "grad_norm": 0.318359375, "learning_rate": 0.002, "loss": 5.2625, "step": 5704 }, { "epoch": 0.6476721620883312, "grad_norm": 0.310546875, "learning_rate": 0.002, "loss": 5.2444, "step": 5705 }, { "epoch": 0.6477856891982503, "grad_norm": 0.326171875, "learning_rate": 0.002, "loss": 5.2418, "step": 5706 }, { "epoch": 0.6478992163081694, "grad_norm": 0.337890625, "learning_rate": 0.002, "loss": 5.2516, "step": 5707 }, { "epoch": 0.6480127434180885, "grad_norm": 0.349609375, "learning_rate": 0.002, "loss": 5.2276, "step": 5708 }, { "epoch": 0.6481262705280075, "grad_norm": 0.341796875, "learning_rate": 0.002, "loss": 5.2341, "step": 5709 }, { "epoch": 0.6482397976379266, "grad_norm": 0.35546875, "learning_rate": 0.002, "loss": 5.2489, "step": 5710 }, { "epoch": 0.6483533247478457, "grad_norm": 0.353515625, "learning_rate": 0.002, "loss": 5.2395, "step": 5711 }, { "epoch": 0.6484668518577648, "grad_norm": 0.36328125, "learning_rate": 0.002, "loss": 5.2449, "step": 5712 }, { "epoch": 0.6485803789676838, "grad_norm": 0.330078125, "learning_rate": 0.002, "loss": 5.2309, "step": 5713 }, { "epoch": 0.6486939060776029, "grad_norm": 0.349609375, "learning_rate": 0.002, "loss": 5.2451, "step": 5714 }, { "epoch": 0.648807433187522, "grad_norm": 0.353515625, "learning_rate": 0.002, "loss": 5.261, "step": 5715 }, { "epoch": 0.6489209602974411, "grad_norm": 0.376953125, "learning_rate": 0.002, "loss": 5.2472, "step": 5716 }, { "epoch": 0.6490344874073601, "grad_norm": 0.3984375, "learning_rate": 0.002, "loss": 5.2522, "step": 5717 }, { "epoch": 0.6491480145172792, "grad_norm": 0.353515625, "learning_rate": 0.002, "loss": 5.2614, "step": 5718 }, { "epoch": 0.6492615416271983, "grad_norm": 0.29296875, "learning_rate": 0.002, "loss": 5.2594, "step": 5719 }, { "epoch": 0.6493750687371174, "grad_norm": 0.3046875, "learning_rate": 0.002, "loss": 5.2448, "step": 5720 }, { "epoch": 0.6494885958470364, "grad_norm": 0.275390625, "learning_rate": 0.002, "loss": 5.2521, "step": 5721 }, { "epoch": 0.6496021229569555, "grad_norm": 0.28125, "learning_rate": 0.002, "loss": 5.2552, "step": 5722 }, { "epoch": 0.6497156500668746, "grad_norm": 0.294921875, "learning_rate": 0.002, "loss": 5.2395, "step": 5723 }, { "epoch": 0.6498291771767937, "grad_norm": 0.30859375, "learning_rate": 0.002, "loss": 5.2493, "step": 5724 }, { "epoch": 0.6499427042867127, "grad_norm": 0.314453125, "learning_rate": 0.002, "loss": 5.2377, "step": 5725 }, { "epoch": 0.6500562313966318, "grad_norm": 0.306640625, "learning_rate": 0.002, "loss": 5.2389, "step": 5726 }, { "epoch": 0.6501697585065509, "grad_norm": 0.330078125, "learning_rate": 0.002, "loss": 5.2565, "step": 5727 }, { "epoch": 0.65028328561647, "grad_norm": 0.318359375, "learning_rate": 0.002, "loss": 5.2521, "step": 5728 }, { "epoch": 0.650396812726389, "grad_norm": 0.3203125, "learning_rate": 0.002, "loss": 5.2386, "step": 5729 }, { "epoch": 0.6505103398363081, "grad_norm": 0.314453125, "learning_rate": 0.002, "loss": 5.2743, "step": 5730 }, { "epoch": 0.6506238669462272, "grad_norm": 0.2890625, "learning_rate": 0.002, "loss": 5.2373, "step": 5731 }, { "epoch": 0.6507373940561463, "grad_norm": 0.26953125, "learning_rate": 0.002, "loss": 5.2528, "step": 5732 }, { "epoch": 0.6508509211660654, "grad_norm": 0.2392578125, "learning_rate": 0.002, "loss": 5.2502, "step": 5733 }, { "epoch": 0.6509644482759844, "grad_norm": 0.22265625, "learning_rate": 0.002, "loss": 5.2644, "step": 5734 }, { "epoch": 0.6510779753859035, "grad_norm": 0.21875, "learning_rate": 0.002, "loss": 5.2405, "step": 5735 }, { "epoch": 0.6511915024958226, "grad_norm": 0.2216796875, "learning_rate": 0.002, "loss": 5.2586, "step": 5736 }, { "epoch": 0.6513050296057417, "grad_norm": 0.2421875, "learning_rate": 0.002, "loss": 5.2561, "step": 5737 }, { "epoch": 0.6514185567156607, "grad_norm": 0.283203125, "learning_rate": 0.002, "loss": 5.2477, "step": 5738 }, { "epoch": 0.6515320838255798, "grad_norm": 0.3671875, "learning_rate": 0.002, "loss": 5.26, "step": 5739 }, { "epoch": 0.6516456109354989, "grad_norm": 0.37890625, "learning_rate": 0.002, "loss": 5.2371, "step": 5740 }, { "epoch": 0.651759138045418, "grad_norm": 0.37109375, "learning_rate": 0.002, "loss": 5.2549, "step": 5741 }, { "epoch": 0.651872665155337, "grad_norm": 0.392578125, "learning_rate": 0.002, "loss": 5.2639, "step": 5742 }, { "epoch": 0.6519861922652561, "grad_norm": 0.33984375, "learning_rate": 0.002, "loss": 5.2486, "step": 5743 }, { "epoch": 0.6520997193751752, "grad_norm": 0.33984375, "learning_rate": 0.002, "loss": 5.2493, "step": 5744 }, { "epoch": 0.6522132464850943, "grad_norm": 0.314453125, "learning_rate": 0.002, "loss": 5.2502, "step": 5745 }, { "epoch": 0.6523267735950133, "grad_norm": 0.31640625, "learning_rate": 0.002, "loss": 5.2675, "step": 5746 }, { "epoch": 0.6524403007049324, "grad_norm": 0.298828125, "learning_rate": 0.002, "loss": 5.236, "step": 5747 }, { "epoch": 0.6525538278148515, "grad_norm": 0.302734375, "learning_rate": 0.002, "loss": 5.2457, "step": 5748 }, { "epoch": 0.6526673549247706, "grad_norm": 0.30859375, "learning_rate": 0.002, "loss": 5.2436, "step": 5749 }, { "epoch": 0.6527808820346896, "grad_norm": 0.3203125, "learning_rate": 0.002, "loss": 5.2464, "step": 5750 }, { "epoch": 0.6528944091446087, "grad_norm": 0.341796875, "learning_rate": 0.002, "loss": 5.2498, "step": 5751 }, { "epoch": 0.6530079362545278, "grad_norm": 0.33984375, "learning_rate": 0.002, "loss": 5.2416, "step": 5752 }, { "epoch": 0.6531214633644469, "grad_norm": 0.33203125, "learning_rate": 0.002, "loss": 5.2426, "step": 5753 }, { "epoch": 0.653234990474366, "grad_norm": 0.3125, "learning_rate": 0.002, "loss": 5.2726, "step": 5754 }, { "epoch": 0.653348517584285, "grad_norm": 0.318359375, "learning_rate": 0.002, "loss": 5.269, "step": 5755 }, { "epoch": 0.6534620446942041, "grad_norm": 0.3046875, "learning_rate": 0.002, "loss": 5.248, "step": 5756 }, { "epoch": 0.6535755718041232, "grad_norm": 0.326171875, "learning_rate": 0.002, "loss": 5.2545, "step": 5757 }, { "epoch": 0.6536890989140423, "grad_norm": 0.33203125, "learning_rate": 0.002, "loss": 5.2515, "step": 5758 }, { "epoch": 0.6538026260239613, "grad_norm": 0.37890625, "learning_rate": 0.002, "loss": 5.2563, "step": 5759 }, { "epoch": 0.6539161531338804, "grad_norm": 0.376953125, "learning_rate": 0.002, "loss": 5.2698, "step": 5760 }, { "epoch": 0.6540296802437995, "grad_norm": 0.42578125, "learning_rate": 0.002, "loss": 5.2411, "step": 5761 }, { "epoch": 0.6541432073537186, "grad_norm": 0.375, "learning_rate": 0.002, "loss": 5.2692, "step": 5762 }, { "epoch": 0.6542567344636376, "grad_norm": 0.376953125, "learning_rate": 0.002, "loss": 5.2513, "step": 5763 }, { "epoch": 0.6543702615735567, "grad_norm": 0.341796875, "learning_rate": 0.002, "loss": 5.2468, "step": 5764 }, { "epoch": 0.6544837886834758, "grad_norm": 0.328125, "learning_rate": 0.002, "loss": 5.2363, "step": 5765 }, { "epoch": 0.6545973157933949, "grad_norm": 0.318359375, "learning_rate": 0.002, "loss": 5.254, "step": 5766 }, { "epoch": 0.6547108429033139, "grad_norm": 0.3046875, "learning_rate": 0.002, "loss": 5.2767, "step": 5767 }, { "epoch": 0.654824370013233, "grad_norm": 0.26953125, "learning_rate": 0.002, "loss": 5.2168, "step": 5768 }, { "epoch": 0.6549378971231521, "grad_norm": 0.26953125, "learning_rate": 0.002, "loss": 5.2608, "step": 5769 }, { "epoch": 0.6550514242330712, "grad_norm": 0.265625, "learning_rate": 0.002, "loss": 5.2596, "step": 5770 }, { "epoch": 0.6551649513429902, "grad_norm": 0.28125, "learning_rate": 0.002, "loss": 5.2526, "step": 5771 }, { "epoch": 0.6552784784529093, "grad_norm": 0.310546875, "learning_rate": 0.002, "loss": 5.2665, "step": 5772 }, { "epoch": 0.6553920055628284, "grad_norm": 0.3515625, "learning_rate": 0.002, "loss": 5.2638, "step": 5773 }, { "epoch": 0.6555055326727475, "grad_norm": 0.3515625, "learning_rate": 0.002, "loss": 5.2483, "step": 5774 }, { "epoch": 0.6556190597826665, "grad_norm": 0.34375, "learning_rate": 0.002, "loss": 5.2619, "step": 5775 }, { "epoch": 0.6557325868925856, "grad_norm": 0.31640625, "learning_rate": 0.002, "loss": 5.2591, "step": 5776 }, { "epoch": 0.6558461140025047, "grad_norm": 0.3046875, "learning_rate": 0.002, "loss": 5.2255, "step": 5777 }, { "epoch": 0.6559596411124238, "grad_norm": 0.294921875, "learning_rate": 0.002, "loss": 5.2452, "step": 5778 }, { "epoch": 0.6560731682223429, "grad_norm": 0.279296875, "learning_rate": 0.002, "loss": 5.2466, "step": 5779 }, { "epoch": 0.6561866953322619, "grad_norm": 0.275390625, "learning_rate": 0.002, "loss": 5.2522, "step": 5780 }, { "epoch": 0.656300222442181, "grad_norm": 0.275390625, "learning_rate": 0.002, "loss": 5.2565, "step": 5781 }, { "epoch": 0.6564137495521001, "grad_norm": 0.2734375, "learning_rate": 0.002, "loss": 5.2356, "step": 5782 }, { "epoch": 0.6565272766620192, "grad_norm": 0.2578125, "learning_rate": 0.002, "loss": 5.2484, "step": 5783 }, { "epoch": 0.6566408037719382, "grad_norm": 0.265625, "learning_rate": 0.002, "loss": 5.2446, "step": 5784 }, { "epoch": 0.6567543308818573, "grad_norm": 0.26171875, "learning_rate": 0.002, "loss": 5.2522, "step": 5785 }, { "epoch": 0.6568678579917764, "grad_norm": 0.275390625, "learning_rate": 0.002, "loss": 5.2751, "step": 5786 }, { "epoch": 0.6569813851016955, "grad_norm": 0.29296875, "learning_rate": 0.002, "loss": 5.2499, "step": 5787 }, { "epoch": 0.6570949122116145, "grad_norm": 0.30859375, "learning_rate": 0.002, "loss": 5.257, "step": 5788 }, { "epoch": 0.6572084393215336, "grad_norm": 0.30078125, "learning_rate": 0.002, "loss": 5.275, "step": 5789 }, { "epoch": 0.6573219664314527, "grad_norm": 0.27734375, "learning_rate": 0.002, "loss": 5.2461, "step": 5790 }, { "epoch": 0.6574354935413718, "grad_norm": 0.26953125, "learning_rate": 0.002, "loss": 5.2315, "step": 5791 }, { "epoch": 0.6575490206512908, "grad_norm": 0.28515625, "learning_rate": 0.002, "loss": 5.2415, "step": 5792 }, { "epoch": 0.6576625477612099, "grad_norm": 0.302734375, "learning_rate": 0.002, "loss": 5.2316, "step": 5793 }, { "epoch": 0.657776074871129, "grad_norm": 0.318359375, "learning_rate": 0.002, "loss": 5.2361, "step": 5794 }, { "epoch": 0.6578896019810481, "grad_norm": 0.3359375, "learning_rate": 0.002, "loss": 5.2401, "step": 5795 }, { "epoch": 0.6580031290909671, "grad_norm": 0.326171875, "learning_rate": 0.002, "loss": 5.2528, "step": 5796 }, { "epoch": 0.6581166562008862, "grad_norm": 0.326171875, "learning_rate": 0.002, "loss": 5.2531, "step": 5797 }, { "epoch": 0.6582301833108053, "grad_norm": 0.3046875, "learning_rate": 0.002, "loss": 5.2496, "step": 5798 }, { "epoch": 0.6583437104207244, "grad_norm": 0.28125, "learning_rate": 0.002, "loss": 5.2559, "step": 5799 }, { "epoch": 0.6584572375306434, "grad_norm": 0.26953125, "learning_rate": 0.002, "loss": 5.2464, "step": 5800 }, { "epoch": 0.6585707646405625, "grad_norm": 0.2890625, "learning_rate": 0.002, "loss": 5.2596, "step": 5801 }, { "epoch": 0.6586842917504816, "grad_norm": 0.328125, "learning_rate": 0.002, "loss": 5.285, "step": 5802 }, { "epoch": 0.6587978188604007, "grad_norm": 0.3359375, "learning_rate": 0.002, "loss": 5.245, "step": 5803 }, { "epoch": 0.6589113459703198, "grad_norm": 0.37890625, "learning_rate": 0.002, "loss": 5.2498, "step": 5804 }, { "epoch": 0.6590248730802388, "grad_norm": 0.4140625, "learning_rate": 0.002, "loss": 5.2349, "step": 5805 }, { "epoch": 0.6591384001901579, "grad_norm": 0.421875, "learning_rate": 0.002, "loss": 5.2382, "step": 5806 }, { "epoch": 0.659251927300077, "grad_norm": 0.42578125, "learning_rate": 0.002, "loss": 5.2642, "step": 5807 }, { "epoch": 0.659365454409996, "grad_norm": 0.4140625, "learning_rate": 0.002, "loss": 5.2397, "step": 5808 }, { "epoch": 0.6594789815199151, "grad_norm": 0.365234375, "learning_rate": 0.002, "loss": 5.24, "step": 5809 }, { "epoch": 0.6595925086298342, "grad_norm": 0.375, "learning_rate": 0.002, "loss": 5.2341, "step": 5810 }, { "epoch": 0.6597060357397533, "grad_norm": 0.326171875, "learning_rate": 0.002, "loss": 5.2494, "step": 5811 }, { "epoch": 0.6598195628496724, "grad_norm": 0.33984375, "learning_rate": 0.002, "loss": 5.2617, "step": 5812 }, { "epoch": 0.6599330899595914, "grad_norm": 0.298828125, "learning_rate": 0.002, "loss": 5.2566, "step": 5813 }, { "epoch": 0.6600466170695105, "grad_norm": 0.3125, "learning_rate": 0.002, "loss": 5.2492, "step": 5814 }, { "epoch": 0.6601601441794296, "grad_norm": 0.291015625, "learning_rate": 0.002, "loss": 5.2523, "step": 5815 }, { "epoch": 0.6602736712893487, "grad_norm": 0.29296875, "learning_rate": 0.002, "loss": 5.2357, "step": 5816 }, { "epoch": 0.6603871983992677, "grad_norm": 0.283203125, "learning_rate": 0.002, "loss": 5.234, "step": 5817 }, { "epoch": 0.6605007255091868, "grad_norm": 0.271484375, "learning_rate": 0.002, "loss": 5.2466, "step": 5818 }, { "epoch": 0.6606142526191059, "grad_norm": 0.2734375, "learning_rate": 0.002, "loss": 5.2188, "step": 5819 }, { "epoch": 0.660727779729025, "grad_norm": 0.27734375, "learning_rate": 0.002, "loss": 5.2533, "step": 5820 }, { "epoch": 0.660841306838944, "grad_norm": 0.283203125, "learning_rate": 0.002, "loss": 5.2456, "step": 5821 }, { "epoch": 0.6609548339488631, "grad_norm": 0.275390625, "learning_rate": 0.002, "loss": 5.2541, "step": 5822 }, { "epoch": 0.6610683610587822, "grad_norm": 0.28515625, "learning_rate": 0.002, "loss": 5.258, "step": 5823 }, { "epoch": 0.6611818881687013, "grad_norm": 0.283203125, "learning_rate": 0.002, "loss": 5.2659, "step": 5824 }, { "epoch": 0.6612954152786203, "grad_norm": 0.298828125, "learning_rate": 0.002, "loss": 5.2447, "step": 5825 }, { "epoch": 0.6614089423885394, "grad_norm": 0.302734375, "learning_rate": 0.002, "loss": 5.2445, "step": 5826 }, { "epoch": 0.6615224694984585, "grad_norm": 0.314453125, "learning_rate": 0.002, "loss": 5.2328, "step": 5827 }, { "epoch": 0.6616359966083776, "grad_norm": 0.326171875, "learning_rate": 0.002, "loss": 5.2688, "step": 5828 }, { "epoch": 0.6617495237182967, "grad_norm": 0.326171875, "learning_rate": 0.002, "loss": 5.2465, "step": 5829 }, { "epoch": 0.6618630508282157, "grad_norm": 0.296875, "learning_rate": 0.002, "loss": 5.2442, "step": 5830 }, { "epoch": 0.6619765779381348, "grad_norm": 0.296875, "learning_rate": 0.002, "loss": 5.2527, "step": 5831 }, { "epoch": 0.6620901050480539, "grad_norm": 0.2734375, "learning_rate": 0.002, "loss": 5.2277, "step": 5832 }, { "epoch": 0.662203632157973, "grad_norm": 0.26953125, "learning_rate": 0.002, "loss": 5.2274, "step": 5833 }, { "epoch": 0.662317159267892, "grad_norm": 0.26171875, "learning_rate": 0.002, "loss": 5.2492, "step": 5834 }, { "epoch": 0.6624306863778111, "grad_norm": 0.279296875, "learning_rate": 0.002, "loss": 5.2514, "step": 5835 }, { "epoch": 0.6625442134877302, "grad_norm": 0.3046875, "learning_rate": 0.002, "loss": 5.2623, "step": 5836 }, { "epoch": 0.6626577405976493, "grad_norm": 0.30078125, "learning_rate": 0.002, "loss": 5.2227, "step": 5837 }, { "epoch": 0.6627712677075683, "grad_norm": 0.28125, "learning_rate": 0.002, "loss": 5.2543, "step": 5838 }, { "epoch": 0.6628847948174874, "grad_norm": 0.2734375, "learning_rate": 0.002, "loss": 5.2362, "step": 5839 }, { "epoch": 0.6629983219274065, "grad_norm": 0.263671875, "learning_rate": 0.002, "loss": 5.2457, "step": 5840 }, { "epoch": 0.6631118490373256, "grad_norm": 0.27734375, "learning_rate": 0.002, "loss": 5.2402, "step": 5841 }, { "epoch": 0.6632253761472446, "grad_norm": 0.2890625, "learning_rate": 0.002, "loss": 5.2534, "step": 5842 }, { "epoch": 0.6633389032571637, "grad_norm": 0.302734375, "learning_rate": 0.002, "loss": 5.2417, "step": 5843 }, { "epoch": 0.6634524303670828, "grad_norm": 0.322265625, "learning_rate": 0.002, "loss": 5.261, "step": 5844 }, { "epoch": 0.6635659574770019, "grad_norm": 0.322265625, "learning_rate": 0.002, "loss": 5.2506, "step": 5845 }, { "epoch": 0.6636794845869209, "grad_norm": 0.31640625, "learning_rate": 0.002, "loss": 5.2303, "step": 5846 }, { "epoch": 0.66379301169684, "grad_norm": 0.306640625, "learning_rate": 0.002, "loss": 5.2469, "step": 5847 }, { "epoch": 0.6639065388067591, "grad_norm": 0.30078125, "learning_rate": 0.002, "loss": 5.2291, "step": 5848 }, { "epoch": 0.6640200659166782, "grad_norm": 0.2890625, "learning_rate": 0.002, "loss": 5.2097, "step": 5849 }, { "epoch": 0.6641335930265972, "grad_norm": 0.302734375, "learning_rate": 0.002, "loss": 5.2439, "step": 5850 }, { "epoch": 0.6642471201365163, "grad_norm": 0.298828125, "learning_rate": 0.002, "loss": 5.2527, "step": 5851 }, { "epoch": 0.6643606472464354, "grad_norm": 0.318359375, "learning_rate": 0.002, "loss": 5.2313, "step": 5852 }, { "epoch": 0.6644741743563545, "grad_norm": 0.341796875, "learning_rate": 0.002, "loss": 5.2552, "step": 5853 }, { "epoch": 0.6645877014662736, "grad_norm": 0.369140625, "learning_rate": 0.002, "loss": 5.261, "step": 5854 }, { "epoch": 0.6647012285761926, "grad_norm": 0.443359375, "learning_rate": 0.002, "loss": 5.2332, "step": 5855 }, { "epoch": 0.6648147556861117, "grad_norm": 0.4296875, "learning_rate": 0.002, "loss": 5.2473, "step": 5856 }, { "epoch": 0.6649282827960308, "grad_norm": 0.3828125, "learning_rate": 0.002, "loss": 5.2228, "step": 5857 }, { "epoch": 0.6650418099059499, "grad_norm": 0.388671875, "learning_rate": 0.002, "loss": 5.2657, "step": 5858 }, { "epoch": 0.6651553370158689, "grad_norm": 0.31640625, "learning_rate": 0.002, "loss": 5.2418, "step": 5859 }, { "epoch": 0.665268864125788, "grad_norm": 0.30078125, "learning_rate": 0.002, "loss": 5.2578, "step": 5860 }, { "epoch": 0.6653823912357071, "grad_norm": 0.26953125, "learning_rate": 0.002, "loss": 5.2593, "step": 5861 }, { "epoch": 0.6654959183456262, "grad_norm": 0.28515625, "learning_rate": 0.002, "loss": 5.2296, "step": 5862 }, { "epoch": 0.6656094454555452, "grad_norm": 0.291015625, "learning_rate": 0.002, "loss": 5.2287, "step": 5863 }, { "epoch": 0.6657229725654643, "grad_norm": 0.326171875, "learning_rate": 0.002, "loss": 5.2352, "step": 5864 }, { "epoch": 0.6658364996753834, "grad_norm": 0.349609375, "learning_rate": 0.002, "loss": 5.2523, "step": 5865 }, { "epoch": 0.6659500267853025, "grad_norm": 0.333984375, "learning_rate": 0.002, "loss": 5.2488, "step": 5866 }, { "epoch": 0.6660635538952215, "grad_norm": 0.3359375, "learning_rate": 0.002, "loss": 5.2259, "step": 5867 }, { "epoch": 0.6661770810051406, "grad_norm": 0.294921875, "learning_rate": 0.002, "loss": 5.2569, "step": 5868 }, { "epoch": 0.6662906081150597, "grad_norm": 0.294921875, "learning_rate": 0.002, "loss": 5.2565, "step": 5869 }, { "epoch": 0.6664041352249788, "grad_norm": 0.296875, "learning_rate": 0.002, "loss": 5.2087, "step": 5870 }, { "epoch": 0.6665176623348978, "grad_norm": 0.328125, "learning_rate": 0.002, "loss": 5.2486, "step": 5871 }, { "epoch": 0.6666311894448169, "grad_norm": 0.361328125, "learning_rate": 0.002, "loss": 5.2596, "step": 5872 }, { "epoch": 0.666744716554736, "grad_norm": 0.345703125, "learning_rate": 0.002, "loss": 5.2466, "step": 5873 }, { "epoch": 0.6668582436646551, "grad_norm": 0.37890625, "learning_rate": 0.002, "loss": 5.2561, "step": 5874 }, { "epoch": 0.6669717707745741, "grad_norm": 0.326171875, "learning_rate": 0.002, "loss": 5.2672, "step": 5875 }, { "epoch": 0.6670852978844932, "grad_norm": 0.515625, "learning_rate": 0.002, "loss": 5.2672, "step": 5876 }, { "epoch": 0.6671988249944123, "grad_norm": 0.26171875, "learning_rate": 0.002, "loss": 5.233, "step": 5877 }, { "epoch": 0.6673123521043314, "grad_norm": 0.298828125, "learning_rate": 0.002, "loss": 5.2522, "step": 5878 }, { "epoch": 0.6674258792142505, "grad_norm": 0.294921875, "learning_rate": 0.002, "loss": 5.2337, "step": 5879 }, { "epoch": 0.6675394063241695, "grad_norm": 0.275390625, "learning_rate": 0.002, "loss": 5.237, "step": 5880 }, { "epoch": 0.6676529334340886, "grad_norm": 0.265625, "learning_rate": 0.002, "loss": 5.2511, "step": 5881 }, { "epoch": 0.6677664605440077, "grad_norm": 0.265625, "learning_rate": 0.002, "loss": 5.2521, "step": 5882 }, { "epoch": 0.6678799876539268, "grad_norm": 0.25390625, "learning_rate": 0.002, "loss": 5.2497, "step": 5883 }, { "epoch": 0.6679935147638458, "grad_norm": 0.263671875, "learning_rate": 0.002, "loss": 5.2291, "step": 5884 }, { "epoch": 0.6681070418737649, "grad_norm": 0.314453125, "learning_rate": 0.002, "loss": 5.2697, "step": 5885 }, { "epoch": 0.668220568983684, "grad_norm": 0.33203125, "learning_rate": 0.002, "loss": 5.2636, "step": 5886 }, { "epoch": 0.6683340960936031, "grad_norm": 0.328125, "learning_rate": 0.002, "loss": 5.2371, "step": 5887 }, { "epoch": 0.6684476232035221, "grad_norm": 0.314453125, "learning_rate": 0.002, "loss": 5.2477, "step": 5888 }, { "epoch": 0.6685611503134412, "grad_norm": 0.302734375, "learning_rate": 0.002, "loss": 5.2298, "step": 5889 }, { "epoch": 0.6686746774233603, "grad_norm": 0.28125, "learning_rate": 0.002, "loss": 5.248, "step": 5890 }, { "epoch": 0.6687882045332794, "grad_norm": 0.28125, "learning_rate": 0.002, "loss": 5.2366, "step": 5891 }, { "epoch": 0.6689017316431984, "grad_norm": 0.26953125, "learning_rate": 0.002, "loss": 5.2533, "step": 5892 }, { "epoch": 0.6690152587531175, "grad_norm": 0.271484375, "learning_rate": 0.002, "loss": 5.2462, "step": 5893 }, { "epoch": 0.6691287858630366, "grad_norm": 0.279296875, "learning_rate": 0.002, "loss": 5.234, "step": 5894 }, { "epoch": 0.6692423129729557, "grad_norm": 0.26953125, "learning_rate": 0.002, "loss": 5.2532, "step": 5895 }, { "epoch": 0.6693558400828747, "grad_norm": 0.267578125, "learning_rate": 0.002, "loss": 5.2467, "step": 5896 }, { "epoch": 0.6694693671927938, "grad_norm": 0.2470703125, "learning_rate": 0.002, "loss": 5.2415, "step": 5897 }, { "epoch": 0.6695828943027129, "grad_norm": 0.2470703125, "learning_rate": 0.002, "loss": 5.235, "step": 5898 }, { "epoch": 0.669696421412632, "grad_norm": 0.2353515625, "learning_rate": 0.002, "loss": 5.2393, "step": 5899 }, { "epoch": 0.669809948522551, "grad_norm": 0.2373046875, "learning_rate": 0.002, "loss": 5.2381, "step": 5900 }, { "epoch": 0.6699234756324701, "grad_norm": 0.25, "learning_rate": 0.002, "loss": 5.2339, "step": 5901 }, { "epoch": 0.6700370027423892, "grad_norm": 0.267578125, "learning_rate": 0.002, "loss": 5.2339, "step": 5902 }, { "epoch": 0.6701505298523083, "grad_norm": 0.2890625, "learning_rate": 0.002, "loss": 5.2733, "step": 5903 }, { "epoch": 0.6702640569622274, "grad_norm": 0.318359375, "learning_rate": 0.002, "loss": 5.2526, "step": 5904 }, { "epoch": 0.6703775840721464, "grad_norm": 0.345703125, "learning_rate": 0.002, "loss": 5.2446, "step": 5905 }, { "epoch": 0.6704911111820655, "grad_norm": 0.34375, "learning_rate": 0.002, "loss": 5.2448, "step": 5906 }, { "epoch": 0.6706046382919846, "grad_norm": 0.373046875, "learning_rate": 0.002, "loss": 5.2374, "step": 5907 }, { "epoch": 0.6707181654019037, "grad_norm": 0.3359375, "learning_rate": 0.002, "loss": 5.2521, "step": 5908 }, { "epoch": 0.6708316925118227, "grad_norm": 0.330078125, "learning_rate": 0.002, "loss": 5.2458, "step": 5909 }, { "epoch": 0.6709452196217418, "grad_norm": 0.314453125, "learning_rate": 0.002, "loss": 5.2434, "step": 5910 }, { "epoch": 0.6710587467316609, "grad_norm": 0.314453125, "learning_rate": 0.002, "loss": 5.2533, "step": 5911 }, { "epoch": 0.67117227384158, "grad_norm": 0.30859375, "learning_rate": 0.002, "loss": 5.2413, "step": 5912 }, { "epoch": 0.6712858009514991, "grad_norm": 0.30078125, "learning_rate": 0.002, "loss": 5.2272, "step": 5913 }, { "epoch": 0.6713993280614182, "grad_norm": 0.275390625, "learning_rate": 0.002, "loss": 5.239, "step": 5914 }, { "epoch": 0.6715128551713373, "grad_norm": 0.26953125, "learning_rate": 0.002, "loss": 5.2312, "step": 5915 }, { "epoch": 0.6716263822812564, "grad_norm": 0.2451171875, "learning_rate": 0.002, "loss": 5.2384, "step": 5916 }, { "epoch": 0.6717399093911754, "grad_norm": 0.296875, "learning_rate": 0.002, "loss": 5.2516, "step": 5917 }, { "epoch": 0.6718534365010945, "grad_norm": 0.46875, "learning_rate": 0.002, "loss": 5.2672, "step": 5918 }, { "epoch": 0.6719669636110136, "grad_norm": 0.443359375, "learning_rate": 0.002, "loss": 5.2504, "step": 5919 }, { "epoch": 0.6720804907209327, "grad_norm": 0.412109375, "learning_rate": 0.002, "loss": 5.2494, "step": 5920 }, { "epoch": 0.6721940178308518, "grad_norm": 0.46484375, "learning_rate": 0.002, "loss": 5.2597, "step": 5921 }, { "epoch": 0.6723075449407708, "grad_norm": 0.412109375, "learning_rate": 0.002, "loss": 5.2467, "step": 5922 }, { "epoch": 0.6724210720506899, "grad_norm": 0.390625, "learning_rate": 0.002, "loss": 5.2595, "step": 5923 }, { "epoch": 0.672534599160609, "grad_norm": 0.345703125, "learning_rate": 0.002, "loss": 5.2413, "step": 5924 }, { "epoch": 0.6726481262705281, "grad_norm": 0.28515625, "learning_rate": 0.002, "loss": 5.2274, "step": 5925 }, { "epoch": 0.6727616533804471, "grad_norm": 0.275390625, "learning_rate": 0.002, "loss": 5.2361, "step": 5926 }, { "epoch": 0.6728751804903662, "grad_norm": 0.236328125, "learning_rate": 0.002, "loss": 5.2595, "step": 5927 }, { "epoch": 0.6729887076002853, "grad_norm": 0.2470703125, "learning_rate": 0.002, "loss": 5.2586, "step": 5928 }, { "epoch": 0.6731022347102044, "grad_norm": 0.259765625, "learning_rate": 0.002, "loss": 5.2615, "step": 5929 }, { "epoch": 0.6732157618201234, "grad_norm": 0.28125, "learning_rate": 0.002, "loss": 5.2372, "step": 5930 }, { "epoch": 0.6733292889300425, "grad_norm": 0.310546875, "learning_rate": 0.002, "loss": 5.2443, "step": 5931 }, { "epoch": 0.6734428160399616, "grad_norm": 0.296875, "learning_rate": 0.002, "loss": 5.2524, "step": 5932 }, { "epoch": 0.6735563431498807, "grad_norm": 0.3125, "learning_rate": 0.002, "loss": 5.2573, "step": 5933 }, { "epoch": 0.6736698702597997, "grad_norm": 0.31640625, "learning_rate": 0.002, "loss": 5.2465, "step": 5934 }, { "epoch": 0.6737833973697188, "grad_norm": 0.33203125, "learning_rate": 0.002, "loss": 5.2429, "step": 5935 }, { "epoch": 0.6738969244796379, "grad_norm": 0.330078125, "learning_rate": 0.002, "loss": 5.2555, "step": 5936 }, { "epoch": 0.674010451589557, "grad_norm": 0.353515625, "learning_rate": 0.002, "loss": 5.2504, "step": 5937 }, { "epoch": 0.674123978699476, "grad_norm": 0.365234375, "learning_rate": 0.002, "loss": 5.2389, "step": 5938 }, { "epoch": 0.6742375058093951, "grad_norm": 0.337890625, "learning_rate": 0.002, "loss": 5.245, "step": 5939 }, { "epoch": 0.6743510329193142, "grad_norm": 0.314453125, "learning_rate": 0.002, "loss": 5.2555, "step": 5940 }, { "epoch": 0.6744645600292333, "grad_norm": 0.27734375, "learning_rate": 0.002, "loss": 5.2303, "step": 5941 }, { "epoch": 0.6745780871391523, "grad_norm": 0.26171875, "learning_rate": 0.002, "loss": 5.2276, "step": 5942 }, { "epoch": 0.6746916142490714, "grad_norm": 0.25, "learning_rate": 0.002, "loss": 5.2482, "step": 5943 }, { "epoch": 0.6748051413589905, "grad_norm": 0.23828125, "learning_rate": 0.002, "loss": 5.2511, "step": 5944 }, { "epoch": 0.6749186684689096, "grad_norm": 0.23046875, "learning_rate": 0.002, "loss": 5.2159, "step": 5945 }, { "epoch": 0.6750321955788287, "grad_norm": 0.228515625, "learning_rate": 0.002, "loss": 5.2413, "step": 5946 }, { "epoch": 0.6751457226887477, "grad_norm": 0.25, "learning_rate": 0.002, "loss": 5.2412, "step": 5947 }, { "epoch": 0.6752592497986668, "grad_norm": 0.271484375, "learning_rate": 0.002, "loss": 5.2137, "step": 5948 }, { "epoch": 0.6753727769085859, "grad_norm": 0.32421875, "learning_rate": 0.002, "loss": 5.2364, "step": 5949 }, { "epoch": 0.675486304018505, "grad_norm": 0.333984375, "learning_rate": 0.002, "loss": 5.2222, "step": 5950 }, { "epoch": 0.675599831128424, "grad_norm": 0.359375, "learning_rate": 0.002, "loss": 5.237, "step": 5951 }, { "epoch": 0.6757133582383431, "grad_norm": 0.337890625, "learning_rate": 0.002, "loss": 5.2289, "step": 5952 }, { "epoch": 0.6758268853482622, "grad_norm": 0.35546875, "learning_rate": 0.002, "loss": 5.2329, "step": 5953 }, { "epoch": 0.6759404124581813, "grad_norm": 0.341796875, "learning_rate": 0.002, "loss": 5.245, "step": 5954 }, { "epoch": 0.6760539395681003, "grad_norm": 0.341796875, "learning_rate": 0.002, "loss": 5.2435, "step": 5955 }, { "epoch": 0.6761674666780194, "grad_norm": 0.34765625, "learning_rate": 0.002, "loss": 5.2424, "step": 5956 }, { "epoch": 0.6762809937879385, "grad_norm": 0.3515625, "learning_rate": 0.002, "loss": 5.2476, "step": 5957 }, { "epoch": 0.6763945208978576, "grad_norm": 0.306640625, "learning_rate": 0.002, "loss": 5.229, "step": 5958 }, { "epoch": 0.6765080480077766, "grad_norm": 0.33203125, "learning_rate": 0.002, "loss": 5.2421, "step": 5959 }, { "epoch": 0.6766215751176957, "grad_norm": 0.314453125, "learning_rate": 0.002, "loss": 5.2506, "step": 5960 }, { "epoch": 0.6767351022276148, "grad_norm": 0.310546875, "learning_rate": 0.002, "loss": 5.2437, "step": 5961 }, { "epoch": 0.6768486293375339, "grad_norm": 0.275390625, "learning_rate": 0.002, "loss": 5.2622, "step": 5962 }, { "epoch": 0.676962156447453, "grad_norm": 0.27734375, "learning_rate": 0.002, "loss": 5.2488, "step": 5963 }, { "epoch": 0.677075683557372, "grad_norm": 0.267578125, "learning_rate": 0.002, "loss": 5.2457, "step": 5964 }, { "epoch": 0.6771892106672911, "grad_norm": 0.287109375, "learning_rate": 0.002, "loss": 5.2546, "step": 5965 }, { "epoch": 0.6773027377772102, "grad_norm": 0.3046875, "learning_rate": 0.002, "loss": 5.2479, "step": 5966 }, { "epoch": 0.6774162648871292, "grad_norm": 0.306640625, "learning_rate": 0.002, "loss": 5.237, "step": 5967 }, { "epoch": 0.6775297919970483, "grad_norm": 0.32421875, "learning_rate": 0.002, "loss": 5.2383, "step": 5968 }, { "epoch": 0.6776433191069674, "grad_norm": 0.3515625, "learning_rate": 0.002, "loss": 5.2344, "step": 5969 }, { "epoch": 0.6777568462168865, "grad_norm": 0.326171875, "learning_rate": 0.002, "loss": 5.2657, "step": 5970 }, { "epoch": 0.6778703733268056, "grad_norm": 0.30078125, "learning_rate": 0.002, "loss": 5.2456, "step": 5971 }, { "epoch": 0.6779839004367246, "grad_norm": 0.3046875, "learning_rate": 0.002, "loss": 5.2556, "step": 5972 }, { "epoch": 0.6780974275466437, "grad_norm": 0.283203125, "learning_rate": 0.002, "loss": 5.2474, "step": 5973 }, { "epoch": 0.6782109546565628, "grad_norm": 0.287109375, "learning_rate": 0.002, "loss": 5.2349, "step": 5974 }, { "epoch": 0.6783244817664819, "grad_norm": 0.287109375, "learning_rate": 0.002, "loss": 5.2473, "step": 5975 }, { "epoch": 0.6784380088764009, "grad_norm": 0.265625, "learning_rate": 0.002, "loss": 5.2307, "step": 5976 }, { "epoch": 0.67855153598632, "grad_norm": 0.26171875, "learning_rate": 0.002, "loss": 5.2411, "step": 5977 }, { "epoch": 0.6786650630962391, "grad_norm": 0.27734375, "learning_rate": 0.002, "loss": 5.2479, "step": 5978 }, { "epoch": 0.6787785902061582, "grad_norm": 0.2890625, "learning_rate": 0.002, "loss": 5.2395, "step": 5979 }, { "epoch": 0.6788921173160772, "grad_norm": 0.3359375, "learning_rate": 0.002, "loss": 5.2386, "step": 5980 }, { "epoch": 0.6790056444259963, "grad_norm": 0.353515625, "learning_rate": 0.002, "loss": 5.2367, "step": 5981 }, { "epoch": 0.6791191715359154, "grad_norm": 0.34375, "learning_rate": 0.002, "loss": 5.2321, "step": 5982 }, { "epoch": 0.6792326986458345, "grad_norm": 0.330078125, "learning_rate": 0.002, "loss": 5.252, "step": 5983 }, { "epoch": 0.6793462257557535, "grad_norm": 0.357421875, "learning_rate": 0.002, "loss": 5.2381, "step": 5984 }, { "epoch": 0.6794597528656726, "grad_norm": 0.3359375, "learning_rate": 0.002, "loss": 5.2544, "step": 5985 }, { "epoch": 0.6795732799755917, "grad_norm": 0.31640625, "learning_rate": 0.002, "loss": 5.2316, "step": 5986 }, { "epoch": 0.6796868070855108, "grad_norm": 0.341796875, "learning_rate": 0.002, "loss": 5.2453, "step": 5987 }, { "epoch": 0.6798003341954298, "grad_norm": 0.353515625, "learning_rate": 0.002, "loss": 5.2529, "step": 5988 }, { "epoch": 0.6799138613053489, "grad_norm": 0.376953125, "learning_rate": 0.002, "loss": 5.2416, "step": 5989 }, { "epoch": 0.680027388415268, "grad_norm": 0.353515625, "learning_rate": 0.002, "loss": 5.2154, "step": 5990 }, { "epoch": 0.6801409155251871, "grad_norm": 0.357421875, "learning_rate": 0.002, "loss": 5.2423, "step": 5991 }, { "epoch": 0.6802544426351061, "grad_norm": 0.341796875, "learning_rate": 0.002, "loss": 5.2402, "step": 5992 }, { "epoch": 0.6803679697450252, "grad_norm": 0.3125, "learning_rate": 0.002, "loss": 5.2468, "step": 5993 }, { "epoch": 0.6804814968549443, "grad_norm": 0.3046875, "learning_rate": 0.002, "loss": 5.2356, "step": 5994 }, { "epoch": 0.6805950239648634, "grad_norm": 0.265625, "learning_rate": 0.002, "loss": 5.2426, "step": 5995 }, { "epoch": 0.6807085510747825, "grad_norm": 0.267578125, "learning_rate": 0.002, "loss": 5.2732, "step": 5996 }, { "epoch": 0.6808220781847015, "grad_norm": 0.244140625, "learning_rate": 0.002, "loss": 5.2705, "step": 5997 }, { "epoch": 0.6809356052946206, "grad_norm": 0.255859375, "learning_rate": 0.002, "loss": 5.2548, "step": 5998 }, { "epoch": 0.6810491324045397, "grad_norm": 0.26171875, "learning_rate": 0.002, "loss": 5.2282, "step": 5999 }, { "epoch": 0.6811626595144588, "grad_norm": 0.291015625, "learning_rate": 0.002, "loss": 5.2515, "step": 6000 }, { "epoch": 0.6812761866243778, "grad_norm": 0.322265625, "learning_rate": 0.002, "loss": 5.2479, "step": 6001 }, { "epoch": 0.6813897137342969, "grad_norm": 0.33203125, "learning_rate": 0.002, "loss": 5.2339, "step": 6002 }, { "epoch": 0.681503240844216, "grad_norm": 0.322265625, "learning_rate": 0.002, "loss": 5.248, "step": 6003 }, { "epoch": 0.6816167679541351, "grad_norm": 0.318359375, "learning_rate": 0.002, "loss": 5.2485, "step": 6004 }, { "epoch": 0.6817302950640541, "grad_norm": 0.314453125, "learning_rate": 0.002, "loss": 5.2493, "step": 6005 }, { "epoch": 0.6818438221739732, "grad_norm": 0.31640625, "learning_rate": 0.002, "loss": 5.2382, "step": 6006 }, { "epoch": 0.6819573492838923, "grad_norm": 0.33203125, "learning_rate": 0.002, "loss": 5.2447, "step": 6007 }, { "epoch": 0.6820708763938114, "grad_norm": 0.34375, "learning_rate": 0.002, "loss": 5.2375, "step": 6008 }, { "epoch": 0.6821844035037304, "grad_norm": 0.328125, "learning_rate": 0.002, "loss": 5.2495, "step": 6009 }, { "epoch": 0.6822979306136495, "grad_norm": 0.328125, "learning_rate": 0.002, "loss": 5.2572, "step": 6010 }, { "epoch": 0.6824114577235686, "grad_norm": 0.310546875, "learning_rate": 0.002, "loss": 5.235, "step": 6011 }, { "epoch": 0.6825249848334877, "grad_norm": 0.29296875, "learning_rate": 0.002, "loss": 5.2647, "step": 6012 }, { "epoch": 0.6826385119434067, "grad_norm": 0.271484375, "learning_rate": 0.002, "loss": 5.2426, "step": 6013 }, { "epoch": 0.6827520390533258, "grad_norm": 0.26953125, "learning_rate": 0.002, "loss": 5.2274, "step": 6014 }, { "epoch": 0.6828655661632449, "grad_norm": 0.267578125, "learning_rate": 0.002, "loss": 5.2429, "step": 6015 }, { "epoch": 0.682979093273164, "grad_norm": 0.28125, "learning_rate": 0.002, "loss": 5.2224, "step": 6016 }, { "epoch": 0.683092620383083, "grad_norm": 0.263671875, "learning_rate": 0.002, "loss": 5.229, "step": 6017 }, { "epoch": 0.6832061474930021, "grad_norm": 0.265625, "learning_rate": 0.002, "loss": 5.2476, "step": 6018 }, { "epoch": 0.6833196746029212, "grad_norm": 0.2734375, "learning_rate": 0.002, "loss": 5.2536, "step": 6019 }, { "epoch": 0.6834332017128403, "grad_norm": 0.251953125, "learning_rate": 0.002, "loss": 5.261, "step": 6020 }, { "epoch": 0.6835467288227594, "grad_norm": 0.25390625, "learning_rate": 0.002, "loss": 5.252, "step": 6021 }, { "epoch": 0.6836602559326784, "grad_norm": 0.2431640625, "learning_rate": 0.002, "loss": 5.2217, "step": 6022 }, { "epoch": 0.6837737830425975, "grad_norm": 0.234375, "learning_rate": 0.002, "loss": 5.2421, "step": 6023 }, { "epoch": 0.6838873101525166, "grad_norm": 0.24609375, "learning_rate": 0.002, "loss": 5.2475, "step": 6024 }, { "epoch": 0.6840008372624357, "grad_norm": 0.28515625, "learning_rate": 0.002, "loss": 5.2412, "step": 6025 }, { "epoch": 0.6841143643723547, "grad_norm": 0.32421875, "learning_rate": 0.002, "loss": 5.2385, "step": 6026 }, { "epoch": 0.6842278914822738, "grad_norm": 0.341796875, "learning_rate": 0.002, "loss": 5.2638, "step": 6027 }, { "epoch": 0.6843414185921929, "grad_norm": 0.349609375, "learning_rate": 0.002, "loss": 5.2516, "step": 6028 }, { "epoch": 0.684454945702112, "grad_norm": 0.361328125, "learning_rate": 0.002, "loss": 5.2472, "step": 6029 }, { "epoch": 0.684568472812031, "grad_norm": 0.32421875, "learning_rate": 0.002, "loss": 5.2436, "step": 6030 }, { "epoch": 0.6846819999219501, "grad_norm": 0.328125, "learning_rate": 0.002, "loss": 5.2438, "step": 6031 }, { "epoch": 0.6847955270318692, "grad_norm": 0.2890625, "learning_rate": 0.002, "loss": 5.2309, "step": 6032 }, { "epoch": 0.6849090541417883, "grad_norm": 0.291015625, "learning_rate": 0.002, "loss": 5.2346, "step": 6033 }, { "epoch": 0.6850225812517073, "grad_norm": 0.267578125, "learning_rate": 0.002, "loss": 5.2245, "step": 6034 }, { "epoch": 0.6851361083616264, "grad_norm": 0.2734375, "learning_rate": 0.002, "loss": 5.2426, "step": 6035 }, { "epoch": 0.6852496354715455, "grad_norm": 0.26953125, "learning_rate": 0.002, "loss": 5.2252, "step": 6036 }, { "epoch": 0.6853631625814646, "grad_norm": 0.29296875, "learning_rate": 0.002, "loss": 5.2452, "step": 6037 }, { "epoch": 0.6854766896913836, "grad_norm": 0.32421875, "learning_rate": 0.002, "loss": 5.2472, "step": 6038 }, { "epoch": 0.6855902168013027, "grad_norm": 0.3515625, "learning_rate": 0.002, "loss": 5.2423, "step": 6039 }, { "epoch": 0.6857037439112218, "grad_norm": 0.365234375, "learning_rate": 0.002, "loss": 5.2294, "step": 6040 }, { "epoch": 0.6858172710211409, "grad_norm": 0.3828125, "learning_rate": 0.002, "loss": 5.2474, "step": 6041 }, { "epoch": 0.68593079813106, "grad_norm": 0.376953125, "learning_rate": 0.002, "loss": 5.2509, "step": 6042 }, { "epoch": 0.686044325240979, "grad_norm": 0.36328125, "learning_rate": 0.002, "loss": 5.2411, "step": 6043 }, { "epoch": 0.6861578523508981, "grad_norm": 0.373046875, "learning_rate": 0.002, "loss": 5.2347, "step": 6044 }, { "epoch": 0.6862713794608172, "grad_norm": 0.353515625, "learning_rate": 0.002, "loss": 5.2391, "step": 6045 }, { "epoch": 0.6863849065707363, "grad_norm": 0.390625, "learning_rate": 0.002, "loss": 5.2634, "step": 6046 }, { "epoch": 0.6864984336806553, "grad_norm": 0.37890625, "learning_rate": 0.002, "loss": 5.2518, "step": 6047 }, { "epoch": 0.6866119607905744, "grad_norm": 0.39453125, "learning_rate": 0.002, "loss": 5.2406, "step": 6048 }, { "epoch": 0.6867254879004935, "grad_norm": 0.375, "learning_rate": 0.002, "loss": 5.2374, "step": 6049 }, { "epoch": 0.6868390150104126, "grad_norm": 0.376953125, "learning_rate": 0.002, "loss": 5.2472, "step": 6050 }, { "epoch": 0.6869525421203316, "grad_norm": 0.349609375, "learning_rate": 0.002, "loss": 5.2462, "step": 6051 }, { "epoch": 0.6870660692302507, "grad_norm": 0.359375, "learning_rate": 0.002, "loss": 5.2577, "step": 6052 }, { "epoch": 0.6871795963401698, "grad_norm": 0.328125, "learning_rate": 0.002, "loss": 5.2708, "step": 6053 }, { "epoch": 0.6872931234500889, "grad_norm": 0.34765625, "learning_rate": 0.002, "loss": 5.223, "step": 6054 }, { "epoch": 0.6874066505600079, "grad_norm": 0.3515625, "learning_rate": 0.002, "loss": 5.2549, "step": 6055 }, { "epoch": 0.687520177669927, "grad_norm": 0.34765625, "learning_rate": 0.002, "loss": 5.2377, "step": 6056 }, { "epoch": 0.6876337047798461, "grad_norm": 0.341796875, "learning_rate": 0.002, "loss": 5.2208, "step": 6057 }, { "epoch": 0.6877472318897652, "grad_norm": 0.34765625, "learning_rate": 0.002, "loss": 5.2376, "step": 6058 }, { "epoch": 0.6878607589996842, "grad_norm": 0.34375, "learning_rate": 0.002, "loss": 5.2575, "step": 6059 }, { "epoch": 0.6879742861096033, "grad_norm": 0.328125, "learning_rate": 0.002, "loss": 5.2225, "step": 6060 }, { "epoch": 0.6880878132195224, "grad_norm": 0.3203125, "learning_rate": 0.002, "loss": 5.2431, "step": 6061 }, { "epoch": 0.6882013403294415, "grad_norm": 0.318359375, "learning_rate": 0.002, "loss": 5.2485, "step": 6062 }, { "epoch": 0.6883148674393605, "grad_norm": 0.36328125, "learning_rate": 0.002, "loss": 5.2608, "step": 6063 }, { "epoch": 0.6884283945492796, "grad_norm": 0.294921875, "learning_rate": 0.002, "loss": 5.2388, "step": 6064 }, { "epoch": 0.6885419216591987, "grad_norm": 0.28125, "learning_rate": 0.002, "loss": 5.2441, "step": 6065 }, { "epoch": 0.6886554487691178, "grad_norm": 0.2890625, "learning_rate": 0.002, "loss": 5.242, "step": 6066 }, { "epoch": 0.6887689758790368, "grad_norm": 0.283203125, "learning_rate": 0.002, "loss": 5.2494, "step": 6067 }, { "epoch": 0.6888825029889559, "grad_norm": 0.2734375, "learning_rate": 0.002, "loss": 5.2346, "step": 6068 }, { "epoch": 0.688996030098875, "grad_norm": 0.263671875, "learning_rate": 0.002, "loss": 5.2521, "step": 6069 }, { "epoch": 0.6891095572087941, "grad_norm": 0.271484375, "learning_rate": 0.002, "loss": 5.2533, "step": 6070 }, { "epoch": 0.6892230843187132, "grad_norm": 0.263671875, "learning_rate": 0.002, "loss": 5.2626, "step": 6071 }, { "epoch": 0.6893366114286322, "grad_norm": 0.259765625, "learning_rate": 0.002, "loss": 5.2497, "step": 6072 }, { "epoch": 0.6894501385385513, "grad_norm": 0.267578125, "learning_rate": 0.002, "loss": 5.2533, "step": 6073 }, { "epoch": 0.6895636656484704, "grad_norm": 0.29296875, "learning_rate": 0.002, "loss": 5.2295, "step": 6074 }, { "epoch": 0.6896771927583895, "grad_norm": 0.296875, "learning_rate": 0.002, "loss": 5.2321, "step": 6075 }, { "epoch": 0.6897907198683085, "grad_norm": 0.318359375, "learning_rate": 0.002, "loss": 5.2544, "step": 6076 }, { "epoch": 0.6899042469782276, "grad_norm": 0.310546875, "learning_rate": 0.002, "loss": 5.2312, "step": 6077 }, { "epoch": 0.6900177740881467, "grad_norm": 0.314453125, "learning_rate": 0.002, "loss": 5.2409, "step": 6078 }, { "epoch": 0.6901313011980658, "grad_norm": 0.314453125, "learning_rate": 0.002, "loss": 5.2494, "step": 6079 }, { "epoch": 0.6902448283079848, "grad_norm": 0.3125, "learning_rate": 0.002, "loss": 5.2417, "step": 6080 }, { "epoch": 0.6903583554179039, "grad_norm": 0.3046875, "learning_rate": 0.002, "loss": 5.2411, "step": 6081 }, { "epoch": 0.690471882527823, "grad_norm": 0.33203125, "learning_rate": 0.002, "loss": 5.2486, "step": 6082 }, { "epoch": 0.6905854096377421, "grad_norm": 0.31640625, "learning_rate": 0.002, "loss": 5.2362, "step": 6083 }, { "epoch": 0.6906989367476611, "grad_norm": 0.3046875, "learning_rate": 0.002, "loss": 5.2228, "step": 6084 }, { "epoch": 0.6908124638575802, "grad_norm": 0.3046875, "learning_rate": 0.002, "loss": 5.2461, "step": 6085 }, { "epoch": 0.6909259909674993, "grad_norm": 0.283203125, "learning_rate": 0.002, "loss": 5.2488, "step": 6086 }, { "epoch": 0.6910395180774184, "grad_norm": 0.298828125, "learning_rate": 0.002, "loss": 5.2303, "step": 6087 }, { "epoch": 0.6911530451873374, "grad_norm": 0.279296875, "learning_rate": 0.002, "loss": 5.25, "step": 6088 }, { "epoch": 0.6912665722972565, "grad_norm": 0.30078125, "learning_rate": 0.002, "loss": 5.2485, "step": 6089 }, { "epoch": 0.6913800994071756, "grad_norm": 0.287109375, "learning_rate": 0.002, "loss": 5.2409, "step": 6090 }, { "epoch": 0.6914936265170947, "grad_norm": 0.29296875, "learning_rate": 0.002, "loss": 5.2462, "step": 6091 }, { "epoch": 0.6916071536270137, "grad_norm": 0.27734375, "learning_rate": 0.002, "loss": 5.242, "step": 6092 }, { "epoch": 0.6917206807369328, "grad_norm": 0.2578125, "learning_rate": 0.002, "loss": 5.245, "step": 6093 }, { "epoch": 0.6918342078468519, "grad_norm": 0.259765625, "learning_rate": 0.002, "loss": 5.2544, "step": 6094 }, { "epoch": 0.691947734956771, "grad_norm": 0.283203125, "learning_rate": 0.002, "loss": 5.2309, "step": 6095 }, { "epoch": 0.69206126206669, "grad_norm": 0.361328125, "learning_rate": 0.002, "loss": 5.2487, "step": 6096 }, { "epoch": 0.6921747891766091, "grad_norm": 0.41015625, "learning_rate": 0.002, "loss": 5.2367, "step": 6097 }, { "epoch": 0.6922883162865282, "grad_norm": 0.44140625, "learning_rate": 0.002, "loss": 5.2588, "step": 6098 }, { "epoch": 0.6924018433964473, "grad_norm": 0.4453125, "learning_rate": 0.002, "loss": 5.2361, "step": 6099 }, { "epoch": 0.6925153705063664, "grad_norm": 0.421875, "learning_rate": 0.002, "loss": 5.2485, "step": 6100 }, { "epoch": 0.6926288976162854, "grad_norm": 0.404296875, "learning_rate": 0.002, "loss": 5.2512, "step": 6101 }, { "epoch": 0.6927424247262045, "grad_norm": 0.39453125, "learning_rate": 0.002, "loss": 5.2459, "step": 6102 }, { "epoch": 0.6928559518361236, "grad_norm": 0.33203125, "learning_rate": 0.002, "loss": 5.2693, "step": 6103 }, { "epoch": 0.6929694789460427, "grad_norm": 0.322265625, "learning_rate": 0.002, "loss": 5.2527, "step": 6104 }, { "epoch": 0.6930830060559617, "grad_norm": 0.2890625, "learning_rate": 0.002, "loss": 5.2467, "step": 6105 }, { "epoch": 0.6931965331658808, "grad_norm": 0.302734375, "learning_rate": 0.002, "loss": 5.2445, "step": 6106 }, { "epoch": 0.6933100602757999, "grad_norm": 0.3125, "learning_rate": 0.002, "loss": 5.2548, "step": 6107 }, { "epoch": 0.693423587385719, "grad_norm": 0.326171875, "learning_rate": 0.002, "loss": 5.2422, "step": 6108 }, { "epoch": 0.693537114495638, "grad_norm": 0.33203125, "learning_rate": 0.002, "loss": 5.2399, "step": 6109 }, { "epoch": 0.6936506416055571, "grad_norm": 0.349609375, "learning_rate": 0.002, "loss": 5.2513, "step": 6110 }, { "epoch": 0.6937641687154762, "grad_norm": 0.326171875, "learning_rate": 0.002, "loss": 5.2447, "step": 6111 }, { "epoch": 0.6938776958253953, "grad_norm": 0.3046875, "learning_rate": 0.002, "loss": 5.2426, "step": 6112 }, { "epoch": 0.6939912229353143, "grad_norm": 0.318359375, "learning_rate": 0.002, "loss": 5.2534, "step": 6113 }, { "epoch": 0.6941047500452334, "grad_norm": 0.3046875, "learning_rate": 0.002, "loss": 5.2575, "step": 6114 }, { "epoch": 0.6942182771551525, "grad_norm": 0.318359375, "learning_rate": 0.002, "loss": 5.2382, "step": 6115 }, { "epoch": 0.6943318042650716, "grad_norm": 0.283203125, "learning_rate": 0.002, "loss": 5.2273, "step": 6116 }, { "epoch": 0.6944453313749906, "grad_norm": 0.283203125, "learning_rate": 0.002, "loss": 5.2584, "step": 6117 }, { "epoch": 0.6945588584849097, "grad_norm": 0.271484375, "learning_rate": 0.002, "loss": 5.2405, "step": 6118 }, { "epoch": 0.6946723855948288, "grad_norm": 0.28125, "learning_rate": 0.002, "loss": 5.2277, "step": 6119 }, { "epoch": 0.6947859127047479, "grad_norm": 0.267578125, "learning_rate": 0.002, "loss": 5.2442, "step": 6120 }, { "epoch": 0.694899439814667, "grad_norm": 0.298828125, "learning_rate": 0.002, "loss": 5.2261, "step": 6121 }, { "epoch": 0.695012966924586, "grad_norm": 0.3125, "learning_rate": 0.002, "loss": 5.2474, "step": 6122 }, { "epoch": 0.6951264940345051, "grad_norm": 0.3359375, "learning_rate": 0.002, "loss": 5.2591, "step": 6123 }, { "epoch": 0.6952400211444242, "grad_norm": 0.322265625, "learning_rate": 0.002, "loss": 5.2372, "step": 6124 }, { "epoch": 0.6953535482543433, "grad_norm": 0.326171875, "learning_rate": 0.002, "loss": 5.2371, "step": 6125 }, { "epoch": 0.6954670753642623, "grad_norm": 0.296875, "learning_rate": 0.002, "loss": 5.2758, "step": 6126 }, { "epoch": 0.6955806024741814, "grad_norm": 0.298828125, "learning_rate": 0.002, "loss": 5.2541, "step": 6127 }, { "epoch": 0.6956941295841005, "grad_norm": 0.28125, "learning_rate": 0.002, "loss": 5.2535, "step": 6128 }, { "epoch": 0.6958076566940196, "grad_norm": 0.271484375, "learning_rate": 0.002, "loss": 5.2459, "step": 6129 }, { "epoch": 0.6959211838039386, "grad_norm": 0.28125, "learning_rate": 0.002, "loss": 5.2402, "step": 6130 }, { "epoch": 0.6960347109138577, "grad_norm": 0.294921875, "learning_rate": 0.002, "loss": 5.2125, "step": 6131 }, { "epoch": 0.6961482380237768, "grad_norm": 0.3203125, "learning_rate": 0.002, "loss": 5.2431, "step": 6132 }, { "epoch": 0.6962617651336959, "grad_norm": 0.33984375, "learning_rate": 0.002, "loss": 5.2259, "step": 6133 }, { "epoch": 0.6963752922436149, "grad_norm": 0.349609375, "learning_rate": 0.002, "loss": 5.2648, "step": 6134 }, { "epoch": 0.696488819353534, "grad_norm": 0.330078125, "learning_rate": 0.002, "loss": 5.249, "step": 6135 }, { "epoch": 0.6966023464634531, "grad_norm": 0.30859375, "learning_rate": 0.002, "loss": 5.2362, "step": 6136 }, { "epoch": 0.6967158735733722, "grad_norm": 0.265625, "learning_rate": 0.002, "loss": 5.2544, "step": 6137 }, { "epoch": 0.6968294006832912, "grad_norm": 0.267578125, "learning_rate": 0.002, "loss": 5.252, "step": 6138 }, { "epoch": 0.6969429277932103, "grad_norm": 0.251953125, "learning_rate": 0.002, "loss": 5.2261, "step": 6139 }, { "epoch": 0.6970564549031294, "grad_norm": 0.26953125, "learning_rate": 0.002, "loss": 5.2624, "step": 6140 }, { "epoch": 0.6971699820130485, "grad_norm": 0.3125, "learning_rate": 0.002, "loss": 5.2722, "step": 6141 }, { "epoch": 0.6972835091229675, "grad_norm": 0.33203125, "learning_rate": 0.002, "loss": 5.2489, "step": 6142 }, { "epoch": 0.6973970362328866, "grad_norm": 0.353515625, "learning_rate": 0.002, "loss": 5.2629, "step": 6143 }, { "epoch": 0.6975105633428057, "grad_norm": 0.345703125, "learning_rate": 0.002, "loss": 5.2544, "step": 6144 }, { "epoch": 0.6976240904527248, "grad_norm": 0.3515625, "learning_rate": 0.002, "loss": 5.2501, "step": 6145 }, { "epoch": 0.6977376175626439, "grad_norm": 0.322265625, "learning_rate": 0.002, "loss": 5.2579, "step": 6146 }, { "epoch": 0.6978511446725629, "grad_norm": 0.328125, "learning_rate": 0.002, "loss": 5.2549, "step": 6147 }, { "epoch": 0.697964671782482, "grad_norm": 0.3046875, "learning_rate": 0.002, "loss": 5.2363, "step": 6148 }, { "epoch": 0.6980781988924011, "grad_norm": 0.294921875, "learning_rate": 0.002, "loss": 5.2774, "step": 6149 }, { "epoch": 0.6981917260023202, "grad_norm": 0.2890625, "learning_rate": 0.002, "loss": 5.2517, "step": 6150 }, { "epoch": 0.6983052531122392, "grad_norm": 0.298828125, "learning_rate": 0.002, "loss": 5.2218, "step": 6151 }, { "epoch": 0.6984187802221583, "grad_norm": 0.3515625, "learning_rate": 0.002, "loss": 5.2258, "step": 6152 }, { "epoch": 0.6985323073320774, "grad_norm": 0.384765625, "learning_rate": 0.002, "loss": 5.2366, "step": 6153 }, { "epoch": 0.6986458344419966, "grad_norm": 0.4375, "learning_rate": 0.002, "loss": 5.2476, "step": 6154 }, { "epoch": 0.6987593615519156, "grad_norm": 0.4375, "learning_rate": 0.002, "loss": 5.2622, "step": 6155 }, { "epoch": 0.6988728886618347, "grad_norm": 0.44921875, "learning_rate": 0.002, "loss": 5.2498, "step": 6156 }, { "epoch": 0.6989864157717538, "grad_norm": 0.404296875, "learning_rate": 0.002, "loss": 5.2578, "step": 6157 }, { "epoch": 0.6990999428816729, "grad_norm": 0.396484375, "learning_rate": 0.002, "loss": 5.2555, "step": 6158 }, { "epoch": 0.699213469991592, "grad_norm": 0.35546875, "learning_rate": 0.002, "loss": 5.2556, "step": 6159 }, { "epoch": 0.699326997101511, "grad_norm": 0.369140625, "learning_rate": 0.002, "loss": 5.2488, "step": 6160 }, { "epoch": 0.6994405242114301, "grad_norm": 0.333984375, "learning_rate": 0.002, "loss": 5.2494, "step": 6161 }, { "epoch": 0.6995540513213492, "grad_norm": 0.341796875, "learning_rate": 0.002, "loss": 5.2434, "step": 6162 }, { "epoch": 0.6996675784312683, "grad_norm": 0.353515625, "learning_rate": 0.002, "loss": 5.2354, "step": 6163 }, { "epoch": 0.6997811055411873, "grad_norm": 0.35546875, "learning_rate": 0.002, "loss": 5.2331, "step": 6164 }, { "epoch": 0.6998946326511064, "grad_norm": 0.333984375, "learning_rate": 0.002, "loss": 5.2576, "step": 6165 }, { "epoch": 0.7000081597610255, "grad_norm": 0.349609375, "learning_rate": 0.002, "loss": 5.2407, "step": 6166 }, { "epoch": 0.7001216868709446, "grad_norm": 0.337890625, "learning_rate": 0.002, "loss": 5.2423, "step": 6167 }, { "epoch": 0.7002352139808636, "grad_norm": 0.345703125, "learning_rate": 0.002, "loss": 5.2476, "step": 6168 }, { "epoch": 0.7003487410907827, "grad_norm": 0.33984375, "learning_rate": 0.002, "loss": 5.2273, "step": 6169 }, { "epoch": 0.7004622682007018, "grad_norm": 0.34375, "learning_rate": 0.002, "loss": 5.2359, "step": 6170 }, { "epoch": 0.7005757953106209, "grad_norm": 0.359375, "learning_rate": 0.002, "loss": 5.2311, "step": 6171 }, { "epoch": 0.7006893224205399, "grad_norm": 0.3359375, "learning_rate": 0.002, "loss": 5.2383, "step": 6172 }, { "epoch": 0.700802849530459, "grad_norm": 0.34765625, "learning_rate": 0.002, "loss": 5.2472, "step": 6173 }, { "epoch": 0.7009163766403781, "grad_norm": 0.349609375, "learning_rate": 0.002, "loss": 5.2446, "step": 6174 }, { "epoch": 0.7010299037502972, "grad_norm": 0.353515625, "learning_rate": 0.002, "loss": 5.2264, "step": 6175 }, { "epoch": 0.7011434308602162, "grad_norm": 0.318359375, "learning_rate": 0.002, "loss": 5.2312, "step": 6176 }, { "epoch": 0.7012569579701353, "grad_norm": 0.306640625, "learning_rate": 0.002, "loss": 5.2238, "step": 6177 }, { "epoch": 0.7013704850800544, "grad_norm": 0.283203125, "learning_rate": 0.002, "loss": 5.2237, "step": 6178 }, { "epoch": 0.7014840121899735, "grad_norm": 0.271484375, "learning_rate": 0.002, "loss": 5.2507, "step": 6179 }, { "epoch": 0.7015975392998925, "grad_norm": 0.2578125, "learning_rate": 0.002, "loss": 5.2243, "step": 6180 }, { "epoch": 0.7017110664098116, "grad_norm": 0.2451171875, "learning_rate": 0.002, "loss": 5.2432, "step": 6181 }, { "epoch": 0.7018245935197307, "grad_norm": 0.28125, "learning_rate": 0.002, "loss": 5.2354, "step": 6182 }, { "epoch": 0.7019381206296498, "grad_norm": 0.30078125, "learning_rate": 0.002, "loss": 5.249, "step": 6183 }, { "epoch": 0.7020516477395689, "grad_norm": 0.328125, "learning_rate": 0.002, "loss": 5.2265, "step": 6184 }, { "epoch": 0.7021651748494879, "grad_norm": 0.337890625, "learning_rate": 0.002, "loss": 5.2604, "step": 6185 }, { "epoch": 0.702278701959407, "grad_norm": 0.330078125, "learning_rate": 0.002, "loss": 5.2491, "step": 6186 }, { "epoch": 0.7023922290693261, "grad_norm": 0.310546875, "learning_rate": 0.002, "loss": 5.2426, "step": 6187 }, { "epoch": 0.7025057561792452, "grad_norm": 0.306640625, "learning_rate": 0.002, "loss": 5.2493, "step": 6188 }, { "epoch": 0.7026192832891642, "grad_norm": 0.283203125, "learning_rate": 0.002, "loss": 5.229, "step": 6189 }, { "epoch": 0.7027328103990833, "grad_norm": 0.30078125, "learning_rate": 0.002, "loss": 5.2454, "step": 6190 }, { "epoch": 0.7028463375090024, "grad_norm": 0.31640625, "learning_rate": 0.002, "loss": 5.2199, "step": 6191 }, { "epoch": 0.7029598646189215, "grad_norm": 0.294921875, "learning_rate": 0.002, "loss": 5.2349, "step": 6192 }, { "epoch": 0.7030733917288405, "grad_norm": 0.306640625, "learning_rate": 0.002, "loss": 5.2488, "step": 6193 }, { "epoch": 0.7031869188387596, "grad_norm": 0.275390625, "learning_rate": 0.002, "loss": 5.2423, "step": 6194 }, { "epoch": 0.7033004459486787, "grad_norm": 0.27734375, "learning_rate": 0.002, "loss": 5.2295, "step": 6195 }, { "epoch": 0.7034139730585978, "grad_norm": 0.2578125, "learning_rate": 0.002, "loss": 5.2474, "step": 6196 }, { "epoch": 0.7035275001685168, "grad_norm": 0.271484375, "learning_rate": 0.002, "loss": 5.2513, "step": 6197 }, { "epoch": 0.7036410272784359, "grad_norm": 0.2578125, "learning_rate": 0.002, "loss": 5.2557, "step": 6198 }, { "epoch": 0.703754554388355, "grad_norm": 0.263671875, "learning_rate": 0.002, "loss": 5.2479, "step": 6199 }, { "epoch": 0.7038680814982741, "grad_norm": 0.271484375, "learning_rate": 0.002, "loss": 5.2192, "step": 6200 }, { "epoch": 0.7039816086081931, "grad_norm": 0.275390625, "learning_rate": 0.002, "loss": 5.2396, "step": 6201 }, { "epoch": 0.7040951357181122, "grad_norm": 0.298828125, "learning_rate": 0.002, "loss": 5.2607, "step": 6202 }, { "epoch": 0.7042086628280313, "grad_norm": 0.31640625, "learning_rate": 0.002, "loss": 5.2301, "step": 6203 }, { "epoch": 0.7043221899379504, "grad_norm": 0.3671875, "learning_rate": 0.002, "loss": 5.2418, "step": 6204 }, { "epoch": 0.7044357170478694, "grad_norm": 0.37890625, "learning_rate": 0.002, "loss": 5.2536, "step": 6205 }, { "epoch": 0.7045492441577885, "grad_norm": 0.427734375, "learning_rate": 0.002, "loss": 5.2456, "step": 6206 }, { "epoch": 0.7046627712677076, "grad_norm": 0.466796875, "learning_rate": 0.002, "loss": 5.2339, "step": 6207 }, { "epoch": 0.7047762983776267, "grad_norm": 0.5078125, "learning_rate": 0.002, "loss": 5.2302, "step": 6208 }, { "epoch": 0.7048898254875458, "grad_norm": 0.44140625, "learning_rate": 0.002, "loss": 5.2312, "step": 6209 }, { "epoch": 0.7050033525974648, "grad_norm": 0.4296875, "learning_rate": 0.002, "loss": 5.2403, "step": 6210 }, { "epoch": 0.7051168797073839, "grad_norm": 0.37890625, "learning_rate": 0.002, "loss": 5.2503, "step": 6211 }, { "epoch": 0.705230406817303, "grad_norm": 0.375, "learning_rate": 0.002, "loss": 5.234, "step": 6212 }, { "epoch": 0.705343933927222, "grad_norm": 0.35546875, "learning_rate": 0.002, "loss": 5.2318, "step": 6213 }, { "epoch": 0.7054574610371411, "grad_norm": 0.369140625, "learning_rate": 0.002, "loss": 5.2595, "step": 6214 }, { "epoch": 0.7055709881470602, "grad_norm": 0.359375, "learning_rate": 0.002, "loss": 5.239, "step": 6215 }, { "epoch": 0.7056845152569793, "grad_norm": 0.361328125, "learning_rate": 0.002, "loss": 5.2301, "step": 6216 }, { "epoch": 0.7057980423668984, "grad_norm": 0.33203125, "learning_rate": 0.002, "loss": 5.2209, "step": 6217 }, { "epoch": 0.7059115694768174, "grad_norm": 0.345703125, "learning_rate": 0.002, "loss": 5.2533, "step": 6218 }, { "epoch": 0.7060250965867365, "grad_norm": 0.31640625, "learning_rate": 0.002, "loss": 5.2491, "step": 6219 }, { "epoch": 0.7061386236966556, "grad_norm": 0.326171875, "learning_rate": 0.002, "loss": 5.2531, "step": 6220 }, { "epoch": 0.7062521508065747, "grad_norm": 0.298828125, "learning_rate": 0.002, "loss": 5.2298, "step": 6221 }, { "epoch": 0.7063656779164937, "grad_norm": 0.296875, "learning_rate": 0.002, "loss": 5.2367, "step": 6222 }, { "epoch": 0.7064792050264128, "grad_norm": 0.291015625, "learning_rate": 0.002, "loss": 5.2367, "step": 6223 }, { "epoch": 0.7065927321363319, "grad_norm": 0.2734375, "learning_rate": 0.002, "loss": 5.2328, "step": 6224 }, { "epoch": 0.706706259246251, "grad_norm": 0.26953125, "learning_rate": 0.002, "loss": 5.2607, "step": 6225 }, { "epoch": 0.70681978635617, "grad_norm": 0.298828125, "learning_rate": 0.002, "loss": 5.2371, "step": 6226 }, { "epoch": 0.7069333134660891, "grad_norm": 0.30078125, "learning_rate": 0.002, "loss": 5.2399, "step": 6227 }, { "epoch": 0.7070468405760082, "grad_norm": 0.31640625, "learning_rate": 0.002, "loss": 5.2497, "step": 6228 }, { "epoch": 0.7071603676859273, "grad_norm": 0.314453125, "learning_rate": 0.002, "loss": 5.2283, "step": 6229 }, { "epoch": 0.7072738947958463, "grad_norm": 0.330078125, "learning_rate": 0.002, "loss": 5.2552, "step": 6230 }, { "epoch": 0.7073874219057654, "grad_norm": 0.328125, "learning_rate": 0.002, "loss": 5.2496, "step": 6231 }, { "epoch": 0.7075009490156845, "grad_norm": 0.3203125, "learning_rate": 0.002, "loss": 5.2524, "step": 6232 }, { "epoch": 0.7076144761256036, "grad_norm": 0.322265625, "learning_rate": 0.002, "loss": 5.2322, "step": 6233 }, { "epoch": 0.7077280032355227, "grad_norm": 0.32421875, "learning_rate": 0.002, "loss": 5.2416, "step": 6234 }, { "epoch": 0.7078415303454417, "grad_norm": 0.3046875, "learning_rate": 0.002, "loss": 5.2494, "step": 6235 }, { "epoch": 0.7079550574553608, "grad_norm": 0.30859375, "learning_rate": 0.002, "loss": 5.2507, "step": 6236 }, { "epoch": 0.7080685845652799, "grad_norm": 0.298828125, "learning_rate": 0.002, "loss": 5.2275, "step": 6237 }, { "epoch": 0.708182111675199, "grad_norm": 0.30859375, "learning_rate": 0.002, "loss": 5.2449, "step": 6238 }, { "epoch": 0.708295638785118, "grad_norm": 0.31640625, "learning_rate": 0.002, "loss": 5.2474, "step": 6239 }, { "epoch": 0.7084091658950371, "grad_norm": 0.31640625, "learning_rate": 0.002, "loss": 5.2493, "step": 6240 }, { "epoch": 0.7085226930049562, "grad_norm": 0.341796875, "learning_rate": 0.002, "loss": 5.2378, "step": 6241 }, { "epoch": 0.7086362201148753, "grad_norm": 0.33203125, "learning_rate": 0.002, "loss": 5.2356, "step": 6242 }, { "epoch": 0.7087497472247943, "grad_norm": 0.33984375, "learning_rate": 0.002, "loss": 5.2261, "step": 6243 }, { "epoch": 0.7088632743347134, "grad_norm": 0.32421875, "learning_rate": 0.002, "loss": 5.2489, "step": 6244 }, { "epoch": 0.7089768014446325, "grad_norm": 0.32421875, "learning_rate": 0.002, "loss": 5.2545, "step": 6245 }, { "epoch": 0.7090903285545516, "grad_norm": 0.3046875, "learning_rate": 0.002, "loss": 5.2424, "step": 6246 }, { "epoch": 0.7092038556644706, "grad_norm": 0.3046875, "learning_rate": 0.002, "loss": 5.2223, "step": 6247 }, { "epoch": 0.7093173827743897, "grad_norm": 0.291015625, "learning_rate": 0.002, "loss": 5.2622, "step": 6248 }, { "epoch": 0.7094309098843088, "grad_norm": 0.267578125, "learning_rate": 0.002, "loss": 5.2345, "step": 6249 }, { "epoch": 0.7095444369942279, "grad_norm": 0.263671875, "learning_rate": 0.002, "loss": 5.2589, "step": 6250 }, { "epoch": 0.7096579641041469, "grad_norm": 0.263671875, "learning_rate": 0.002, "loss": 5.2574, "step": 6251 }, { "epoch": 0.709771491214066, "grad_norm": 0.287109375, "learning_rate": 0.002, "loss": 5.2437, "step": 6252 }, { "epoch": 0.7098850183239851, "grad_norm": 0.30078125, "learning_rate": 0.002, "loss": 5.231, "step": 6253 }, { "epoch": 0.7099985454339042, "grad_norm": 0.32421875, "learning_rate": 0.002, "loss": 5.2562, "step": 6254 }, { "epoch": 0.7101120725438232, "grad_norm": 0.30078125, "learning_rate": 0.002, "loss": 5.2438, "step": 6255 }, { "epoch": 0.7102255996537423, "grad_norm": 0.306640625, "learning_rate": 0.002, "loss": 5.2579, "step": 6256 }, { "epoch": 0.7103391267636614, "grad_norm": 0.30859375, "learning_rate": 0.002, "loss": 5.2265, "step": 6257 }, { "epoch": 0.7104526538735805, "grad_norm": 0.31640625, "learning_rate": 0.002, "loss": 5.2682, "step": 6258 }, { "epoch": 0.7105661809834996, "grad_norm": 0.3125, "learning_rate": 0.002, "loss": 5.2451, "step": 6259 }, { "epoch": 0.7106797080934186, "grad_norm": 0.328125, "learning_rate": 0.002, "loss": 5.2502, "step": 6260 }, { "epoch": 0.7107932352033377, "grad_norm": 0.353515625, "learning_rate": 0.002, "loss": 5.2373, "step": 6261 }, { "epoch": 0.7109067623132568, "grad_norm": 0.396484375, "learning_rate": 0.002, "loss": 5.2753, "step": 6262 }, { "epoch": 0.7110202894231759, "grad_norm": 0.4140625, "learning_rate": 0.002, "loss": 5.217, "step": 6263 }, { "epoch": 0.7111338165330949, "grad_norm": 0.427734375, "learning_rate": 0.002, "loss": 5.2446, "step": 6264 }, { "epoch": 0.711247343643014, "grad_norm": 0.396484375, "learning_rate": 0.002, "loss": 5.2324, "step": 6265 }, { "epoch": 0.7113608707529331, "grad_norm": 0.412109375, "learning_rate": 0.002, "loss": 5.2599, "step": 6266 }, { "epoch": 0.7114743978628522, "grad_norm": 0.345703125, "learning_rate": 0.002, "loss": 5.217, "step": 6267 }, { "epoch": 0.7115879249727712, "grad_norm": 0.349609375, "learning_rate": 0.002, "loss": 5.2671, "step": 6268 }, { "epoch": 0.7117014520826903, "grad_norm": 0.318359375, "learning_rate": 0.002, "loss": 5.2372, "step": 6269 }, { "epoch": 0.7118149791926094, "grad_norm": 0.32421875, "learning_rate": 0.002, "loss": 5.244, "step": 6270 }, { "epoch": 0.7119285063025285, "grad_norm": 0.30078125, "learning_rate": 0.002, "loss": 5.2523, "step": 6271 }, { "epoch": 0.7120420334124475, "grad_norm": 0.310546875, "learning_rate": 0.002, "loss": 5.2354, "step": 6272 }, { "epoch": 0.7121555605223666, "grad_norm": 0.29296875, "learning_rate": 0.002, "loss": 5.2386, "step": 6273 }, { "epoch": 0.7122690876322857, "grad_norm": 0.296875, "learning_rate": 0.002, "loss": 5.2426, "step": 6274 }, { "epoch": 0.7123826147422048, "grad_norm": 0.279296875, "learning_rate": 0.002, "loss": 5.2166, "step": 6275 }, { "epoch": 0.7124961418521238, "grad_norm": 0.28515625, "learning_rate": 0.002, "loss": 5.2319, "step": 6276 }, { "epoch": 0.7126096689620429, "grad_norm": 0.29296875, "learning_rate": 0.002, "loss": 5.25, "step": 6277 }, { "epoch": 0.712723196071962, "grad_norm": 0.306640625, "learning_rate": 0.002, "loss": 5.2544, "step": 6278 }, { "epoch": 0.7128367231818811, "grad_norm": 0.2890625, "learning_rate": 0.002, "loss": 5.2268, "step": 6279 }, { "epoch": 0.7129502502918001, "grad_norm": 0.28125, "learning_rate": 0.002, "loss": 5.2417, "step": 6280 }, { "epoch": 0.7130637774017192, "grad_norm": 0.2734375, "learning_rate": 0.002, "loss": 5.2317, "step": 6281 }, { "epoch": 0.7131773045116383, "grad_norm": 0.2578125, "learning_rate": 0.002, "loss": 5.2315, "step": 6282 }, { "epoch": 0.7132908316215574, "grad_norm": 0.271484375, "learning_rate": 0.002, "loss": 5.2487, "step": 6283 }, { "epoch": 0.7134043587314765, "grad_norm": 0.26953125, "learning_rate": 0.002, "loss": 5.2278, "step": 6284 }, { "epoch": 0.7135178858413955, "grad_norm": 0.2890625, "learning_rate": 0.002, "loss": 5.2564, "step": 6285 }, { "epoch": 0.7136314129513146, "grad_norm": 0.2890625, "learning_rate": 0.002, "loss": 5.254, "step": 6286 }, { "epoch": 0.7137449400612337, "grad_norm": 0.283203125, "learning_rate": 0.002, "loss": 5.237, "step": 6287 }, { "epoch": 0.7138584671711528, "grad_norm": 0.265625, "learning_rate": 0.002, "loss": 5.2461, "step": 6288 }, { "epoch": 0.7139719942810718, "grad_norm": 0.2734375, "learning_rate": 0.002, "loss": 5.2695, "step": 6289 }, { "epoch": 0.7140855213909909, "grad_norm": 0.267578125, "learning_rate": 0.002, "loss": 5.2339, "step": 6290 }, { "epoch": 0.71419904850091, "grad_norm": 0.2734375, "learning_rate": 0.002, "loss": 5.2607, "step": 6291 }, { "epoch": 0.7143125756108291, "grad_norm": 0.25390625, "learning_rate": 0.002, "loss": 5.2145, "step": 6292 }, { "epoch": 0.7144261027207481, "grad_norm": 0.271484375, "learning_rate": 0.002, "loss": 5.2186, "step": 6293 }, { "epoch": 0.7145396298306672, "grad_norm": 0.267578125, "learning_rate": 0.002, "loss": 5.2618, "step": 6294 }, { "epoch": 0.7146531569405863, "grad_norm": 0.271484375, "learning_rate": 0.002, "loss": 5.2191, "step": 6295 }, { "epoch": 0.7147666840505054, "grad_norm": 0.287109375, "learning_rate": 0.002, "loss": 5.2542, "step": 6296 }, { "epoch": 0.7148802111604244, "grad_norm": 0.2890625, "learning_rate": 0.002, "loss": 5.2508, "step": 6297 }, { "epoch": 0.7149937382703435, "grad_norm": 0.291015625, "learning_rate": 0.002, "loss": 5.2608, "step": 6298 }, { "epoch": 0.7151072653802626, "grad_norm": 0.30859375, "learning_rate": 0.002, "loss": 5.2516, "step": 6299 }, { "epoch": 0.7152207924901817, "grad_norm": 0.31640625, "learning_rate": 0.002, "loss": 5.2422, "step": 6300 }, { "epoch": 0.7153343196001007, "grad_norm": 0.353515625, "learning_rate": 0.002, "loss": 5.2375, "step": 6301 }, { "epoch": 0.7154478467100198, "grad_norm": 0.3671875, "learning_rate": 0.002, "loss": 5.2288, "step": 6302 }, { "epoch": 0.7155613738199389, "grad_norm": 0.419921875, "learning_rate": 0.002, "loss": 5.2386, "step": 6303 }, { "epoch": 0.715674900929858, "grad_norm": 0.419921875, "learning_rate": 0.002, "loss": 5.2415, "step": 6304 }, { "epoch": 0.715788428039777, "grad_norm": 0.4296875, "learning_rate": 0.002, "loss": 5.2467, "step": 6305 }, { "epoch": 0.7159019551496961, "grad_norm": 0.427734375, "learning_rate": 0.002, "loss": 5.2426, "step": 6306 }, { "epoch": 0.7160154822596152, "grad_norm": 0.361328125, "learning_rate": 0.002, "loss": 5.2535, "step": 6307 }, { "epoch": 0.7161290093695343, "grad_norm": 0.38671875, "learning_rate": 0.002, "loss": 5.247, "step": 6308 }, { "epoch": 0.7162425364794534, "grad_norm": 0.35546875, "learning_rate": 0.002, "loss": 5.2234, "step": 6309 }, { "epoch": 0.7163560635893724, "grad_norm": 0.388671875, "learning_rate": 0.002, "loss": 5.2215, "step": 6310 }, { "epoch": 0.7164695906992915, "grad_norm": 0.375, "learning_rate": 0.002, "loss": 5.2295, "step": 6311 }, { "epoch": 0.7165831178092106, "grad_norm": 0.38671875, "learning_rate": 0.002, "loss": 5.2615, "step": 6312 }, { "epoch": 0.7166966449191297, "grad_norm": 0.36328125, "learning_rate": 0.002, "loss": 5.2268, "step": 6313 }, { "epoch": 0.7168101720290487, "grad_norm": 0.361328125, "learning_rate": 0.002, "loss": 5.2612, "step": 6314 }, { "epoch": 0.7169236991389678, "grad_norm": 0.3359375, "learning_rate": 0.002, "loss": 5.2545, "step": 6315 }, { "epoch": 0.7170372262488869, "grad_norm": 0.337890625, "learning_rate": 0.002, "loss": 5.2335, "step": 6316 }, { "epoch": 0.717150753358806, "grad_norm": 0.31640625, "learning_rate": 0.002, "loss": 5.2281, "step": 6317 }, { "epoch": 0.717264280468725, "grad_norm": 0.30078125, "learning_rate": 0.002, "loss": 5.209, "step": 6318 }, { "epoch": 0.7173778075786441, "grad_norm": 0.2890625, "learning_rate": 0.002, "loss": 5.2326, "step": 6319 }, { "epoch": 0.7174913346885632, "grad_norm": 0.27734375, "learning_rate": 0.002, "loss": 5.254, "step": 6320 }, { "epoch": 0.7176048617984823, "grad_norm": 0.25390625, "learning_rate": 0.002, "loss": 5.256, "step": 6321 }, { "epoch": 0.7177183889084013, "grad_norm": 0.240234375, "learning_rate": 0.002, "loss": 5.2453, "step": 6322 }, { "epoch": 0.7178319160183204, "grad_norm": 0.2490234375, "learning_rate": 0.002, "loss": 5.2529, "step": 6323 }, { "epoch": 0.7179454431282395, "grad_norm": 0.255859375, "learning_rate": 0.002, "loss": 5.2234, "step": 6324 }, { "epoch": 0.7180589702381586, "grad_norm": 0.294921875, "learning_rate": 0.002, "loss": 5.2272, "step": 6325 }, { "epoch": 0.7181724973480776, "grad_norm": 0.3046875, "learning_rate": 0.002, "loss": 5.2367, "step": 6326 }, { "epoch": 0.7182860244579967, "grad_norm": 0.310546875, "learning_rate": 0.002, "loss": 5.2425, "step": 6327 }, { "epoch": 0.7183995515679158, "grad_norm": 0.306640625, "learning_rate": 0.002, "loss": 5.243, "step": 6328 }, { "epoch": 0.7185130786778349, "grad_norm": 0.302734375, "learning_rate": 0.002, "loss": 5.2307, "step": 6329 }, { "epoch": 0.718626605787754, "grad_norm": 0.2734375, "learning_rate": 0.002, "loss": 5.2472, "step": 6330 }, { "epoch": 0.718740132897673, "grad_norm": 0.2890625, "learning_rate": 0.002, "loss": 5.2287, "step": 6331 }, { "epoch": 0.7188536600075921, "grad_norm": 0.291015625, "learning_rate": 0.002, "loss": 5.2418, "step": 6332 }, { "epoch": 0.7189671871175112, "grad_norm": 0.31640625, "learning_rate": 0.002, "loss": 5.2604, "step": 6333 }, { "epoch": 0.7190807142274303, "grad_norm": 0.3359375, "learning_rate": 0.002, "loss": 5.2445, "step": 6334 }, { "epoch": 0.7191942413373493, "grad_norm": 0.34765625, "learning_rate": 0.002, "loss": 5.2534, "step": 6335 }, { "epoch": 0.7193077684472684, "grad_norm": 0.341796875, "learning_rate": 0.002, "loss": 5.2262, "step": 6336 }, { "epoch": 0.7194212955571875, "grad_norm": 0.34765625, "learning_rate": 0.002, "loss": 5.2634, "step": 6337 }, { "epoch": 0.7195348226671066, "grad_norm": 0.314453125, "learning_rate": 0.002, "loss": 5.2402, "step": 6338 }, { "epoch": 0.7196483497770256, "grad_norm": 0.31640625, "learning_rate": 0.002, "loss": 5.2427, "step": 6339 }, { "epoch": 0.7197618768869447, "grad_norm": 0.29296875, "learning_rate": 0.002, "loss": 5.25, "step": 6340 }, { "epoch": 0.7198754039968638, "grad_norm": 0.279296875, "learning_rate": 0.002, "loss": 5.2438, "step": 6341 }, { "epoch": 0.7199889311067829, "grad_norm": 0.291015625, "learning_rate": 0.002, "loss": 5.2458, "step": 6342 }, { "epoch": 0.7201024582167019, "grad_norm": 0.296875, "learning_rate": 0.002, "loss": 5.2153, "step": 6343 }, { "epoch": 0.720215985326621, "grad_norm": 0.314453125, "learning_rate": 0.002, "loss": 5.2486, "step": 6344 }, { "epoch": 0.7203295124365401, "grad_norm": 0.326171875, "learning_rate": 0.002, "loss": 5.2198, "step": 6345 }, { "epoch": 0.7204430395464592, "grad_norm": 0.333984375, "learning_rate": 0.002, "loss": 5.2228, "step": 6346 }, { "epoch": 0.7205565666563782, "grad_norm": 0.318359375, "learning_rate": 0.002, "loss": 5.2262, "step": 6347 }, { "epoch": 0.7206700937662973, "grad_norm": 0.306640625, "learning_rate": 0.002, "loss": 5.2327, "step": 6348 }, { "epoch": 0.7207836208762164, "grad_norm": 0.28125, "learning_rate": 0.002, "loss": 5.2519, "step": 6349 }, { "epoch": 0.7208971479861355, "grad_norm": 0.267578125, "learning_rate": 0.002, "loss": 5.2547, "step": 6350 }, { "epoch": 0.7210106750960545, "grad_norm": 0.25390625, "learning_rate": 0.002, "loss": 5.2459, "step": 6351 }, { "epoch": 0.7211242022059736, "grad_norm": 0.26953125, "learning_rate": 0.002, "loss": 5.2511, "step": 6352 }, { "epoch": 0.7212377293158927, "grad_norm": 0.265625, "learning_rate": 0.002, "loss": 5.2552, "step": 6353 }, { "epoch": 0.7213512564258118, "grad_norm": 0.283203125, "learning_rate": 0.002, "loss": 5.2613, "step": 6354 }, { "epoch": 0.7214647835357308, "grad_norm": 0.2890625, "learning_rate": 0.002, "loss": 5.2413, "step": 6355 }, { "epoch": 0.7215783106456499, "grad_norm": 0.30859375, "learning_rate": 0.002, "loss": 5.251, "step": 6356 }, { "epoch": 0.721691837755569, "grad_norm": 0.3046875, "learning_rate": 0.002, "loss": 5.2418, "step": 6357 }, { "epoch": 0.7218053648654881, "grad_norm": 0.33203125, "learning_rate": 0.002, "loss": 5.2294, "step": 6358 }, { "epoch": 0.7219188919754072, "grad_norm": 0.369140625, "learning_rate": 0.002, "loss": 5.2562, "step": 6359 }, { "epoch": 0.7220324190853262, "grad_norm": 0.40234375, "learning_rate": 0.002, "loss": 5.2279, "step": 6360 }, { "epoch": 0.7221459461952453, "grad_norm": 0.396484375, "learning_rate": 0.002, "loss": 5.2671, "step": 6361 }, { "epoch": 0.7222594733051644, "grad_norm": 0.357421875, "learning_rate": 0.002, "loss": 5.23, "step": 6362 }, { "epoch": 0.7223730004150835, "grad_norm": 0.298828125, "learning_rate": 0.002, "loss": 5.2403, "step": 6363 }, { "epoch": 0.7224865275250025, "grad_norm": 0.30078125, "learning_rate": 0.002, "loss": 5.2304, "step": 6364 }, { "epoch": 0.7226000546349216, "grad_norm": 0.271484375, "learning_rate": 0.002, "loss": 5.253, "step": 6365 }, { "epoch": 0.7227135817448407, "grad_norm": 0.26953125, "learning_rate": 0.002, "loss": 5.2448, "step": 6366 }, { "epoch": 0.7228271088547598, "grad_norm": 0.28125, "learning_rate": 0.002, "loss": 5.2385, "step": 6367 }, { "epoch": 0.7229406359646788, "grad_norm": 0.27734375, "learning_rate": 0.002, "loss": 5.2347, "step": 6368 }, { "epoch": 0.7230541630745979, "grad_norm": 0.291015625, "learning_rate": 0.002, "loss": 5.2655, "step": 6369 }, { "epoch": 0.723167690184517, "grad_norm": 0.330078125, "learning_rate": 0.002, "loss": 5.2436, "step": 6370 }, { "epoch": 0.7232812172944361, "grad_norm": 0.35546875, "learning_rate": 0.002, "loss": 5.2494, "step": 6371 }, { "epoch": 0.7233947444043551, "grad_norm": 0.3671875, "learning_rate": 0.002, "loss": 5.2626, "step": 6372 }, { "epoch": 0.7235082715142742, "grad_norm": 0.3828125, "learning_rate": 0.002, "loss": 5.2181, "step": 6373 }, { "epoch": 0.7236217986241933, "grad_norm": 0.37890625, "learning_rate": 0.002, "loss": 5.2354, "step": 6374 }, { "epoch": 0.7237353257341124, "grad_norm": 0.404296875, "learning_rate": 0.002, "loss": 5.2315, "step": 6375 }, { "epoch": 0.7238488528440314, "grad_norm": 0.353515625, "learning_rate": 0.002, "loss": 5.2571, "step": 6376 }, { "epoch": 0.7239623799539505, "grad_norm": 0.35546875, "learning_rate": 0.002, "loss": 5.2371, "step": 6377 }, { "epoch": 0.7240759070638696, "grad_norm": 0.33203125, "learning_rate": 0.002, "loss": 5.2275, "step": 6378 }, { "epoch": 0.7241894341737887, "grad_norm": 0.341796875, "learning_rate": 0.002, "loss": 5.2283, "step": 6379 }, { "epoch": 0.7243029612837077, "grad_norm": 0.322265625, "learning_rate": 0.002, "loss": 5.2403, "step": 6380 }, { "epoch": 0.7244164883936268, "grad_norm": 0.330078125, "learning_rate": 0.002, "loss": 5.2546, "step": 6381 }, { "epoch": 0.7245300155035459, "grad_norm": 0.337890625, "learning_rate": 0.002, "loss": 5.2404, "step": 6382 }, { "epoch": 0.724643542613465, "grad_norm": 0.34765625, "learning_rate": 0.002, "loss": 5.2525, "step": 6383 }, { "epoch": 0.724757069723384, "grad_norm": 0.333984375, "learning_rate": 0.002, "loss": 5.2326, "step": 6384 }, { "epoch": 0.7248705968333031, "grad_norm": 0.333984375, "learning_rate": 0.002, "loss": 5.2494, "step": 6385 }, { "epoch": 0.7249841239432222, "grad_norm": 0.306640625, "learning_rate": 0.002, "loss": 5.2328, "step": 6386 }, { "epoch": 0.7250976510531413, "grad_norm": 0.291015625, "learning_rate": 0.002, "loss": 5.2292, "step": 6387 }, { "epoch": 0.7252111781630604, "grad_norm": 0.2734375, "learning_rate": 0.002, "loss": 5.2334, "step": 6388 }, { "epoch": 0.7253247052729794, "grad_norm": 0.259765625, "learning_rate": 0.002, "loss": 5.2517, "step": 6389 }, { "epoch": 0.7254382323828985, "grad_norm": 0.287109375, "learning_rate": 0.002, "loss": 5.265, "step": 6390 }, { "epoch": 0.7255517594928176, "grad_norm": 0.302734375, "learning_rate": 0.002, "loss": 5.2655, "step": 6391 }, { "epoch": 0.7256652866027367, "grad_norm": 0.345703125, "learning_rate": 0.002, "loss": 5.2321, "step": 6392 }, { "epoch": 0.7257788137126557, "grad_norm": 0.32421875, "learning_rate": 0.002, "loss": 5.2326, "step": 6393 }, { "epoch": 0.7258923408225748, "grad_norm": 0.330078125, "learning_rate": 0.002, "loss": 5.2581, "step": 6394 }, { "epoch": 0.7260058679324939, "grad_norm": 0.345703125, "learning_rate": 0.002, "loss": 5.245, "step": 6395 }, { "epoch": 0.7261193950424131, "grad_norm": 0.3046875, "learning_rate": 0.002, "loss": 5.254, "step": 6396 }, { "epoch": 0.7262329221523321, "grad_norm": 0.3203125, "learning_rate": 0.002, "loss": 5.249, "step": 6397 }, { "epoch": 0.7263464492622512, "grad_norm": 0.29296875, "learning_rate": 0.002, "loss": 5.2758, "step": 6398 }, { "epoch": 0.7264599763721703, "grad_norm": 0.33984375, "learning_rate": 0.002, "loss": 5.2435, "step": 6399 }, { "epoch": 0.7265735034820894, "grad_norm": 0.341796875, "learning_rate": 0.002, "loss": 5.2326, "step": 6400 }, { "epoch": 0.7266870305920085, "grad_norm": 0.396484375, "learning_rate": 0.002, "loss": 5.2434, "step": 6401 }, { "epoch": 0.7268005577019275, "grad_norm": 0.404296875, "learning_rate": 0.002, "loss": 5.2529, "step": 6402 }, { "epoch": 0.7269140848118466, "grad_norm": 0.451171875, "learning_rate": 0.002, "loss": 5.2284, "step": 6403 }, { "epoch": 0.7270276119217657, "grad_norm": 0.458984375, "learning_rate": 0.002, "loss": 5.2504, "step": 6404 }, { "epoch": 0.7271411390316848, "grad_norm": 0.42578125, "learning_rate": 0.002, "loss": 5.2474, "step": 6405 }, { "epoch": 0.7272546661416038, "grad_norm": 0.357421875, "learning_rate": 0.002, "loss": 5.2369, "step": 6406 }, { "epoch": 0.7273681932515229, "grad_norm": 0.318359375, "learning_rate": 0.002, "loss": 5.236, "step": 6407 }, { "epoch": 0.727481720361442, "grad_norm": 0.275390625, "learning_rate": 0.002, "loss": 5.2436, "step": 6408 }, { "epoch": 0.7275952474713611, "grad_norm": 0.279296875, "learning_rate": 0.002, "loss": 5.2522, "step": 6409 }, { "epoch": 0.7277087745812801, "grad_norm": 0.30078125, "learning_rate": 0.002, "loss": 5.2544, "step": 6410 }, { "epoch": 0.7278223016911992, "grad_norm": 0.2890625, "learning_rate": 0.002, "loss": 5.223, "step": 6411 }, { "epoch": 0.7279358288011183, "grad_norm": 0.30078125, "learning_rate": 0.002, "loss": 5.2486, "step": 6412 }, { "epoch": 0.7280493559110374, "grad_norm": 0.29296875, "learning_rate": 0.002, "loss": 5.2143, "step": 6413 }, { "epoch": 0.7281628830209564, "grad_norm": 0.279296875, "learning_rate": 0.002, "loss": 5.2352, "step": 6414 }, { "epoch": 0.7282764101308755, "grad_norm": 0.27734375, "learning_rate": 0.002, "loss": 5.2504, "step": 6415 }, { "epoch": 0.7283899372407946, "grad_norm": 0.259765625, "learning_rate": 0.002, "loss": 5.2362, "step": 6416 }, { "epoch": 0.7285034643507137, "grad_norm": 0.240234375, "learning_rate": 0.002, "loss": 5.2259, "step": 6417 }, { "epoch": 0.7286169914606327, "grad_norm": 0.244140625, "learning_rate": 0.002, "loss": 5.2635, "step": 6418 }, { "epoch": 0.7287305185705518, "grad_norm": 0.2275390625, "learning_rate": 0.002, "loss": 5.2481, "step": 6419 }, { "epoch": 0.7288440456804709, "grad_norm": 0.2392578125, "learning_rate": 0.002, "loss": 5.2477, "step": 6420 }, { "epoch": 0.72895757279039, "grad_norm": 0.2451171875, "learning_rate": 0.002, "loss": 5.2511, "step": 6421 }, { "epoch": 0.729071099900309, "grad_norm": 0.259765625, "learning_rate": 0.002, "loss": 5.2473, "step": 6422 }, { "epoch": 0.7291846270102281, "grad_norm": 0.2734375, "learning_rate": 0.002, "loss": 5.2467, "step": 6423 }, { "epoch": 0.7292981541201472, "grad_norm": 0.28125, "learning_rate": 0.002, "loss": 5.2284, "step": 6424 }, { "epoch": 0.7294116812300663, "grad_norm": 0.30859375, "learning_rate": 0.002, "loss": 5.2597, "step": 6425 }, { "epoch": 0.7295252083399854, "grad_norm": 0.30078125, "learning_rate": 0.002, "loss": 5.2468, "step": 6426 }, { "epoch": 0.7296387354499044, "grad_norm": 0.3125, "learning_rate": 0.002, "loss": 5.2568, "step": 6427 }, { "epoch": 0.7297522625598235, "grad_norm": 0.310546875, "learning_rate": 0.002, "loss": 5.248, "step": 6428 }, { "epoch": 0.7298657896697426, "grad_norm": 0.322265625, "learning_rate": 0.002, "loss": 5.2468, "step": 6429 }, { "epoch": 0.7299793167796617, "grad_norm": 0.306640625, "learning_rate": 0.002, "loss": 5.2374, "step": 6430 }, { "epoch": 0.7300928438895807, "grad_norm": 0.314453125, "learning_rate": 0.002, "loss": 5.2322, "step": 6431 }, { "epoch": 0.7302063709994998, "grad_norm": 0.296875, "learning_rate": 0.002, "loss": 5.249, "step": 6432 }, { "epoch": 0.7303198981094189, "grad_norm": 0.294921875, "learning_rate": 0.002, "loss": 5.2433, "step": 6433 }, { "epoch": 0.730433425219338, "grad_norm": 0.28125, "learning_rate": 0.002, "loss": 5.2502, "step": 6434 }, { "epoch": 0.730546952329257, "grad_norm": 0.287109375, "learning_rate": 0.002, "loss": 5.2283, "step": 6435 }, { "epoch": 0.7306604794391761, "grad_norm": 0.291015625, "learning_rate": 0.002, "loss": 5.2358, "step": 6436 }, { "epoch": 0.7307740065490952, "grad_norm": 0.30859375, "learning_rate": 0.002, "loss": 5.2377, "step": 6437 }, { "epoch": 0.7308875336590143, "grad_norm": 0.3359375, "learning_rate": 0.002, "loss": 5.2598, "step": 6438 }, { "epoch": 0.7310010607689333, "grad_norm": 0.328125, "learning_rate": 0.002, "loss": 5.2473, "step": 6439 }, { "epoch": 0.7311145878788524, "grad_norm": 0.333984375, "learning_rate": 0.002, "loss": 5.2511, "step": 6440 }, { "epoch": 0.7312281149887715, "grad_norm": 0.330078125, "learning_rate": 0.002, "loss": 5.2519, "step": 6441 }, { "epoch": 0.7313416420986906, "grad_norm": 0.322265625, "learning_rate": 0.002, "loss": 5.2497, "step": 6442 }, { "epoch": 0.7314551692086096, "grad_norm": 0.33203125, "learning_rate": 0.002, "loss": 5.2443, "step": 6443 }, { "epoch": 0.7315686963185287, "grad_norm": 0.357421875, "learning_rate": 0.002, "loss": 5.2332, "step": 6444 }, { "epoch": 0.7316822234284478, "grad_norm": 0.361328125, "learning_rate": 0.002, "loss": 5.2344, "step": 6445 }, { "epoch": 0.7317957505383669, "grad_norm": 0.357421875, "learning_rate": 0.002, "loss": 5.2467, "step": 6446 }, { "epoch": 0.731909277648286, "grad_norm": 0.341796875, "learning_rate": 0.002, "loss": 5.2576, "step": 6447 }, { "epoch": 0.732022804758205, "grad_norm": 0.36328125, "learning_rate": 0.002, "loss": 5.2264, "step": 6448 }, { "epoch": 0.7321363318681241, "grad_norm": 0.341796875, "learning_rate": 0.002, "loss": 5.2429, "step": 6449 }, { "epoch": 0.7322498589780432, "grad_norm": 0.38671875, "learning_rate": 0.002, "loss": 5.256, "step": 6450 }, { "epoch": 0.7323633860879623, "grad_norm": 0.390625, "learning_rate": 0.002, "loss": 5.2203, "step": 6451 }, { "epoch": 0.7324769131978813, "grad_norm": 0.3984375, "learning_rate": 0.002, "loss": 5.236, "step": 6452 }, { "epoch": 0.7325904403078004, "grad_norm": 0.36328125, "learning_rate": 0.002, "loss": 5.2515, "step": 6453 }, { "epoch": 0.7327039674177195, "grad_norm": 0.36328125, "learning_rate": 0.002, "loss": 5.2408, "step": 6454 }, { "epoch": 0.7328174945276386, "grad_norm": 0.318359375, "learning_rate": 0.002, "loss": 5.2276, "step": 6455 }, { "epoch": 0.7329310216375576, "grad_norm": 0.337890625, "learning_rate": 0.002, "loss": 5.2239, "step": 6456 }, { "epoch": 0.7330445487474767, "grad_norm": 0.345703125, "learning_rate": 0.002, "loss": 5.2579, "step": 6457 }, { "epoch": 0.7331580758573958, "grad_norm": 0.376953125, "learning_rate": 0.002, "loss": 5.2555, "step": 6458 }, { "epoch": 0.7332716029673149, "grad_norm": 0.375, "learning_rate": 0.002, "loss": 5.2549, "step": 6459 }, { "epoch": 0.7333851300772339, "grad_norm": 0.38671875, "learning_rate": 0.002, "loss": 5.2386, "step": 6460 }, { "epoch": 0.733498657187153, "grad_norm": 0.345703125, "learning_rate": 0.002, "loss": 5.2336, "step": 6461 }, { "epoch": 0.7336121842970721, "grad_norm": 0.298828125, "learning_rate": 0.002, "loss": 5.2418, "step": 6462 }, { "epoch": 0.7337257114069912, "grad_norm": 0.279296875, "learning_rate": 0.002, "loss": 5.2433, "step": 6463 }, { "epoch": 0.7338392385169102, "grad_norm": 0.275390625, "learning_rate": 0.002, "loss": 5.2411, "step": 6464 }, { "epoch": 0.7339527656268293, "grad_norm": 0.271484375, "learning_rate": 0.002, "loss": 5.2435, "step": 6465 }, { "epoch": 0.7340662927367484, "grad_norm": 0.283203125, "learning_rate": 0.002, "loss": 5.235, "step": 6466 }, { "epoch": 0.7341798198466675, "grad_norm": 0.279296875, "learning_rate": 0.002, "loss": 5.237, "step": 6467 }, { "epoch": 0.7342933469565865, "grad_norm": 0.29296875, "learning_rate": 0.002, "loss": 5.2561, "step": 6468 }, { "epoch": 0.7344068740665056, "grad_norm": 0.28515625, "learning_rate": 0.002, "loss": 5.2398, "step": 6469 }, { "epoch": 0.7345204011764247, "grad_norm": 0.30078125, "learning_rate": 0.002, "loss": 5.231, "step": 6470 }, { "epoch": 0.7346339282863438, "grad_norm": 0.29296875, "learning_rate": 0.002, "loss": 5.2583, "step": 6471 }, { "epoch": 0.7347474553962628, "grad_norm": 0.2890625, "learning_rate": 0.002, "loss": 5.2392, "step": 6472 }, { "epoch": 0.7348609825061819, "grad_norm": 0.279296875, "learning_rate": 0.002, "loss": 5.2508, "step": 6473 }, { "epoch": 0.734974509616101, "grad_norm": 0.291015625, "learning_rate": 0.002, "loss": 5.2592, "step": 6474 }, { "epoch": 0.7350880367260201, "grad_norm": 0.302734375, "learning_rate": 0.002, "loss": 5.2527, "step": 6475 }, { "epoch": 0.7352015638359392, "grad_norm": 0.322265625, "learning_rate": 0.002, "loss": 5.2305, "step": 6476 }, { "epoch": 0.7353150909458582, "grad_norm": 0.33984375, "learning_rate": 0.002, "loss": 5.246, "step": 6477 }, { "epoch": 0.7354286180557773, "grad_norm": 0.318359375, "learning_rate": 0.002, "loss": 5.2484, "step": 6478 }, { "epoch": 0.7355421451656964, "grad_norm": 0.28515625, "learning_rate": 0.002, "loss": 5.234, "step": 6479 }, { "epoch": 0.7356556722756155, "grad_norm": 0.259765625, "learning_rate": 0.002, "loss": 5.2496, "step": 6480 }, { "epoch": 0.7357691993855345, "grad_norm": 0.244140625, "learning_rate": 0.002, "loss": 5.2584, "step": 6481 }, { "epoch": 0.7358827264954536, "grad_norm": 0.25, "learning_rate": 0.002, "loss": 5.2369, "step": 6482 }, { "epoch": 0.7359962536053727, "grad_norm": 0.279296875, "learning_rate": 0.002, "loss": 5.2481, "step": 6483 }, { "epoch": 0.7361097807152918, "grad_norm": 0.302734375, "learning_rate": 0.002, "loss": 5.262, "step": 6484 }, { "epoch": 0.7362233078252108, "grad_norm": 0.337890625, "learning_rate": 0.002, "loss": 5.2611, "step": 6485 }, { "epoch": 0.7363368349351299, "grad_norm": 0.34765625, "learning_rate": 0.002, "loss": 5.2396, "step": 6486 }, { "epoch": 0.736450362045049, "grad_norm": 0.3515625, "learning_rate": 0.002, "loss": 5.2505, "step": 6487 }, { "epoch": 0.7365638891549681, "grad_norm": 0.345703125, "learning_rate": 0.002, "loss": 5.2574, "step": 6488 }, { "epoch": 0.7366774162648871, "grad_norm": 0.318359375, "learning_rate": 0.002, "loss": 5.2449, "step": 6489 }, { "epoch": 0.7367909433748062, "grad_norm": 0.3203125, "learning_rate": 0.002, "loss": 5.2528, "step": 6490 }, { "epoch": 0.7369044704847253, "grad_norm": 0.298828125, "learning_rate": 0.002, "loss": 5.2301, "step": 6491 }, { "epoch": 0.7370179975946444, "grad_norm": 0.302734375, "learning_rate": 0.002, "loss": 5.2427, "step": 6492 }, { "epoch": 0.7371315247045634, "grad_norm": 0.27734375, "learning_rate": 0.002, "loss": 5.2409, "step": 6493 }, { "epoch": 0.7372450518144825, "grad_norm": 0.294921875, "learning_rate": 0.002, "loss": 5.2462, "step": 6494 }, { "epoch": 0.7373585789244016, "grad_norm": 0.2734375, "learning_rate": 0.002, "loss": 5.2261, "step": 6495 }, { "epoch": 0.7374721060343207, "grad_norm": 0.287109375, "learning_rate": 0.002, "loss": 5.2608, "step": 6496 }, { "epoch": 0.7375856331442397, "grad_norm": 0.2890625, "learning_rate": 0.002, "loss": 5.2577, "step": 6497 }, { "epoch": 0.7376991602541588, "grad_norm": 0.306640625, "learning_rate": 0.002, "loss": 5.2387, "step": 6498 }, { "epoch": 0.7378126873640779, "grad_norm": 0.310546875, "learning_rate": 0.002, "loss": 5.2269, "step": 6499 }, { "epoch": 0.737926214473997, "grad_norm": 0.328125, "learning_rate": 0.002, "loss": 5.2523, "step": 6500 }, { "epoch": 0.738039741583916, "grad_norm": 0.3046875, "learning_rate": 0.002, "loss": 5.2218, "step": 6501 }, { "epoch": 0.7381532686938351, "grad_norm": 0.330078125, "learning_rate": 0.002, "loss": 5.2441, "step": 6502 }, { "epoch": 0.7382667958037542, "grad_norm": 0.322265625, "learning_rate": 0.002, "loss": 5.2375, "step": 6503 }, { "epoch": 0.7383803229136733, "grad_norm": 0.349609375, "learning_rate": 0.002, "loss": 5.2305, "step": 6504 }, { "epoch": 0.7384938500235924, "grad_norm": 0.373046875, "learning_rate": 0.002, "loss": 5.2231, "step": 6505 }, { "epoch": 0.7386073771335114, "grad_norm": 0.427734375, "learning_rate": 0.002, "loss": 5.2292, "step": 6506 }, { "epoch": 0.7387209042434305, "grad_norm": 0.392578125, "learning_rate": 0.002, "loss": 5.2048, "step": 6507 }, { "epoch": 0.7388344313533496, "grad_norm": 0.392578125, "learning_rate": 0.002, "loss": 5.2132, "step": 6508 }, { "epoch": 0.7389479584632687, "grad_norm": 0.337890625, "learning_rate": 0.002, "loss": 5.2315, "step": 6509 }, { "epoch": 0.7390614855731877, "grad_norm": 0.333984375, "learning_rate": 0.002, "loss": 5.2397, "step": 6510 }, { "epoch": 0.7391750126831068, "grad_norm": 0.306640625, "learning_rate": 0.002, "loss": 5.2287, "step": 6511 }, { "epoch": 0.7392885397930259, "grad_norm": 0.294921875, "learning_rate": 0.002, "loss": 5.2405, "step": 6512 }, { "epoch": 0.739402066902945, "grad_norm": 0.27734375, "learning_rate": 0.002, "loss": 5.2423, "step": 6513 }, { "epoch": 0.739515594012864, "grad_norm": 0.27734375, "learning_rate": 0.002, "loss": 5.2387, "step": 6514 }, { "epoch": 0.7396291211227831, "grad_norm": 0.263671875, "learning_rate": 0.002, "loss": 5.2253, "step": 6515 }, { "epoch": 0.7397426482327022, "grad_norm": 0.283203125, "learning_rate": 0.002, "loss": 5.2197, "step": 6516 }, { "epoch": 0.7398561753426213, "grad_norm": 0.283203125, "learning_rate": 0.002, "loss": 5.2267, "step": 6517 }, { "epoch": 0.7399697024525403, "grad_norm": 0.296875, "learning_rate": 0.002, "loss": 5.2124, "step": 6518 }, { "epoch": 0.7400832295624594, "grad_norm": 0.306640625, "learning_rate": 0.002, "loss": 5.2582, "step": 6519 }, { "epoch": 0.7401967566723785, "grad_norm": 0.310546875, "learning_rate": 0.002, "loss": 5.2414, "step": 6520 }, { "epoch": 0.7403102837822976, "grad_norm": 0.3125, "learning_rate": 0.002, "loss": 5.235, "step": 6521 }, { "epoch": 0.7404238108922166, "grad_norm": 0.296875, "learning_rate": 0.002, "loss": 5.2377, "step": 6522 }, { "epoch": 0.7405373380021357, "grad_norm": 0.283203125, "learning_rate": 0.002, "loss": 5.2338, "step": 6523 }, { "epoch": 0.7406508651120548, "grad_norm": 0.28125, "learning_rate": 0.002, "loss": 5.2497, "step": 6524 }, { "epoch": 0.7407643922219739, "grad_norm": 0.2734375, "learning_rate": 0.002, "loss": 5.2183, "step": 6525 }, { "epoch": 0.740877919331893, "grad_norm": 0.28125, "learning_rate": 0.002, "loss": 5.2492, "step": 6526 }, { "epoch": 0.740991446441812, "grad_norm": 0.265625, "learning_rate": 0.002, "loss": 5.2358, "step": 6527 }, { "epoch": 0.7411049735517311, "grad_norm": 0.251953125, "learning_rate": 0.002, "loss": 5.2461, "step": 6528 }, { "epoch": 0.7412185006616502, "grad_norm": 0.25, "learning_rate": 0.002, "loss": 5.2402, "step": 6529 }, { "epoch": 0.7413320277715693, "grad_norm": 0.255859375, "learning_rate": 0.002, "loss": 5.2408, "step": 6530 }, { "epoch": 0.7414455548814883, "grad_norm": 0.2734375, "learning_rate": 0.002, "loss": 5.2309, "step": 6531 }, { "epoch": 0.7415590819914074, "grad_norm": 0.291015625, "learning_rate": 0.002, "loss": 5.2614, "step": 6532 }, { "epoch": 0.7416726091013265, "grad_norm": 0.330078125, "learning_rate": 0.002, "loss": 5.2371, "step": 6533 }, { "epoch": 0.7417861362112456, "grad_norm": 0.32421875, "learning_rate": 0.002, "loss": 5.2419, "step": 6534 }, { "epoch": 0.7418996633211646, "grad_norm": 0.322265625, "learning_rate": 0.002, "loss": 5.2457, "step": 6535 }, { "epoch": 0.7420131904310837, "grad_norm": 0.30078125, "learning_rate": 0.002, "loss": 5.2353, "step": 6536 }, { "epoch": 0.7421267175410028, "grad_norm": 0.29296875, "learning_rate": 0.002, "loss": 5.2268, "step": 6537 }, { "epoch": 0.7422402446509219, "grad_norm": 0.2890625, "learning_rate": 0.002, "loss": 5.2537, "step": 6538 }, { "epoch": 0.7423537717608409, "grad_norm": 0.30078125, "learning_rate": 0.002, "loss": 5.2176, "step": 6539 }, { "epoch": 0.74246729887076, "grad_norm": 0.306640625, "learning_rate": 0.002, "loss": 5.2293, "step": 6540 }, { "epoch": 0.7425808259806791, "grad_norm": 0.357421875, "learning_rate": 0.002, "loss": 5.2393, "step": 6541 }, { "epoch": 0.7426943530905982, "grad_norm": 0.3984375, "learning_rate": 0.002, "loss": 5.2599, "step": 6542 }, { "epoch": 0.7428078802005172, "grad_norm": 0.431640625, "learning_rate": 0.002, "loss": 5.2475, "step": 6543 }, { "epoch": 0.7429214073104363, "grad_norm": 0.41796875, "learning_rate": 0.002, "loss": 5.2291, "step": 6544 }, { "epoch": 0.7430349344203554, "grad_norm": 0.451171875, "learning_rate": 0.002, "loss": 5.244, "step": 6545 }, { "epoch": 0.7431484615302745, "grad_norm": 0.376953125, "learning_rate": 0.002, "loss": 5.2286, "step": 6546 }, { "epoch": 0.7432619886401935, "grad_norm": 0.37109375, "learning_rate": 0.002, "loss": 5.2353, "step": 6547 }, { "epoch": 0.7433755157501126, "grad_norm": 0.3203125, "learning_rate": 0.002, "loss": 5.2229, "step": 6548 }, { "epoch": 0.7434890428600317, "grad_norm": 0.296875, "learning_rate": 0.002, "loss": 5.2331, "step": 6549 }, { "epoch": 0.7436025699699508, "grad_norm": 0.26171875, "learning_rate": 0.002, "loss": 5.2244, "step": 6550 }, { "epoch": 0.7437160970798699, "grad_norm": 0.259765625, "learning_rate": 0.002, "loss": 5.2461, "step": 6551 }, { "epoch": 0.7438296241897889, "grad_norm": 0.248046875, "learning_rate": 0.002, "loss": 5.2289, "step": 6552 }, { "epoch": 0.743943151299708, "grad_norm": 0.2451171875, "learning_rate": 0.002, "loss": 5.2444, "step": 6553 }, { "epoch": 0.7440566784096271, "grad_norm": 0.2451171875, "learning_rate": 0.002, "loss": 5.2608, "step": 6554 }, { "epoch": 0.7441702055195462, "grad_norm": 0.2412109375, "learning_rate": 0.002, "loss": 5.2246, "step": 6555 }, { "epoch": 0.7442837326294652, "grad_norm": 0.2392578125, "learning_rate": 0.002, "loss": 5.2412, "step": 6556 }, { "epoch": 0.7443972597393843, "grad_norm": 0.2412109375, "learning_rate": 0.002, "loss": 5.2227, "step": 6557 }, { "epoch": 0.7445107868493034, "grad_norm": 0.2314453125, "learning_rate": 0.002, "loss": 5.2438, "step": 6558 }, { "epoch": 0.7446243139592225, "grad_norm": 0.234375, "learning_rate": 0.002, "loss": 5.2306, "step": 6559 }, { "epoch": 0.7447378410691415, "grad_norm": 0.25, "learning_rate": 0.002, "loss": 5.2453, "step": 6560 }, { "epoch": 0.7448513681790606, "grad_norm": 0.27734375, "learning_rate": 0.002, "loss": 5.229, "step": 6561 }, { "epoch": 0.7449648952889797, "grad_norm": 0.287109375, "learning_rate": 0.002, "loss": 5.2272, "step": 6562 }, { "epoch": 0.7450784223988988, "grad_norm": 0.294921875, "learning_rate": 0.002, "loss": 5.2371, "step": 6563 }, { "epoch": 0.7451919495088178, "grad_norm": 0.2734375, "learning_rate": 0.002, "loss": 5.2373, "step": 6564 }, { "epoch": 0.7453054766187369, "grad_norm": 0.279296875, "learning_rate": 0.002, "loss": 5.2289, "step": 6565 }, { "epoch": 0.745419003728656, "grad_norm": 0.263671875, "learning_rate": 0.002, "loss": 5.224, "step": 6566 }, { "epoch": 0.7455325308385751, "grad_norm": 0.275390625, "learning_rate": 0.002, "loss": 5.2315, "step": 6567 }, { "epoch": 0.7456460579484941, "grad_norm": 0.271484375, "learning_rate": 0.002, "loss": 5.2401, "step": 6568 }, { "epoch": 0.7457595850584132, "grad_norm": 0.26953125, "learning_rate": 0.002, "loss": 5.241, "step": 6569 }, { "epoch": 0.7458731121683323, "grad_norm": 0.2578125, "learning_rate": 0.002, "loss": 5.2505, "step": 6570 }, { "epoch": 0.7459866392782514, "grad_norm": 0.259765625, "learning_rate": 0.002, "loss": 5.2421, "step": 6571 }, { "epoch": 0.7461001663881704, "grad_norm": 0.25390625, "learning_rate": 0.002, "loss": 5.2558, "step": 6572 }, { "epoch": 0.7462136934980895, "grad_norm": 0.2890625, "learning_rate": 0.002, "loss": 5.2396, "step": 6573 }, { "epoch": 0.7463272206080086, "grad_norm": 0.3125, "learning_rate": 0.002, "loss": 5.2588, "step": 6574 }, { "epoch": 0.7464407477179277, "grad_norm": 0.37890625, "learning_rate": 0.002, "loss": 5.2467, "step": 6575 }, { "epoch": 0.7465542748278468, "grad_norm": 0.466796875, "learning_rate": 0.002, "loss": 5.2517, "step": 6576 }, { "epoch": 0.7466678019377658, "grad_norm": 0.44921875, "learning_rate": 0.002, "loss": 5.2457, "step": 6577 }, { "epoch": 0.7467813290476849, "grad_norm": 0.4453125, "learning_rate": 0.002, "loss": 5.2204, "step": 6578 }, { "epoch": 0.746894856157604, "grad_norm": 0.4609375, "learning_rate": 0.002, "loss": 5.24, "step": 6579 }, { "epoch": 0.7470083832675231, "grad_norm": 0.40625, "learning_rate": 0.002, "loss": 5.2324, "step": 6580 }, { "epoch": 0.7471219103774421, "grad_norm": 0.3984375, "learning_rate": 0.002, "loss": 5.2342, "step": 6581 }, { "epoch": 0.7472354374873612, "grad_norm": 0.32421875, "learning_rate": 0.002, "loss": 5.2397, "step": 6582 }, { "epoch": 0.7473489645972803, "grad_norm": 0.341796875, "learning_rate": 0.002, "loss": 5.2179, "step": 6583 }, { "epoch": 0.7474624917071994, "grad_norm": 0.314453125, "learning_rate": 0.002, "loss": 5.2565, "step": 6584 }, { "epoch": 0.7475760188171184, "grad_norm": 0.31640625, "learning_rate": 0.002, "loss": 5.2235, "step": 6585 }, { "epoch": 0.7476895459270375, "grad_norm": 0.291015625, "learning_rate": 0.002, "loss": 5.2388, "step": 6586 }, { "epoch": 0.7478030730369566, "grad_norm": 0.28515625, "learning_rate": 0.002, "loss": 5.232, "step": 6587 }, { "epoch": 0.7479166001468757, "grad_norm": 0.263671875, "learning_rate": 0.002, "loss": 5.2668, "step": 6588 }, { "epoch": 0.7480301272567947, "grad_norm": 0.2451171875, "learning_rate": 0.002, "loss": 5.2487, "step": 6589 }, { "epoch": 0.7481436543667138, "grad_norm": 0.236328125, "learning_rate": 0.002, "loss": 5.2537, "step": 6590 }, { "epoch": 0.7482571814766329, "grad_norm": 0.240234375, "learning_rate": 0.002, "loss": 5.2132, "step": 6591 }, { "epoch": 0.748370708586552, "grad_norm": 0.25390625, "learning_rate": 0.002, "loss": 5.2297, "step": 6592 }, { "epoch": 0.748484235696471, "grad_norm": 0.265625, "learning_rate": 0.002, "loss": 5.2187, "step": 6593 }, { "epoch": 0.7485977628063901, "grad_norm": 0.2734375, "learning_rate": 0.002, "loss": 5.2564, "step": 6594 }, { "epoch": 0.7487112899163092, "grad_norm": 0.25, "learning_rate": 0.002, "loss": 5.2328, "step": 6595 }, { "epoch": 0.7488248170262283, "grad_norm": 0.2314453125, "learning_rate": 0.002, "loss": 5.2347, "step": 6596 }, { "epoch": 0.7489383441361473, "grad_norm": 0.23828125, "learning_rate": 0.002, "loss": 5.2322, "step": 6597 }, { "epoch": 0.7490518712460664, "grad_norm": 0.251953125, "learning_rate": 0.002, "loss": 5.224, "step": 6598 }, { "epoch": 0.7491653983559855, "grad_norm": 0.2890625, "learning_rate": 0.002, "loss": 5.2275, "step": 6599 }, { "epoch": 0.7492789254659046, "grad_norm": 0.3515625, "learning_rate": 0.002, "loss": 5.242, "step": 6600 }, { "epoch": 0.7493924525758237, "grad_norm": 0.365234375, "learning_rate": 0.002, "loss": 5.2326, "step": 6601 }, { "epoch": 0.7495059796857427, "grad_norm": 0.353515625, "learning_rate": 0.002, "loss": 5.2316, "step": 6602 }, { "epoch": 0.7496195067956618, "grad_norm": 0.3515625, "learning_rate": 0.002, "loss": 5.2438, "step": 6603 }, { "epoch": 0.7497330339055809, "grad_norm": 0.298828125, "learning_rate": 0.002, "loss": 5.2464, "step": 6604 }, { "epoch": 0.7498465610155, "grad_norm": 0.302734375, "learning_rate": 0.002, "loss": 5.2058, "step": 6605 }, { "epoch": 0.749960088125419, "grad_norm": 0.28125, "learning_rate": 0.002, "loss": 5.2333, "step": 6606 }, { "epoch": 0.7500736152353381, "grad_norm": 0.287109375, "learning_rate": 0.002, "loss": 5.2363, "step": 6607 }, { "epoch": 0.7501871423452572, "grad_norm": 0.2734375, "learning_rate": 0.002, "loss": 5.2124, "step": 6608 }, { "epoch": 0.7503006694551763, "grad_norm": 0.294921875, "learning_rate": 0.002, "loss": 5.2463, "step": 6609 }, { "epoch": 0.7504141965650953, "grad_norm": 0.328125, "learning_rate": 0.002, "loss": 5.2565, "step": 6610 }, { "epoch": 0.7505277236750144, "grad_norm": 0.35546875, "learning_rate": 0.002, "loss": 5.2413, "step": 6611 }, { "epoch": 0.7506412507849335, "grad_norm": 0.36328125, "learning_rate": 0.002, "loss": 5.2392, "step": 6612 }, { "epoch": 0.7507547778948526, "grad_norm": 0.326171875, "learning_rate": 0.002, "loss": 5.2116, "step": 6613 }, { "epoch": 0.7508683050047716, "grad_norm": 0.328125, "learning_rate": 0.002, "loss": 5.2397, "step": 6614 }, { "epoch": 0.7509818321146907, "grad_norm": 0.28125, "learning_rate": 0.002, "loss": 5.2337, "step": 6615 }, { "epoch": 0.7510953592246098, "grad_norm": 0.283203125, "learning_rate": 0.002, "loss": 5.2508, "step": 6616 }, { "epoch": 0.7512088863345289, "grad_norm": 0.259765625, "learning_rate": 0.002, "loss": 5.2494, "step": 6617 }, { "epoch": 0.751322413444448, "grad_norm": 0.248046875, "learning_rate": 0.002, "loss": 5.2424, "step": 6618 }, { "epoch": 0.751435940554367, "grad_norm": 0.251953125, "learning_rate": 0.002, "loss": 5.2054, "step": 6619 }, { "epoch": 0.7515494676642861, "grad_norm": 0.25, "learning_rate": 0.002, "loss": 5.2499, "step": 6620 }, { "epoch": 0.7516629947742052, "grad_norm": 0.302734375, "learning_rate": 0.002, "loss": 5.2385, "step": 6621 }, { "epoch": 0.7517765218841242, "grad_norm": 0.322265625, "learning_rate": 0.002, "loss": 5.2374, "step": 6622 }, { "epoch": 0.7518900489940433, "grad_norm": 0.365234375, "learning_rate": 0.002, "loss": 5.2377, "step": 6623 }, { "epoch": 0.7520035761039624, "grad_norm": 0.384765625, "learning_rate": 0.002, "loss": 5.2351, "step": 6624 }, { "epoch": 0.7521171032138815, "grad_norm": 0.384765625, "learning_rate": 0.002, "loss": 5.227, "step": 6625 }, { "epoch": 0.7522306303238006, "grad_norm": 0.365234375, "learning_rate": 0.002, "loss": 5.2301, "step": 6626 }, { "epoch": 0.7523441574337196, "grad_norm": 0.35546875, "learning_rate": 0.002, "loss": 5.237, "step": 6627 }, { "epoch": 0.7524576845436387, "grad_norm": 0.33984375, "learning_rate": 0.002, "loss": 5.2243, "step": 6628 }, { "epoch": 0.7525712116535578, "grad_norm": 0.349609375, "learning_rate": 0.002, "loss": 5.257, "step": 6629 }, { "epoch": 0.7526847387634769, "grad_norm": 0.310546875, "learning_rate": 0.002, "loss": 5.2434, "step": 6630 }, { "epoch": 0.7527982658733959, "grad_norm": 0.318359375, "learning_rate": 0.002, "loss": 5.2383, "step": 6631 }, { "epoch": 0.752911792983315, "grad_norm": 0.294921875, "learning_rate": 0.002, "loss": 5.2206, "step": 6632 }, { "epoch": 0.7530253200932341, "grad_norm": 0.287109375, "learning_rate": 0.002, "loss": 5.2558, "step": 6633 }, { "epoch": 0.7531388472031532, "grad_norm": 0.2734375, "learning_rate": 0.002, "loss": 5.2568, "step": 6634 }, { "epoch": 0.7532523743130722, "grad_norm": 0.291015625, "learning_rate": 0.002, "loss": 5.2483, "step": 6635 }, { "epoch": 0.7533659014229913, "grad_norm": 0.287109375, "learning_rate": 0.002, "loss": 5.2293, "step": 6636 }, { "epoch": 0.7534794285329105, "grad_norm": 0.31640625, "learning_rate": 0.002, "loss": 5.2456, "step": 6637 }, { "epoch": 0.7535929556428296, "grad_norm": 0.30078125, "learning_rate": 0.002, "loss": 5.2091, "step": 6638 }, { "epoch": 0.7537064827527487, "grad_norm": 0.318359375, "learning_rate": 0.002, "loss": 5.2541, "step": 6639 }, { "epoch": 0.7538200098626677, "grad_norm": 0.302734375, "learning_rate": 0.002, "loss": 5.2416, "step": 6640 }, { "epoch": 0.7539335369725868, "grad_norm": 0.291015625, "learning_rate": 0.002, "loss": 5.2353, "step": 6641 }, { "epoch": 0.7540470640825059, "grad_norm": 0.26953125, "learning_rate": 0.002, "loss": 5.2472, "step": 6642 }, { "epoch": 0.754160591192425, "grad_norm": 0.25, "learning_rate": 0.002, "loss": 5.2533, "step": 6643 }, { "epoch": 0.754274118302344, "grad_norm": 0.236328125, "learning_rate": 0.002, "loss": 5.2283, "step": 6644 }, { "epoch": 0.7543876454122631, "grad_norm": 0.2373046875, "learning_rate": 0.002, "loss": 5.2423, "step": 6645 }, { "epoch": 0.7545011725221822, "grad_norm": 0.240234375, "learning_rate": 0.002, "loss": 5.2412, "step": 6646 }, { "epoch": 0.7546146996321013, "grad_norm": 0.236328125, "learning_rate": 0.002, "loss": 5.2557, "step": 6647 }, { "epoch": 0.7547282267420203, "grad_norm": 0.2578125, "learning_rate": 0.002, "loss": 5.229, "step": 6648 }, { "epoch": 0.7548417538519394, "grad_norm": 0.27734375, "learning_rate": 0.002, "loss": 5.2573, "step": 6649 }, { "epoch": 0.7549552809618585, "grad_norm": 0.31640625, "learning_rate": 0.002, "loss": 5.2189, "step": 6650 }, { "epoch": 0.7550688080717776, "grad_norm": 0.35546875, "learning_rate": 0.002, "loss": 5.2383, "step": 6651 }, { "epoch": 0.7551823351816966, "grad_norm": 0.40625, "learning_rate": 0.002, "loss": 5.2595, "step": 6652 }, { "epoch": 0.7552958622916157, "grad_norm": 0.373046875, "learning_rate": 0.002, "loss": 5.2155, "step": 6653 }, { "epoch": 0.7554093894015348, "grad_norm": 0.34765625, "learning_rate": 0.002, "loss": 5.2134, "step": 6654 }, { "epoch": 0.7555229165114539, "grad_norm": 0.345703125, "learning_rate": 0.002, "loss": 5.2423, "step": 6655 }, { "epoch": 0.7556364436213729, "grad_norm": 0.30859375, "learning_rate": 0.002, "loss": 5.2403, "step": 6656 }, { "epoch": 0.755749970731292, "grad_norm": 0.294921875, "learning_rate": 0.002, "loss": 5.2458, "step": 6657 }, { "epoch": 0.7558634978412111, "grad_norm": 0.26953125, "learning_rate": 0.002, "loss": 5.2019, "step": 6658 }, { "epoch": 0.7559770249511302, "grad_norm": 0.263671875, "learning_rate": 0.002, "loss": 5.2393, "step": 6659 }, { "epoch": 0.7560905520610492, "grad_norm": 0.267578125, "learning_rate": 0.002, "loss": 5.2406, "step": 6660 }, { "epoch": 0.7562040791709683, "grad_norm": 0.255859375, "learning_rate": 0.002, "loss": 5.2323, "step": 6661 }, { "epoch": 0.7563176062808874, "grad_norm": 0.25, "learning_rate": 0.002, "loss": 5.2272, "step": 6662 }, { "epoch": 0.7564311333908065, "grad_norm": 0.251953125, "learning_rate": 0.002, "loss": 5.2457, "step": 6663 }, { "epoch": 0.7565446605007256, "grad_norm": 0.25390625, "learning_rate": 0.002, "loss": 5.2305, "step": 6664 }, { "epoch": 0.7566581876106446, "grad_norm": 0.2490234375, "learning_rate": 0.002, "loss": 5.2457, "step": 6665 }, { "epoch": 0.7567717147205637, "grad_norm": 0.251953125, "learning_rate": 0.002, "loss": 5.24, "step": 6666 }, { "epoch": 0.7568852418304828, "grad_norm": 0.24609375, "learning_rate": 0.002, "loss": 5.2461, "step": 6667 }, { "epoch": 0.7569987689404019, "grad_norm": 0.251953125, "learning_rate": 0.002, "loss": 5.2423, "step": 6668 }, { "epoch": 0.7571122960503209, "grad_norm": 0.25, "learning_rate": 0.002, "loss": 5.2406, "step": 6669 }, { "epoch": 0.75722582316024, "grad_norm": 0.275390625, "learning_rate": 0.002, "loss": 5.2573, "step": 6670 }, { "epoch": 0.7573393502701591, "grad_norm": 0.2734375, "learning_rate": 0.002, "loss": 5.2423, "step": 6671 }, { "epoch": 0.7574528773800782, "grad_norm": 0.302734375, "learning_rate": 0.002, "loss": 5.2429, "step": 6672 }, { "epoch": 0.7575664044899972, "grad_norm": 0.296875, "learning_rate": 0.002, "loss": 5.2299, "step": 6673 }, { "epoch": 0.7576799315999163, "grad_norm": 0.296875, "learning_rate": 0.002, "loss": 5.2371, "step": 6674 }, { "epoch": 0.7577934587098354, "grad_norm": 0.2734375, "learning_rate": 0.002, "loss": 5.2248, "step": 6675 }, { "epoch": 0.7579069858197545, "grad_norm": 0.265625, "learning_rate": 0.002, "loss": 5.2463, "step": 6676 }, { "epoch": 0.7580205129296735, "grad_norm": 0.271484375, "learning_rate": 0.002, "loss": 5.248, "step": 6677 }, { "epoch": 0.7581340400395926, "grad_norm": 0.326171875, "learning_rate": 0.002, "loss": 5.2227, "step": 6678 }, { "epoch": 0.7582475671495117, "grad_norm": 0.33984375, "learning_rate": 0.002, "loss": 5.2248, "step": 6679 }, { "epoch": 0.7583610942594308, "grad_norm": 0.353515625, "learning_rate": 0.002, "loss": 5.229, "step": 6680 }, { "epoch": 0.7584746213693498, "grad_norm": 0.390625, "learning_rate": 0.002, "loss": 5.2353, "step": 6681 }, { "epoch": 0.7585881484792689, "grad_norm": 0.361328125, "learning_rate": 0.002, "loss": 5.2508, "step": 6682 }, { "epoch": 0.758701675589188, "grad_norm": 0.365234375, "learning_rate": 0.002, "loss": 5.2359, "step": 6683 }, { "epoch": 0.7588152026991071, "grad_norm": 0.318359375, "learning_rate": 0.002, "loss": 5.2327, "step": 6684 }, { "epoch": 0.7589287298090261, "grad_norm": 0.291015625, "learning_rate": 0.002, "loss": 5.2302, "step": 6685 }, { "epoch": 0.7590422569189452, "grad_norm": 0.287109375, "learning_rate": 0.002, "loss": 5.2441, "step": 6686 }, { "epoch": 0.7591557840288643, "grad_norm": 0.28125, "learning_rate": 0.002, "loss": 5.231, "step": 6687 }, { "epoch": 0.7592693111387834, "grad_norm": 0.271484375, "learning_rate": 0.002, "loss": 5.2483, "step": 6688 }, { "epoch": 0.7593828382487025, "grad_norm": 0.294921875, "learning_rate": 0.002, "loss": 5.231, "step": 6689 }, { "epoch": 0.7594963653586215, "grad_norm": 0.310546875, "learning_rate": 0.002, "loss": 5.2369, "step": 6690 }, { "epoch": 0.7596098924685406, "grad_norm": 0.330078125, "learning_rate": 0.002, "loss": 5.2421, "step": 6691 }, { "epoch": 0.7597234195784597, "grad_norm": 0.32421875, "learning_rate": 0.002, "loss": 5.238, "step": 6692 }, { "epoch": 0.7598369466883788, "grad_norm": 0.314453125, "learning_rate": 0.002, "loss": 5.2282, "step": 6693 }, { "epoch": 0.7599504737982978, "grad_norm": 0.265625, "learning_rate": 0.002, "loss": 5.2171, "step": 6694 }, { "epoch": 0.7600640009082169, "grad_norm": 0.265625, "learning_rate": 0.002, "loss": 5.2314, "step": 6695 }, { "epoch": 0.760177528018136, "grad_norm": 0.279296875, "learning_rate": 0.002, "loss": 5.2506, "step": 6696 }, { "epoch": 0.7602910551280551, "grad_norm": 0.3046875, "learning_rate": 0.002, "loss": 5.2679, "step": 6697 }, { "epoch": 0.7604045822379741, "grad_norm": 0.326171875, "learning_rate": 0.002, "loss": 5.2272, "step": 6698 }, { "epoch": 0.7605181093478932, "grad_norm": 0.34375, "learning_rate": 0.002, "loss": 5.2184, "step": 6699 }, { "epoch": 0.7606316364578123, "grad_norm": 0.34375, "learning_rate": 0.002, "loss": 5.2413, "step": 6700 }, { "epoch": 0.7607451635677314, "grad_norm": 0.306640625, "learning_rate": 0.002, "loss": 5.2358, "step": 6701 }, { "epoch": 0.7608586906776504, "grad_norm": 0.326171875, "learning_rate": 0.002, "loss": 5.2341, "step": 6702 }, { "epoch": 0.7609722177875695, "grad_norm": 0.369140625, "learning_rate": 0.002, "loss": 5.2514, "step": 6703 }, { "epoch": 0.7610857448974886, "grad_norm": 0.412109375, "learning_rate": 0.002, "loss": 5.2317, "step": 6704 }, { "epoch": 0.7611992720074077, "grad_norm": 0.43359375, "learning_rate": 0.002, "loss": 5.2383, "step": 6705 }, { "epoch": 0.7613127991173267, "grad_norm": 0.43359375, "learning_rate": 0.002, "loss": 5.2513, "step": 6706 }, { "epoch": 0.7614263262272458, "grad_norm": 0.431640625, "learning_rate": 0.002, "loss": 5.2426, "step": 6707 }, { "epoch": 0.7615398533371649, "grad_norm": 0.38671875, "learning_rate": 0.002, "loss": 5.2501, "step": 6708 }, { "epoch": 0.761653380447084, "grad_norm": 0.384765625, "learning_rate": 0.002, "loss": 5.2264, "step": 6709 }, { "epoch": 0.761766907557003, "grad_norm": 0.375, "learning_rate": 0.002, "loss": 5.2307, "step": 6710 }, { "epoch": 0.7618804346669221, "grad_norm": 0.36328125, "learning_rate": 0.002, "loss": 5.2472, "step": 6711 }, { "epoch": 0.7619939617768412, "grad_norm": 0.349609375, "learning_rate": 0.002, "loss": 5.2241, "step": 6712 }, { "epoch": 0.7621074888867603, "grad_norm": 0.333984375, "learning_rate": 0.002, "loss": 5.2492, "step": 6713 }, { "epoch": 0.7622210159966794, "grad_norm": 0.341796875, "learning_rate": 0.002, "loss": 5.2182, "step": 6714 }, { "epoch": 0.7623345431065984, "grad_norm": 0.30859375, "learning_rate": 0.002, "loss": 5.2345, "step": 6715 }, { "epoch": 0.7624480702165175, "grad_norm": 0.33203125, "learning_rate": 0.002, "loss": 5.235, "step": 6716 }, { "epoch": 0.7625615973264366, "grad_norm": 0.3046875, "learning_rate": 0.002, "loss": 5.2307, "step": 6717 }, { "epoch": 0.7626751244363557, "grad_norm": 0.30859375, "learning_rate": 0.002, "loss": 5.2234, "step": 6718 }, { "epoch": 0.7627886515462747, "grad_norm": 0.29296875, "learning_rate": 0.002, "loss": 5.2426, "step": 6719 }, { "epoch": 0.7629021786561938, "grad_norm": 0.28515625, "learning_rate": 0.002, "loss": 5.237, "step": 6720 }, { "epoch": 0.7630157057661129, "grad_norm": 0.279296875, "learning_rate": 0.002, "loss": 5.2426, "step": 6721 }, { "epoch": 0.763129232876032, "grad_norm": 0.267578125, "learning_rate": 0.002, "loss": 5.2178, "step": 6722 }, { "epoch": 0.763242759985951, "grad_norm": 0.2578125, "learning_rate": 0.002, "loss": 5.223, "step": 6723 }, { "epoch": 0.7633562870958701, "grad_norm": 0.251953125, "learning_rate": 0.002, "loss": 5.2444, "step": 6724 }, { "epoch": 0.7634698142057892, "grad_norm": 0.259765625, "learning_rate": 0.002, "loss": 5.2394, "step": 6725 }, { "epoch": 0.7635833413157083, "grad_norm": 0.25, "learning_rate": 0.002, "loss": 5.234, "step": 6726 }, { "epoch": 0.7636968684256273, "grad_norm": 0.265625, "learning_rate": 0.002, "loss": 5.2378, "step": 6727 }, { "epoch": 0.7638103955355464, "grad_norm": 0.2578125, "learning_rate": 0.002, "loss": 5.2362, "step": 6728 }, { "epoch": 0.7639239226454655, "grad_norm": 0.271484375, "learning_rate": 0.002, "loss": 5.218, "step": 6729 }, { "epoch": 0.7640374497553846, "grad_norm": 0.265625, "learning_rate": 0.002, "loss": 5.2459, "step": 6730 }, { "epoch": 0.7641509768653036, "grad_norm": 0.283203125, "learning_rate": 0.002, "loss": 5.2241, "step": 6731 }, { "epoch": 0.7642645039752227, "grad_norm": 0.294921875, "learning_rate": 0.002, "loss": 5.2279, "step": 6732 }, { "epoch": 0.7643780310851418, "grad_norm": 0.318359375, "learning_rate": 0.002, "loss": 5.2338, "step": 6733 }, { "epoch": 0.7644915581950609, "grad_norm": 0.330078125, "learning_rate": 0.002, "loss": 5.2399, "step": 6734 }, { "epoch": 0.76460508530498, "grad_norm": 0.34375, "learning_rate": 0.002, "loss": 5.2254, "step": 6735 }, { "epoch": 0.764718612414899, "grad_norm": 0.37890625, "learning_rate": 0.002, "loss": 5.2402, "step": 6736 }, { "epoch": 0.7648321395248181, "grad_norm": 0.3671875, "learning_rate": 0.002, "loss": 5.2512, "step": 6737 }, { "epoch": 0.7649456666347372, "grad_norm": 0.369140625, "learning_rate": 0.002, "loss": 5.2268, "step": 6738 }, { "epoch": 0.7650591937446563, "grad_norm": 0.341796875, "learning_rate": 0.002, "loss": 5.2291, "step": 6739 }, { "epoch": 0.7651727208545753, "grad_norm": 0.30859375, "learning_rate": 0.002, "loss": 5.2577, "step": 6740 }, { "epoch": 0.7652862479644944, "grad_norm": 0.263671875, "learning_rate": 0.002, "loss": 5.2588, "step": 6741 }, { "epoch": 0.7653997750744135, "grad_norm": 0.26171875, "learning_rate": 0.002, "loss": 5.2401, "step": 6742 }, { "epoch": 0.7655133021843326, "grad_norm": 0.283203125, "learning_rate": 0.002, "loss": 5.2435, "step": 6743 }, { "epoch": 0.7656268292942516, "grad_norm": 0.32421875, "learning_rate": 0.002, "loss": 5.2362, "step": 6744 }, { "epoch": 0.7657403564041707, "grad_norm": 0.357421875, "learning_rate": 0.002, "loss": 5.2474, "step": 6745 }, { "epoch": 0.7658538835140898, "grad_norm": 0.357421875, "learning_rate": 0.002, "loss": 5.2224, "step": 6746 }, { "epoch": 0.7659674106240089, "grad_norm": 0.353515625, "learning_rate": 0.002, "loss": 5.2369, "step": 6747 }, { "epoch": 0.7660809377339279, "grad_norm": 0.349609375, "learning_rate": 0.002, "loss": 5.2333, "step": 6748 }, { "epoch": 0.766194464843847, "grad_norm": 0.3125, "learning_rate": 0.002, "loss": 5.2431, "step": 6749 }, { "epoch": 0.7663079919537661, "grad_norm": 0.318359375, "learning_rate": 0.002, "loss": 5.2459, "step": 6750 }, { "epoch": 0.7664215190636852, "grad_norm": 0.275390625, "learning_rate": 0.002, "loss": 5.2325, "step": 6751 }, { "epoch": 0.7665350461736042, "grad_norm": 0.287109375, "learning_rate": 0.002, "loss": 5.2352, "step": 6752 }, { "epoch": 0.7666485732835233, "grad_norm": 0.279296875, "learning_rate": 0.002, "loss": 5.2107, "step": 6753 }, { "epoch": 0.7667621003934424, "grad_norm": 0.287109375, "learning_rate": 0.002, "loss": 5.2366, "step": 6754 }, { "epoch": 0.7668756275033615, "grad_norm": 0.28515625, "learning_rate": 0.002, "loss": 5.2143, "step": 6755 }, { "epoch": 0.7669891546132805, "grad_norm": 0.275390625, "learning_rate": 0.002, "loss": 5.231, "step": 6756 }, { "epoch": 0.7671026817231996, "grad_norm": 0.283203125, "learning_rate": 0.002, "loss": 5.2416, "step": 6757 }, { "epoch": 0.7672162088331187, "grad_norm": 0.28125, "learning_rate": 0.002, "loss": 5.229, "step": 6758 }, { "epoch": 0.7673297359430378, "grad_norm": 0.306640625, "learning_rate": 0.002, "loss": 5.2533, "step": 6759 }, { "epoch": 0.7674432630529568, "grad_norm": 0.31640625, "learning_rate": 0.002, "loss": 5.2351, "step": 6760 }, { "epoch": 0.7675567901628759, "grad_norm": 0.341796875, "learning_rate": 0.002, "loss": 5.2277, "step": 6761 }, { "epoch": 0.767670317272795, "grad_norm": 0.365234375, "learning_rate": 0.002, "loss": 5.2395, "step": 6762 }, { "epoch": 0.7677838443827141, "grad_norm": 0.3671875, "learning_rate": 0.002, "loss": 5.242, "step": 6763 }, { "epoch": 0.7678973714926332, "grad_norm": 0.369140625, "learning_rate": 0.002, "loss": 5.2243, "step": 6764 }, { "epoch": 0.7680108986025522, "grad_norm": 0.3671875, "learning_rate": 0.002, "loss": 5.2223, "step": 6765 }, { "epoch": 0.7681244257124713, "grad_norm": 0.33203125, "learning_rate": 0.002, "loss": 5.2535, "step": 6766 }, { "epoch": 0.7682379528223904, "grad_norm": 0.326171875, "learning_rate": 0.002, "loss": 5.2203, "step": 6767 }, { "epoch": 0.7683514799323095, "grad_norm": 0.306640625, "learning_rate": 0.002, "loss": 5.223, "step": 6768 }, { "epoch": 0.7684650070422285, "grad_norm": 0.31640625, "learning_rate": 0.002, "loss": 5.2298, "step": 6769 }, { "epoch": 0.7685785341521476, "grad_norm": 0.29296875, "learning_rate": 0.002, "loss": 5.235, "step": 6770 }, { "epoch": 0.7686920612620667, "grad_norm": 0.2890625, "learning_rate": 0.002, "loss": 5.2343, "step": 6771 }, { "epoch": 0.7688055883719858, "grad_norm": 0.26953125, "learning_rate": 0.002, "loss": 5.222, "step": 6772 }, { "epoch": 0.7689191154819048, "grad_norm": 0.275390625, "learning_rate": 0.002, "loss": 5.2294, "step": 6773 }, { "epoch": 0.7690326425918239, "grad_norm": 0.26953125, "learning_rate": 0.002, "loss": 5.2549, "step": 6774 }, { "epoch": 0.769146169701743, "grad_norm": 0.275390625, "learning_rate": 0.002, "loss": 5.2303, "step": 6775 }, { "epoch": 0.7692596968116621, "grad_norm": 0.275390625, "learning_rate": 0.002, "loss": 5.2419, "step": 6776 }, { "epoch": 0.7693732239215811, "grad_norm": 0.271484375, "learning_rate": 0.002, "loss": 5.2335, "step": 6777 }, { "epoch": 0.7694867510315002, "grad_norm": 0.29296875, "learning_rate": 0.002, "loss": 5.241, "step": 6778 }, { "epoch": 0.7696002781414193, "grad_norm": 0.2890625, "learning_rate": 0.002, "loss": 5.2332, "step": 6779 }, { "epoch": 0.7697138052513384, "grad_norm": 0.296875, "learning_rate": 0.002, "loss": 5.237, "step": 6780 }, { "epoch": 0.7698273323612574, "grad_norm": 0.294921875, "learning_rate": 0.002, "loss": 5.2282, "step": 6781 }, { "epoch": 0.7699408594711765, "grad_norm": 0.296875, "learning_rate": 0.002, "loss": 5.2345, "step": 6782 }, { "epoch": 0.7700543865810956, "grad_norm": 0.310546875, "learning_rate": 0.002, "loss": 5.2493, "step": 6783 }, { "epoch": 0.7701679136910147, "grad_norm": 0.330078125, "learning_rate": 0.002, "loss": 5.2382, "step": 6784 }, { "epoch": 0.7702814408009337, "grad_norm": 0.31640625, "learning_rate": 0.002, "loss": 5.2788, "step": 6785 }, { "epoch": 0.7703949679108528, "grad_norm": 0.318359375, "learning_rate": 0.002, "loss": 5.238, "step": 6786 }, { "epoch": 0.7705084950207719, "grad_norm": 0.275390625, "learning_rate": 0.002, "loss": 5.244, "step": 6787 }, { "epoch": 0.770622022130691, "grad_norm": 0.271484375, "learning_rate": 0.002, "loss": 5.2475, "step": 6788 }, { "epoch": 0.77073554924061, "grad_norm": 0.259765625, "learning_rate": 0.002, "loss": 5.2095, "step": 6789 }, { "epoch": 0.7708490763505291, "grad_norm": 0.25390625, "learning_rate": 0.002, "loss": 5.2081, "step": 6790 }, { "epoch": 0.7709626034604482, "grad_norm": 0.259765625, "learning_rate": 0.002, "loss": 5.2202, "step": 6791 }, { "epoch": 0.7710761305703673, "grad_norm": 0.275390625, "learning_rate": 0.002, "loss": 5.2479, "step": 6792 }, { "epoch": 0.7711896576802864, "grad_norm": 0.294921875, "learning_rate": 0.002, "loss": 5.2461, "step": 6793 }, { "epoch": 0.7713031847902054, "grad_norm": 0.306640625, "learning_rate": 0.002, "loss": 5.2277, "step": 6794 }, { "epoch": 0.7714167119001245, "grad_norm": 0.330078125, "learning_rate": 0.002, "loss": 5.234, "step": 6795 }, { "epoch": 0.7715302390100436, "grad_norm": 0.33984375, "learning_rate": 0.002, "loss": 5.2404, "step": 6796 }, { "epoch": 0.7716437661199627, "grad_norm": 0.32421875, "learning_rate": 0.002, "loss": 5.2266, "step": 6797 }, { "epoch": 0.7717572932298817, "grad_norm": 0.322265625, "learning_rate": 0.002, "loss": 5.2344, "step": 6798 }, { "epoch": 0.7718708203398008, "grad_norm": 0.31640625, "learning_rate": 0.002, "loss": 5.2377, "step": 6799 }, { "epoch": 0.7719843474497199, "grad_norm": 0.3125, "learning_rate": 0.002, "loss": 5.2376, "step": 6800 }, { "epoch": 0.772097874559639, "grad_norm": 0.2890625, "learning_rate": 0.002, "loss": 5.2333, "step": 6801 }, { "epoch": 0.772211401669558, "grad_norm": 0.30078125, "learning_rate": 0.002, "loss": 5.2518, "step": 6802 }, { "epoch": 0.7723249287794771, "grad_norm": 0.30078125, "learning_rate": 0.002, "loss": 5.2407, "step": 6803 }, { "epoch": 0.7724384558893962, "grad_norm": 0.3046875, "learning_rate": 0.002, "loss": 5.2413, "step": 6804 }, { "epoch": 0.7725519829993153, "grad_norm": 0.322265625, "learning_rate": 0.002, "loss": 5.2209, "step": 6805 }, { "epoch": 0.7726655101092343, "grad_norm": 0.310546875, "learning_rate": 0.002, "loss": 5.2597, "step": 6806 }, { "epoch": 0.7727790372191534, "grad_norm": 0.3046875, "learning_rate": 0.002, "loss": 5.2248, "step": 6807 }, { "epoch": 0.7728925643290725, "grad_norm": 0.296875, "learning_rate": 0.002, "loss": 5.2286, "step": 6808 }, { "epoch": 0.7730060914389916, "grad_norm": 0.32421875, "learning_rate": 0.002, "loss": 5.2331, "step": 6809 }, { "epoch": 0.7731196185489106, "grad_norm": 0.32421875, "learning_rate": 0.002, "loss": 5.2193, "step": 6810 }, { "epoch": 0.7732331456588297, "grad_norm": 0.33984375, "learning_rate": 0.002, "loss": 5.2528, "step": 6811 }, { "epoch": 0.7733466727687488, "grad_norm": 0.306640625, "learning_rate": 0.002, "loss": 5.2491, "step": 6812 }, { "epoch": 0.7734601998786679, "grad_norm": 0.3125, "learning_rate": 0.002, "loss": 5.2199, "step": 6813 }, { "epoch": 0.773573726988587, "grad_norm": 0.296875, "learning_rate": 0.002, "loss": 5.2408, "step": 6814 }, { "epoch": 0.773687254098506, "grad_norm": 0.31640625, "learning_rate": 0.002, "loss": 5.2329, "step": 6815 }, { "epoch": 0.7738007812084251, "grad_norm": 0.333984375, "learning_rate": 0.002, "loss": 5.2513, "step": 6816 }, { "epoch": 0.7739143083183442, "grad_norm": 0.36328125, "learning_rate": 0.002, "loss": 5.2356, "step": 6817 }, { "epoch": 0.7740278354282633, "grad_norm": 0.375, "learning_rate": 0.002, "loss": 5.2374, "step": 6818 }, { "epoch": 0.7741413625381823, "grad_norm": 0.380859375, "learning_rate": 0.002, "loss": 5.2206, "step": 6819 }, { "epoch": 0.7742548896481014, "grad_norm": 0.37890625, "learning_rate": 0.002, "loss": 5.2301, "step": 6820 }, { "epoch": 0.7743684167580205, "grad_norm": 0.390625, "learning_rate": 0.002, "loss": 5.2602, "step": 6821 }, { "epoch": 0.7744819438679396, "grad_norm": 0.341796875, "learning_rate": 0.002, "loss": 5.2196, "step": 6822 }, { "epoch": 0.7745954709778586, "grad_norm": 0.33203125, "learning_rate": 0.002, "loss": 5.2355, "step": 6823 }, { "epoch": 0.7747089980877777, "grad_norm": 0.3046875, "learning_rate": 0.002, "loss": 5.2368, "step": 6824 }, { "epoch": 0.7748225251976968, "grad_norm": 0.318359375, "learning_rate": 0.002, "loss": 5.2678, "step": 6825 }, { "epoch": 0.7749360523076159, "grad_norm": 0.369140625, "learning_rate": 0.002, "loss": 5.2182, "step": 6826 }, { "epoch": 0.7750495794175349, "grad_norm": 0.38671875, "learning_rate": 0.002, "loss": 5.2361, "step": 6827 }, { "epoch": 0.775163106527454, "grad_norm": 0.41796875, "learning_rate": 0.002, "loss": 5.232, "step": 6828 }, { "epoch": 0.7752766336373731, "grad_norm": 0.416015625, "learning_rate": 0.002, "loss": 5.2196, "step": 6829 }, { "epoch": 0.7753901607472922, "grad_norm": 0.4140625, "learning_rate": 0.002, "loss": 5.2364, "step": 6830 }, { "epoch": 0.7755036878572112, "grad_norm": 0.365234375, "learning_rate": 0.002, "loss": 5.258, "step": 6831 }, { "epoch": 0.7756172149671303, "grad_norm": 0.33984375, "learning_rate": 0.002, "loss": 5.2222, "step": 6832 }, { "epoch": 0.7757307420770494, "grad_norm": 0.322265625, "learning_rate": 0.002, "loss": 5.2422, "step": 6833 }, { "epoch": 0.7758442691869685, "grad_norm": 0.30859375, "learning_rate": 0.002, "loss": 5.2332, "step": 6834 }, { "epoch": 0.7759577962968875, "grad_norm": 0.29296875, "learning_rate": 0.002, "loss": 5.2299, "step": 6835 }, { "epoch": 0.7760713234068066, "grad_norm": 0.28515625, "learning_rate": 0.002, "loss": 5.2432, "step": 6836 }, { "epoch": 0.7761848505167257, "grad_norm": 0.2890625, "learning_rate": 0.002, "loss": 5.2389, "step": 6837 }, { "epoch": 0.7762983776266448, "grad_norm": 0.3046875, "learning_rate": 0.002, "loss": 5.2381, "step": 6838 }, { "epoch": 0.7764119047365639, "grad_norm": 0.283203125, "learning_rate": 0.002, "loss": 5.2311, "step": 6839 }, { "epoch": 0.7765254318464829, "grad_norm": 0.287109375, "learning_rate": 0.002, "loss": 5.2197, "step": 6840 }, { "epoch": 0.776638958956402, "grad_norm": 0.28125, "learning_rate": 0.002, "loss": 5.2363, "step": 6841 }, { "epoch": 0.7767524860663211, "grad_norm": 0.275390625, "learning_rate": 0.002, "loss": 5.245, "step": 6842 }, { "epoch": 0.7768660131762402, "grad_norm": 0.26953125, "learning_rate": 0.002, "loss": 5.2618, "step": 6843 }, { "epoch": 0.7769795402861592, "grad_norm": 0.2734375, "learning_rate": 0.002, "loss": 5.25, "step": 6844 }, { "epoch": 0.7770930673960783, "grad_norm": 0.2890625, "learning_rate": 0.002, "loss": 5.247, "step": 6845 }, { "epoch": 0.7772065945059974, "grad_norm": 0.30078125, "learning_rate": 0.002, "loss": 5.2188, "step": 6846 }, { "epoch": 0.7773201216159165, "grad_norm": 0.3359375, "learning_rate": 0.002, "loss": 5.255, "step": 6847 }, { "epoch": 0.7774336487258355, "grad_norm": 0.3359375, "learning_rate": 0.002, "loss": 5.2345, "step": 6848 }, { "epoch": 0.7775471758357546, "grad_norm": 0.3359375, "learning_rate": 0.002, "loss": 5.2509, "step": 6849 }, { "epoch": 0.7776607029456737, "grad_norm": 0.341796875, "learning_rate": 0.002, "loss": 5.2479, "step": 6850 }, { "epoch": 0.7777742300555928, "grad_norm": 0.33203125, "learning_rate": 0.002, "loss": 5.2289, "step": 6851 }, { "epoch": 0.7778877571655118, "grad_norm": 0.314453125, "learning_rate": 0.002, "loss": 5.2379, "step": 6852 }, { "epoch": 0.7780012842754309, "grad_norm": 0.328125, "learning_rate": 0.002, "loss": 5.2318, "step": 6853 }, { "epoch": 0.77811481138535, "grad_norm": 0.3125, "learning_rate": 0.002, "loss": 5.2342, "step": 6854 }, { "epoch": 0.7782283384952691, "grad_norm": 0.318359375, "learning_rate": 0.002, "loss": 5.271, "step": 6855 }, { "epoch": 0.7783418656051881, "grad_norm": 0.2890625, "learning_rate": 0.002, "loss": 5.2582, "step": 6856 }, { "epoch": 0.7784553927151072, "grad_norm": 0.275390625, "learning_rate": 0.002, "loss": 5.2279, "step": 6857 }, { "epoch": 0.7785689198250263, "grad_norm": 0.248046875, "learning_rate": 0.002, "loss": 5.2574, "step": 6858 }, { "epoch": 0.7786824469349454, "grad_norm": 0.2431640625, "learning_rate": 0.002, "loss": 5.2319, "step": 6859 }, { "epoch": 0.7787959740448644, "grad_norm": 0.2451171875, "learning_rate": 0.002, "loss": 5.2146, "step": 6860 }, { "epoch": 0.7789095011547835, "grad_norm": 0.265625, "learning_rate": 0.002, "loss": 5.2272, "step": 6861 }, { "epoch": 0.7790230282647026, "grad_norm": 0.287109375, "learning_rate": 0.002, "loss": 5.2288, "step": 6862 }, { "epoch": 0.7791365553746217, "grad_norm": 0.28515625, "learning_rate": 0.002, "loss": 5.2431, "step": 6863 }, { "epoch": 0.7792500824845408, "grad_norm": 0.27734375, "learning_rate": 0.002, "loss": 5.2463, "step": 6864 }, { "epoch": 0.7793636095944598, "grad_norm": 0.26171875, "learning_rate": 0.002, "loss": 5.2358, "step": 6865 }, { "epoch": 0.7794771367043789, "grad_norm": 0.265625, "learning_rate": 0.002, "loss": 5.2503, "step": 6866 }, { "epoch": 0.779590663814298, "grad_norm": 0.267578125, "learning_rate": 0.002, "loss": 5.2333, "step": 6867 }, { "epoch": 0.779704190924217, "grad_norm": 0.28515625, "learning_rate": 0.002, "loss": 5.2463, "step": 6868 }, { "epoch": 0.7798177180341361, "grad_norm": 0.298828125, "learning_rate": 0.002, "loss": 5.232, "step": 6869 }, { "epoch": 0.7799312451440552, "grad_norm": 0.30859375, "learning_rate": 0.002, "loss": 5.2399, "step": 6870 }, { "epoch": 0.7800447722539743, "grad_norm": 0.318359375, "learning_rate": 0.002, "loss": 5.2346, "step": 6871 }, { "epoch": 0.7801582993638934, "grad_norm": 0.32421875, "learning_rate": 0.002, "loss": 5.2398, "step": 6872 }, { "epoch": 0.7802718264738124, "grad_norm": 0.314453125, "learning_rate": 0.002, "loss": 5.2493, "step": 6873 }, { "epoch": 0.7803853535837315, "grad_norm": 0.30078125, "learning_rate": 0.002, "loss": 5.2278, "step": 6874 }, { "epoch": 0.7804988806936506, "grad_norm": 0.31640625, "learning_rate": 0.002, "loss": 5.2305, "step": 6875 }, { "epoch": 0.7806124078035697, "grad_norm": 0.314453125, "learning_rate": 0.002, "loss": 5.2213, "step": 6876 }, { "epoch": 0.7807259349134887, "grad_norm": 0.37109375, "learning_rate": 0.002, "loss": 5.243, "step": 6877 }, { "epoch": 0.7808394620234079, "grad_norm": 0.55859375, "learning_rate": 0.002, "loss": 5.2457, "step": 6878 }, { "epoch": 0.780952989133327, "grad_norm": 0.50390625, "learning_rate": 0.002, "loss": 5.2353, "step": 6879 }, { "epoch": 0.7810665162432461, "grad_norm": 0.412109375, "learning_rate": 0.002, "loss": 5.2473, "step": 6880 }, { "epoch": 0.7811800433531652, "grad_norm": 0.41796875, "learning_rate": 0.002, "loss": 5.2453, "step": 6881 }, { "epoch": 0.7812935704630842, "grad_norm": 0.33984375, "learning_rate": 0.002, "loss": 5.2442, "step": 6882 }, { "epoch": 0.7814070975730033, "grad_norm": 0.3671875, "learning_rate": 0.002, "loss": 5.2573, "step": 6883 }, { "epoch": 0.7815206246829224, "grad_norm": 0.30078125, "learning_rate": 0.002, "loss": 5.2277, "step": 6884 }, { "epoch": 0.7816341517928415, "grad_norm": 0.294921875, "learning_rate": 0.002, "loss": 5.2445, "step": 6885 }, { "epoch": 0.7817476789027605, "grad_norm": 0.283203125, "learning_rate": 0.002, "loss": 5.2392, "step": 6886 }, { "epoch": 0.7818612060126796, "grad_norm": 0.287109375, "learning_rate": 0.002, "loss": 5.2451, "step": 6887 }, { "epoch": 0.7819747331225987, "grad_norm": 0.310546875, "learning_rate": 0.002, "loss": 5.2335, "step": 6888 }, { "epoch": 0.7820882602325178, "grad_norm": 0.318359375, "learning_rate": 0.002, "loss": 5.2373, "step": 6889 }, { "epoch": 0.7822017873424368, "grad_norm": 0.341796875, "learning_rate": 0.002, "loss": 5.2308, "step": 6890 }, { "epoch": 0.7823153144523559, "grad_norm": 0.333984375, "learning_rate": 0.002, "loss": 5.2202, "step": 6891 }, { "epoch": 0.782428841562275, "grad_norm": 0.341796875, "learning_rate": 0.002, "loss": 5.2322, "step": 6892 }, { "epoch": 0.7825423686721941, "grad_norm": 0.34375, "learning_rate": 0.002, "loss": 5.2481, "step": 6893 }, { "epoch": 0.7826558957821131, "grad_norm": 0.310546875, "learning_rate": 0.002, "loss": 5.2461, "step": 6894 }, { "epoch": 0.7827694228920322, "grad_norm": 0.30078125, "learning_rate": 0.002, "loss": 5.2435, "step": 6895 }, { "epoch": 0.7828829500019513, "grad_norm": 0.28125, "learning_rate": 0.002, "loss": 5.2343, "step": 6896 }, { "epoch": 0.7829964771118704, "grad_norm": 0.2578125, "learning_rate": 0.002, "loss": 5.2486, "step": 6897 }, { "epoch": 0.7831100042217894, "grad_norm": 0.2578125, "learning_rate": 0.002, "loss": 5.2181, "step": 6898 }, { "epoch": 0.7832235313317085, "grad_norm": 0.251953125, "learning_rate": 0.002, "loss": 5.2498, "step": 6899 }, { "epoch": 0.7833370584416276, "grad_norm": 0.255859375, "learning_rate": 0.002, "loss": 5.2332, "step": 6900 }, { "epoch": 0.7834505855515467, "grad_norm": 0.263671875, "learning_rate": 0.002, "loss": 5.2143, "step": 6901 }, { "epoch": 0.7835641126614658, "grad_norm": 0.279296875, "learning_rate": 0.002, "loss": 5.2363, "step": 6902 }, { "epoch": 0.7836776397713848, "grad_norm": 0.306640625, "learning_rate": 0.002, "loss": 5.241, "step": 6903 }, { "epoch": 0.7837911668813039, "grad_norm": 0.30859375, "learning_rate": 0.002, "loss": 5.2391, "step": 6904 }, { "epoch": 0.783904693991223, "grad_norm": 0.337890625, "learning_rate": 0.002, "loss": 5.245, "step": 6905 }, { "epoch": 0.784018221101142, "grad_norm": 0.3515625, "learning_rate": 0.002, "loss": 5.2314, "step": 6906 }, { "epoch": 0.7841317482110611, "grad_norm": 0.40625, "learning_rate": 0.002, "loss": 5.2241, "step": 6907 }, { "epoch": 0.7842452753209802, "grad_norm": 0.38671875, "learning_rate": 0.002, "loss": 5.2169, "step": 6908 }, { "epoch": 0.7843588024308993, "grad_norm": 0.380859375, "learning_rate": 0.002, "loss": 5.2409, "step": 6909 }, { "epoch": 0.7844723295408184, "grad_norm": 0.33203125, "learning_rate": 0.002, "loss": 5.236, "step": 6910 }, { "epoch": 0.7845858566507374, "grad_norm": 0.341796875, "learning_rate": 0.002, "loss": 5.2588, "step": 6911 }, { "epoch": 0.7846993837606565, "grad_norm": 0.302734375, "learning_rate": 0.002, "loss": 5.2417, "step": 6912 }, { "epoch": 0.7848129108705756, "grad_norm": 0.3046875, "learning_rate": 0.002, "loss": 5.2552, "step": 6913 }, { "epoch": 0.7849264379804947, "grad_norm": 0.28515625, "learning_rate": 0.002, "loss": 5.2175, "step": 6914 }, { "epoch": 0.7850399650904137, "grad_norm": 0.28515625, "learning_rate": 0.002, "loss": 5.2256, "step": 6915 }, { "epoch": 0.7851534922003328, "grad_norm": 0.30078125, "learning_rate": 0.002, "loss": 5.2154, "step": 6916 }, { "epoch": 0.7852670193102519, "grad_norm": 0.287109375, "learning_rate": 0.002, "loss": 5.2367, "step": 6917 }, { "epoch": 0.785380546420171, "grad_norm": 0.302734375, "learning_rate": 0.002, "loss": 5.2481, "step": 6918 }, { "epoch": 0.78549407353009, "grad_norm": 0.310546875, "learning_rate": 0.002, "loss": 5.2453, "step": 6919 }, { "epoch": 0.7856076006400091, "grad_norm": 0.310546875, "learning_rate": 0.002, "loss": 5.2562, "step": 6920 }, { "epoch": 0.7857211277499282, "grad_norm": 0.337890625, "learning_rate": 0.002, "loss": 5.2279, "step": 6921 }, { "epoch": 0.7858346548598473, "grad_norm": 0.326171875, "learning_rate": 0.002, "loss": 5.2364, "step": 6922 }, { "epoch": 0.7859481819697663, "grad_norm": 0.30859375, "learning_rate": 0.002, "loss": 5.2455, "step": 6923 }, { "epoch": 0.7860617090796854, "grad_norm": 0.330078125, "learning_rate": 0.002, "loss": 5.237, "step": 6924 }, { "epoch": 0.7861752361896045, "grad_norm": 0.283203125, "learning_rate": 0.002, "loss": 5.2437, "step": 6925 }, { "epoch": 0.7862887632995236, "grad_norm": 0.30859375, "learning_rate": 0.002, "loss": 5.2476, "step": 6926 }, { "epoch": 0.7864022904094427, "grad_norm": 0.287109375, "learning_rate": 0.002, "loss": 5.2362, "step": 6927 }, { "epoch": 0.7865158175193617, "grad_norm": 0.291015625, "learning_rate": 0.002, "loss": 5.248, "step": 6928 }, { "epoch": 0.7866293446292808, "grad_norm": 0.29296875, "learning_rate": 0.002, "loss": 5.2252, "step": 6929 }, { "epoch": 0.7867428717391999, "grad_norm": 0.291015625, "learning_rate": 0.002, "loss": 5.2306, "step": 6930 }, { "epoch": 0.786856398849119, "grad_norm": 0.30859375, "learning_rate": 0.002, "loss": 5.2265, "step": 6931 }, { "epoch": 0.786969925959038, "grad_norm": 0.322265625, "learning_rate": 0.002, "loss": 5.2217, "step": 6932 }, { "epoch": 0.7870834530689571, "grad_norm": 0.32421875, "learning_rate": 0.002, "loss": 5.2369, "step": 6933 }, { "epoch": 0.7871969801788762, "grad_norm": 0.341796875, "learning_rate": 0.002, "loss": 5.2235, "step": 6934 }, { "epoch": 0.7873105072887953, "grad_norm": 0.337890625, "learning_rate": 0.002, "loss": 5.2451, "step": 6935 }, { "epoch": 0.7874240343987143, "grad_norm": 0.34765625, "learning_rate": 0.002, "loss": 5.2467, "step": 6936 }, { "epoch": 0.7875375615086334, "grad_norm": 0.314453125, "learning_rate": 0.002, "loss": 5.2405, "step": 6937 }, { "epoch": 0.7876510886185525, "grad_norm": 0.3046875, "learning_rate": 0.002, "loss": 5.2115, "step": 6938 }, { "epoch": 0.7877646157284716, "grad_norm": 0.275390625, "learning_rate": 0.002, "loss": 5.2136, "step": 6939 }, { "epoch": 0.7878781428383906, "grad_norm": 0.279296875, "learning_rate": 0.002, "loss": 5.2299, "step": 6940 }, { "epoch": 0.7879916699483097, "grad_norm": 0.279296875, "learning_rate": 0.002, "loss": 5.2345, "step": 6941 }, { "epoch": 0.7881051970582288, "grad_norm": 0.29296875, "learning_rate": 0.002, "loss": 5.2467, "step": 6942 }, { "epoch": 0.7882187241681479, "grad_norm": 0.3125, "learning_rate": 0.002, "loss": 5.2191, "step": 6943 }, { "epoch": 0.7883322512780669, "grad_norm": 0.33984375, "learning_rate": 0.002, "loss": 5.2442, "step": 6944 }, { "epoch": 0.788445778387986, "grad_norm": 0.322265625, "learning_rate": 0.002, "loss": 5.2374, "step": 6945 }, { "epoch": 0.7885593054979051, "grad_norm": 0.353515625, "learning_rate": 0.002, "loss": 5.2301, "step": 6946 }, { "epoch": 0.7886728326078242, "grad_norm": 0.33984375, "learning_rate": 0.002, "loss": 5.2383, "step": 6947 }, { "epoch": 0.7887863597177432, "grad_norm": 0.326171875, "learning_rate": 0.002, "loss": 5.2345, "step": 6948 }, { "epoch": 0.7888998868276623, "grad_norm": 0.33203125, "learning_rate": 0.002, "loss": 5.2313, "step": 6949 }, { "epoch": 0.7890134139375814, "grad_norm": 0.33984375, "learning_rate": 0.002, "loss": 5.2563, "step": 6950 }, { "epoch": 0.7891269410475005, "grad_norm": 0.365234375, "learning_rate": 0.002, "loss": 5.2591, "step": 6951 }, { "epoch": 0.7892404681574196, "grad_norm": 0.359375, "learning_rate": 0.002, "loss": 5.2393, "step": 6952 }, { "epoch": 0.7893539952673386, "grad_norm": 0.361328125, "learning_rate": 0.002, "loss": 5.2363, "step": 6953 }, { "epoch": 0.7894675223772577, "grad_norm": 0.373046875, "learning_rate": 0.002, "loss": 5.2446, "step": 6954 }, { "epoch": 0.7895810494871768, "grad_norm": 0.349609375, "learning_rate": 0.002, "loss": 5.2312, "step": 6955 }, { "epoch": 0.7896945765970959, "grad_norm": 0.333984375, "learning_rate": 0.002, "loss": 5.226, "step": 6956 }, { "epoch": 0.7898081037070149, "grad_norm": 0.30078125, "learning_rate": 0.002, "loss": 5.2148, "step": 6957 }, { "epoch": 0.789921630816934, "grad_norm": 0.287109375, "learning_rate": 0.002, "loss": 5.2506, "step": 6958 }, { "epoch": 0.7900351579268531, "grad_norm": 0.275390625, "learning_rate": 0.002, "loss": 5.234, "step": 6959 }, { "epoch": 0.7901486850367722, "grad_norm": 0.279296875, "learning_rate": 0.002, "loss": 5.2247, "step": 6960 }, { "epoch": 0.7902622121466912, "grad_norm": 0.27734375, "learning_rate": 0.002, "loss": 5.2458, "step": 6961 }, { "epoch": 0.7903757392566103, "grad_norm": 0.283203125, "learning_rate": 0.002, "loss": 5.2491, "step": 6962 }, { "epoch": 0.7904892663665294, "grad_norm": 0.26953125, "learning_rate": 0.002, "loss": 5.247, "step": 6963 }, { "epoch": 0.7906027934764485, "grad_norm": 0.287109375, "learning_rate": 0.002, "loss": 5.2535, "step": 6964 }, { "epoch": 0.7907163205863675, "grad_norm": 0.28515625, "learning_rate": 0.002, "loss": 5.243, "step": 6965 }, { "epoch": 0.7908298476962866, "grad_norm": 0.302734375, "learning_rate": 0.002, "loss": 5.2401, "step": 6966 }, { "epoch": 0.7909433748062057, "grad_norm": 0.29296875, "learning_rate": 0.002, "loss": 5.2325, "step": 6967 }, { "epoch": 0.7910569019161248, "grad_norm": 0.287109375, "learning_rate": 0.002, "loss": 5.2415, "step": 6968 }, { "epoch": 0.7911704290260438, "grad_norm": 0.2578125, "learning_rate": 0.002, "loss": 5.2203, "step": 6969 }, { "epoch": 0.7912839561359629, "grad_norm": 0.2734375, "learning_rate": 0.002, "loss": 5.2427, "step": 6970 }, { "epoch": 0.791397483245882, "grad_norm": 0.34375, "learning_rate": 0.002, "loss": 5.2402, "step": 6971 }, { "epoch": 0.7915110103558011, "grad_norm": 0.39453125, "learning_rate": 0.002, "loss": 5.2275, "step": 6972 }, { "epoch": 0.7916245374657201, "grad_norm": 0.4140625, "learning_rate": 0.002, "loss": 5.2234, "step": 6973 }, { "epoch": 0.7917380645756392, "grad_norm": 0.359375, "learning_rate": 0.002, "loss": 5.2497, "step": 6974 }, { "epoch": 0.7918515916855583, "grad_norm": 0.361328125, "learning_rate": 0.002, "loss": 5.2208, "step": 6975 }, { "epoch": 0.7919651187954774, "grad_norm": 0.294921875, "learning_rate": 0.002, "loss": 5.236, "step": 6976 }, { "epoch": 0.7920786459053965, "grad_norm": 0.279296875, "learning_rate": 0.002, "loss": 5.2368, "step": 6977 }, { "epoch": 0.7921921730153155, "grad_norm": 0.259765625, "learning_rate": 0.002, "loss": 5.2332, "step": 6978 }, { "epoch": 0.7923057001252346, "grad_norm": 0.2451171875, "learning_rate": 0.002, "loss": 5.2314, "step": 6979 }, { "epoch": 0.7924192272351537, "grad_norm": 0.251953125, "learning_rate": 0.002, "loss": 5.2378, "step": 6980 }, { "epoch": 0.7925327543450728, "grad_norm": 0.255859375, "learning_rate": 0.002, "loss": 5.2369, "step": 6981 }, { "epoch": 0.7926462814549918, "grad_norm": 0.28125, "learning_rate": 0.002, "loss": 5.2576, "step": 6982 }, { "epoch": 0.7927598085649109, "grad_norm": 0.310546875, "learning_rate": 0.002, "loss": 5.2327, "step": 6983 }, { "epoch": 0.79287333567483, "grad_norm": 0.36328125, "learning_rate": 0.002, "loss": 5.2248, "step": 6984 }, { "epoch": 0.7929868627847491, "grad_norm": 0.376953125, "learning_rate": 0.002, "loss": 5.2396, "step": 6985 }, { "epoch": 0.7931003898946681, "grad_norm": 0.384765625, "learning_rate": 0.002, "loss": 5.2326, "step": 6986 }, { "epoch": 0.7932139170045872, "grad_norm": 0.345703125, "learning_rate": 0.002, "loss": 5.2195, "step": 6987 }, { "epoch": 0.7933274441145063, "grad_norm": 0.349609375, "learning_rate": 0.002, "loss": 5.2251, "step": 6988 }, { "epoch": 0.7934409712244254, "grad_norm": 0.32421875, "learning_rate": 0.002, "loss": 5.2463, "step": 6989 }, { "epoch": 0.7935544983343444, "grad_norm": 0.322265625, "learning_rate": 0.002, "loss": 5.2387, "step": 6990 }, { "epoch": 0.7936680254442635, "grad_norm": 0.314453125, "learning_rate": 0.002, "loss": 5.262, "step": 6991 }, { "epoch": 0.7937815525541826, "grad_norm": 0.322265625, "learning_rate": 0.002, "loss": 5.2209, "step": 6992 }, { "epoch": 0.7938950796641017, "grad_norm": 0.337890625, "learning_rate": 0.002, "loss": 5.223, "step": 6993 }, { "epoch": 0.7940086067740207, "grad_norm": 0.34375, "learning_rate": 0.002, "loss": 5.2487, "step": 6994 }, { "epoch": 0.7941221338839398, "grad_norm": 0.384765625, "learning_rate": 0.002, "loss": 5.2105, "step": 6995 }, { "epoch": 0.7942356609938589, "grad_norm": 0.369140625, "learning_rate": 0.002, "loss": 5.2564, "step": 6996 }, { "epoch": 0.794349188103778, "grad_norm": 0.37890625, "learning_rate": 0.002, "loss": 5.2414, "step": 6997 }, { "epoch": 0.794462715213697, "grad_norm": 0.341796875, "learning_rate": 0.002, "loss": 5.2365, "step": 6998 }, { "epoch": 0.7945762423236161, "grad_norm": 0.337890625, "learning_rate": 0.002, "loss": 5.2439, "step": 6999 }, { "epoch": 0.7946897694335352, "grad_norm": 0.3203125, "learning_rate": 0.002, "loss": 5.2292, "step": 7000 }, { "epoch": 0.7948032965434543, "grad_norm": 0.32421875, "learning_rate": 0.002, "loss": 5.2461, "step": 7001 }, { "epoch": 0.7949168236533734, "grad_norm": 0.328125, "learning_rate": 0.002, "loss": 5.246, "step": 7002 }, { "epoch": 0.7950303507632924, "grad_norm": 0.34375, "learning_rate": 0.002, "loss": 5.2428, "step": 7003 }, { "epoch": 0.7951438778732115, "grad_norm": 0.33203125, "learning_rate": 0.002, "loss": 5.2487, "step": 7004 }, { "epoch": 0.7952574049831306, "grad_norm": 0.318359375, "learning_rate": 0.002, "loss": 5.2452, "step": 7005 }, { "epoch": 0.7953709320930497, "grad_norm": 0.31640625, "learning_rate": 0.002, "loss": 5.2282, "step": 7006 }, { "epoch": 0.7954844592029687, "grad_norm": 0.283203125, "learning_rate": 0.002, "loss": 5.2371, "step": 7007 }, { "epoch": 0.7955979863128878, "grad_norm": 0.251953125, "learning_rate": 0.002, "loss": 5.2197, "step": 7008 }, { "epoch": 0.7957115134228069, "grad_norm": 0.2314453125, "learning_rate": 0.002, "loss": 5.216, "step": 7009 }, { "epoch": 0.795825040532726, "grad_norm": 0.2216796875, "learning_rate": 0.002, "loss": 5.2231, "step": 7010 }, { "epoch": 0.795938567642645, "grad_norm": 0.28125, "learning_rate": 0.002, "loss": 5.2396, "step": 7011 }, { "epoch": 0.7960520947525641, "grad_norm": 0.375, "learning_rate": 0.002, "loss": 5.2239, "step": 7012 }, { "epoch": 0.7961656218624832, "grad_norm": 0.455078125, "learning_rate": 0.002, "loss": 5.2562, "step": 7013 }, { "epoch": 0.7962791489724023, "grad_norm": 0.46484375, "learning_rate": 0.002, "loss": 5.248, "step": 7014 }, { "epoch": 0.7963926760823213, "grad_norm": 0.43359375, "learning_rate": 0.002, "loss": 5.2318, "step": 7015 }, { "epoch": 0.7965062031922404, "grad_norm": 0.388671875, "learning_rate": 0.002, "loss": 5.2435, "step": 7016 }, { "epoch": 0.7966197303021595, "grad_norm": 0.359375, "learning_rate": 0.002, "loss": 5.2274, "step": 7017 }, { "epoch": 0.7967332574120786, "grad_norm": 0.3203125, "learning_rate": 0.002, "loss": 5.2473, "step": 7018 }, { "epoch": 0.7968467845219976, "grad_norm": 0.302734375, "learning_rate": 0.002, "loss": 5.2277, "step": 7019 }, { "epoch": 0.7969603116319167, "grad_norm": 0.28125, "learning_rate": 0.002, "loss": 5.2257, "step": 7020 }, { "epoch": 0.7970738387418358, "grad_norm": 0.271484375, "learning_rate": 0.002, "loss": 5.2436, "step": 7021 }, { "epoch": 0.7971873658517549, "grad_norm": 0.294921875, "learning_rate": 0.002, "loss": 5.249, "step": 7022 }, { "epoch": 0.797300892961674, "grad_norm": 0.2734375, "learning_rate": 0.002, "loss": 5.251, "step": 7023 }, { "epoch": 0.797414420071593, "grad_norm": 0.306640625, "learning_rate": 0.002, "loss": 5.232, "step": 7024 }, { "epoch": 0.7975279471815121, "grad_norm": 0.30078125, "learning_rate": 0.002, "loss": 5.2185, "step": 7025 }, { "epoch": 0.7976414742914312, "grad_norm": 0.31640625, "learning_rate": 0.002, "loss": 5.2482, "step": 7026 }, { "epoch": 0.7977550014013502, "grad_norm": 0.31640625, "learning_rate": 0.002, "loss": 5.2369, "step": 7027 }, { "epoch": 0.7978685285112693, "grad_norm": 0.3125, "learning_rate": 0.002, "loss": 5.2301, "step": 7028 }, { "epoch": 0.7979820556211884, "grad_norm": 0.306640625, "learning_rate": 0.002, "loss": 5.2426, "step": 7029 }, { "epoch": 0.7980955827311075, "grad_norm": 0.29296875, "learning_rate": 0.002, "loss": 5.2391, "step": 7030 }, { "epoch": 0.7982091098410266, "grad_norm": 0.255859375, "learning_rate": 0.002, "loss": 5.217, "step": 7031 }, { "epoch": 0.7983226369509456, "grad_norm": 0.267578125, "learning_rate": 0.002, "loss": 5.2424, "step": 7032 }, { "epoch": 0.7984361640608647, "grad_norm": 0.25, "learning_rate": 0.002, "loss": 5.2188, "step": 7033 }, { "epoch": 0.7985496911707838, "grad_norm": 0.30078125, "learning_rate": 0.002, "loss": 5.2291, "step": 7034 }, { "epoch": 0.7986632182807029, "grad_norm": 0.326171875, "learning_rate": 0.002, "loss": 5.2345, "step": 7035 }, { "epoch": 0.7987767453906219, "grad_norm": 0.390625, "learning_rate": 0.002, "loss": 5.2464, "step": 7036 }, { "epoch": 0.798890272500541, "grad_norm": 0.388671875, "learning_rate": 0.002, "loss": 5.2364, "step": 7037 }, { "epoch": 0.7990037996104601, "grad_norm": 0.359375, "learning_rate": 0.002, "loss": 5.2175, "step": 7038 }, { "epoch": 0.7991173267203792, "grad_norm": 0.3515625, "learning_rate": 0.002, "loss": 5.2395, "step": 7039 }, { "epoch": 0.7992308538302982, "grad_norm": 0.3046875, "learning_rate": 0.002, "loss": 5.1986, "step": 7040 }, { "epoch": 0.7993443809402173, "grad_norm": 0.2890625, "learning_rate": 0.002, "loss": 5.2579, "step": 7041 }, { "epoch": 0.7994579080501364, "grad_norm": 0.27734375, "learning_rate": 0.002, "loss": 5.255, "step": 7042 }, { "epoch": 0.7995714351600555, "grad_norm": 0.275390625, "learning_rate": 0.002, "loss": 5.2412, "step": 7043 }, { "epoch": 0.7996849622699745, "grad_norm": 0.302734375, "learning_rate": 0.002, "loss": 5.2219, "step": 7044 }, { "epoch": 0.7997984893798936, "grad_norm": 0.314453125, "learning_rate": 0.002, "loss": 5.232, "step": 7045 }, { "epoch": 0.7999120164898127, "grad_norm": 0.33203125, "learning_rate": 0.002, "loss": 5.2227, "step": 7046 }, { "epoch": 0.8000255435997318, "grad_norm": 0.34375, "learning_rate": 0.002, "loss": 5.2288, "step": 7047 }, { "epoch": 0.8001390707096508, "grad_norm": 0.333984375, "learning_rate": 0.002, "loss": 5.2299, "step": 7048 }, { "epoch": 0.8002525978195699, "grad_norm": 0.32421875, "learning_rate": 0.002, "loss": 5.2249, "step": 7049 }, { "epoch": 0.800366124929489, "grad_norm": 0.330078125, "learning_rate": 0.002, "loss": 5.2269, "step": 7050 }, { "epoch": 0.8004796520394081, "grad_norm": 0.31640625, "learning_rate": 0.002, "loss": 5.2385, "step": 7051 }, { "epoch": 0.8005931791493271, "grad_norm": 0.31640625, "learning_rate": 0.002, "loss": 5.2385, "step": 7052 }, { "epoch": 0.8007067062592462, "grad_norm": 0.3125, "learning_rate": 0.002, "loss": 5.2264, "step": 7053 }, { "epoch": 0.8008202333691653, "grad_norm": 0.3203125, "learning_rate": 0.002, "loss": 5.2482, "step": 7054 }, { "epoch": 0.8009337604790844, "grad_norm": 0.294921875, "learning_rate": 0.002, "loss": 5.2317, "step": 7055 }, { "epoch": 0.8010472875890035, "grad_norm": 0.287109375, "learning_rate": 0.002, "loss": 5.249, "step": 7056 }, { "epoch": 0.8011608146989225, "grad_norm": 0.283203125, "learning_rate": 0.002, "loss": 5.2467, "step": 7057 }, { "epoch": 0.8012743418088416, "grad_norm": 0.2890625, "learning_rate": 0.002, "loss": 5.2354, "step": 7058 }, { "epoch": 0.8013878689187607, "grad_norm": 0.310546875, "learning_rate": 0.0019999985497728043, "loss": 5.2593, "step": 7059 }, { "epoch": 0.8015013960286798, "grad_norm": 0.31640625, "learning_rate": 0.0019999941990958897, "loss": 5.2228, "step": 7060 }, { "epoch": 0.8016149231385988, "grad_norm": 0.361328125, "learning_rate": 0.0019999869479832783, "loss": 5.2192, "step": 7061 }, { "epoch": 0.8017284502485179, "grad_norm": 0.3515625, "learning_rate": 0.0019999767964583377, "loss": 5.2464, "step": 7062 }, { "epoch": 0.801841977358437, "grad_norm": 0.3515625, "learning_rate": 0.001999963744553784, "loss": 5.235, "step": 7063 }, { "epoch": 0.8019555044683561, "grad_norm": 0.3671875, "learning_rate": 0.00199994779231168, "loss": 5.2344, "step": 7064 }, { "epoch": 0.8020690315782751, "grad_norm": 0.31640625, "learning_rate": 0.0019999289397834344, "loss": 5.251, "step": 7065 }, { "epoch": 0.8021825586881942, "grad_norm": 0.33984375, "learning_rate": 0.0019999071870298053, "loss": 5.2342, "step": 7066 }, { "epoch": 0.8022960857981133, "grad_norm": 0.326171875, "learning_rate": 0.0019998825341208943, "loss": 5.2311, "step": 7067 }, { "epoch": 0.8024096129080324, "grad_norm": 0.322265625, "learning_rate": 0.0019998549811361525, "loss": 5.2038, "step": 7068 }, { "epoch": 0.8025231400179514, "grad_norm": 0.29296875, "learning_rate": 0.0019998245281643745, "loss": 5.2382, "step": 7069 }, { "epoch": 0.8026366671278705, "grad_norm": 0.294921875, "learning_rate": 0.0019997911753037024, "loss": 5.2386, "step": 7070 }, { "epoch": 0.8027501942377896, "grad_norm": 0.30078125, "learning_rate": 0.0019997549226616236, "loss": 5.208, "step": 7071 }, { "epoch": 0.8028637213477087, "grad_norm": 0.357421875, "learning_rate": 0.00199971577035497, "loss": 5.2324, "step": 7072 }, { "epoch": 0.8029772484576277, "grad_norm": 0.39453125, "learning_rate": 0.001999673718509919, "loss": 5.2286, "step": 7073 }, { "epoch": 0.8030907755675468, "grad_norm": 0.42578125, "learning_rate": 0.0019996287672619925, "loss": 5.2365, "step": 7074 }, { "epoch": 0.8032043026774659, "grad_norm": 0.41015625, "learning_rate": 0.001999580916756055, "loss": 5.2468, "step": 7075 }, { "epoch": 0.803317829787385, "grad_norm": 0.353515625, "learning_rate": 0.0019995301671463174, "loss": 5.2252, "step": 7076 }, { "epoch": 0.803431356897304, "grad_norm": 0.357421875, "learning_rate": 0.0019994765185963304, "loss": 5.216, "step": 7077 }, { "epoch": 0.8035448840072231, "grad_norm": 0.275390625, "learning_rate": 0.001999419971278989, "loss": 5.2266, "step": 7078 }, { "epoch": 0.8036584111171422, "grad_norm": 0.275390625, "learning_rate": 0.0019993605253765304, "loss": 5.2387, "step": 7079 }, { "epoch": 0.8037719382270613, "grad_norm": 0.23828125, "learning_rate": 0.0019992981810805317, "loss": 5.205, "step": 7080 }, { "epoch": 0.8038854653369804, "grad_norm": 0.2265625, "learning_rate": 0.0019992329385919117, "loss": 5.2228, "step": 7081 }, { "epoch": 0.8039989924468994, "grad_norm": 0.2353515625, "learning_rate": 0.00199916479812093, "loss": 5.2332, "step": 7082 }, { "epoch": 0.8041125195568185, "grad_norm": 0.2412109375, "learning_rate": 0.001999093759887183, "loss": 5.2142, "step": 7083 }, { "epoch": 0.8042260466667376, "grad_norm": 0.255859375, "learning_rate": 0.0019990198241196092, "loss": 5.2272, "step": 7084 }, { "epoch": 0.8043395737766567, "grad_norm": 0.255859375, "learning_rate": 0.0019989429910564826, "loss": 5.248, "step": 7085 }, { "epoch": 0.8044531008865757, "grad_norm": 0.2578125, "learning_rate": 0.001998863260945416, "loss": 5.2242, "step": 7086 }, { "epoch": 0.8045666279964948, "grad_norm": 0.2470703125, "learning_rate": 0.0019987806340433564, "loss": 5.2396, "step": 7087 }, { "epoch": 0.8046801551064139, "grad_norm": 0.24609375, "learning_rate": 0.001998695110616589, "loss": 5.2022, "step": 7088 }, { "epoch": 0.804793682216333, "grad_norm": 0.255859375, "learning_rate": 0.0019986066909407317, "loss": 5.2301, "step": 7089 }, { "epoch": 0.804907209326252, "grad_norm": 0.28125, "learning_rate": 0.0019985153753007373, "loss": 5.213, "step": 7090 }, { "epoch": 0.8050207364361711, "grad_norm": 0.30859375, "learning_rate": 0.0019984211639908914, "loss": 5.2323, "step": 7091 }, { "epoch": 0.8051342635460902, "grad_norm": 0.353515625, "learning_rate": 0.001998324057314811, "loss": 5.25, "step": 7092 }, { "epoch": 0.8052477906560093, "grad_norm": 0.359375, "learning_rate": 0.0019982240555854445, "loss": 5.2288, "step": 7093 }, { "epoch": 0.8053613177659283, "grad_norm": 0.375, "learning_rate": 0.00199812115912507, "loss": 5.2443, "step": 7094 }, { "epoch": 0.8054748448758474, "grad_norm": 0.3515625, "learning_rate": 0.001998015368265295, "loss": 5.2326, "step": 7095 }, { "epoch": 0.8055883719857665, "grad_norm": 0.33203125, "learning_rate": 0.001997906683347055, "loss": 5.2554, "step": 7096 }, { "epoch": 0.8057018990956856, "grad_norm": 0.294921875, "learning_rate": 0.0019977951047206108, "loss": 5.2406, "step": 7097 }, { "epoch": 0.8058154262056046, "grad_norm": 0.298828125, "learning_rate": 0.0019976806327455508, "loss": 5.2463, "step": 7098 }, { "epoch": 0.8059289533155237, "grad_norm": 0.294921875, "learning_rate": 0.001997563267790786, "loss": 5.2427, "step": 7099 }, { "epoch": 0.8060424804254428, "grad_norm": 0.3359375, "learning_rate": 0.0019974430102345526, "loss": 5.2501, "step": 7100 }, { "epoch": 0.8061560075353619, "grad_norm": 0.384765625, "learning_rate": 0.001997319860464407, "loss": 5.2372, "step": 7101 }, { "epoch": 0.806269534645281, "grad_norm": 0.41015625, "learning_rate": 0.001997193818877228, "loss": 5.2439, "step": 7102 }, { "epoch": 0.8063830617552, "grad_norm": 0.396484375, "learning_rate": 0.001997064885879213, "loss": 5.2292, "step": 7103 }, { "epoch": 0.8064965888651191, "grad_norm": 0.375, "learning_rate": 0.0019969330618858777, "loss": 5.2216, "step": 7104 }, { "epoch": 0.8066101159750382, "grad_norm": 0.333984375, "learning_rate": 0.0019967983473220554, "loss": 5.2187, "step": 7105 }, { "epoch": 0.8067236430849573, "grad_norm": 0.31640625, "learning_rate": 0.0019966607426218936, "loss": 5.2221, "step": 7106 }, { "epoch": 0.8068371701948763, "grad_norm": 0.296875, "learning_rate": 0.0019965202482288553, "loss": 5.2452, "step": 7107 }, { "epoch": 0.8069506973047954, "grad_norm": 0.28515625, "learning_rate": 0.001996376864595715, "loss": 5.2446, "step": 7108 }, { "epoch": 0.8070642244147145, "grad_norm": 0.27734375, "learning_rate": 0.0019962305921845595, "loss": 5.2246, "step": 7109 }, { "epoch": 0.8071777515246336, "grad_norm": 0.29296875, "learning_rate": 0.001996081431466785, "loss": 5.2344, "step": 7110 }, { "epoch": 0.8072912786345526, "grad_norm": 0.330078125, "learning_rate": 0.001995929382923095, "loss": 5.2461, "step": 7111 }, { "epoch": 0.8074048057444717, "grad_norm": 0.375, "learning_rate": 0.0019957744470435017, "loss": 5.2463, "step": 7112 }, { "epoch": 0.8075183328543908, "grad_norm": 0.392578125, "learning_rate": 0.0019956166243273203, "loss": 5.2233, "step": 7113 }, { "epoch": 0.8076318599643099, "grad_norm": 0.373046875, "learning_rate": 0.0019954559152831706, "loss": 5.2364, "step": 7114 }, { "epoch": 0.8077453870742289, "grad_norm": 0.390625, "learning_rate": 0.001995292320428973, "loss": 5.2483, "step": 7115 }, { "epoch": 0.807858914184148, "grad_norm": 0.3515625, "learning_rate": 0.001995125840291951, "loss": 5.2371, "step": 7116 }, { "epoch": 0.8079724412940671, "grad_norm": 0.36328125, "learning_rate": 0.001994956475408623, "loss": 5.2271, "step": 7117 }, { "epoch": 0.8080859684039862, "grad_norm": 0.333984375, "learning_rate": 0.0019947842263248064, "loss": 5.2429, "step": 7118 }, { "epoch": 0.8081994955139052, "grad_norm": 0.326171875, "learning_rate": 0.001994609093595613, "loss": 5.2098, "step": 7119 }, { "epoch": 0.8083130226238244, "grad_norm": 0.291015625, "learning_rate": 0.001994431077785448, "loss": 5.2377, "step": 7120 }, { "epoch": 0.8084265497337435, "grad_norm": 0.28515625, "learning_rate": 0.0019942501794680077, "loss": 5.2504, "step": 7121 }, { "epoch": 0.8085400768436626, "grad_norm": 0.251953125, "learning_rate": 0.0019940663992262775, "loss": 5.2359, "step": 7122 }, { "epoch": 0.8086536039535817, "grad_norm": 0.26171875, "learning_rate": 0.0019938797376525316, "loss": 5.2292, "step": 7123 }, { "epoch": 0.8087671310635007, "grad_norm": 0.26171875, "learning_rate": 0.001993690195348329, "loss": 5.2436, "step": 7124 }, { "epoch": 0.8088806581734198, "grad_norm": 0.2470703125, "learning_rate": 0.001993497772924513, "loss": 5.226, "step": 7125 }, { "epoch": 0.8089941852833389, "grad_norm": 0.263671875, "learning_rate": 0.0019933024710012083, "loss": 5.2094, "step": 7126 }, { "epoch": 0.809107712393258, "grad_norm": 0.263671875, "learning_rate": 0.00199310429020782, "loss": 5.2277, "step": 7127 }, { "epoch": 0.809221239503177, "grad_norm": 0.279296875, "learning_rate": 0.00199290323118303, "loss": 5.2299, "step": 7128 }, { "epoch": 0.8093347666130961, "grad_norm": 0.271484375, "learning_rate": 0.001992699294574798, "loss": 5.2419, "step": 7129 }, { "epoch": 0.8094482937230152, "grad_norm": 0.279296875, "learning_rate": 0.0019924924810403545, "loss": 5.2246, "step": 7130 }, { "epoch": 0.8095618208329343, "grad_norm": 0.283203125, "learning_rate": 0.0019922827912462046, "loss": 5.2314, "step": 7131 }, { "epoch": 0.8096753479428533, "grad_norm": 0.291015625, "learning_rate": 0.0019920702258681196, "loss": 5.2273, "step": 7132 }, { "epoch": 0.8097888750527724, "grad_norm": 0.28125, "learning_rate": 0.001991854785591141, "loss": 5.2327, "step": 7133 }, { "epoch": 0.8099024021626915, "grad_norm": 0.265625, "learning_rate": 0.0019916364711095743, "loss": 5.2324, "step": 7134 }, { "epoch": 0.8100159292726106, "grad_norm": 0.259765625, "learning_rate": 0.001991415283126986, "loss": 5.2381, "step": 7135 }, { "epoch": 0.8101294563825296, "grad_norm": 0.2431640625, "learning_rate": 0.0019911912223562057, "loss": 5.2377, "step": 7136 }, { "epoch": 0.8102429834924487, "grad_norm": 0.2734375, "learning_rate": 0.00199096428951932, "loss": 5.2477, "step": 7137 }, { "epoch": 0.8103565106023678, "grad_norm": 0.3046875, "learning_rate": 0.0019907344853476714, "loss": 5.2349, "step": 7138 }, { "epoch": 0.8104700377122869, "grad_norm": 0.318359375, "learning_rate": 0.001990501810581856, "loss": 5.2307, "step": 7139 }, { "epoch": 0.810583564822206, "grad_norm": 0.333984375, "learning_rate": 0.001990266265971721, "loss": 5.2297, "step": 7140 }, { "epoch": 0.810697091932125, "grad_norm": 0.3046875, "learning_rate": 0.0019900278522763624, "loss": 5.2133, "step": 7141 }, { "epoch": 0.8108106190420441, "grad_norm": 0.30078125, "learning_rate": 0.0019897865702641227, "loss": 5.244, "step": 7142 }, { "epoch": 0.8109241461519632, "grad_norm": 0.271484375, "learning_rate": 0.001989542420712588, "loss": 5.2312, "step": 7143 }, { "epoch": 0.8110376732618823, "grad_norm": 0.2578125, "learning_rate": 0.001989295404408585, "loss": 5.2455, "step": 7144 }, { "epoch": 0.8111512003718013, "grad_norm": 0.2421875, "learning_rate": 0.0019890455221481806, "loss": 5.2388, "step": 7145 }, { "epoch": 0.8112647274817204, "grad_norm": 0.2451171875, "learning_rate": 0.001988792774736677, "loss": 5.2303, "step": 7146 }, { "epoch": 0.8113782545916395, "grad_norm": 0.23828125, "learning_rate": 0.00198853716298861, "loss": 5.2365, "step": 7147 }, { "epoch": 0.8114917817015586, "grad_norm": 0.24609375, "learning_rate": 0.0019882786877277463, "loss": 5.2388, "step": 7148 }, { "epoch": 0.8116053088114776, "grad_norm": 0.251953125, "learning_rate": 0.001988017349787081, "loss": 5.2471, "step": 7149 }, { "epoch": 0.8117188359213967, "grad_norm": 0.275390625, "learning_rate": 0.001987753150008836, "loss": 5.2396, "step": 7150 }, { "epoch": 0.8118323630313158, "grad_norm": 0.29296875, "learning_rate": 0.0019874860892444544, "loss": 5.2562, "step": 7151 }, { "epoch": 0.8119458901412349, "grad_norm": 0.30859375, "learning_rate": 0.0019872161683545998, "loss": 5.2307, "step": 7152 }, { "epoch": 0.8120594172511539, "grad_norm": 0.349609375, "learning_rate": 0.001986943388209154, "loss": 5.2181, "step": 7153 }, { "epoch": 0.812172944361073, "grad_norm": 0.369140625, "learning_rate": 0.001986667749687213, "loss": 5.2511, "step": 7154 }, { "epoch": 0.8122864714709921, "grad_norm": 0.3984375, "learning_rate": 0.0019863892536770844, "loss": 5.2599, "step": 7155 }, { "epoch": 0.8123999985809112, "grad_norm": 0.37890625, "learning_rate": 0.001986107901076285, "loss": 5.2153, "step": 7156 }, { "epoch": 0.8125135256908302, "grad_norm": 0.345703125, "learning_rate": 0.001985823692791537, "loss": 5.2369, "step": 7157 }, { "epoch": 0.8126270528007493, "grad_norm": 0.3515625, "learning_rate": 0.0019855366297387668, "loss": 5.2278, "step": 7158 }, { "epoch": 0.8127405799106684, "grad_norm": 0.318359375, "learning_rate": 0.0019852467128430996, "loss": 5.2292, "step": 7159 }, { "epoch": 0.8128541070205875, "grad_norm": 0.34375, "learning_rate": 0.001984953943038859, "loss": 5.2276, "step": 7160 }, { "epoch": 0.8129676341305065, "grad_norm": 0.361328125, "learning_rate": 0.001984658321269562, "loss": 5.258, "step": 7161 }, { "epoch": 0.8130811612404256, "grad_norm": 0.388671875, "learning_rate": 0.001984359848487917, "loss": 5.2062, "step": 7162 }, { "epoch": 0.8131946883503447, "grad_norm": 0.3984375, "learning_rate": 0.00198405852565582, "loss": 5.2148, "step": 7163 }, { "epoch": 0.8133082154602638, "grad_norm": 0.38671875, "learning_rate": 0.0019837543537443523, "loss": 5.2288, "step": 7164 }, { "epoch": 0.8134217425701828, "grad_norm": 0.3359375, "learning_rate": 0.0019834473337337773, "loss": 5.243, "step": 7165 }, { "epoch": 0.8135352696801019, "grad_norm": 0.33203125, "learning_rate": 0.0019831374666135363, "loss": 5.2008, "step": 7166 }, { "epoch": 0.813648796790021, "grad_norm": 0.326171875, "learning_rate": 0.0019828247533822466, "loss": 5.215, "step": 7167 }, { "epoch": 0.8137623238999401, "grad_norm": 0.326171875, "learning_rate": 0.001982509195047698, "loss": 5.2385, "step": 7168 }, { "epoch": 0.8138758510098592, "grad_norm": 0.30859375, "learning_rate": 0.0019821907926268487, "loss": 5.2461, "step": 7169 }, { "epoch": 0.8139893781197782, "grad_norm": 0.31640625, "learning_rate": 0.001981869547145822, "loss": 5.2426, "step": 7170 }, { "epoch": 0.8141029052296973, "grad_norm": 0.294921875, "learning_rate": 0.001981545459639906, "loss": 5.2326, "step": 7171 }, { "epoch": 0.8142164323396164, "grad_norm": 0.296875, "learning_rate": 0.0019812185311535446, "loss": 5.2196, "step": 7172 }, { "epoch": 0.8143299594495355, "grad_norm": 0.287109375, "learning_rate": 0.0019808887627403406, "loss": 5.2386, "step": 7173 }, { "epoch": 0.8144434865594545, "grad_norm": 0.27734375, "learning_rate": 0.001980556155463046, "loss": 5.256, "step": 7174 }, { "epoch": 0.8145570136693736, "grad_norm": 0.287109375, "learning_rate": 0.0019802207103935647, "loss": 5.2262, "step": 7175 }, { "epoch": 0.8146705407792927, "grad_norm": 0.283203125, "learning_rate": 0.0019798824286129443, "loss": 5.2309, "step": 7176 }, { "epoch": 0.8147840678892118, "grad_norm": 0.30078125, "learning_rate": 0.0019795413112113744, "loss": 5.2336, "step": 7177 }, { "epoch": 0.8148975949991308, "grad_norm": 0.326171875, "learning_rate": 0.001979197359288183, "loss": 5.2155, "step": 7178 }, { "epoch": 0.8150111221090499, "grad_norm": 0.31640625, "learning_rate": 0.001978850573951834, "loss": 5.2059, "step": 7179 }, { "epoch": 0.815124649218969, "grad_norm": 0.31640625, "learning_rate": 0.001978500956319921, "loss": 5.246, "step": 7180 }, { "epoch": 0.8152381763288881, "grad_norm": 0.318359375, "learning_rate": 0.0019781485075191668, "loss": 5.2322, "step": 7181 }, { "epoch": 0.8153517034388071, "grad_norm": 0.283203125, "learning_rate": 0.0019777932286854185, "loss": 5.2339, "step": 7182 }, { "epoch": 0.8154652305487262, "grad_norm": 0.283203125, "learning_rate": 0.001977435120963641, "loss": 5.2343, "step": 7183 }, { "epoch": 0.8155787576586453, "grad_norm": 0.2490234375, "learning_rate": 0.0019770741855079195, "loss": 5.2315, "step": 7184 }, { "epoch": 0.8156922847685644, "grad_norm": 0.248046875, "learning_rate": 0.001976710423481449, "loss": 5.2266, "step": 7185 }, { "epoch": 0.8158058118784834, "grad_norm": 0.23828125, "learning_rate": 0.0019763438360565367, "loss": 5.238, "step": 7186 }, { "epoch": 0.8159193389884025, "grad_norm": 0.2451171875, "learning_rate": 0.0019759744244145926, "loss": 5.2134, "step": 7187 }, { "epoch": 0.8160328660983216, "grad_norm": 0.2431640625, "learning_rate": 0.0019756021897461306, "loss": 5.2253, "step": 7188 }, { "epoch": 0.8161463932082407, "grad_norm": 0.255859375, "learning_rate": 0.0019752271332507607, "loss": 5.2367, "step": 7189 }, { "epoch": 0.8162599203181597, "grad_norm": 0.271484375, "learning_rate": 0.0019748492561371877, "loss": 5.246, "step": 7190 }, { "epoch": 0.8163734474280788, "grad_norm": 0.32421875, "learning_rate": 0.0019744685596232068, "loss": 5.213, "step": 7191 }, { "epoch": 0.8164869745379979, "grad_norm": 0.349609375, "learning_rate": 0.0019740850449356984, "loss": 5.2335, "step": 7192 }, { "epoch": 0.816600501647917, "grad_norm": 0.36328125, "learning_rate": 0.001973698713310626, "loss": 5.2367, "step": 7193 }, { "epoch": 0.816714028757836, "grad_norm": 0.353515625, "learning_rate": 0.001973309565993032, "loss": 5.2401, "step": 7194 }, { "epoch": 0.8168275558677551, "grad_norm": 0.349609375, "learning_rate": 0.00197291760423703, "loss": 5.2281, "step": 7195 }, { "epoch": 0.8169410829776742, "grad_norm": 0.322265625, "learning_rate": 0.0019725228293058076, "loss": 5.2098, "step": 7196 }, { "epoch": 0.8170546100875933, "grad_norm": 0.34375, "learning_rate": 0.001972125242471615, "loss": 5.2306, "step": 7197 }, { "epoch": 0.8171681371975124, "grad_norm": 0.33203125, "learning_rate": 0.001971724845015768, "loss": 5.2341, "step": 7198 }, { "epoch": 0.8172816643074314, "grad_norm": 0.345703125, "learning_rate": 0.001971321638228637, "loss": 5.2291, "step": 7199 }, { "epoch": 0.8173951914173505, "grad_norm": 0.3515625, "learning_rate": 0.001970915623409648, "loss": 5.2319, "step": 7200 }, { "epoch": 0.8175087185272696, "grad_norm": 0.36328125, "learning_rate": 0.0019705068018672747, "loss": 5.2428, "step": 7201 }, { "epoch": 0.8176222456371887, "grad_norm": 0.326171875, "learning_rate": 0.001970095174919039, "loss": 5.2308, "step": 7202 }, { "epoch": 0.8177357727471077, "grad_norm": 0.328125, "learning_rate": 0.0019696807438915015, "loss": 5.2295, "step": 7203 }, { "epoch": 0.8178492998570268, "grad_norm": 0.31640625, "learning_rate": 0.0019692635101202604, "loss": 5.2513, "step": 7204 }, { "epoch": 0.8179628269669459, "grad_norm": 0.294921875, "learning_rate": 0.0019688434749499466, "loss": 5.2363, "step": 7205 }, { "epoch": 0.818076354076865, "grad_norm": 0.2734375, "learning_rate": 0.001968420639734218, "loss": 5.2003, "step": 7206 }, { "epoch": 0.818189881186784, "grad_norm": 0.27734375, "learning_rate": 0.0019679950058357583, "loss": 5.2274, "step": 7207 }, { "epoch": 0.8183034082967031, "grad_norm": 0.27734375, "learning_rate": 0.001967566574626268, "loss": 5.2399, "step": 7208 }, { "epoch": 0.8184169354066222, "grad_norm": 0.279296875, "learning_rate": 0.001967135347486465, "loss": 5.2159, "step": 7209 }, { "epoch": 0.8185304625165413, "grad_norm": 0.29296875, "learning_rate": 0.001966701325806076, "loss": 5.2317, "step": 7210 }, { "epoch": 0.8186439896264603, "grad_norm": 0.30859375, "learning_rate": 0.0019662645109838352, "loss": 5.2416, "step": 7211 }, { "epoch": 0.8187575167363794, "grad_norm": 0.2890625, "learning_rate": 0.001965824904427477, "loss": 5.2259, "step": 7212 }, { "epoch": 0.8188710438462985, "grad_norm": 0.283203125, "learning_rate": 0.001965382507553734, "loss": 5.2457, "step": 7213 }, { "epoch": 0.8189845709562176, "grad_norm": 0.28125, "learning_rate": 0.00196493732178833, "loss": 5.2565, "step": 7214 }, { "epoch": 0.8190980980661366, "grad_norm": 0.26953125, "learning_rate": 0.001964489348565977, "loss": 5.2399, "step": 7215 }, { "epoch": 0.8192116251760557, "grad_norm": 0.26953125, "learning_rate": 0.0019640385893303705, "loss": 5.2222, "step": 7216 }, { "epoch": 0.8193251522859748, "grad_norm": 0.259765625, "learning_rate": 0.0019635850455341857, "loss": 5.228, "step": 7217 }, { "epoch": 0.8194386793958939, "grad_norm": 0.255859375, "learning_rate": 0.001963128718639069, "loss": 5.229, "step": 7218 }, { "epoch": 0.819552206505813, "grad_norm": 0.251953125, "learning_rate": 0.0019626696101156384, "loss": 5.2476, "step": 7219 }, { "epoch": 0.819665733615732, "grad_norm": 0.248046875, "learning_rate": 0.001962207721443475, "loss": 5.2394, "step": 7220 }, { "epoch": 0.8197792607256511, "grad_norm": 0.25, "learning_rate": 0.00196174305411112, "loss": 5.2235, "step": 7221 }, { "epoch": 0.8198927878355702, "grad_norm": 0.25390625, "learning_rate": 0.0019612756096160687, "loss": 5.2381, "step": 7222 }, { "epoch": 0.8200063149454893, "grad_norm": 0.267578125, "learning_rate": 0.0019608053894647685, "loss": 5.2386, "step": 7223 }, { "epoch": 0.8201198420554083, "grad_norm": 0.271484375, "learning_rate": 0.00196033239517261, "loss": 5.2171, "step": 7224 }, { "epoch": 0.8202333691653274, "grad_norm": 0.2890625, "learning_rate": 0.0019598566282639242, "loss": 5.253, "step": 7225 }, { "epoch": 0.8203468962752465, "grad_norm": 0.30859375, "learning_rate": 0.001959378090271979, "loss": 5.2352, "step": 7226 }, { "epoch": 0.8204604233851656, "grad_norm": 0.3046875, "learning_rate": 0.0019588967827389703, "loss": 5.233, "step": 7227 }, { "epoch": 0.8205739504950846, "grad_norm": 0.296875, "learning_rate": 0.001958412707216023, "loss": 5.2228, "step": 7228 }, { "epoch": 0.8206874776050037, "grad_norm": 0.263671875, "learning_rate": 0.001957925865263179, "loss": 5.2065, "step": 7229 }, { "epoch": 0.8208010047149228, "grad_norm": 0.275390625, "learning_rate": 0.0019574362584493968, "loss": 5.2348, "step": 7230 }, { "epoch": 0.8209145318248419, "grad_norm": 0.3046875, "learning_rate": 0.0019569438883525465, "loss": 5.2241, "step": 7231 }, { "epoch": 0.8210280589347609, "grad_norm": 0.3515625, "learning_rate": 0.001956448756559402, "loss": 5.2316, "step": 7232 }, { "epoch": 0.82114158604468, "grad_norm": 0.396484375, "learning_rate": 0.0019559508646656383, "loss": 5.2187, "step": 7233 }, { "epoch": 0.8212551131545991, "grad_norm": 0.412109375, "learning_rate": 0.0019554502142758246, "loss": 5.242, "step": 7234 }, { "epoch": 0.8213686402645182, "grad_norm": 0.37109375, "learning_rate": 0.001954946807003421, "loss": 5.2443, "step": 7235 }, { "epoch": 0.8214821673744372, "grad_norm": 0.345703125, "learning_rate": 0.0019544406444707715, "loss": 5.2319, "step": 7236 }, { "epoch": 0.8215956944843563, "grad_norm": 0.328125, "learning_rate": 0.0019539317283091, "loss": 5.2334, "step": 7237 }, { "epoch": 0.8217092215942754, "grad_norm": 0.302734375, "learning_rate": 0.0019534200601585036, "loss": 5.2381, "step": 7238 }, { "epoch": 0.8218227487041945, "grad_norm": 0.287109375, "learning_rate": 0.0019529056416679506, "loss": 5.243, "step": 7239 }, { "epoch": 0.8219362758141135, "grad_norm": 0.28515625, "learning_rate": 0.0019523884744952701, "loss": 5.2346, "step": 7240 }, { "epoch": 0.8220498029240326, "grad_norm": 0.28125, "learning_rate": 0.0019518685603071512, "loss": 5.2412, "step": 7241 }, { "epoch": 0.8221633300339517, "grad_norm": 0.28515625, "learning_rate": 0.0019513459007791355, "loss": 5.2575, "step": 7242 }, { "epoch": 0.8222768571438708, "grad_norm": 0.271484375, "learning_rate": 0.0019508204975956119, "loss": 5.2204, "step": 7243 }, { "epoch": 0.8223903842537899, "grad_norm": 0.2734375, "learning_rate": 0.001950292352449812, "loss": 5.2482, "step": 7244 }, { "epoch": 0.8225039113637089, "grad_norm": 0.255859375, "learning_rate": 0.0019497614670438027, "loss": 5.2401, "step": 7245 }, { "epoch": 0.822617438473628, "grad_norm": 0.27734375, "learning_rate": 0.0019492278430884833, "loss": 5.2485, "step": 7246 }, { "epoch": 0.8227309655835471, "grad_norm": 0.265625, "learning_rate": 0.0019486914823035782, "loss": 5.2242, "step": 7247 }, { "epoch": 0.8228444926934662, "grad_norm": 0.30078125, "learning_rate": 0.0019481523864176313, "loss": 5.2347, "step": 7248 }, { "epoch": 0.8229580198033852, "grad_norm": 0.28125, "learning_rate": 0.001947610557168002, "loss": 5.2267, "step": 7249 }, { "epoch": 0.8230715469133043, "grad_norm": 0.28125, "learning_rate": 0.0019470659963008582, "loss": 5.231, "step": 7250 }, { "epoch": 0.8231850740232234, "grad_norm": 0.283203125, "learning_rate": 0.0019465187055711707, "loss": 5.2222, "step": 7251 }, { "epoch": 0.8232986011331425, "grad_norm": 0.26171875, "learning_rate": 0.001945968686742708, "loss": 5.2463, "step": 7252 }, { "epoch": 0.8234121282430615, "grad_norm": 0.2734375, "learning_rate": 0.0019454159415880313, "loss": 5.2299, "step": 7253 }, { "epoch": 0.8235256553529806, "grad_norm": 0.26171875, "learning_rate": 0.0019448604718884867, "loss": 5.2249, "step": 7254 }, { "epoch": 0.8236391824628997, "grad_norm": 0.283203125, "learning_rate": 0.0019443022794342019, "loss": 5.2376, "step": 7255 }, { "epoch": 0.8237527095728188, "grad_norm": 0.302734375, "learning_rate": 0.0019437413660240786, "loss": 5.2104, "step": 7256 }, { "epoch": 0.8238662366827378, "grad_norm": 0.34375, "learning_rate": 0.001943177733465788, "loss": 5.2189, "step": 7257 }, { "epoch": 0.8239797637926569, "grad_norm": 0.37890625, "learning_rate": 0.0019426113835757635, "loss": 5.2393, "step": 7258 }, { "epoch": 0.824093290902576, "grad_norm": 0.400390625, "learning_rate": 0.0019420423181791967, "loss": 5.2351, "step": 7259 }, { "epoch": 0.8242068180124951, "grad_norm": 0.396484375, "learning_rate": 0.0019414705391100299, "loss": 5.2582, "step": 7260 }, { "epoch": 0.8243203451224141, "grad_norm": 0.431640625, "learning_rate": 0.001940896048210951, "loss": 5.2146, "step": 7261 }, { "epoch": 0.8244338722323332, "grad_norm": 0.38671875, "learning_rate": 0.0019403188473333872, "loss": 5.2421, "step": 7262 }, { "epoch": 0.8245473993422523, "grad_norm": 0.439453125, "learning_rate": 0.0019397389383375, "loss": 5.2155, "step": 7263 }, { "epoch": 0.8246609264521714, "grad_norm": 0.39453125, "learning_rate": 0.001939156323092177, "loss": 5.2283, "step": 7264 }, { "epoch": 0.8247744535620904, "grad_norm": 0.380859375, "learning_rate": 0.0019385710034750287, "loss": 5.2023, "step": 7265 }, { "epoch": 0.8248879806720095, "grad_norm": 0.37890625, "learning_rate": 0.0019379829813723807, "loss": 5.2374, "step": 7266 }, { "epoch": 0.8250015077819286, "grad_norm": 0.36328125, "learning_rate": 0.0019373922586792673, "loss": 5.2174, "step": 7267 }, { "epoch": 0.8251150348918477, "grad_norm": 0.318359375, "learning_rate": 0.0019367988372994264, "loss": 5.2171, "step": 7268 }, { "epoch": 0.8252285620017668, "grad_norm": 0.3203125, "learning_rate": 0.0019362027191452936, "loss": 5.2411, "step": 7269 }, { "epoch": 0.8253420891116858, "grad_norm": 0.28125, "learning_rate": 0.0019356039061379949, "loss": 5.2315, "step": 7270 }, { "epoch": 0.8254556162216049, "grad_norm": 0.2890625, "learning_rate": 0.0019350024002073411, "loss": 5.2372, "step": 7271 }, { "epoch": 0.825569143331524, "grad_norm": 0.26171875, "learning_rate": 0.0019343982032918217, "loss": 5.2191, "step": 7272 }, { "epoch": 0.8256826704414431, "grad_norm": 0.26953125, "learning_rate": 0.0019337913173385988, "loss": 5.2386, "step": 7273 }, { "epoch": 0.8257961975513621, "grad_norm": 0.263671875, "learning_rate": 0.0019331817443034999, "loss": 5.2264, "step": 7274 }, { "epoch": 0.8259097246612812, "grad_norm": 0.25390625, "learning_rate": 0.0019325694861510121, "loss": 5.2239, "step": 7275 }, { "epoch": 0.8260232517712003, "grad_norm": 0.267578125, "learning_rate": 0.0019319545448542772, "loss": 5.2328, "step": 7276 }, { "epoch": 0.8261367788811194, "grad_norm": 0.27734375, "learning_rate": 0.001931336922395083, "loss": 5.228, "step": 7277 }, { "epoch": 0.8262503059910384, "grad_norm": 0.3046875, "learning_rate": 0.0019307166207638577, "loss": 5.2125, "step": 7278 }, { "epoch": 0.8263638331009575, "grad_norm": 0.314453125, "learning_rate": 0.0019300936419596644, "loss": 5.2516, "step": 7279 }, { "epoch": 0.8264773602108766, "grad_norm": 0.298828125, "learning_rate": 0.0019294679879901938, "loss": 5.2344, "step": 7280 }, { "epoch": 0.8265908873207957, "grad_norm": 0.275390625, "learning_rate": 0.0019288396608717578, "loss": 5.2307, "step": 7281 }, { "epoch": 0.8267044144307147, "grad_norm": 0.255859375, "learning_rate": 0.0019282086626292833, "loss": 5.2337, "step": 7282 }, { "epoch": 0.8268179415406338, "grad_norm": 0.2578125, "learning_rate": 0.0019275749952963057, "loss": 5.2255, "step": 7283 }, { "epoch": 0.8269314686505529, "grad_norm": 0.25390625, "learning_rate": 0.0019269386609149612, "loss": 5.2403, "step": 7284 }, { "epoch": 0.827044995760472, "grad_norm": 0.275390625, "learning_rate": 0.001926299661535983, "loss": 5.2156, "step": 7285 }, { "epoch": 0.827158522870391, "grad_norm": 0.27734375, "learning_rate": 0.0019256579992186905, "loss": 5.2405, "step": 7286 }, { "epoch": 0.8272720499803101, "grad_norm": 0.29296875, "learning_rate": 0.0019250136760309868, "loss": 5.2254, "step": 7287 }, { "epoch": 0.8273855770902292, "grad_norm": 0.287109375, "learning_rate": 0.0019243666940493497, "loss": 5.2372, "step": 7288 }, { "epoch": 0.8274991042001483, "grad_norm": 0.279296875, "learning_rate": 0.001923717055358826, "loss": 5.2192, "step": 7289 }, { "epoch": 0.8276126313100673, "grad_norm": 0.263671875, "learning_rate": 0.0019230647620530228, "loss": 5.2281, "step": 7290 }, { "epoch": 0.8277261584199864, "grad_norm": 0.255859375, "learning_rate": 0.0019224098162341043, "loss": 5.2192, "step": 7291 }, { "epoch": 0.8278396855299055, "grad_norm": 0.255859375, "learning_rate": 0.0019217522200127822, "loss": 5.2353, "step": 7292 }, { "epoch": 0.8279532126398246, "grad_norm": 0.2578125, "learning_rate": 0.0019210919755083089, "loss": 5.2337, "step": 7293 }, { "epoch": 0.8280667397497437, "grad_norm": 0.2578125, "learning_rate": 0.0019204290848484728, "loss": 5.236, "step": 7294 }, { "epoch": 0.8281802668596627, "grad_norm": 0.24609375, "learning_rate": 0.00191976355016959, "loss": 5.2139, "step": 7295 }, { "epoch": 0.8282937939695818, "grad_norm": 0.259765625, "learning_rate": 0.0019190953736164963, "loss": 5.2483, "step": 7296 }, { "epoch": 0.8284073210795009, "grad_norm": 0.26953125, "learning_rate": 0.0019184245573425425, "loss": 5.2305, "step": 7297 }, { "epoch": 0.82852084818942, "grad_norm": 0.30859375, "learning_rate": 0.001917751103509587, "loss": 5.2192, "step": 7298 }, { "epoch": 0.828634375299339, "grad_norm": 0.32421875, "learning_rate": 0.0019170750142879873, "loss": 5.203, "step": 7299 }, { "epoch": 0.8287479024092581, "grad_norm": 0.359375, "learning_rate": 0.0019163962918565946, "loss": 5.223, "step": 7300 }, { "epoch": 0.8288614295191772, "grad_norm": 0.373046875, "learning_rate": 0.0019157149384027457, "loss": 5.2191, "step": 7301 }, { "epoch": 0.8289749566290963, "grad_norm": 0.388671875, "learning_rate": 0.0019150309561222574, "loss": 5.2087, "step": 7302 }, { "epoch": 0.8290884837390153, "grad_norm": 0.380859375, "learning_rate": 0.0019143443472194178, "loss": 5.2119, "step": 7303 }, { "epoch": 0.8292020108489344, "grad_norm": 0.37890625, "learning_rate": 0.0019136551139069793, "loss": 5.2168, "step": 7304 }, { "epoch": 0.8293155379588535, "grad_norm": 0.34375, "learning_rate": 0.0019129632584061538, "loss": 5.2104, "step": 7305 }, { "epoch": 0.8294290650687726, "grad_norm": 0.33203125, "learning_rate": 0.0019122687829466023, "loss": 5.2178, "step": 7306 }, { "epoch": 0.8295425921786916, "grad_norm": 0.310546875, "learning_rate": 0.0019115716897664299, "loss": 5.2493, "step": 7307 }, { "epoch": 0.8296561192886107, "grad_norm": 0.310546875, "learning_rate": 0.0019108719811121771, "loss": 5.2356, "step": 7308 }, { "epoch": 0.8297696463985298, "grad_norm": 0.28515625, "learning_rate": 0.0019101696592388148, "loss": 5.201, "step": 7309 }, { "epoch": 0.8298831735084489, "grad_norm": 0.2734375, "learning_rate": 0.001909464726409734, "loss": 5.219, "step": 7310 }, { "epoch": 0.8299967006183679, "grad_norm": 0.25390625, "learning_rate": 0.001908757184896741, "loss": 5.2147, "step": 7311 }, { "epoch": 0.830110227728287, "grad_norm": 0.2490234375, "learning_rate": 0.0019080470369800494, "loss": 5.2397, "step": 7312 }, { "epoch": 0.8302237548382061, "grad_norm": 0.2421875, "learning_rate": 0.0019073342849482717, "loss": 5.2212, "step": 7313 }, { "epoch": 0.8303372819481252, "grad_norm": 0.2451171875, "learning_rate": 0.0019066189310984135, "loss": 5.2484, "step": 7314 }, { "epoch": 0.8304508090580442, "grad_norm": 0.2578125, "learning_rate": 0.001905900977735865, "loss": 5.2294, "step": 7315 }, { "epoch": 0.8305643361679633, "grad_norm": 0.25390625, "learning_rate": 0.0019051804271743935, "loss": 5.2219, "step": 7316 }, { "epoch": 0.8306778632778824, "grad_norm": 0.255859375, "learning_rate": 0.0019044572817361375, "loss": 5.2292, "step": 7317 }, { "epoch": 0.8307913903878015, "grad_norm": 0.263671875, "learning_rate": 0.001903731543751597, "loss": 5.2066, "step": 7318 }, { "epoch": 0.8309049174977206, "grad_norm": 0.27734375, "learning_rate": 0.0019030032155596276, "loss": 5.2262, "step": 7319 }, { "epoch": 0.8310184446076396, "grad_norm": 0.27734375, "learning_rate": 0.0019022722995074319, "loss": 5.2266, "step": 7320 }, { "epoch": 0.8311319717175587, "grad_norm": 0.271484375, "learning_rate": 0.0019015387979505536, "loss": 5.245, "step": 7321 }, { "epoch": 0.8312454988274778, "grad_norm": 0.26953125, "learning_rate": 0.0019008027132528676, "loss": 5.2294, "step": 7322 }, { "epoch": 0.8313590259373969, "grad_norm": 0.294921875, "learning_rate": 0.0019000640477865739, "loss": 5.2244, "step": 7323 }, { "epoch": 0.8314725530473159, "grad_norm": 0.31640625, "learning_rate": 0.00189932280393219, "loss": 5.239, "step": 7324 }, { "epoch": 0.831586080157235, "grad_norm": 0.34375, "learning_rate": 0.0018985789840785426, "loss": 5.2362, "step": 7325 }, { "epoch": 0.8316996072671541, "grad_norm": 0.34375, "learning_rate": 0.0018978325906227598, "loss": 5.2193, "step": 7326 }, { "epoch": 0.8318131343770732, "grad_norm": 0.357421875, "learning_rate": 0.0018970836259702643, "loss": 5.2269, "step": 7327 }, { "epoch": 0.8319266614869922, "grad_norm": 0.34765625, "learning_rate": 0.0018963320925347647, "loss": 5.2273, "step": 7328 }, { "epoch": 0.8320401885969113, "grad_norm": 0.353515625, "learning_rate": 0.001895577992738248, "loss": 5.2182, "step": 7329 }, { "epoch": 0.8321537157068304, "grad_norm": 0.3203125, "learning_rate": 0.0018948213290109722, "loss": 5.2334, "step": 7330 }, { "epoch": 0.8322672428167495, "grad_norm": 0.3125, "learning_rate": 0.001894062103791458, "loss": 5.225, "step": 7331 }, { "epoch": 0.8323807699266685, "grad_norm": 0.30078125, "learning_rate": 0.001893300319526481, "loss": 5.2291, "step": 7332 }, { "epoch": 0.8324942970365876, "grad_norm": 0.296875, "learning_rate": 0.0018925359786710642, "loss": 5.2212, "step": 7333 }, { "epoch": 0.8326078241465067, "grad_norm": 0.271484375, "learning_rate": 0.0018917690836884693, "loss": 5.2243, "step": 7334 }, { "epoch": 0.8327213512564258, "grad_norm": 0.27734375, "learning_rate": 0.0018909996370501901, "loss": 5.2375, "step": 7335 }, { "epoch": 0.8328348783663448, "grad_norm": 0.251953125, "learning_rate": 0.0018902276412359423, "loss": 5.2317, "step": 7336 }, { "epoch": 0.8329484054762639, "grad_norm": 0.259765625, "learning_rate": 0.0018894530987336585, "loss": 5.2565, "step": 7337 }, { "epoch": 0.833061932586183, "grad_norm": 0.259765625, "learning_rate": 0.0018886760120394772, "loss": 5.2575, "step": 7338 }, { "epoch": 0.8331754596961021, "grad_norm": 0.267578125, "learning_rate": 0.0018878963836577373, "loss": 5.2332, "step": 7339 }, { "epoch": 0.8332889868060211, "grad_norm": 0.275390625, "learning_rate": 0.0018871142161009677, "loss": 5.2165, "step": 7340 }, { "epoch": 0.8334025139159402, "grad_norm": 0.294921875, "learning_rate": 0.0018863295118898816, "loss": 5.2251, "step": 7341 }, { "epoch": 0.8335160410258593, "grad_norm": 0.296875, "learning_rate": 0.0018855422735533662, "loss": 5.2341, "step": 7342 }, { "epoch": 0.8336295681357784, "grad_norm": 0.298828125, "learning_rate": 0.0018847525036284761, "loss": 5.2221, "step": 7343 }, { "epoch": 0.8337430952456975, "grad_norm": 0.28515625, "learning_rate": 0.0018839602046604239, "loss": 5.2277, "step": 7344 }, { "epoch": 0.8338566223556165, "grad_norm": 0.279296875, "learning_rate": 0.0018831653792025732, "loss": 5.2105, "step": 7345 }, { "epoch": 0.8339701494655356, "grad_norm": 0.271484375, "learning_rate": 0.0018823680298164294, "loss": 5.2287, "step": 7346 }, { "epoch": 0.8340836765754547, "grad_norm": 0.28125, "learning_rate": 0.0018815681590716319, "loss": 5.2431, "step": 7347 }, { "epoch": 0.8341972036853738, "grad_norm": 0.302734375, "learning_rate": 0.001880765769545946, "loss": 5.2252, "step": 7348 }, { "epoch": 0.8343107307952928, "grad_norm": 0.349609375, "learning_rate": 0.001879960863825254, "loss": 5.2205, "step": 7349 }, { "epoch": 0.8344242579052119, "grad_norm": 0.375, "learning_rate": 0.0018791534445035472, "loss": 5.232, "step": 7350 }, { "epoch": 0.834537785015131, "grad_norm": 0.3828125, "learning_rate": 0.0018783435141829187, "loss": 5.2107, "step": 7351 }, { "epoch": 0.8346513121250501, "grad_norm": 0.361328125, "learning_rate": 0.0018775310754735517, "loss": 5.201, "step": 7352 }, { "epoch": 0.8347648392349691, "grad_norm": 0.337890625, "learning_rate": 0.0018767161309937153, "loss": 5.2314, "step": 7353 }, { "epoch": 0.8348783663448882, "grad_norm": 0.357421875, "learning_rate": 0.0018758986833697527, "loss": 5.2384, "step": 7354 }, { "epoch": 0.8349918934548073, "grad_norm": 0.33984375, "learning_rate": 0.0018750787352360746, "loss": 5.2412, "step": 7355 }, { "epoch": 0.8351054205647264, "grad_norm": 0.369140625, "learning_rate": 0.0018742562892351503, "loss": 5.2106, "step": 7356 }, { "epoch": 0.8352189476746454, "grad_norm": 0.330078125, "learning_rate": 0.0018734313480174983, "loss": 5.2153, "step": 7357 }, { "epoch": 0.8353324747845645, "grad_norm": 0.33984375, "learning_rate": 0.0018726039142416796, "loss": 5.2318, "step": 7358 }, { "epoch": 0.8354460018944836, "grad_norm": 0.32421875, "learning_rate": 0.0018717739905742873, "loss": 5.2403, "step": 7359 }, { "epoch": 0.8355595290044027, "grad_norm": 0.322265625, "learning_rate": 0.0018709415796899383, "loss": 5.2338, "step": 7360 }, { "epoch": 0.8356730561143219, "grad_norm": 0.30078125, "learning_rate": 0.0018701066842712667, "loss": 5.2164, "step": 7361 }, { "epoch": 0.8357865832242409, "grad_norm": 0.29296875, "learning_rate": 0.001869269307008912, "loss": 5.2091, "step": 7362 }, { "epoch": 0.83590011033416, "grad_norm": 0.26171875, "learning_rate": 0.0018684294506015125, "loss": 5.2272, "step": 7363 }, { "epoch": 0.8360136374440791, "grad_norm": 0.2578125, "learning_rate": 0.0018675871177556967, "loss": 5.2254, "step": 7364 }, { "epoch": 0.8361271645539982, "grad_norm": 0.267578125, "learning_rate": 0.001866742311186073, "loss": 5.2338, "step": 7365 }, { "epoch": 0.8362406916639172, "grad_norm": 0.27734375, "learning_rate": 0.0018658950336152228, "loss": 5.2252, "step": 7366 }, { "epoch": 0.8363542187738363, "grad_norm": 0.314453125, "learning_rate": 0.0018650452877736901, "loss": 5.2021, "step": 7367 }, { "epoch": 0.8364677458837554, "grad_norm": 0.333984375, "learning_rate": 0.0018641930763999743, "loss": 5.2147, "step": 7368 }, { "epoch": 0.8365812729936745, "grad_norm": 0.3515625, "learning_rate": 0.0018633384022405197, "loss": 5.2393, "step": 7369 }, { "epoch": 0.8366948001035935, "grad_norm": 0.3515625, "learning_rate": 0.0018624812680497081, "loss": 5.2228, "step": 7370 }, { "epoch": 0.8368083272135126, "grad_norm": 0.365234375, "learning_rate": 0.001861621676589849, "loss": 5.2268, "step": 7371 }, { "epoch": 0.8369218543234317, "grad_norm": 0.341796875, "learning_rate": 0.0018607596306311712, "loss": 5.2217, "step": 7372 }, { "epoch": 0.8370353814333508, "grad_norm": 0.35546875, "learning_rate": 0.0018598951329518137, "loss": 5.2215, "step": 7373 }, { "epoch": 0.8371489085432698, "grad_norm": 0.33203125, "learning_rate": 0.0018590281863378163, "loss": 5.2059, "step": 7374 }, { "epoch": 0.8372624356531889, "grad_norm": 0.34375, "learning_rate": 0.0018581587935831113, "loss": 5.2297, "step": 7375 }, { "epoch": 0.837375962763108, "grad_norm": 0.328125, "learning_rate": 0.0018572869574895144, "loss": 5.2313, "step": 7376 }, { "epoch": 0.8374894898730271, "grad_norm": 0.337890625, "learning_rate": 0.0018564126808667156, "loss": 5.2385, "step": 7377 }, { "epoch": 0.8376030169829461, "grad_norm": 0.330078125, "learning_rate": 0.0018555359665322695, "loss": 5.2205, "step": 7378 }, { "epoch": 0.8377165440928652, "grad_norm": 0.310546875, "learning_rate": 0.0018546568173115874, "loss": 5.239, "step": 7379 }, { "epoch": 0.8378300712027843, "grad_norm": 0.2734375, "learning_rate": 0.0018537752360379277, "loss": 5.225, "step": 7380 }, { "epoch": 0.8379435983127034, "grad_norm": 0.255859375, "learning_rate": 0.0018528912255523855, "loss": 5.2082, "step": 7381 }, { "epoch": 0.8380571254226225, "grad_norm": 0.236328125, "learning_rate": 0.0018520047887038858, "loss": 5.2277, "step": 7382 }, { "epoch": 0.8381706525325415, "grad_norm": 0.234375, "learning_rate": 0.0018511159283491722, "loss": 5.2383, "step": 7383 }, { "epoch": 0.8382841796424606, "grad_norm": 0.24609375, "learning_rate": 0.0018502246473528, "loss": 5.2243, "step": 7384 }, { "epoch": 0.8383977067523797, "grad_norm": 0.259765625, "learning_rate": 0.001849330948587124, "loss": 5.2148, "step": 7385 }, { "epoch": 0.8385112338622988, "grad_norm": 0.27734375, "learning_rate": 0.001848434834932291, "loss": 5.2117, "step": 7386 }, { "epoch": 0.8386247609722178, "grad_norm": 0.275390625, "learning_rate": 0.0018475363092762315, "loss": 5.2222, "step": 7387 }, { "epoch": 0.8387382880821369, "grad_norm": 0.28125, "learning_rate": 0.0018466353745146481, "loss": 5.2113, "step": 7388 }, { "epoch": 0.838851815192056, "grad_norm": 0.28125, "learning_rate": 0.0018457320335510078, "loss": 5.2312, "step": 7389 }, { "epoch": 0.8389653423019751, "grad_norm": 0.287109375, "learning_rate": 0.0018448262892965322, "loss": 5.2219, "step": 7390 }, { "epoch": 0.8390788694118941, "grad_norm": 0.255859375, "learning_rate": 0.0018439181446701875, "loss": 5.2092, "step": 7391 }, { "epoch": 0.8391923965218132, "grad_norm": 0.2490234375, "learning_rate": 0.0018430076025986763, "loss": 5.217, "step": 7392 }, { "epoch": 0.8393059236317323, "grad_norm": 0.251953125, "learning_rate": 0.0018420946660164268, "loss": 5.2323, "step": 7393 }, { "epoch": 0.8394194507416514, "grad_norm": 0.2470703125, "learning_rate": 0.0018411793378655845, "loss": 5.2479, "step": 7394 }, { "epoch": 0.8395329778515704, "grad_norm": 0.255859375, "learning_rate": 0.0018402616210960031, "loss": 5.1887, "step": 7395 }, { "epoch": 0.8396465049614895, "grad_norm": 0.255859375, "learning_rate": 0.0018393415186652323, "loss": 5.2388, "step": 7396 }, { "epoch": 0.8397600320714086, "grad_norm": 0.26171875, "learning_rate": 0.0018384190335385116, "loss": 5.2259, "step": 7397 }, { "epoch": 0.8398735591813277, "grad_norm": 0.259765625, "learning_rate": 0.0018374941686887588, "loss": 5.2321, "step": 7398 }, { "epoch": 0.8399870862912467, "grad_norm": 0.26953125, "learning_rate": 0.0018365669270965607, "loss": 5.2157, "step": 7399 }, { "epoch": 0.8401006134011658, "grad_norm": 0.251953125, "learning_rate": 0.001835637311750164, "loss": 5.2224, "step": 7400 }, { "epoch": 0.8402141405110849, "grad_norm": 0.263671875, "learning_rate": 0.0018347053256454657, "loss": 5.2302, "step": 7401 }, { "epoch": 0.840327667621004, "grad_norm": 0.2431640625, "learning_rate": 0.0018337709717860021, "loss": 5.2177, "step": 7402 }, { "epoch": 0.840441194730923, "grad_norm": 0.2734375, "learning_rate": 0.0018328342531829407, "loss": 5.227, "step": 7403 }, { "epoch": 0.8405547218408421, "grad_norm": 0.283203125, "learning_rate": 0.0018318951728550706, "loss": 5.2082, "step": 7404 }, { "epoch": 0.8406682489507612, "grad_norm": 0.326171875, "learning_rate": 0.0018309537338287904, "loss": 5.2236, "step": 7405 }, { "epoch": 0.8407817760606803, "grad_norm": 0.345703125, "learning_rate": 0.0018300099391381022, "loss": 5.2196, "step": 7406 }, { "epoch": 0.8408953031705994, "grad_norm": 0.33984375, "learning_rate": 0.001829063791824598, "loss": 5.2117, "step": 7407 }, { "epoch": 0.8410088302805184, "grad_norm": 0.330078125, "learning_rate": 0.0018281152949374527, "loss": 5.2316, "step": 7408 }, { "epoch": 0.8411223573904375, "grad_norm": 0.322265625, "learning_rate": 0.001827164451533413, "loss": 5.2309, "step": 7409 }, { "epoch": 0.8412358845003566, "grad_norm": 0.318359375, "learning_rate": 0.0018262112646767875, "loss": 5.2474, "step": 7410 }, { "epoch": 0.8413494116102757, "grad_norm": 0.28515625, "learning_rate": 0.0018252557374394374, "loss": 5.2165, "step": 7411 }, { "epoch": 0.8414629387201947, "grad_norm": 0.2734375, "learning_rate": 0.001824297872900766, "loss": 5.2043, "step": 7412 }, { "epoch": 0.8415764658301138, "grad_norm": 0.263671875, "learning_rate": 0.0018233376741477098, "loss": 5.2403, "step": 7413 }, { "epoch": 0.8416899929400329, "grad_norm": 0.263671875, "learning_rate": 0.0018223751442747271, "loss": 5.2441, "step": 7414 }, { "epoch": 0.841803520049952, "grad_norm": 0.26953125, "learning_rate": 0.001821410286383789, "loss": 5.2222, "step": 7415 }, { "epoch": 0.841917047159871, "grad_norm": 0.279296875, "learning_rate": 0.001820443103584369, "loss": 5.2222, "step": 7416 }, { "epoch": 0.8420305742697901, "grad_norm": 0.296875, "learning_rate": 0.0018194735989934337, "loss": 5.2188, "step": 7417 }, { "epoch": 0.8421441013797092, "grad_norm": 0.294921875, "learning_rate": 0.0018185017757354313, "loss": 5.2177, "step": 7418 }, { "epoch": 0.8422576284896283, "grad_norm": 0.302734375, "learning_rate": 0.0018175276369422832, "loss": 5.2055, "step": 7419 }, { "epoch": 0.8423711555995473, "grad_norm": 0.302734375, "learning_rate": 0.0018165511857533736, "loss": 5.1936, "step": 7420 }, { "epoch": 0.8424846827094664, "grad_norm": 0.3125, "learning_rate": 0.0018155724253155368, "loss": 5.2188, "step": 7421 }, { "epoch": 0.8425982098193855, "grad_norm": 0.2734375, "learning_rate": 0.0018145913587830518, "loss": 5.2243, "step": 7422 }, { "epoch": 0.8427117369293046, "grad_norm": 0.287109375, "learning_rate": 0.0018136079893176279, "loss": 5.2046, "step": 7423 }, { "epoch": 0.8428252640392236, "grad_norm": 0.259765625, "learning_rate": 0.001812622320088396, "loss": 5.2183, "step": 7424 }, { "epoch": 0.8429387911491427, "grad_norm": 0.2490234375, "learning_rate": 0.0018116343542719001, "loss": 5.1986, "step": 7425 }, { "epoch": 0.8430523182590618, "grad_norm": 0.2392578125, "learning_rate": 0.0018106440950520835, "loss": 5.2239, "step": 7426 }, { "epoch": 0.8431658453689809, "grad_norm": 0.2451171875, "learning_rate": 0.0018096515456202818, "loss": 5.2253, "step": 7427 }, { "epoch": 0.8432793724789, "grad_norm": 0.251953125, "learning_rate": 0.0018086567091752108, "loss": 5.2263, "step": 7428 }, { "epoch": 0.843392899588819, "grad_norm": 0.283203125, "learning_rate": 0.001807659588922957, "loss": 5.2151, "step": 7429 }, { "epoch": 0.8435064266987381, "grad_norm": 0.30859375, "learning_rate": 0.001806660188076967, "loss": 5.2337, "step": 7430 }, { "epoch": 0.8436199538086572, "grad_norm": 0.32421875, "learning_rate": 0.001805658509858037, "loss": 5.2317, "step": 7431 }, { "epoch": 0.8437334809185763, "grad_norm": 0.345703125, "learning_rate": 0.0018046545574943022, "loss": 5.238, "step": 7432 }, { "epoch": 0.8438470080284953, "grad_norm": 0.32421875, "learning_rate": 0.0018036483342212268, "loss": 5.2044, "step": 7433 }, { "epoch": 0.8439605351384144, "grad_norm": 0.33984375, "learning_rate": 0.0018026398432815947, "loss": 5.2297, "step": 7434 }, { "epoch": 0.8440740622483335, "grad_norm": 0.349609375, "learning_rate": 0.0018016290879254966, "loss": 5.2301, "step": 7435 }, { "epoch": 0.8441875893582526, "grad_norm": 0.37109375, "learning_rate": 0.0018006160714103213, "loss": 5.2368, "step": 7436 }, { "epoch": 0.8443011164681716, "grad_norm": 0.375, "learning_rate": 0.001799600797000744, "loss": 5.2175, "step": 7437 }, { "epoch": 0.8444146435780907, "grad_norm": 0.3828125, "learning_rate": 0.001798583267968718, "loss": 5.2314, "step": 7438 }, { "epoch": 0.8445281706880098, "grad_norm": 0.36328125, "learning_rate": 0.0017975634875934605, "loss": 5.205, "step": 7439 }, { "epoch": 0.8446416977979289, "grad_norm": 0.38671875, "learning_rate": 0.0017965414591614465, "loss": 5.2173, "step": 7440 }, { "epoch": 0.8447552249078479, "grad_norm": 0.359375, "learning_rate": 0.0017955171859663941, "loss": 5.2231, "step": 7441 }, { "epoch": 0.844868752017767, "grad_norm": 0.34765625, "learning_rate": 0.0017944906713092566, "loss": 5.2094, "step": 7442 }, { "epoch": 0.8449822791276861, "grad_norm": 0.296875, "learning_rate": 0.0017934619184982103, "loss": 5.2393, "step": 7443 }, { "epoch": 0.8450958062376052, "grad_norm": 0.279296875, "learning_rate": 0.001792430930848645, "loss": 5.2216, "step": 7444 }, { "epoch": 0.8452093333475242, "grad_norm": 0.2578125, "learning_rate": 0.0017913977116831524, "loss": 5.2167, "step": 7445 }, { "epoch": 0.8453228604574433, "grad_norm": 0.2490234375, "learning_rate": 0.0017903622643315159, "loss": 5.2135, "step": 7446 }, { "epoch": 0.8454363875673624, "grad_norm": 0.26953125, "learning_rate": 0.0017893245921307, "loss": 5.2207, "step": 7447 }, { "epoch": 0.8455499146772815, "grad_norm": 0.271484375, "learning_rate": 0.0017882846984248386, "loss": 5.2202, "step": 7448 }, { "epoch": 0.8456634417872005, "grad_norm": 0.283203125, "learning_rate": 0.0017872425865652257, "loss": 5.203, "step": 7449 }, { "epoch": 0.8457769688971196, "grad_norm": 0.265625, "learning_rate": 0.001786198259910303, "loss": 5.2125, "step": 7450 }, { "epoch": 0.8458904960070387, "grad_norm": 0.26953125, "learning_rate": 0.001785151721825651, "loss": 5.2425, "step": 7451 }, { "epoch": 0.8460040231169578, "grad_norm": 0.26953125, "learning_rate": 0.0017841029756839758, "loss": 5.2119, "step": 7452 }, { "epoch": 0.8461175502268768, "grad_norm": 0.28515625, "learning_rate": 0.0017830520248651, "loss": 5.2316, "step": 7453 }, { "epoch": 0.8462310773367959, "grad_norm": 0.275390625, "learning_rate": 0.0017819988727559514, "loss": 5.2465, "step": 7454 }, { "epoch": 0.846344604446715, "grad_norm": 0.279296875, "learning_rate": 0.0017809435227505522, "loss": 5.2205, "step": 7455 }, { "epoch": 0.8464581315566341, "grad_norm": 0.267578125, "learning_rate": 0.001779885978250007, "loss": 5.2195, "step": 7456 }, { "epoch": 0.8465716586665532, "grad_norm": 0.267578125, "learning_rate": 0.0017788262426624935, "loss": 5.2111, "step": 7457 }, { "epoch": 0.8466851857764722, "grad_norm": 0.248046875, "learning_rate": 0.00177776431940325, "loss": 5.2225, "step": 7458 }, { "epoch": 0.8467987128863913, "grad_norm": 0.244140625, "learning_rate": 0.0017767002118945652, "loss": 5.219, "step": 7459 }, { "epoch": 0.8469122399963104, "grad_norm": 0.2333984375, "learning_rate": 0.0017756339235657677, "loss": 5.22, "step": 7460 }, { "epoch": 0.8470257671062295, "grad_norm": 0.2373046875, "learning_rate": 0.0017745654578532133, "loss": 5.2136, "step": 7461 }, { "epoch": 0.8471392942161485, "grad_norm": 0.236328125, "learning_rate": 0.0017734948182002762, "loss": 5.2108, "step": 7462 }, { "epoch": 0.8472528213260676, "grad_norm": 0.263671875, "learning_rate": 0.0017724220080573348, "loss": 5.2412, "step": 7463 }, { "epoch": 0.8473663484359867, "grad_norm": 0.279296875, "learning_rate": 0.001771347030881764, "loss": 5.2288, "step": 7464 }, { "epoch": 0.8474798755459058, "grad_norm": 0.283203125, "learning_rate": 0.0017702698901379218, "loss": 5.2324, "step": 7465 }, { "epoch": 0.8475934026558248, "grad_norm": 0.2890625, "learning_rate": 0.001769190589297139, "loss": 5.2034, "step": 7466 }, { "epoch": 0.8477069297657439, "grad_norm": 0.302734375, "learning_rate": 0.0017681091318377073, "loss": 5.2271, "step": 7467 }, { "epoch": 0.847820456875663, "grad_norm": 0.294921875, "learning_rate": 0.0017670255212448693, "loss": 5.2168, "step": 7468 }, { "epoch": 0.8479339839855821, "grad_norm": 0.318359375, "learning_rate": 0.001765939761010806, "loss": 5.2402, "step": 7469 }, { "epoch": 0.8480475110955011, "grad_norm": 0.294921875, "learning_rate": 0.001764851854634627, "loss": 5.2227, "step": 7470 }, { "epoch": 0.8481610382054202, "grad_norm": 0.283203125, "learning_rate": 0.0017637618056223566, "loss": 5.2304, "step": 7471 }, { "epoch": 0.8482745653153393, "grad_norm": 0.265625, "learning_rate": 0.0017626696174869259, "loss": 5.2111, "step": 7472 }, { "epoch": 0.8483880924252584, "grad_norm": 0.26171875, "learning_rate": 0.001761575293748159, "loss": 5.2403, "step": 7473 }, { "epoch": 0.8485016195351774, "grad_norm": 0.2431640625, "learning_rate": 0.001760478837932763, "loss": 5.2097, "step": 7474 }, { "epoch": 0.8486151466450965, "grad_norm": 0.2353515625, "learning_rate": 0.0017593802535743155, "loss": 5.2026, "step": 7475 }, { "epoch": 0.8487286737550156, "grad_norm": 0.255859375, "learning_rate": 0.0017582795442132543, "loss": 5.2282, "step": 7476 }, { "epoch": 0.8488422008649347, "grad_norm": 0.240234375, "learning_rate": 0.001757176713396865, "loss": 5.2219, "step": 7477 }, { "epoch": 0.8489557279748537, "grad_norm": 0.271484375, "learning_rate": 0.0017560717646792703, "loss": 5.2334, "step": 7478 }, { "epoch": 0.8490692550847728, "grad_norm": 0.259765625, "learning_rate": 0.0017549647016214186, "loss": 5.2092, "step": 7479 }, { "epoch": 0.8491827821946919, "grad_norm": 0.25390625, "learning_rate": 0.0017538555277910725, "loss": 5.2101, "step": 7480 }, { "epoch": 0.849296309304611, "grad_norm": 0.259765625, "learning_rate": 0.0017527442467627954, "loss": 5.2334, "step": 7481 }, { "epoch": 0.84940983641453, "grad_norm": 0.275390625, "learning_rate": 0.0017516308621179438, "loss": 5.2092, "step": 7482 }, { "epoch": 0.8495233635244491, "grad_norm": 0.2734375, "learning_rate": 0.0017505153774446526, "loss": 5.219, "step": 7483 }, { "epoch": 0.8496368906343682, "grad_norm": 0.298828125, "learning_rate": 0.0017493977963378233, "loss": 5.2115, "step": 7484 }, { "epoch": 0.8497504177442873, "grad_norm": 0.2890625, "learning_rate": 0.0017482781223991163, "loss": 5.1933, "step": 7485 }, { "epoch": 0.8498639448542064, "grad_norm": 0.310546875, "learning_rate": 0.0017471563592369342, "loss": 5.2172, "step": 7486 }, { "epoch": 0.8499774719641254, "grad_norm": 0.314453125, "learning_rate": 0.0017460325104664137, "loss": 5.2017, "step": 7487 }, { "epoch": 0.8500909990740445, "grad_norm": 0.30078125, "learning_rate": 0.0017449065797094131, "loss": 5.2281, "step": 7488 }, { "epoch": 0.8502045261839636, "grad_norm": 0.283203125, "learning_rate": 0.0017437785705944992, "loss": 5.194, "step": 7489 }, { "epoch": 0.8503180532938827, "grad_norm": 0.265625, "learning_rate": 0.0017426484867569389, "loss": 5.2077, "step": 7490 }, { "epoch": 0.8504315804038017, "grad_norm": 0.24609375, "learning_rate": 0.0017415163318386822, "loss": 5.2195, "step": 7491 }, { "epoch": 0.8505451075137208, "grad_norm": 0.244140625, "learning_rate": 0.001740382109488357, "loss": 5.2201, "step": 7492 }, { "epoch": 0.8506586346236399, "grad_norm": 0.2333984375, "learning_rate": 0.0017392458233612513, "loss": 5.2055, "step": 7493 }, { "epoch": 0.850772161733559, "grad_norm": 0.2353515625, "learning_rate": 0.0017381074771193059, "loss": 5.217, "step": 7494 }, { "epoch": 0.850885688843478, "grad_norm": 0.232421875, "learning_rate": 0.0017369670744310998, "loss": 5.2222, "step": 7495 }, { "epoch": 0.8509992159533971, "grad_norm": 0.232421875, "learning_rate": 0.00173582461897184, "loss": 5.202, "step": 7496 }, { "epoch": 0.8511127430633162, "grad_norm": 0.2314453125, "learning_rate": 0.0017346801144233483, "loss": 5.2117, "step": 7497 }, { "epoch": 0.8512262701732353, "grad_norm": 0.224609375, "learning_rate": 0.0017335335644740504, "loss": 5.2163, "step": 7498 }, { "epoch": 0.8513397972831543, "grad_norm": 0.2197265625, "learning_rate": 0.0017323849728189645, "loss": 5.2031, "step": 7499 }, { "epoch": 0.8514533243930734, "grad_norm": 0.21875, "learning_rate": 0.0017312343431596874, "loss": 5.2112, "step": 7500 }, { "epoch": 0.8515668515029925, "grad_norm": 0.22265625, "learning_rate": 0.0017300816792043849, "loss": 5.2134, "step": 7501 }, { "epoch": 0.8516803786129116, "grad_norm": 0.2294921875, "learning_rate": 0.0017289269846677779, "loss": 5.1887, "step": 7502 }, { "epoch": 0.8517939057228306, "grad_norm": 0.2490234375, "learning_rate": 0.0017277702632711323, "loss": 5.2175, "step": 7503 }, { "epoch": 0.8519074328327497, "grad_norm": 0.251953125, "learning_rate": 0.0017266115187422452, "loss": 5.2027, "step": 7504 }, { "epoch": 0.8520209599426688, "grad_norm": 0.263671875, "learning_rate": 0.001725450754815434, "loss": 5.2239, "step": 7505 }, { "epoch": 0.8521344870525879, "grad_norm": 0.267578125, "learning_rate": 0.0017242879752315244, "loss": 5.2257, "step": 7506 }, { "epoch": 0.852248014162507, "grad_norm": 0.265625, "learning_rate": 0.0017231231837378377, "loss": 5.2303, "step": 7507 }, { "epoch": 0.852361541272426, "grad_norm": 0.2490234375, "learning_rate": 0.0017219563840881783, "loss": 5.1914, "step": 7508 }, { "epoch": 0.8524750683823451, "grad_norm": 0.251953125, "learning_rate": 0.0017207875800428235, "loss": 5.2349, "step": 7509 }, { "epoch": 0.8525885954922642, "grad_norm": 0.2275390625, "learning_rate": 0.00171961677536851, "loss": 5.2074, "step": 7510 }, { "epoch": 0.8527021226021833, "grad_norm": 0.234375, "learning_rate": 0.0017184439738384214, "loss": 5.203, "step": 7511 }, { "epoch": 0.8528156497121023, "grad_norm": 0.220703125, "learning_rate": 0.0017172691792321773, "loss": 5.219, "step": 7512 }, { "epoch": 0.8529291768220214, "grad_norm": 0.2265625, "learning_rate": 0.0017160923953358199, "loss": 5.2137, "step": 7513 }, { "epoch": 0.8530427039319405, "grad_norm": 0.21484375, "learning_rate": 0.0017149136259418025, "loss": 5.2038, "step": 7514 }, { "epoch": 0.8531562310418596, "grad_norm": 0.2255859375, "learning_rate": 0.001713732874848977, "loss": 5.206, "step": 7515 }, { "epoch": 0.8532697581517786, "grad_norm": 0.2333984375, "learning_rate": 0.0017125501458625826, "loss": 5.2176, "step": 7516 }, { "epoch": 0.8533832852616977, "grad_norm": 0.251953125, "learning_rate": 0.0017113654427942317, "loss": 5.2352, "step": 7517 }, { "epoch": 0.8534968123716168, "grad_norm": 0.26953125, "learning_rate": 0.0017101787694618988, "loss": 5.225, "step": 7518 }, { "epoch": 0.8536103394815359, "grad_norm": 0.287109375, "learning_rate": 0.0017089901296899084, "loss": 5.1893, "step": 7519 }, { "epoch": 0.8537238665914549, "grad_norm": 0.287109375, "learning_rate": 0.0017077995273089219, "loss": 5.2056, "step": 7520 }, { "epoch": 0.853837393701374, "grad_norm": 0.29296875, "learning_rate": 0.0017066069661559266, "loss": 5.2175, "step": 7521 }, { "epoch": 0.8539509208112931, "grad_norm": 0.28515625, "learning_rate": 0.0017054124500742204, "loss": 5.2128, "step": 7522 }, { "epoch": 0.8540644479212122, "grad_norm": 0.2734375, "learning_rate": 0.0017042159829134037, "loss": 5.2205, "step": 7523 }, { "epoch": 0.8541779750311312, "grad_norm": 0.26171875, "learning_rate": 0.001703017568529363, "loss": 5.2286, "step": 7524 }, { "epoch": 0.8542915021410503, "grad_norm": 0.25390625, "learning_rate": 0.0017018172107842616, "loss": 5.2186, "step": 7525 }, { "epoch": 0.8544050292509694, "grad_norm": 0.2470703125, "learning_rate": 0.0017006149135465238, "loss": 5.2357, "step": 7526 }, { "epoch": 0.8545185563608885, "grad_norm": 0.234375, "learning_rate": 0.0016994106806908266, "loss": 5.1987, "step": 7527 }, { "epoch": 0.8546320834708075, "grad_norm": 0.2451171875, "learning_rate": 0.0016982045160980831, "loss": 5.2158, "step": 7528 }, { "epoch": 0.8547456105807266, "grad_norm": 0.25, "learning_rate": 0.0016969964236554329, "loss": 5.2177, "step": 7529 }, { "epoch": 0.8548591376906457, "grad_norm": 0.2578125, "learning_rate": 0.0016957864072562286, "loss": 5.2184, "step": 7530 }, { "epoch": 0.8549726648005648, "grad_norm": 0.2392578125, "learning_rate": 0.0016945744708000221, "loss": 5.211, "step": 7531 }, { "epoch": 0.8550861919104839, "grad_norm": 0.26171875, "learning_rate": 0.0016933606181925547, "loss": 5.2166, "step": 7532 }, { "epoch": 0.8551997190204029, "grad_norm": 0.2412109375, "learning_rate": 0.0016921448533457415, "loss": 5.2279, "step": 7533 }, { "epoch": 0.855313246130322, "grad_norm": 0.26171875, "learning_rate": 0.0016909271801776608, "loss": 5.2373, "step": 7534 }, { "epoch": 0.8554267732402411, "grad_norm": 0.2451171875, "learning_rate": 0.0016897076026125414, "loss": 5.2019, "step": 7535 }, { "epoch": 0.8555403003501602, "grad_norm": 0.2470703125, "learning_rate": 0.0016884861245807486, "loss": 5.2134, "step": 7536 }, { "epoch": 0.8556538274600792, "grad_norm": 0.2265625, "learning_rate": 0.0016872627500187727, "loss": 5.2257, "step": 7537 }, { "epoch": 0.8557673545699983, "grad_norm": 0.216796875, "learning_rate": 0.0016860374828692162, "loss": 5.2173, "step": 7538 }, { "epoch": 0.8558808816799174, "grad_norm": 0.2109375, "learning_rate": 0.0016848103270807808, "loss": 5.2107, "step": 7539 }, { "epoch": 0.8559944087898365, "grad_norm": 0.2060546875, "learning_rate": 0.0016835812866082547, "loss": 5.2267, "step": 7540 }, { "epoch": 0.8561079358997555, "grad_norm": 0.2216796875, "learning_rate": 0.0016823503654125002, "loss": 5.2244, "step": 7541 }, { "epoch": 0.8562214630096746, "grad_norm": 0.2255859375, "learning_rate": 0.0016811175674604395, "loss": 5.2292, "step": 7542 }, { "epoch": 0.8563349901195937, "grad_norm": 0.26171875, "learning_rate": 0.001679882896725045, "loss": 5.2056, "step": 7543 }, { "epoch": 0.8564485172295128, "grad_norm": 0.279296875, "learning_rate": 0.001678646357185323, "loss": 5.2114, "step": 7544 }, { "epoch": 0.8565620443394318, "grad_norm": 0.318359375, "learning_rate": 0.001677407952826303, "loss": 5.2037, "step": 7545 }, { "epoch": 0.8566755714493509, "grad_norm": 0.30078125, "learning_rate": 0.0016761676876390246, "loss": 5.2066, "step": 7546 }, { "epoch": 0.85678909855927, "grad_norm": 0.279296875, "learning_rate": 0.0016749255656205238, "loss": 5.1757, "step": 7547 }, { "epoch": 0.8569026256691891, "grad_norm": 0.283203125, "learning_rate": 0.0016736815907738207, "loss": 5.2106, "step": 7548 }, { "epoch": 0.8570161527791081, "grad_norm": 0.283203125, "learning_rate": 0.001672435767107907, "loss": 5.2196, "step": 7549 }, { "epoch": 0.8571296798890272, "grad_norm": 0.265625, "learning_rate": 0.0016711880986377328, "loss": 5.2458, "step": 7550 }, { "epoch": 0.8572432069989463, "grad_norm": 0.265625, "learning_rate": 0.0016699385893841924, "loss": 5.214, "step": 7551 }, { "epoch": 0.8573567341088654, "grad_norm": 0.251953125, "learning_rate": 0.0016686872433741133, "loss": 5.2182, "step": 7552 }, { "epoch": 0.8574702612187844, "grad_norm": 0.26953125, "learning_rate": 0.0016674340646402424, "loss": 5.241, "step": 7553 }, { "epoch": 0.8575837883287035, "grad_norm": 0.259765625, "learning_rate": 0.0016661790572212328, "loss": 5.1946, "step": 7554 }, { "epoch": 0.8576973154386226, "grad_norm": 0.263671875, "learning_rate": 0.0016649222251616305, "loss": 5.2025, "step": 7555 }, { "epoch": 0.8578108425485417, "grad_norm": 0.28125, "learning_rate": 0.0016636635725118627, "loss": 5.2113, "step": 7556 }, { "epoch": 0.8579243696584608, "grad_norm": 0.28515625, "learning_rate": 0.0016624031033282232, "loss": 5.2039, "step": 7557 }, { "epoch": 0.8580378967683798, "grad_norm": 0.2890625, "learning_rate": 0.0016611408216728604, "loss": 5.2319, "step": 7558 }, { "epoch": 0.8581514238782989, "grad_norm": 0.283203125, "learning_rate": 0.0016598767316137633, "loss": 5.2122, "step": 7559 }, { "epoch": 0.858264950988218, "grad_norm": 0.265625, "learning_rate": 0.0016586108372247492, "loss": 5.2349, "step": 7560 }, { "epoch": 0.858378478098137, "grad_norm": 0.2490234375, "learning_rate": 0.0016573431425854503, "loss": 5.2317, "step": 7561 }, { "epoch": 0.8584920052080561, "grad_norm": 0.255859375, "learning_rate": 0.0016560736517813011, "loss": 5.2297, "step": 7562 }, { "epoch": 0.8586055323179752, "grad_norm": 0.2294921875, "learning_rate": 0.0016548023689035229, "loss": 5.2215, "step": 7563 }, { "epoch": 0.8587190594278943, "grad_norm": 0.234375, "learning_rate": 0.0016535292980491146, "loss": 5.2162, "step": 7564 }, { "epoch": 0.8588325865378134, "grad_norm": 0.22265625, "learning_rate": 0.0016522544433208353, "loss": 5.2133, "step": 7565 }, { "epoch": 0.8589461136477324, "grad_norm": 0.212890625, "learning_rate": 0.0016509778088271941, "loss": 5.2195, "step": 7566 }, { "epoch": 0.8590596407576515, "grad_norm": 0.20703125, "learning_rate": 0.001649699398682436, "loss": 5.2002, "step": 7567 }, { "epoch": 0.8591731678675706, "grad_norm": 0.2119140625, "learning_rate": 0.0016484192170065275, "loss": 5.2216, "step": 7568 }, { "epoch": 0.8592866949774897, "grad_norm": 0.2041015625, "learning_rate": 0.001647137267925145, "loss": 5.2125, "step": 7569 }, { "epoch": 0.8594002220874087, "grad_norm": 0.2060546875, "learning_rate": 0.0016458535555696602, "loss": 5.2175, "step": 7570 }, { "epoch": 0.8595137491973278, "grad_norm": 0.2099609375, "learning_rate": 0.0016445680840771284, "loss": 5.2155, "step": 7571 }, { "epoch": 0.8596272763072469, "grad_norm": 0.208984375, "learning_rate": 0.001643280857590273, "loss": 5.2244, "step": 7572 }, { "epoch": 0.859740803417166, "grad_norm": 0.216796875, "learning_rate": 0.001641991880257474, "loss": 5.2184, "step": 7573 }, { "epoch": 0.859854330527085, "grad_norm": 0.232421875, "learning_rate": 0.001640701156232753, "loss": 5.2109, "step": 7574 }, { "epoch": 0.8599678576370041, "grad_norm": 0.2314453125, "learning_rate": 0.0016394086896757616, "loss": 5.2202, "step": 7575 }, { "epoch": 0.8600813847469232, "grad_norm": 0.244140625, "learning_rate": 0.0016381144847517671, "loss": 5.2075, "step": 7576 }, { "epoch": 0.8601949118568423, "grad_norm": 0.2431640625, "learning_rate": 0.0016368185456316381, "loss": 5.2054, "step": 7577 }, { "epoch": 0.8603084389667613, "grad_norm": 0.248046875, "learning_rate": 0.0016355208764918334, "loss": 5.1963, "step": 7578 }, { "epoch": 0.8604219660766804, "grad_norm": 0.244140625, "learning_rate": 0.001634221481514386, "loss": 5.2361, "step": 7579 }, { "epoch": 0.8605354931865995, "grad_norm": 0.255859375, "learning_rate": 0.001632920364886892, "loss": 5.224, "step": 7580 }, { "epoch": 0.8606490202965186, "grad_norm": 0.248046875, "learning_rate": 0.0016316175308024943, "loss": 5.2148, "step": 7581 }, { "epoch": 0.8607625474064377, "grad_norm": 0.255859375, "learning_rate": 0.0016303129834598724, "loss": 5.2131, "step": 7582 }, { "epoch": 0.8608760745163567, "grad_norm": 0.27734375, "learning_rate": 0.001629006727063226, "loss": 5.2143, "step": 7583 }, { "epoch": 0.8609896016262758, "grad_norm": 0.294921875, "learning_rate": 0.001627698765822263, "loss": 5.2074, "step": 7584 }, { "epoch": 0.8611031287361949, "grad_norm": 0.298828125, "learning_rate": 0.0016263891039521854, "loss": 5.1969, "step": 7585 }, { "epoch": 0.861216655846114, "grad_norm": 0.30078125, "learning_rate": 0.0016250777456736762, "loss": 5.207, "step": 7586 }, { "epoch": 0.861330182956033, "grad_norm": 0.294921875, "learning_rate": 0.0016237646952128855, "loss": 5.1918, "step": 7587 }, { "epoch": 0.8614437100659521, "grad_norm": 0.30078125, "learning_rate": 0.001622449956801416, "loss": 5.2103, "step": 7588 }, { "epoch": 0.8615572371758712, "grad_norm": 0.2578125, "learning_rate": 0.0016211335346763116, "loss": 5.2023, "step": 7589 }, { "epoch": 0.8616707642857903, "grad_norm": 0.26171875, "learning_rate": 0.0016198154330800407, "loss": 5.2133, "step": 7590 }, { "epoch": 0.8617842913957093, "grad_norm": 0.232421875, "learning_rate": 0.0016184956562604858, "loss": 5.1942, "step": 7591 }, { "epoch": 0.8618978185056284, "grad_norm": 0.232421875, "learning_rate": 0.0016171742084709268, "loss": 5.2057, "step": 7592 }, { "epoch": 0.8620113456155475, "grad_norm": 0.2294921875, "learning_rate": 0.00161585109397003, "loss": 5.2038, "step": 7593 }, { "epoch": 0.8621248727254666, "grad_norm": 0.236328125, "learning_rate": 0.0016145263170218318, "loss": 5.2123, "step": 7594 }, { "epoch": 0.8622383998353856, "grad_norm": 0.2197265625, "learning_rate": 0.0016131998818957269, "loss": 5.1869, "step": 7595 }, { "epoch": 0.8623519269453047, "grad_norm": 0.220703125, "learning_rate": 0.0016118717928664537, "loss": 5.1967, "step": 7596 }, { "epoch": 0.8624654540552238, "grad_norm": 0.220703125, "learning_rate": 0.0016105420542140809, "loss": 5.2073, "step": 7597 }, { "epoch": 0.8625789811651429, "grad_norm": 0.216796875, "learning_rate": 0.001609210670223993, "loss": 5.2152, "step": 7598 }, { "epoch": 0.8626925082750619, "grad_norm": 0.220703125, "learning_rate": 0.0016078776451868774, "loss": 5.2186, "step": 7599 }, { "epoch": 0.862806035384981, "grad_norm": 0.2197265625, "learning_rate": 0.0016065429833987104, "loss": 5.2069, "step": 7600 }, { "epoch": 0.8629195624949001, "grad_norm": 0.2216796875, "learning_rate": 0.001605206689160742, "loss": 5.1865, "step": 7601 }, { "epoch": 0.8630330896048193, "grad_norm": 0.2353515625, "learning_rate": 0.0016038687667794846, "loss": 5.2122, "step": 7602 }, { "epoch": 0.8631466167147384, "grad_norm": 0.2275390625, "learning_rate": 0.001602529220566696, "loss": 5.2025, "step": 7603 }, { "epoch": 0.8632601438246574, "grad_norm": 0.2255859375, "learning_rate": 0.0016011880548393693, "loss": 5.2108, "step": 7604 }, { "epoch": 0.8633736709345765, "grad_norm": 0.2275390625, "learning_rate": 0.0015998452739197145, "loss": 5.19, "step": 7605 }, { "epoch": 0.8634871980444956, "grad_norm": 0.2275390625, "learning_rate": 0.0015985008821351489, "loss": 5.2129, "step": 7606 }, { "epoch": 0.8636007251544147, "grad_norm": 0.23046875, "learning_rate": 0.0015971548838182793, "loss": 5.2129, "step": 7607 }, { "epoch": 0.8637142522643337, "grad_norm": 0.216796875, "learning_rate": 0.0015958072833068924, "loss": 5.2162, "step": 7608 }, { "epoch": 0.8638277793742528, "grad_norm": 0.208984375, "learning_rate": 0.0015944580849439358, "loss": 5.2314, "step": 7609 }, { "epoch": 0.8639413064841719, "grad_norm": 0.197265625, "learning_rate": 0.0015931072930775076, "loss": 5.21, "step": 7610 }, { "epoch": 0.864054833594091, "grad_norm": 0.1904296875, "learning_rate": 0.0015917549120608423, "loss": 5.1931, "step": 7611 }, { "epoch": 0.86416836070401, "grad_norm": 0.193359375, "learning_rate": 0.001590400946252294, "loss": 5.2007, "step": 7612 }, { "epoch": 0.8642818878139291, "grad_norm": 0.1962890625, "learning_rate": 0.0015890454000153254, "loss": 5.203, "step": 7613 }, { "epoch": 0.8643954149238482, "grad_norm": 0.22265625, "learning_rate": 0.0015876882777184915, "loss": 5.2026, "step": 7614 }, { "epoch": 0.8645089420337673, "grad_norm": 0.2158203125, "learning_rate": 0.0015863295837354279, "loss": 5.2039, "step": 7615 }, { "epoch": 0.8646224691436863, "grad_norm": 0.2412109375, "learning_rate": 0.001584969322444834, "loss": 5.2212, "step": 7616 }, { "epoch": 0.8647359962536054, "grad_norm": 0.236328125, "learning_rate": 0.0015836074982304613, "loss": 5.2077, "step": 7617 }, { "epoch": 0.8648495233635245, "grad_norm": 0.25, "learning_rate": 0.001582244115481097, "loss": 5.2058, "step": 7618 }, { "epoch": 0.8649630504734436, "grad_norm": 0.23828125, "learning_rate": 0.001580879178590552, "loss": 5.1931, "step": 7619 }, { "epoch": 0.8650765775833626, "grad_norm": 0.244140625, "learning_rate": 0.001579512691957645, "loss": 5.218, "step": 7620 }, { "epoch": 0.8651901046932817, "grad_norm": 0.2265625, "learning_rate": 0.00157814465998619, "loss": 5.2069, "step": 7621 }, { "epoch": 0.8653036318032008, "grad_norm": 0.2275390625, "learning_rate": 0.0015767750870849804, "loss": 5.2055, "step": 7622 }, { "epoch": 0.8654171589131199, "grad_norm": 0.216796875, "learning_rate": 0.0015754039776677761, "loss": 5.1989, "step": 7623 }, { "epoch": 0.865530686023039, "grad_norm": 0.216796875, "learning_rate": 0.0015740313361532882, "loss": 5.2105, "step": 7624 }, { "epoch": 0.865644213132958, "grad_norm": 0.21484375, "learning_rate": 0.0015726571669651664, "loss": 5.2033, "step": 7625 }, { "epoch": 0.8657577402428771, "grad_norm": 0.2197265625, "learning_rate": 0.0015712814745319822, "loss": 5.1956, "step": 7626 }, { "epoch": 0.8658712673527962, "grad_norm": 0.2236328125, "learning_rate": 0.0015699042632872173, "loss": 5.2051, "step": 7627 }, { "epoch": 0.8659847944627153, "grad_norm": 0.228515625, "learning_rate": 0.0015685255376692478, "loss": 5.2271, "step": 7628 }, { "epoch": 0.8660983215726343, "grad_norm": 0.23828125, "learning_rate": 0.0015671453021213297, "loss": 5.1899, "step": 7629 }, { "epoch": 0.8662118486825534, "grad_norm": 0.2431640625, "learning_rate": 0.0015657635610915861, "loss": 5.2224, "step": 7630 }, { "epoch": 0.8663253757924725, "grad_norm": 0.234375, "learning_rate": 0.001564380319032991, "loss": 5.1975, "step": 7631 }, { "epoch": 0.8664389029023916, "grad_norm": 0.2451171875, "learning_rate": 0.0015629955804033558, "loss": 5.1879, "step": 7632 }, { "epoch": 0.8665524300123106, "grad_norm": 0.2353515625, "learning_rate": 0.0015616093496653156, "loss": 5.223, "step": 7633 }, { "epoch": 0.8666659571222297, "grad_norm": 0.25390625, "learning_rate": 0.0015602216312863135, "loss": 5.2054, "step": 7634 }, { "epoch": 0.8667794842321488, "grad_norm": 0.25390625, "learning_rate": 0.0015588324297385872, "loss": 5.2198, "step": 7635 }, { "epoch": 0.8668930113420679, "grad_norm": 0.2734375, "learning_rate": 0.0015574417494991545, "loss": 5.2062, "step": 7636 }, { "epoch": 0.8670065384519869, "grad_norm": 0.265625, "learning_rate": 0.0015560495950497977, "loss": 5.1861, "step": 7637 }, { "epoch": 0.867120065561906, "grad_norm": 0.275390625, "learning_rate": 0.0015546559708770511, "loss": 5.1992, "step": 7638 }, { "epoch": 0.8672335926718251, "grad_norm": 0.27734375, "learning_rate": 0.0015532608814721848, "loss": 5.2023, "step": 7639 }, { "epoch": 0.8673471197817442, "grad_norm": 0.28515625, "learning_rate": 0.0015518643313311915, "loss": 5.1679, "step": 7640 }, { "epoch": 0.8674606468916632, "grad_norm": 0.267578125, "learning_rate": 0.0015504663249547713, "loss": 5.1957, "step": 7641 }, { "epoch": 0.8675741740015823, "grad_norm": 0.283203125, "learning_rate": 0.0015490668668483166, "loss": 5.2118, "step": 7642 }, { "epoch": 0.8676877011115014, "grad_norm": 0.26953125, "learning_rate": 0.0015476659615218998, "loss": 5.2051, "step": 7643 }, { "epoch": 0.8678012282214205, "grad_norm": 0.267578125, "learning_rate": 0.0015462636134902562, "loss": 5.185, "step": 7644 }, { "epoch": 0.8679147553313395, "grad_norm": 0.265625, "learning_rate": 0.0015448598272727702, "loss": 5.2109, "step": 7645 }, { "epoch": 0.8680282824412586, "grad_norm": 0.265625, "learning_rate": 0.0015434546073934622, "loss": 5.2174, "step": 7646 }, { "epoch": 0.8681418095511777, "grad_norm": 0.240234375, "learning_rate": 0.0015420479583809728, "loss": 5.1935, "step": 7647 }, { "epoch": 0.8682553366610968, "grad_norm": 0.2294921875, "learning_rate": 0.0015406398847685472, "loss": 5.2076, "step": 7648 }, { "epoch": 0.8683688637710159, "grad_norm": 0.205078125, "learning_rate": 0.0015392303910940228, "loss": 5.1892, "step": 7649 }, { "epoch": 0.8684823908809349, "grad_norm": 0.193359375, "learning_rate": 0.0015378194818998125, "loss": 5.2103, "step": 7650 }, { "epoch": 0.868595917990854, "grad_norm": 0.1845703125, "learning_rate": 0.0015364071617328923, "loss": 5.2292, "step": 7651 }, { "epoch": 0.8687094451007731, "grad_norm": 0.1806640625, "learning_rate": 0.0015349934351447845, "loss": 5.2191, "step": 7652 }, { "epoch": 0.8688229722106922, "grad_norm": 0.1865234375, "learning_rate": 0.0015335783066915435, "loss": 5.2028, "step": 7653 }, { "epoch": 0.8689364993206112, "grad_norm": 0.193359375, "learning_rate": 0.0015321617809337432, "loss": 5.1941, "step": 7654 }, { "epoch": 0.8690500264305303, "grad_norm": 0.1982421875, "learning_rate": 0.0015307438624364588, "loss": 5.222, "step": 7655 }, { "epoch": 0.8691635535404494, "grad_norm": 0.2177734375, "learning_rate": 0.0015293245557692547, "loss": 5.1929, "step": 7656 }, { "epoch": 0.8692770806503685, "grad_norm": 0.2236328125, "learning_rate": 0.0015279038655061688, "loss": 5.2246, "step": 7657 }, { "epoch": 0.8693906077602875, "grad_norm": 0.2138671875, "learning_rate": 0.0015264817962256988, "loss": 5.1833, "step": 7658 }, { "epoch": 0.8695041348702066, "grad_norm": 0.2138671875, "learning_rate": 0.0015250583525107855, "loss": 5.2075, "step": 7659 }, { "epoch": 0.8696176619801257, "grad_norm": 0.19140625, "learning_rate": 0.0015236335389487996, "loss": 5.1948, "step": 7660 }, { "epoch": 0.8697311890900448, "grad_norm": 0.2041015625, "learning_rate": 0.001522207360131526, "loss": 5.2117, "step": 7661 }, { "epoch": 0.8698447161999638, "grad_norm": 0.19140625, "learning_rate": 0.0015207798206551503, "loss": 5.2014, "step": 7662 }, { "epoch": 0.8699582433098829, "grad_norm": 0.19140625, "learning_rate": 0.0015193509251202422, "loss": 5.2132, "step": 7663 }, { "epoch": 0.870071770419802, "grad_norm": 0.2021484375, "learning_rate": 0.001517920678131743, "loss": 5.2317, "step": 7664 }, { "epoch": 0.8701852975297211, "grad_norm": 0.1962890625, "learning_rate": 0.0015164890842989474, "loss": 5.2171, "step": 7665 }, { "epoch": 0.8702988246396401, "grad_norm": 0.2060546875, "learning_rate": 0.0015150561482354921, "loss": 5.1957, "step": 7666 }, { "epoch": 0.8704123517495592, "grad_norm": 0.208984375, "learning_rate": 0.0015136218745593394, "loss": 5.2112, "step": 7667 }, { "epoch": 0.8705258788594783, "grad_norm": 0.2041015625, "learning_rate": 0.001512186267892761, "loss": 5.188, "step": 7668 }, { "epoch": 0.8706394059693974, "grad_norm": 0.2001953125, "learning_rate": 0.0015107493328623258, "loss": 5.2025, "step": 7669 }, { "epoch": 0.8707529330793164, "grad_norm": 0.2060546875, "learning_rate": 0.0015093110740988837, "loss": 5.2158, "step": 7670 }, { "epoch": 0.8708664601892355, "grad_norm": 0.1923828125, "learning_rate": 0.0015078714962375498, "loss": 5.1936, "step": 7671 }, { "epoch": 0.8709799872991546, "grad_norm": 0.2060546875, "learning_rate": 0.0015064306039176906, "loss": 5.2162, "step": 7672 }, { "epoch": 0.8710935144090737, "grad_norm": 0.197265625, "learning_rate": 0.0015049884017829089, "loss": 5.2106, "step": 7673 }, { "epoch": 0.8712070415189928, "grad_norm": 0.220703125, "learning_rate": 0.0015035448944810293, "loss": 5.187, "step": 7674 }, { "epoch": 0.8713205686289118, "grad_norm": 0.2255859375, "learning_rate": 0.0015021000866640806, "loss": 5.2236, "step": 7675 }, { "epoch": 0.8714340957388309, "grad_norm": 0.2490234375, "learning_rate": 0.001500653982988285, "loss": 5.2045, "step": 7676 }, { "epoch": 0.87154762284875, "grad_norm": 0.240234375, "learning_rate": 0.00149920658811404, "loss": 5.2114, "step": 7677 }, { "epoch": 0.8716611499586691, "grad_norm": 0.265625, "learning_rate": 0.001497757906705904, "loss": 5.186, "step": 7678 }, { "epoch": 0.8717746770685881, "grad_norm": 0.255859375, "learning_rate": 0.0014963079434325822, "loss": 5.1795, "step": 7679 }, { "epoch": 0.8718882041785072, "grad_norm": 0.251953125, "learning_rate": 0.00149485670296691, "loss": 5.2045, "step": 7680 }, { "epoch": 0.8720017312884263, "grad_norm": 0.2470703125, "learning_rate": 0.0014934041899858398, "loss": 5.2013, "step": 7681 }, { "epoch": 0.8721152583983454, "grad_norm": 0.26171875, "learning_rate": 0.001491950409170424, "loss": 5.2121, "step": 7682 }, { "epoch": 0.8722287855082644, "grad_norm": 0.2490234375, "learning_rate": 0.0014904953652058021, "loss": 5.2104, "step": 7683 }, { "epoch": 0.8723423126181835, "grad_norm": 0.2451171875, "learning_rate": 0.0014890390627811838, "loss": 5.1955, "step": 7684 }, { "epoch": 0.8724558397281026, "grad_norm": 0.234375, "learning_rate": 0.0014875815065898339, "loss": 5.2033, "step": 7685 }, { "epoch": 0.8725693668380217, "grad_norm": 0.2470703125, "learning_rate": 0.001486122701329058, "loss": 5.1966, "step": 7686 }, { "epoch": 0.8726828939479407, "grad_norm": 0.2392578125, "learning_rate": 0.0014846626517001883, "loss": 5.2291, "step": 7687 }, { "epoch": 0.8727964210578598, "grad_norm": 0.2353515625, "learning_rate": 0.0014832013624085654, "loss": 5.1995, "step": 7688 }, { "epoch": 0.8729099481677789, "grad_norm": 0.216796875, "learning_rate": 0.0014817388381635262, "loss": 5.2014, "step": 7689 }, { "epoch": 0.873023475277698, "grad_norm": 0.2216796875, "learning_rate": 0.0014802750836783877, "loss": 5.2039, "step": 7690 }, { "epoch": 0.873137002387617, "grad_norm": 0.2021484375, "learning_rate": 0.0014788101036704304, "loss": 5.1871, "step": 7691 }, { "epoch": 0.8732505294975361, "grad_norm": 0.201171875, "learning_rate": 0.0014773439028608858, "loss": 5.1953, "step": 7692 }, { "epoch": 0.8733640566074552, "grad_norm": 0.1865234375, "learning_rate": 0.001475876485974918, "loss": 5.1917, "step": 7693 }, { "epoch": 0.8734775837173743, "grad_norm": 0.1826171875, "learning_rate": 0.0014744078577416122, "loss": 5.2164, "step": 7694 }, { "epoch": 0.8735911108272933, "grad_norm": 0.1845703125, "learning_rate": 0.0014729380228939559, "loss": 5.1981, "step": 7695 }, { "epoch": 0.8737046379372124, "grad_norm": 0.173828125, "learning_rate": 0.0014714669861688257, "loss": 5.1998, "step": 7696 }, { "epoch": 0.8738181650471315, "grad_norm": 0.173828125, "learning_rate": 0.0014699947523069716, "loss": 5.2217, "step": 7697 }, { "epoch": 0.8739316921570506, "grad_norm": 0.1806640625, "learning_rate": 0.0014685213260530016, "loss": 5.1956, "step": 7698 }, { "epoch": 0.8740452192669697, "grad_norm": 0.18359375, "learning_rate": 0.0014670467121553662, "loss": 5.2216, "step": 7699 }, { "epoch": 0.8741587463768887, "grad_norm": 0.17578125, "learning_rate": 0.001465570915366344, "loss": 5.2032, "step": 7700 }, { "epoch": 0.8742722734868078, "grad_norm": 0.1845703125, "learning_rate": 0.0014640939404420251, "loss": 5.2171, "step": 7701 }, { "epoch": 0.8743858005967269, "grad_norm": 0.1845703125, "learning_rate": 0.0014626157921422964, "loss": 5.2001, "step": 7702 }, { "epoch": 0.874499327706646, "grad_norm": 0.1865234375, "learning_rate": 0.001461136475230827, "loss": 5.2108, "step": 7703 }, { "epoch": 0.874612854816565, "grad_norm": 0.1884765625, "learning_rate": 0.0014596559944750507, "loss": 5.2096, "step": 7704 }, { "epoch": 0.8747263819264841, "grad_norm": 0.205078125, "learning_rate": 0.001458174354646154, "loss": 5.1888, "step": 7705 }, { "epoch": 0.8748399090364032, "grad_norm": 0.2119140625, "learning_rate": 0.0014566915605190571, "loss": 5.1753, "step": 7706 }, { "epoch": 0.8749534361463223, "grad_norm": 0.212890625, "learning_rate": 0.0014552076168724013, "loss": 5.2088, "step": 7707 }, { "epoch": 0.8750669632562413, "grad_norm": 0.2412109375, "learning_rate": 0.0014537225284885313, "loss": 5.2031, "step": 7708 }, { "epoch": 0.8751804903661604, "grad_norm": 0.2470703125, "learning_rate": 0.0014522363001534823, "loss": 5.1987, "step": 7709 }, { "epoch": 0.8752940174760795, "grad_norm": 0.255859375, "learning_rate": 0.0014507489366569625, "loss": 5.1918, "step": 7710 }, { "epoch": 0.8754075445859986, "grad_norm": 0.2294921875, "learning_rate": 0.0014492604427923381, "loss": 5.2174, "step": 7711 }, { "epoch": 0.8755210716959176, "grad_norm": 0.2353515625, "learning_rate": 0.0014477708233566191, "loss": 5.2064, "step": 7712 }, { "epoch": 0.8756345988058367, "grad_norm": 0.21875, "learning_rate": 0.0014462800831504426, "loss": 5.2025, "step": 7713 }, { "epoch": 0.8757481259157558, "grad_norm": 0.2216796875, "learning_rate": 0.001444788226978057, "loss": 5.2152, "step": 7714 }, { "epoch": 0.8758616530256749, "grad_norm": 0.2099609375, "learning_rate": 0.0014432952596473074, "loss": 5.2251, "step": 7715 }, { "epoch": 0.875975180135594, "grad_norm": 0.21484375, "learning_rate": 0.0014418011859696211, "loss": 5.1927, "step": 7716 }, { "epoch": 0.876088707245513, "grad_norm": 0.2138671875, "learning_rate": 0.001440306010759989, "loss": 5.2099, "step": 7717 }, { "epoch": 0.8762022343554321, "grad_norm": 0.212890625, "learning_rate": 0.0014388097388369529, "loss": 5.1982, "step": 7718 }, { "epoch": 0.8763157614653512, "grad_norm": 0.2255859375, "learning_rate": 0.001437312375022589, "loss": 5.1945, "step": 7719 }, { "epoch": 0.8764292885752702, "grad_norm": 0.2255859375, "learning_rate": 0.0014358139241424923, "loss": 5.2065, "step": 7720 }, { "epoch": 0.8765428156851893, "grad_norm": 0.20703125, "learning_rate": 0.001434314391025761, "loss": 5.2015, "step": 7721 }, { "epoch": 0.8766563427951084, "grad_norm": 0.2060546875, "learning_rate": 0.0014328137805049808, "loss": 5.2127, "step": 7722 }, { "epoch": 0.8767698699050275, "grad_norm": 0.2001953125, "learning_rate": 0.0014313120974162104, "loss": 5.1895, "step": 7723 }, { "epoch": 0.8768833970149466, "grad_norm": 0.2041015625, "learning_rate": 0.0014298093465989641, "loss": 5.2186, "step": 7724 }, { "epoch": 0.8769969241248656, "grad_norm": 0.2021484375, "learning_rate": 0.001428305532896198, "loss": 5.2036, "step": 7725 }, { "epoch": 0.8771104512347847, "grad_norm": 0.2177734375, "learning_rate": 0.0014268006611542936, "loss": 5.1912, "step": 7726 }, { "epoch": 0.8772239783447038, "grad_norm": 0.2099609375, "learning_rate": 0.0014252947362230412, "loss": 5.1829, "step": 7727 }, { "epoch": 0.8773375054546229, "grad_norm": 0.22265625, "learning_rate": 0.0014237877629556263, "loss": 5.2052, "step": 7728 }, { "epoch": 0.8774510325645419, "grad_norm": 0.2119140625, "learning_rate": 0.0014222797462086123, "loss": 5.2, "step": 7729 }, { "epoch": 0.877564559674461, "grad_norm": 0.21875, "learning_rate": 0.0014207706908419256, "loss": 5.194, "step": 7730 }, { "epoch": 0.8776780867843801, "grad_norm": 0.224609375, "learning_rate": 0.00141926060171884, "loss": 5.204, "step": 7731 }, { "epoch": 0.8777916138942992, "grad_norm": 0.2353515625, "learning_rate": 0.0014177494837059608, "loss": 5.2019, "step": 7732 }, { "epoch": 0.8779051410042182, "grad_norm": 0.2294921875, "learning_rate": 0.0014162373416732087, "loss": 5.2007, "step": 7733 }, { "epoch": 0.8780186681141373, "grad_norm": 0.21875, "learning_rate": 0.0014147241804938046, "loss": 5.2203, "step": 7734 }, { "epoch": 0.8781321952240564, "grad_norm": 0.2060546875, "learning_rate": 0.0014132100050442543, "loss": 5.2002, "step": 7735 }, { "epoch": 0.8782457223339755, "grad_norm": 0.19140625, "learning_rate": 0.0014116948202043322, "loss": 5.2191, "step": 7736 }, { "epoch": 0.8783592494438945, "grad_norm": 0.1796875, "learning_rate": 0.0014101786308570652, "loss": 5.2027, "step": 7737 }, { "epoch": 0.8784727765538136, "grad_norm": 0.185546875, "learning_rate": 0.0014086614418887182, "loss": 5.1952, "step": 7738 }, { "epoch": 0.8785863036637327, "grad_norm": 0.1708984375, "learning_rate": 0.0014071432581887772, "loss": 5.2017, "step": 7739 }, { "epoch": 0.8786998307736518, "grad_norm": 0.1875, "learning_rate": 0.0014056240846499336, "loss": 5.2191, "step": 7740 }, { "epoch": 0.8788133578835708, "grad_norm": 0.1962890625, "learning_rate": 0.0014041039261680692, "loss": 5.1863, "step": 7741 }, { "epoch": 0.8789268849934899, "grad_norm": 0.20703125, "learning_rate": 0.0014025827876422404, "loss": 5.1991, "step": 7742 }, { "epoch": 0.879040412103409, "grad_norm": 0.234375, "learning_rate": 0.001401060673974661, "loss": 5.1886, "step": 7743 }, { "epoch": 0.8791539392133281, "grad_norm": 0.244140625, "learning_rate": 0.001399537590070688, "loss": 5.1917, "step": 7744 }, { "epoch": 0.8792674663232471, "grad_norm": 0.2421875, "learning_rate": 0.0013980135408388056, "loss": 5.1942, "step": 7745 }, { "epoch": 0.8793809934331662, "grad_norm": 0.251953125, "learning_rate": 0.0013964885311906082, "loss": 5.235, "step": 7746 }, { "epoch": 0.8794945205430853, "grad_norm": 0.255859375, "learning_rate": 0.0013949625660407859, "loss": 5.1829, "step": 7747 }, { "epoch": 0.8796080476530044, "grad_norm": 0.25, "learning_rate": 0.0013934356503071078, "loss": 5.2025, "step": 7748 }, { "epoch": 0.8797215747629235, "grad_norm": 0.26171875, "learning_rate": 0.0013919077889104066, "loss": 5.2172, "step": 7749 }, { "epoch": 0.8798351018728425, "grad_norm": 0.263671875, "learning_rate": 0.001390378986774563, "loss": 5.1914, "step": 7750 }, { "epoch": 0.8799486289827616, "grad_norm": 0.25390625, "learning_rate": 0.0013888492488264887, "loss": 5.1833, "step": 7751 }, { "epoch": 0.8800621560926807, "grad_norm": 0.259765625, "learning_rate": 0.0013873185799961115, "loss": 5.2095, "step": 7752 }, { "epoch": 0.8801756832025998, "grad_norm": 0.248046875, "learning_rate": 0.00138578698521636, "loss": 5.2073, "step": 7753 }, { "epoch": 0.8802892103125188, "grad_norm": 0.2431640625, "learning_rate": 0.0013842544694231457, "loss": 5.192, "step": 7754 }, { "epoch": 0.8804027374224379, "grad_norm": 0.2236328125, "learning_rate": 0.0013827210375553485, "loss": 5.1857, "step": 7755 }, { "epoch": 0.880516264532357, "grad_norm": 0.2490234375, "learning_rate": 0.0013811866945548017, "loss": 5.1908, "step": 7756 }, { "epoch": 0.8806297916422761, "grad_norm": 0.228515625, "learning_rate": 0.0013796514453662733, "loss": 5.2199, "step": 7757 }, { "epoch": 0.8807433187521951, "grad_norm": 0.23046875, "learning_rate": 0.0013781152949374528, "loss": 5.2044, "step": 7758 }, { "epoch": 0.8808568458621142, "grad_norm": 0.2177734375, "learning_rate": 0.0013765782482189337, "loss": 5.2011, "step": 7759 }, { "epoch": 0.8809703729720333, "grad_norm": 0.22265625, "learning_rate": 0.0013750403101641983, "loss": 5.1926, "step": 7760 }, { "epoch": 0.8810839000819524, "grad_norm": 0.2236328125, "learning_rate": 0.001373501485729601, "loss": 5.2098, "step": 7761 }, { "epoch": 0.8811974271918714, "grad_norm": 0.2275390625, "learning_rate": 0.0013719617798743531, "loss": 5.1877, "step": 7762 }, { "epoch": 0.8813109543017905, "grad_norm": 0.2197265625, "learning_rate": 0.0013704211975605067, "loss": 5.1889, "step": 7763 }, { "epoch": 0.8814244814117096, "grad_norm": 0.234375, "learning_rate": 0.0013688797437529376, "loss": 5.202, "step": 7764 }, { "epoch": 0.8815380085216287, "grad_norm": 0.2236328125, "learning_rate": 0.0013673374234193314, "loss": 5.1935, "step": 7765 }, { "epoch": 0.8816515356315477, "grad_norm": 0.23046875, "learning_rate": 0.0013657942415301653, "loss": 5.2271, "step": 7766 }, { "epoch": 0.8817650627414668, "grad_norm": 0.2099609375, "learning_rate": 0.0013642502030586931, "loss": 5.1846, "step": 7767 }, { "epoch": 0.8818785898513859, "grad_norm": 0.2041015625, "learning_rate": 0.00136270531298093, "loss": 5.2102, "step": 7768 }, { "epoch": 0.881992116961305, "grad_norm": 0.1884765625, "learning_rate": 0.0013611595762756349, "loss": 5.185, "step": 7769 }, { "epoch": 0.882105644071224, "grad_norm": 0.1865234375, "learning_rate": 0.0013596129979242951, "loss": 5.1883, "step": 7770 }, { "epoch": 0.8822191711811431, "grad_norm": 0.1806640625, "learning_rate": 0.001358065582911111, "loss": 5.202, "step": 7771 }, { "epoch": 0.8823326982910622, "grad_norm": 0.1787109375, "learning_rate": 0.0013565173362229785, "loss": 5.1774, "step": 7772 }, { "epoch": 0.8824462254009813, "grad_norm": 0.1728515625, "learning_rate": 0.0013549682628494744, "loss": 5.1813, "step": 7773 }, { "epoch": 0.8825597525109004, "grad_norm": 0.16796875, "learning_rate": 0.0013534183677828397, "loss": 5.1968, "step": 7774 }, { "epoch": 0.8826732796208194, "grad_norm": 0.16015625, "learning_rate": 0.0013518676560179627, "loss": 5.2038, "step": 7775 }, { "epoch": 0.8827868067307385, "grad_norm": 0.162109375, "learning_rate": 0.0013503161325523645, "loss": 5.1878, "step": 7776 }, { "epoch": 0.8829003338406576, "grad_norm": 0.1552734375, "learning_rate": 0.0013487638023861818, "loss": 5.1836, "step": 7777 }, { "epoch": 0.8830138609505767, "grad_norm": 0.16015625, "learning_rate": 0.0013472106705221512, "loss": 5.1969, "step": 7778 }, { "epoch": 0.8831273880604957, "grad_norm": 0.16015625, "learning_rate": 0.0013456567419655927, "loss": 5.1836, "step": 7779 }, { "epoch": 0.8832409151704148, "grad_norm": 0.16015625, "learning_rate": 0.0013441020217243942, "loss": 5.2085, "step": 7780 }, { "epoch": 0.8833544422803339, "grad_norm": 0.1669921875, "learning_rate": 0.0013425465148089945, "loss": 5.2036, "step": 7781 }, { "epoch": 0.883467969390253, "grad_norm": 0.169921875, "learning_rate": 0.001340990226232368, "loss": 5.1866, "step": 7782 }, { "epoch": 0.883581496500172, "grad_norm": 0.169921875, "learning_rate": 0.0013394331610100077, "loss": 5.1936, "step": 7783 }, { "epoch": 0.8836950236100911, "grad_norm": 0.171875, "learning_rate": 0.0013378753241599104, "loss": 5.1605, "step": 7784 }, { "epoch": 0.8838085507200102, "grad_norm": 0.1787109375, "learning_rate": 0.001336316720702559, "loss": 5.1877, "step": 7785 }, { "epoch": 0.8839220778299293, "grad_norm": 0.1796875, "learning_rate": 0.0013347573556609074, "loss": 5.1972, "step": 7786 }, { "epoch": 0.8840356049398483, "grad_norm": 0.17578125, "learning_rate": 0.0013331972340603628, "loss": 5.1899, "step": 7787 }, { "epoch": 0.8841491320497674, "grad_norm": 0.1669921875, "learning_rate": 0.0013316363609287723, "loss": 5.2045, "step": 7788 }, { "epoch": 0.8842626591596865, "grad_norm": 0.169921875, "learning_rate": 0.0013300747412964035, "loss": 5.1771, "step": 7789 }, { "epoch": 0.8843761862696056, "grad_norm": 0.1611328125, "learning_rate": 0.0013285123801959303, "loss": 5.1944, "step": 7790 }, { "epoch": 0.8844897133795246, "grad_norm": 0.162109375, "learning_rate": 0.0013269492826624158, "loss": 5.2104, "step": 7791 }, { "epoch": 0.8846032404894437, "grad_norm": 0.15625, "learning_rate": 0.0013253854537332978, "loss": 5.2055, "step": 7792 }, { "epoch": 0.8847167675993628, "grad_norm": 0.1650390625, "learning_rate": 0.0013238208984483696, "loss": 5.1728, "step": 7793 }, { "epoch": 0.8848302947092819, "grad_norm": 0.162109375, "learning_rate": 0.0013222556218497655, "loss": 5.203, "step": 7794 }, { "epoch": 0.884943821819201, "grad_norm": 0.171875, "learning_rate": 0.0013206896289819456, "loss": 5.201, "step": 7795 }, { "epoch": 0.88505734892912, "grad_norm": 0.16796875, "learning_rate": 0.0013191229248916772, "loss": 5.2059, "step": 7796 }, { "epoch": 0.8851708760390391, "grad_norm": 0.16796875, "learning_rate": 0.0013175555146280199, "loss": 5.1975, "step": 7797 }, { "epoch": 0.8852844031489582, "grad_norm": 0.1748046875, "learning_rate": 0.0013159874032423094, "loss": 5.197, "step": 7798 }, { "epoch": 0.8853979302588773, "grad_norm": 0.1708984375, "learning_rate": 0.0013144185957881408, "loss": 5.1856, "step": 7799 }, { "epoch": 0.8855114573687963, "grad_norm": 0.1826171875, "learning_rate": 0.0013128490973213522, "loss": 5.2092, "step": 7800 }, { "epoch": 0.8856249844787154, "grad_norm": 0.193359375, "learning_rate": 0.0013112789129000094, "loss": 5.1998, "step": 7801 }, { "epoch": 0.8857385115886345, "grad_norm": 0.2158203125, "learning_rate": 0.0013097080475843875, "loss": 5.187, "step": 7802 }, { "epoch": 0.8858520386985536, "grad_norm": 0.2109375, "learning_rate": 0.001308136506436957, "loss": 5.1945, "step": 7803 }, { "epoch": 0.8859655658084726, "grad_norm": 0.2294921875, "learning_rate": 0.0013065642945223664, "loss": 5.1942, "step": 7804 }, { "epoch": 0.8860790929183917, "grad_norm": 0.240234375, "learning_rate": 0.0013049914169074256, "loss": 5.1937, "step": 7805 }, { "epoch": 0.8861926200283108, "grad_norm": 0.2412109375, "learning_rate": 0.0013034178786610895, "loss": 5.195, "step": 7806 }, { "epoch": 0.8863061471382299, "grad_norm": 0.2314453125, "learning_rate": 0.0013018436848544431, "loss": 5.2196, "step": 7807 }, { "epoch": 0.8864196742481489, "grad_norm": 0.2197265625, "learning_rate": 0.001300268840560683, "loss": 5.2071, "step": 7808 }, { "epoch": 0.886533201358068, "grad_norm": 0.197265625, "learning_rate": 0.0012986933508551024, "loss": 5.1843, "step": 7809 }, { "epoch": 0.8866467284679871, "grad_norm": 0.19921875, "learning_rate": 0.0012971172208150754, "loss": 5.188, "step": 7810 }, { "epoch": 0.8867602555779062, "grad_norm": 0.1787109375, "learning_rate": 0.0012955404555200385, "loss": 5.1877, "step": 7811 }, { "epoch": 0.8868737826878252, "grad_norm": 0.189453125, "learning_rate": 0.001293963060051476, "loss": 5.1989, "step": 7812 }, { "epoch": 0.8869873097977443, "grad_norm": 0.1767578125, "learning_rate": 0.0012923850394929031, "loss": 5.187, "step": 7813 }, { "epoch": 0.8871008369076634, "grad_norm": 0.1787109375, "learning_rate": 0.0012908063989298493, "loss": 5.1738, "step": 7814 }, { "epoch": 0.8872143640175825, "grad_norm": 0.1796875, "learning_rate": 0.0012892271434498423, "loss": 5.1949, "step": 7815 }, { "epoch": 0.8873278911275015, "grad_norm": 0.1748046875, "learning_rate": 0.0012876472781423916, "loss": 5.1852, "step": 7816 }, { "epoch": 0.8874414182374206, "grad_norm": 0.16796875, "learning_rate": 0.0012860668080989723, "loss": 5.1872, "step": 7817 }, { "epoch": 0.8875549453473397, "grad_norm": 0.181640625, "learning_rate": 0.0012844857384130075, "loss": 5.1924, "step": 7818 }, { "epoch": 0.8876684724572588, "grad_norm": 0.169921875, "learning_rate": 0.0012829040741798535, "loss": 5.1844, "step": 7819 }, { "epoch": 0.8877819995671778, "grad_norm": 0.181640625, "learning_rate": 0.0012813218204967823, "loss": 5.1962, "step": 7820 }, { "epoch": 0.8878955266770969, "grad_norm": 0.1884765625, "learning_rate": 0.0012797389824629661, "loss": 5.1862, "step": 7821 }, { "epoch": 0.888009053787016, "grad_norm": 0.185546875, "learning_rate": 0.00127815556517946, "loss": 5.1679, "step": 7822 }, { "epoch": 0.8881225808969351, "grad_norm": 0.1826171875, "learning_rate": 0.0012765715737491857, "loss": 5.1899, "step": 7823 }, { "epoch": 0.8882361080068542, "grad_norm": 0.1865234375, "learning_rate": 0.0012749870132769147, "loss": 5.1907, "step": 7824 }, { "epoch": 0.8883496351167732, "grad_norm": 0.1845703125, "learning_rate": 0.001273401888869254, "loss": 5.1936, "step": 7825 }, { "epoch": 0.8884631622266923, "grad_norm": 0.1962890625, "learning_rate": 0.0012718162056346266, "loss": 5.1973, "step": 7826 }, { "epoch": 0.8885766893366114, "grad_norm": 0.189453125, "learning_rate": 0.0012702299686832561, "loss": 5.1864, "step": 7827 }, { "epoch": 0.8886902164465305, "grad_norm": 0.1962890625, "learning_rate": 0.0012686431831271524, "loss": 5.1934, "step": 7828 }, { "epoch": 0.8888037435564495, "grad_norm": 0.193359375, "learning_rate": 0.0012670558540800916, "loss": 5.1848, "step": 7829 }, { "epoch": 0.8889172706663686, "grad_norm": 0.1943359375, "learning_rate": 0.0012654679866576021, "loss": 5.1932, "step": 7830 }, { "epoch": 0.8890307977762877, "grad_norm": 0.1962890625, "learning_rate": 0.0012638795859769476, "loss": 5.1923, "step": 7831 }, { "epoch": 0.8891443248862068, "grad_norm": 0.2001953125, "learning_rate": 0.00126229065715711, "loss": 5.2058, "step": 7832 }, { "epoch": 0.8892578519961258, "grad_norm": 0.1865234375, "learning_rate": 0.001260701205318773, "loss": 5.191, "step": 7833 }, { "epoch": 0.8893713791060449, "grad_norm": 0.1943359375, "learning_rate": 0.0012591112355843062, "loss": 5.1946, "step": 7834 }, { "epoch": 0.889484906215964, "grad_norm": 0.193359375, "learning_rate": 0.0012575207530777486, "loss": 5.2004, "step": 7835 }, { "epoch": 0.8895984333258831, "grad_norm": 0.203125, "learning_rate": 0.0012559297629247906, "loss": 5.1966, "step": 7836 }, { "epoch": 0.8897119604358021, "grad_norm": 0.173828125, "learning_rate": 0.0012543382702527596, "loss": 5.1583, "step": 7837 }, { "epoch": 0.8898254875457212, "grad_norm": 0.1669921875, "learning_rate": 0.001252746280190602, "loss": 5.1799, "step": 7838 }, { "epoch": 0.8899390146556403, "grad_norm": 0.16015625, "learning_rate": 0.0012511537978688678, "loss": 5.203, "step": 7839 }, { "epoch": 0.8900525417655594, "grad_norm": 0.158203125, "learning_rate": 0.0012495608284196925, "loss": 5.1752, "step": 7840 }, { "epoch": 0.8901660688754784, "grad_norm": 0.1484375, "learning_rate": 0.0012479673769767818, "loss": 5.1947, "step": 7841 }, { "epoch": 0.8902795959853975, "grad_norm": 0.1396484375, "learning_rate": 0.0012463734486753953, "loss": 5.1873, "step": 7842 }, { "epoch": 0.8903931230953166, "grad_norm": 0.138671875, "learning_rate": 0.0012447790486523288, "loss": 5.1958, "step": 7843 }, { "epoch": 0.8905066502052358, "grad_norm": 0.1396484375, "learning_rate": 0.0012431841820458981, "loss": 5.1801, "step": 7844 }, { "epoch": 0.8906201773151549, "grad_norm": 0.1396484375, "learning_rate": 0.0012415888539959233, "loss": 5.1912, "step": 7845 }, { "epoch": 0.8907337044250739, "grad_norm": 0.14453125, "learning_rate": 0.0012399930696437114, "loss": 5.1867, "step": 7846 }, { "epoch": 0.890847231534993, "grad_norm": 0.1494140625, "learning_rate": 0.0012383968341320402, "loss": 5.228, "step": 7847 }, { "epoch": 0.8909607586449121, "grad_norm": 0.1435546875, "learning_rate": 0.0012368001526051407, "loss": 5.1707, "step": 7848 }, { "epoch": 0.8910742857548312, "grad_norm": 0.171875, "learning_rate": 0.0012352030302086815, "loss": 5.1916, "step": 7849 }, { "epoch": 0.8911878128647502, "grad_norm": 0.1748046875, "learning_rate": 0.0012336054720897527, "loss": 5.1911, "step": 7850 }, { "epoch": 0.8913013399746693, "grad_norm": 0.1826171875, "learning_rate": 0.001232007483396848, "loss": 5.1767, "step": 7851 }, { "epoch": 0.8914148670845884, "grad_norm": 0.1767578125, "learning_rate": 0.0012304090692798486, "loss": 5.2039, "step": 7852 }, { "epoch": 0.8915283941945075, "grad_norm": 0.1748046875, "learning_rate": 0.0012288102348900077, "loss": 5.2016, "step": 7853 }, { "epoch": 0.8916419213044265, "grad_norm": 0.1572265625, "learning_rate": 0.0012272109853799316, "loss": 5.207, "step": 7854 }, { "epoch": 0.8917554484143456, "grad_norm": 0.171875, "learning_rate": 0.0012256113259035651, "loss": 5.2131, "step": 7855 }, { "epoch": 0.8918689755242647, "grad_norm": 0.1591796875, "learning_rate": 0.0012240112616161744, "loss": 5.1855, "step": 7856 }, { "epoch": 0.8919825026341838, "grad_norm": 0.158203125, "learning_rate": 0.0012224107976743297, "loss": 5.1954, "step": 7857 }, { "epoch": 0.8920960297441028, "grad_norm": 0.1591796875, "learning_rate": 0.00122080993923589, "loss": 5.1868, "step": 7858 }, { "epoch": 0.8922095568540219, "grad_norm": 0.16015625, "learning_rate": 0.0012192086914599846, "loss": 5.1843, "step": 7859 }, { "epoch": 0.892323083963941, "grad_norm": 0.1611328125, "learning_rate": 0.0012176070595069988, "loss": 5.1921, "step": 7860 }, { "epoch": 0.8924366110738601, "grad_norm": 0.158203125, "learning_rate": 0.0012160050485385547, "loss": 5.1675, "step": 7861 }, { "epoch": 0.8925501381837792, "grad_norm": 0.1630859375, "learning_rate": 0.0012144026637174968, "loss": 5.1671, "step": 7862 }, { "epoch": 0.8926636652936982, "grad_norm": 0.1669921875, "learning_rate": 0.001212799910207874, "loss": 5.1872, "step": 7863 }, { "epoch": 0.8927771924036173, "grad_norm": 0.1611328125, "learning_rate": 0.0012111967931749232, "loss": 5.1853, "step": 7864 }, { "epoch": 0.8928907195135364, "grad_norm": 0.1669921875, "learning_rate": 0.0012095933177850536, "loss": 5.1713, "step": 7865 }, { "epoch": 0.8930042466234555, "grad_norm": 0.1591796875, "learning_rate": 0.0012079894892058282, "loss": 5.1619, "step": 7866 }, { "epoch": 0.8931177737333745, "grad_norm": 0.1572265625, "learning_rate": 0.0012063853126059497, "loss": 5.2004, "step": 7867 }, { "epoch": 0.8932313008432936, "grad_norm": 0.1572265625, "learning_rate": 0.0012047807931552406, "loss": 5.2044, "step": 7868 }, { "epoch": 0.8933448279532127, "grad_norm": 0.154296875, "learning_rate": 0.0012031759360246297, "loss": 5.1908, "step": 7869 }, { "epoch": 0.8934583550631318, "grad_norm": 0.150390625, "learning_rate": 0.0012015707463861333, "loss": 5.1902, "step": 7870 }, { "epoch": 0.8935718821730508, "grad_norm": 0.150390625, "learning_rate": 0.0011999652294128402, "loss": 5.2204, "step": 7871 }, { "epoch": 0.8936854092829699, "grad_norm": 0.140625, "learning_rate": 0.0011983593902788927, "loss": 5.1862, "step": 7872 }, { "epoch": 0.893798936392889, "grad_norm": 0.15234375, "learning_rate": 0.0011967532341594727, "loss": 5.1808, "step": 7873 }, { "epoch": 0.8939124635028081, "grad_norm": 0.1474609375, "learning_rate": 0.0011951467662307822, "loss": 5.1677, "step": 7874 }, { "epoch": 0.8940259906127271, "grad_norm": 0.14453125, "learning_rate": 0.0011935399916700297, "loss": 5.1634, "step": 7875 }, { "epoch": 0.8941395177226462, "grad_norm": 0.1474609375, "learning_rate": 0.0011919329156554112, "loss": 5.1936, "step": 7876 }, { "epoch": 0.8942530448325653, "grad_norm": 0.1416015625, "learning_rate": 0.0011903255433660933, "loss": 5.1836, "step": 7877 }, { "epoch": 0.8943665719424844, "grad_norm": 0.13671875, "learning_rate": 0.0011887178799821992, "loss": 5.1945, "step": 7878 }, { "epoch": 0.8944800990524034, "grad_norm": 0.1416015625, "learning_rate": 0.0011871099306847886, "loss": 5.205, "step": 7879 }, { "epoch": 0.8945936261623225, "grad_norm": 0.1396484375, "learning_rate": 0.0011855017006558437, "loss": 5.1798, "step": 7880 }, { "epoch": 0.8947071532722416, "grad_norm": 0.146484375, "learning_rate": 0.0011838931950782505, "loss": 5.1999, "step": 7881 }, { "epoch": 0.8948206803821607, "grad_norm": 0.1455078125, "learning_rate": 0.0011822844191357843, "loss": 5.1847, "step": 7882 }, { "epoch": 0.8949342074920797, "grad_norm": 0.1435546875, "learning_rate": 0.0011806753780130904, "loss": 5.1886, "step": 7883 }, { "epoch": 0.8950477346019988, "grad_norm": 0.14453125, "learning_rate": 0.0011790660768956692, "loss": 5.2067, "step": 7884 }, { "epoch": 0.8951612617119179, "grad_norm": 0.150390625, "learning_rate": 0.0011774565209698587, "loss": 5.1954, "step": 7885 }, { "epoch": 0.895274788821837, "grad_norm": 0.1484375, "learning_rate": 0.0011758467154228194, "loss": 5.2096, "step": 7886 }, { "epoch": 0.895388315931756, "grad_norm": 0.138671875, "learning_rate": 0.0011742366654425144, "loss": 5.1883, "step": 7887 }, { "epoch": 0.8955018430416751, "grad_norm": 0.1494140625, "learning_rate": 0.0011726263762176956, "loss": 5.1892, "step": 7888 }, { "epoch": 0.8956153701515942, "grad_norm": 0.154296875, "learning_rate": 0.001171015852937886, "loss": 5.1557, "step": 7889 }, { "epoch": 0.8957288972615133, "grad_norm": 0.1611328125, "learning_rate": 0.0011694051007933627, "loss": 5.1942, "step": 7890 }, { "epoch": 0.8958424243714324, "grad_norm": 0.1630859375, "learning_rate": 0.0011677941249751396, "loss": 5.1717, "step": 7891 }, { "epoch": 0.8959559514813514, "grad_norm": 0.1708984375, "learning_rate": 0.0011661829306749525, "loss": 5.179, "step": 7892 }, { "epoch": 0.8960694785912705, "grad_norm": 0.1904296875, "learning_rate": 0.0011645715230852415, "loss": 5.1974, "step": 7893 }, { "epoch": 0.8961830057011896, "grad_norm": 0.20703125, "learning_rate": 0.0011629599073991327, "loss": 5.1684, "step": 7894 }, { "epoch": 0.8962965328111087, "grad_norm": 0.205078125, "learning_rate": 0.0011613480888104243, "loss": 5.1968, "step": 7895 }, { "epoch": 0.8964100599210277, "grad_norm": 0.234375, "learning_rate": 0.0011597360725135667, "loss": 5.2139, "step": 7896 }, { "epoch": 0.8965235870309468, "grad_norm": 0.2109375, "learning_rate": 0.0011581238637036499, "loss": 5.188, "step": 7897 }, { "epoch": 0.8966371141408659, "grad_norm": 0.220703125, "learning_rate": 0.0011565114675763822, "loss": 5.1818, "step": 7898 }, { "epoch": 0.896750641250785, "grad_norm": 0.1904296875, "learning_rate": 0.0011548988893280761, "loss": 5.1698, "step": 7899 }, { "epoch": 0.896864168360704, "grad_norm": 0.1865234375, "learning_rate": 0.0011532861341556316, "loss": 5.1732, "step": 7900 }, { "epoch": 0.8969776954706231, "grad_norm": 0.181640625, "learning_rate": 0.0011516732072565186, "loss": 5.1746, "step": 7901 }, { "epoch": 0.8970912225805422, "grad_norm": 0.173828125, "learning_rate": 0.0011500601138287596, "loss": 5.1823, "step": 7902 }, { "epoch": 0.8972047496904613, "grad_norm": 0.1669921875, "learning_rate": 0.0011484468590709153, "loss": 5.1689, "step": 7903 }, { "epoch": 0.8973182768003803, "grad_norm": 0.1806640625, "learning_rate": 0.0011468334481820656, "loss": 5.1704, "step": 7904 }, { "epoch": 0.8974318039102994, "grad_norm": 0.177734375, "learning_rate": 0.0011452198863617926, "loss": 5.1858, "step": 7905 }, { "epoch": 0.8975453310202185, "grad_norm": 0.181640625, "learning_rate": 0.0011436061788101665, "loss": 5.172, "step": 7906 }, { "epoch": 0.8976588581301376, "grad_norm": 0.1640625, "learning_rate": 0.0011419923307277263, "loss": 5.1728, "step": 7907 }, { "epoch": 0.8977723852400566, "grad_norm": 0.173828125, "learning_rate": 0.0011403783473154635, "loss": 5.1778, "step": 7908 }, { "epoch": 0.8978859123499757, "grad_norm": 0.15625, "learning_rate": 0.0011387642337748068, "loss": 5.1722, "step": 7909 }, { "epoch": 0.8979994394598948, "grad_norm": 0.158203125, "learning_rate": 0.0011371499953076027, "loss": 5.1951, "step": 7910 }, { "epoch": 0.8981129665698139, "grad_norm": 0.1640625, "learning_rate": 0.0011355356371161025, "loss": 5.2079, "step": 7911 }, { "epoch": 0.898226493679733, "grad_norm": 0.1630859375, "learning_rate": 0.0011339211644029413, "loss": 5.1825, "step": 7912 }, { "epoch": 0.898340020789652, "grad_norm": 0.1591796875, "learning_rate": 0.0011323065823711242, "loss": 5.2076, "step": 7913 }, { "epoch": 0.8984535478995711, "grad_norm": 0.158203125, "learning_rate": 0.0011306918962240083, "loss": 5.1994, "step": 7914 }, { "epoch": 0.8985670750094902, "grad_norm": 0.1513671875, "learning_rate": 0.0011290771111652866, "loss": 5.1968, "step": 7915 }, { "epoch": 0.8986806021194093, "grad_norm": 0.1494140625, "learning_rate": 0.0011274622323989712, "loss": 5.1793, "step": 7916 }, { "epoch": 0.8987941292293283, "grad_norm": 0.1494140625, "learning_rate": 0.0011258472651293748, "loss": 5.1857, "step": 7917 }, { "epoch": 0.8989076563392474, "grad_norm": 0.1533203125, "learning_rate": 0.0011242322145610965, "loss": 5.1803, "step": 7918 }, { "epoch": 0.8990211834491665, "grad_norm": 0.146484375, "learning_rate": 0.0011226170858990039, "loss": 5.2064, "step": 7919 }, { "epoch": 0.8991347105590856, "grad_norm": 0.1474609375, "learning_rate": 0.0011210018843482155, "loss": 5.1755, "step": 7920 }, { "epoch": 0.8992482376690046, "grad_norm": 0.1357421875, "learning_rate": 0.0011193866151140849, "loss": 5.1773, "step": 7921 }, { "epoch": 0.8993617647789237, "grad_norm": 0.1328125, "learning_rate": 0.0011177712834021848, "loss": 5.1708, "step": 7922 }, { "epoch": 0.8994752918888428, "grad_norm": 0.1259765625, "learning_rate": 0.0011161558944182877, "loss": 5.1751, "step": 7923 }, { "epoch": 0.8995888189987619, "grad_norm": 0.1240234375, "learning_rate": 0.0011145404533683518, "loss": 5.176, "step": 7924 }, { "epoch": 0.8997023461086809, "grad_norm": 0.126953125, "learning_rate": 0.001112924965458503, "loss": 5.2079, "step": 7925 }, { "epoch": 0.8998158732186, "grad_norm": 0.1298828125, "learning_rate": 0.0011113094358950176, "loss": 5.1855, "step": 7926 }, { "epoch": 0.8999294003285191, "grad_norm": 0.12451171875, "learning_rate": 0.0011096938698843064, "loss": 5.1644, "step": 7927 }, { "epoch": 0.9000429274384382, "grad_norm": 0.1318359375, "learning_rate": 0.0011080782726328982, "loss": 5.1846, "step": 7928 }, { "epoch": 0.9001564545483572, "grad_norm": 0.1318359375, "learning_rate": 0.0011064626493474219, "loss": 5.1596, "step": 7929 }, { "epoch": 0.9002699816582763, "grad_norm": 0.1328125, "learning_rate": 0.0011048470052345905, "loss": 5.1702, "step": 7930 }, { "epoch": 0.9003835087681954, "grad_norm": 0.1337890625, "learning_rate": 0.0011032313455011837, "loss": 5.1814, "step": 7931 }, { "epoch": 0.9004970358781145, "grad_norm": 0.1357421875, "learning_rate": 0.001101615675354032, "loss": 5.1957, "step": 7932 }, { "epoch": 0.9006105629880335, "grad_norm": 0.13671875, "learning_rate": 0.0011, "loss": 5.16, "step": 7933 }, { "epoch": 0.9007240900979526, "grad_norm": 0.1318359375, "learning_rate": 0.001098384324645968, "loss": 5.1868, "step": 7934 }, { "epoch": 0.9008376172078717, "grad_norm": 0.1279296875, "learning_rate": 0.0010967686544988166, "loss": 5.1864, "step": 7935 }, { "epoch": 0.9009511443177908, "grad_norm": 0.1337890625, "learning_rate": 0.0010951529947654103, "loss": 5.1908, "step": 7936 }, { "epoch": 0.9010646714277099, "grad_norm": 0.1298828125, "learning_rate": 0.001093537350652578, "loss": 5.1767, "step": 7937 }, { "epoch": 0.9011781985376289, "grad_norm": 0.1298828125, "learning_rate": 0.001091921727367102, "loss": 5.166, "step": 7938 }, { "epoch": 0.901291725647548, "grad_norm": 0.12890625, "learning_rate": 0.0010903061301156937, "loss": 5.1636, "step": 7939 }, { "epoch": 0.9014052527574671, "grad_norm": 0.1298828125, "learning_rate": 0.0010886905641049828, "loss": 5.175, "step": 7940 }, { "epoch": 0.9015187798673862, "grad_norm": 0.125, "learning_rate": 0.0010870750345414973, "loss": 5.1638, "step": 7941 }, { "epoch": 0.9016323069773052, "grad_norm": 0.134765625, "learning_rate": 0.0010854595466316484, "loss": 5.1865, "step": 7942 }, { "epoch": 0.9017458340872243, "grad_norm": 0.13671875, "learning_rate": 0.0010838441055817127, "loss": 5.1731, "step": 7943 }, { "epoch": 0.9018593611971434, "grad_norm": 0.1533203125, "learning_rate": 0.0010822287165978156, "loss": 5.1617, "step": 7944 }, { "epoch": 0.9019728883070625, "grad_norm": 0.1689453125, "learning_rate": 0.001080613384885915, "loss": 5.1774, "step": 7945 }, { "epoch": 0.9020864154169815, "grad_norm": 0.1845703125, "learning_rate": 0.0010789981156517847, "loss": 5.179, "step": 7946 }, { "epoch": 0.9021999425269006, "grad_norm": 0.173828125, "learning_rate": 0.0010773829141009963, "loss": 5.1648, "step": 7947 }, { "epoch": 0.9023134696368197, "grad_norm": 0.185546875, "learning_rate": 0.0010757677854389036, "loss": 5.1832, "step": 7948 }, { "epoch": 0.9024269967467388, "grad_norm": 0.1787109375, "learning_rate": 0.0010741527348706254, "loss": 5.1857, "step": 7949 }, { "epoch": 0.9025405238566578, "grad_norm": 0.17578125, "learning_rate": 0.001072537767601029, "loss": 5.177, "step": 7950 }, { "epoch": 0.9026540509665769, "grad_norm": 0.1796875, "learning_rate": 0.0010709228888347133, "loss": 5.1861, "step": 7951 }, { "epoch": 0.902767578076496, "grad_norm": 0.171875, "learning_rate": 0.0010693081037759916, "loss": 5.1883, "step": 7952 }, { "epoch": 0.9028811051864151, "grad_norm": 0.1689453125, "learning_rate": 0.001067693417628876, "loss": 5.1767, "step": 7953 }, { "epoch": 0.9029946322963341, "grad_norm": 0.16796875, "learning_rate": 0.001066078835597059, "loss": 5.1763, "step": 7954 }, { "epoch": 0.9031081594062532, "grad_norm": 0.15625, "learning_rate": 0.0010644643628838976, "loss": 5.1664, "step": 7955 }, { "epoch": 0.9032216865161723, "grad_norm": 0.1669921875, "learning_rate": 0.0010628500046923975, "loss": 5.1894, "step": 7956 }, { "epoch": 0.9033352136260914, "grad_norm": 0.1630859375, "learning_rate": 0.0010612357662251938, "loss": 5.2043, "step": 7957 }, { "epoch": 0.9034487407360104, "grad_norm": 0.1689453125, "learning_rate": 0.001059621652684537, "loss": 5.1603, "step": 7958 }, { "epoch": 0.9035622678459295, "grad_norm": 0.150390625, "learning_rate": 0.0010580076692722738, "loss": 5.1801, "step": 7959 }, { "epoch": 0.9036757949558486, "grad_norm": 0.15234375, "learning_rate": 0.0010563938211898337, "loss": 5.1819, "step": 7960 }, { "epoch": 0.9037893220657677, "grad_norm": 0.138671875, "learning_rate": 0.0010547801136382075, "loss": 5.1908, "step": 7961 }, { "epoch": 0.9039028491756868, "grad_norm": 0.1416015625, "learning_rate": 0.0010531665518179348, "loss": 5.1787, "step": 7962 }, { "epoch": 0.9040163762856058, "grad_norm": 0.1416015625, "learning_rate": 0.0010515531409290846, "loss": 5.1688, "step": 7963 }, { "epoch": 0.9041299033955249, "grad_norm": 0.1435546875, "learning_rate": 0.0010499398861712405, "loss": 5.1948, "step": 7964 }, { "epoch": 0.904243430505444, "grad_norm": 0.1435546875, "learning_rate": 0.0010483267927434818, "loss": 5.1724, "step": 7965 }, { "epoch": 0.904356957615363, "grad_norm": 0.14453125, "learning_rate": 0.0010467138658443683, "loss": 5.1701, "step": 7966 }, { "epoch": 0.9044704847252821, "grad_norm": 0.1435546875, "learning_rate": 0.001045101110671924, "loss": 5.1979, "step": 7967 }, { "epoch": 0.9045840118352012, "grad_norm": 0.1474609375, "learning_rate": 0.0010434885324236182, "loss": 5.175, "step": 7968 }, { "epoch": 0.9046975389451203, "grad_norm": 0.1435546875, "learning_rate": 0.0010418761362963502, "loss": 5.1852, "step": 7969 }, { "epoch": 0.9048110660550394, "grad_norm": 0.142578125, "learning_rate": 0.0010402639274864334, "loss": 5.1887, "step": 7970 }, { "epoch": 0.9049245931649584, "grad_norm": 0.1357421875, "learning_rate": 0.0010386519111895763, "loss": 5.1572, "step": 7971 }, { "epoch": 0.9050381202748775, "grad_norm": 0.1435546875, "learning_rate": 0.0010370400926008677, "loss": 5.1821, "step": 7972 }, { "epoch": 0.9051516473847966, "grad_norm": 0.130859375, "learning_rate": 0.0010354284769147587, "loss": 5.1717, "step": 7973 }, { "epoch": 0.9052651744947157, "grad_norm": 0.1396484375, "learning_rate": 0.0010338170693250475, "loss": 5.1817, "step": 7974 }, { "epoch": 0.9053787016046347, "grad_norm": 0.1318359375, "learning_rate": 0.0010322058750248605, "loss": 5.1866, "step": 7975 }, { "epoch": 0.9054922287145538, "grad_norm": 0.1357421875, "learning_rate": 0.0010305948992066377, "loss": 5.1933, "step": 7976 }, { "epoch": 0.9056057558244729, "grad_norm": 0.13671875, "learning_rate": 0.0010289841470621142, "loss": 5.1607, "step": 7977 }, { "epoch": 0.905719282934392, "grad_norm": 0.15234375, "learning_rate": 0.0010273736237823045, "loss": 5.1882, "step": 7978 }, { "epoch": 0.905832810044311, "grad_norm": 0.1533203125, "learning_rate": 0.001025763334557486, "loss": 5.1713, "step": 7979 }, { "epoch": 0.9059463371542301, "grad_norm": 0.150390625, "learning_rate": 0.0010241532845771814, "loss": 5.17, "step": 7980 }, { "epoch": 0.9060598642641492, "grad_norm": 0.15234375, "learning_rate": 0.0010225434790301414, "loss": 5.1675, "step": 7981 }, { "epoch": 0.9061733913740683, "grad_norm": 0.162109375, "learning_rate": 0.0010209339231043314, "loss": 5.1693, "step": 7982 }, { "epoch": 0.9062869184839873, "grad_norm": 0.1455078125, "learning_rate": 0.00101932462198691, "loss": 5.1671, "step": 7983 }, { "epoch": 0.9064004455939064, "grad_norm": 0.1494140625, "learning_rate": 0.001017715580864216, "loss": 5.1734, "step": 7984 }, { "epoch": 0.9065139727038255, "grad_norm": 0.154296875, "learning_rate": 0.0010161068049217494, "loss": 5.1768, "step": 7985 }, { "epoch": 0.9066274998137446, "grad_norm": 0.158203125, "learning_rate": 0.0010144982993441564, "loss": 5.1915, "step": 7986 }, { "epoch": 0.9067410269236637, "grad_norm": 0.1533203125, "learning_rate": 0.0010128900693152114, "loss": 5.1684, "step": 7987 }, { "epoch": 0.9068545540335827, "grad_norm": 0.1611328125, "learning_rate": 0.0010112821200178009, "loss": 5.1797, "step": 7988 }, { "epoch": 0.9069680811435018, "grad_norm": 0.150390625, "learning_rate": 0.0010096744566339064, "loss": 5.1773, "step": 7989 }, { "epoch": 0.9070816082534209, "grad_norm": 0.1533203125, "learning_rate": 0.001008067084344589, "loss": 5.1864, "step": 7990 }, { "epoch": 0.90719513536334, "grad_norm": 0.14453125, "learning_rate": 0.0010064600083299704, "loss": 5.1773, "step": 7991 }, { "epoch": 0.907308662473259, "grad_norm": 0.1474609375, "learning_rate": 0.001004853233769218, "loss": 5.1919, "step": 7992 }, { "epoch": 0.9074221895831781, "grad_norm": 0.1484375, "learning_rate": 0.0010032467658405277, "loss": 5.1791, "step": 7993 }, { "epoch": 0.9075357166930972, "grad_norm": 0.1474609375, "learning_rate": 0.0010016406097211074, "loss": 5.162, "step": 7994 }, { "epoch": 0.9076492438030163, "grad_norm": 0.134765625, "learning_rate": 0.0010000347705871597, "loss": 5.1776, "step": 7995 }, { "epoch": 0.9077627709129353, "grad_norm": 0.142578125, "learning_rate": 0.0009984292536138666, "loss": 5.1733, "step": 7996 }, { "epoch": 0.9078762980228544, "grad_norm": 0.1337890625, "learning_rate": 0.0009968240639753702, "loss": 5.1716, "step": 7997 }, { "epoch": 0.9079898251327735, "grad_norm": 0.130859375, "learning_rate": 0.0009952192068447597, "loss": 5.172, "step": 7998 }, { "epoch": 0.9081033522426926, "grad_norm": 0.134765625, "learning_rate": 0.0009936146873940506, "loss": 5.182, "step": 7999 }, { "epoch": 0.9082168793526116, "grad_norm": 0.1416015625, "learning_rate": 0.0009920105107941717, "loss": 5.1707, "step": 8000 }, { "epoch": 0.9083304064625307, "grad_norm": 0.134765625, "learning_rate": 0.0009904066822149467, "loss": 5.175, "step": 8001 }, { "epoch": 0.9084439335724498, "grad_norm": 0.142578125, "learning_rate": 0.0009888032068250767, "loss": 5.1907, "step": 8002 }, { "epoch": 0.9085574606823689, "grad_norm": 0.1435546875, "learning_rate": 0.0009872000897921261, "loss": 5.1779, "step": 8003 }, { "epoch": 0.9086709877922879, "grad_norm": 0.15625, "learning_rate": 0.0009855973362825033, "loss": 5.1776, "step": 8004 }, { "epoch": 0.908784514902207, "grad_norm": 0.146484375, "learning_rate": 0.0009839949514614454, "loss": 5.1975, "step": 8005 }, { "epoch": 0.9088980420121261, "grad_norm": 0.1494140625, "learning_rate": 0.0009823929404930015, "loss": 5.1914, "step": 8006 }, { "epoch": 0.9090115691220452, "grad_norm": 0.1318359375, "learning_rate": 0.0009807913085400155, "loss": 5.1892, "step": 8007 }, { "epoch": 0.9091250962319642, "grad_norm": 0.140625, "learning_rate": 0.0009791900607641103, "loss": 5.1815, "step": 8008 }, { "epoch": 0.9092386233418833, "grad_norm": 0.1416015625, "learning_rate": 0.0009775892023256704, "loss": 5.1674, "step": 8009 }, { "epoch": 0.9093521504518024, "grad_norm": 0.15234375, "learning_rate": 0.000975988738383826, "loss": 5.1825, "step": 8010 }, { "epoch": 0.9094656775617215, "grad_norm": 0.14453125, "learning_rate": 0.0009743886740964352, "loss": 5.1813, "step": 8011 }, { "epoch": 0.9095792046716406, "grad_norm": 0.1455078125, "learning_rate": 0.0009727890146200687, "loss": 5.1862, "step": 8012 }, { "epoch": 0.9096927317815596, "grad_norm": 0.13671875, "learning_rate": 0.0009711897651099925, "loss": 5.189, "step": 8013 }, { "epoch": 0.9098062588914787, "grad_norm": 0.1455078125, "learning_rate": 0.0009695909307201513, "loss": 5.1727, "step": 8014 }, { "epoch": 0.9099197860013978, "grad_norm": 0.142578125, "learning_rate": 0.0009679925166031525, "loss": 5.1764, "step": 8015 }, { "epoch": 0.9100333131113169, "grad_norm": 0.15234375, "learning_rate": 0.0009663945279102477, "loss": 5.1844, "step": 8016 }, { "epoch": 0.9101468402212359, "grad_norm": 0.1455078125, "learning_rate": 0.0009647969697913186, "loss": 5.1706, "step": 8017 }, { "epoch": 0.910260367331155, "grad_norm": 0.140625, "learning_rate": 0.0009631998473948596, "loss": 5.1549, "step": 8018 }, { "epoch": 0.9103738944410741, "grad_norm": 0.126953125, "learning_rate": 0.00096160316586796, "loss": 5.192, "step": 8019 }, { "epoch": 0.9104874215509932, "grad_norm": 0.13671875, "learning_rate": 0.0009600069303562887, "loss": 5.1895, "step": 8020 }, { "epoch": 0.9106009486609122, "grad_norm": 0.130859375, "learning_rate": 0.0009584111460040767, "loss": 5.195, "step": 8021 }, { "epoch": 0.9107144757708313, "grad_norm": 0.1298828125, "learning_rate": 0.0009568158179541022, "loss": 5.1845, "step": 8022 }, { "epoch": 0.9108280028807504, "grad_norm": 0.1298828125, "learning_rate": 0.0009552209513476718, "loss": 5.1668, "step": 8023 }, { "epoch": 0.9109415299906695, "grad_norm": 0.1357421875, "learning_rate": 0.0009536265513246047, "loss": 5.1755, "step": 8024 }, { "epoch": 0.9110550571005885, "grad_norm": 0.1259765625, "learning_rate": 0.0009520326230232181, "loss": 5.1808, "step": 8025 }, { "epoch": 0.9111685842105076, "grad_norm": 0.125, "learning_rate": 0.0009504391715803076, "loss": 5.1793, "step": 8026 }, { "epoch": 0.9112821113204267, "grad_norm": 0.12353515625, "learning_rate": 0.0009488462021311324, "loss": 5.1889, "step": 8027 }, { "epoch": 0.9113956384303458, "grad_norm": 0.12158203125, "learning_rate": 0.000947253719809398, "loss": 5.1645, "step": 8028 }, { "epoch": 0.9115091655402648, "grad_norm": 0.11572265625, "learning_rate": 0.0009456617297472407, "loss": 5.1742, "step": 8029 }, { "epoch": 0.9116226926501839, "grad_norm": 0.1201171875, "learning_rate": 0.0009440702370752097, "loss": 5.167, "step": 8030 }, { "epoch": 0.911736219760103, "grad_norm": 0.126953125, "learning_rate": 0.0009424792469222516, "loss": 5.158, "step": 8031 }, { "epoch": 0.9118497468700221, "grad_norm": 0.1259765625, "learning_rate": 0.0009408887644156938, "loss": 5.1752, "step": 8032 }, { "epoch": 0.9119632739799411, "grad_norm": 0.12158203125, "learning_rate": 0.000939298794681227, "loss": 5.1599, "step": 8033 }, { "epoch": 0.9120768010898602, "grad_norm": 0.123046875, "learning_rate": 0.0009377093428428903, "loss": 5.1586, "step": 8034 }, { "epoch": 0.9121903281997793, "grad_norm": 0.11572265625, "learning_rate": 0.0009361204140230524, "loss": 5.1629, "step": 8035 }, { "epoch": 0.9123038553096984, "grad_norm": 0.123046875, "learning_rate": 0.000934532013342398, "loss": 5.161, "step": 8036 }, { "epoch": 0.9124173824196175, "grad_norm": 0.126953125, "learning_rate": 0.0009329441459199089, "loss": 5.172, "step": 8037 }, { "epoch": 0.9125309095295365, "grad_norm": 0.134765625, "learning_rate": 0.0009313568168728477, "loss": 5.17, "step": 8038 }, { "epoch": 0.9126444366394556, "grad_norm": 0.12255859375, "learning_rate": 0.0009297700313167439, "loss": 5.1936, "step": 8039 }, { "epoch": 0.9127579637493747, "grad_norm": 0.1328125, "learning_rate": 0.0009281837943653737, "loss": 5.1874, "step": 8040 }, { "epoch": 0.9128714908592938, "grad_norm": 0.130859375, "learning_rate": 0.000926598111130746, "loss": 5.1661, "step": 8041 }, { "epoch": 0.9129850179692128, "grad_norm": 0.1318359375, "learning_rate": 0.0009250129867230852, "loss": 5.161, "step": 8042 }, { "epoch": 0.9130985450791319, "grad_norm": 0.119140625, "learning_rate": 0.0009234284262508146, "loss": 5.1861, "step": 8043 }, { "epoch": 0.913212072189051, "grad_norm": 0.12255859375, "learning_rate": 0.0009218444348205401, "loss": 5.176, "step": 8044 }, { "epoch": 0.9133255992989701, "grad_norm": 0.1220703125, "learning_rate": 0.0009202610175370338, "loss": 5.1729, "step": 8045 }, { "epoch": 0.9134391264088891, "grad_norm": 0.12890625, "learning_rate": 0.0009186781795032178, "loss": 5.1907, "step": 8046 }, { "epoch": 0.9135526535188082, "grad_norm": 0.134765625, "learning_rate": 0.0009170959258201468, "loss": 5.1914, "step": 8047 }, { "epoch": 0.9136661806287273, "grad_norm": 0.1357421875, "learning_rate": 0.0009155142615869927, "loss": 5.1819, "step": 8048 }, { "epoch": 0.9137797077386464, "grad_norm": 0.1259765625, "learning_rate": 0.0009139331919010281, "loss": 5.1969, "step": 8049 }, { "epoch": 0.9138932348485654, "grad_norm": 0.1298828125, "learning_rate": 0.0009123527218576085, "loss": 5.1699, "step": 8050 }, { "epoch": 0.9140067619584845, "grad_norm": 0.11962890625, "learning_rate": 0.0009107728565501581, "loss": 5.1461, "step": 8051 }, { "epoch": 0.9141202890684036, "grad_norm": 0.1279296875, "learning_rate": 0.0009091936010701511, "loss": 5.1691, "step": 8052 }, { "epoch": 0.9142338161783227, "grad_norm": 0.126953125, "learning_rate": 0.0009076149605070969, "loss": 5.1581, "step": 8053 }, { "epoch": 0.9143473432882417, "grad_norm": 0.1337890625, "learning_rate": 0.000906036939948524, "loss": 5.1846, "step": 8054 }, { "epoch": 0.9144608703981608, "grad_norm": 0.11279296875, "learning_rate": 0.0009044595444799615, "loss": 5.1796, "step": 8055 }, { "epoch": 0.9145743975080799, "grad_norm": 0.1162109375, "learning_rate": 0.0009028827791849245, "loss": 5.1764, "step": 8056 }, { "epoch": 0.914687924617999, "grad_norm": 0.11572265625, "learning_rate": 0.0009013066491448976, "loss": 5.1737, "step": 8057 }, { "epoch": 0.914801451727918, "grad_norm": 0.11767578125, "learning_rate": 0.0008997311594393172, "loss": 5.1796, "step": 8058 }, { "epoch": 0.9149149788378371, "grad_norm": 0.11376953125, "learning_rate": 0.0008981563151455572, "loss": 5.1749, "step": 8059 }, { "epoch": 0.9150285059477562, "grad_norm": 0.12255859375, "learning_rate": 0.0008965821213389105, "loss": 5.1714, "step": 8060 }, { "epoch": 0.9151420330576753, "grad_norm": 0.1162109375, "learning_rate": 0.0008950085830925745, "loss": 5.1859, "step": 8061 }, { "epoch": 0.9152555601675944, "grad_norm": 0.1142578125, "learning_rate": 0.0008934357054776336, "loss": 5.1832, "step": 8062 }, { "epoch": 0.9153690872775134, "grad_norm": 0.10986328125, "learning_rate": 0.0008918634935630432, "loss": 5.1824, "step": 8063 }, { "epoch": 0.9154826143874325, "grad_norm": 0.11669921875, "learning_rate": 0.0008902919524156128, "loss": 5.1731, "step": 8064 }, { "epoch": 0.9155961414973516, "grad_norm": 0.12158203125, "learning_rate": 0.0008887210870999912, "loss": 5.1876, "step": 8065 }, { "epoch": 0.9157096686072707, "grad_norm": 0.111328125, "learning_rate": 0.0008871509026786479, "loss": 5.1597, "step": 8066 }, { "epoch": 0.9158231957171897, "grad_norm": 0.1103515625, "learning_rate": 0.0008855814042118593, "loss": 5.1837, "step": 8067 }, { "epoch": 0.9159367228271088, "grad_norm": 0.1181640625, "learning_rate": 0.0008840125967576907, "loss": 5.1709, "step": 8068 }, { "epoch": 0.9160502499370279, "grad_norm": 0.1083984375, "learning_rate": 0.0008824444853719802, "loss": 5.1555, "step": 8069 }, { "epoch": 0.916163777046947, "grad_norm": 0.11328125, "learning_rate": 0.0008808770751083231, "loss": 5.15, "step": 8070 }, { "epoch": 0.916277304156866, "grad_norm": 0.11328125, "learning_rate": 0.0008793103710180545, "loss": 5.1665, "step": 8071 }, { "epoch": 0.9163908312667851, "grad_norm": 0.11962890625, "learning_rate": 0.0008777443781502345, "loss": 5.1557, "step": 8072 }, { "epoch": 0.9165043583767042, "grad_norm": 0.11328125, "learning_rate": 0.000876179101551631, "loss": 5.1617, "step": 8073 }, { "epoch": 0.9166178854866233, "grad_norm": 0.11767578125, "learning_rate": 0.0008746145462667026, "loss": 5.1653, "step": 8074 }, { "epoch": 0.9167314125965423, "grad_norm": 0.1162109375, "learning_rate": 0.0008730507173375841, "loss": 5.1468, "step": 8075 }, { "epoch": 0.9168449397064614, "grad_norm": 0.10986328125, "learning_rate": 0.0008714876198040699, "loss": 5.1362, "step": 8076 }, { "epoch": 0.9169584668163805, "grad_norm": 0.109375, "learning_rate": 0.0008699252587035969, "loss": 5.1758, "step": 8077 }, { "epoch": 0.9170719939262996, "grad_norm": 0.1064453125, "learning_rate": 0.0008683636390712282, "loss": 5.1589, "step": 8078 }, { "epoch": 0.9171855210362186, "grad_norm": 0.10595703125, "learning_rate": 0.0008668027659396374, "loss": 5.1656, "step": 8079 }, { "epoch": 0.9172990481461377, "grad_norm": 0.111328125, "learning_rate": 0.000865242644339093, "loss": 5.1928, "step": 8080 }, { "epoch": 0.9174125752560568, "grad_norm": 0.10205078125, "learning_rate": 0.0008636832792974409, "loss": 5.1834, "step": 8081 }, { "epoch": 0.9175261023659759, "grad_norm": 0.1123046875, "learning_rate": 0.0008621246758400898, "loss": 5.1582, "step": 8082 }, { "epoch": 0.917639629475895, "grad_norm": 0.1171875, "learning_rate": 0.0008605668389899925, "loss": 5.1827, "step": 8083 }, { "epoch": 0.917753156585814, "grad_norm": 0.1171875, "learning_rate": 0.0008590097737676323, "loss": 5.156, "step": 8084 }, { "epoch": 0.9178666836957332, "grad_norm": 0.11181640625, "learning_rate": 0.0008574534851910059, "loss": 5.1729, "step": 8085 }, { "epoch": 0.9179802108056523, "grad_norm": 0.12255859375, "learning_rate": 0.0008558979782756062, "loss": 5.1755, "step": 8086 }, { "epoch": 0.9180937379155714, "grad_norm": 0.119140625, "learning_rate": 0.0008543432580344075, "loss": 5.1685, "step": 8087 }, { "epoch": 0.9182072650254904, "grad_norm": 0.1142578125, "learning_rate": 0.000852789329477849, "loss": 5.1662, "step": 8088 }, { "epoch": 0.9183207921354095, "grad_norm": 0.1181640625, "learning_rate": 0.0008512361976138181, "loss": 5.1779, "step": 8089 }, { "epoch": 0.9184343192453286, "grad_norm": 0.11962890625, "learning_rate": 0.0008496838674476355, "loss": 5.1755, "step": 8090 }, { "epoch": 0.9185478463552477, "grad_norm": 0.12109375, "learning_rate": 0.0008481323439820374, "loss": 5.1647, "step": 8091 }, { "epoch": 0.9186613734651667, "grad_norm": 0.11572265625, "learning_rate": 0.0008465816322171605, "loss": 5.1856, "step": 8092 }, { "epoch": 0.9187749005750858, "grad_norm": 0.10595703125, "learning_rate": 0.0008450317371505258, "loss": 5.183, "step": 8093 }, { "epoch": 0.9188884276850049, "grad_norm": 0.119140625, "learning_rate": 0.0008434826637770216, "loss": 5.162, "step": 8094 }, { "epoch": 0.919001954794924, "grad_norm": 0.11669921875, "learning_rate": 0.0008419344170888892, "loss": 5.1651, "step": 8095 }, { "epoch": 0.919115481904843, "grad_norm": 0.11962890625, "learning_rate": 0.000840387002075705, "loss": 5.1786, "step": 8096 }, { "epoch": 0.9192290090147621, "grad_norm": 0.1162109375, "learning_rate": 0.0008388404237243652, "loss": 5.1682, "step": 8097 }, { "epoch": 0.9193425361246812, "grad_norm": 0.1279296875, "learning_rate": 0.00083729468701907, "loss": 5.1688, "step": 8098 }, { "epoch": 0.9194560632346003, "grad_norm": 0.12890625, "learning_rate": 0.0008357497969413068, "loss": 5.1825, "step": 8099 }, { "epoch": 0.9195695903445193, "grad_norm": 0.12451171875, "learning_rate": 0.0008342057584698349, "loss": 5.1891, "step": 8100 }, { "epoch": 0.9196831174544384, "grad_norm": 0.11181640625, "learning_rate": 0.0008326625765806688, "loss": 5.1981, "step": 8101 }, { "epoch": 0.9197966445643575, "grad_norm": 0.11376953125, "learning_rate": 0.0008311202562470626, "loss": 5.1757, "step": 8102 }, { "epoch": 0.9199101716742766, "grad_norm": 0.10595703125, "learning_rate": 0.0008295788024394932, "loss": 5.1916, "step": 8103 }, { "epoch": 0.9200236987841957, "grad_norm": 0.107421875, "learning_rate": 0.0008280382201256469, "loss": 5.1824, "step": 8104 }, { "epoch": 0.9201372258941147, "grad_norm": 0.10205078125, "learning_rate": 0.0008264985142703991, "loss": 5.1651, "step": 8105 }, { "epoch": 0.9202507530040338, "grad_norm": 0.10546875, "learning_rate": 0.0008249596898358019, "loss": 5.1896, "step": 8106 }, { "epoch": 0.9203642801139529, "grad_norm": 0.1142578125, "learning_rate": 0.0008234217517810663, "loss": 5.1711, "step": 8107 }, { "epoch": 0.920477807223872, "grad_norm": 0.11767578125, "learning_rate": 0.0008218847050625476, "loss": 5.1697, "step": 8108 }, { "epoch": 0.920591334333791, "grad_norm": 0.11474609375, "learning_rate": 0.000820348554633727, "loss": 5.1701, "step": 8109 }, { "epoch": 0.9207048614437101, "grad_norm": 0.1083984375, "learning_rate": 0.0008188133054451987, "loss": 5.1778, "step": 8110 }, { "epoch": 0.9208183885536292, "grad_norm": 0.10400390625, "learning_rate": 0.0008172789624446513, "loss": 5.1575, "step": 8111 }, { "epoch": 0.9209319156635483, "grad_norm": 0.10888671875, "learning_rate": 0.0008157455305768544, "loss": 5.1888, "step": 8112 }, { "epoch": 0.9210454427734673, "grad_norm": 0.10498046875, "learning_rate": 0.0008142130147836402, "loss": 5.1734, "step": 8113 }, { "epoch": 0.9211589698833864, "grad_norm": 0.10791015625, "learning_rate": 0.0008126814200038885, "loss": 5.1834, "step": 8114 }, { "epoch": 0.9212724969933055, "grad_norm": 0.1005859375, "learning_rate": 0.0008111507511735117, "loss": 5.1632, "step": 8115 }, { "epoch": 0.9213860241032246, "grad_norm": 0.10498046875, "learning_rate": 0.0008096210132254373, "loss": 5.1709, "step": 8116 }, { "epoch": 0.9214995512131436, "grad_norm": 0.09716796875, "learning_rate": 0.0008080922110895937, "loss": 5.1507, "step": 8117 }, { "epoch": 0.9216130783230627, "grad_norm": 0.10205078125, "learning_rate": 0.0008065643496928925, "loss": 5.1566, "step": 8118 }, { "epoch": 0.9217266054329818, "grad_norm": 0.10205078125, "learning_rate": 0.0008050374339592144, "loss": 5.1587, "step": 8119 }, { "epoch": 0.9218401325429009, "grad_norm": 0.107421875, "learning_rate": 0.0008035114688093918, "loss": 5.1667, "step": 8120 }, { "epoch": 0.92195365965282, "grad_norm": 0.10498046875, "learning_rate": 0.0008019864591611945, "loss": 5.1718, "step": 8121 }, { "epoch": 0.922067186762739, "grad_norm": 0.10498046875, "learning_rate": 0.0008004624099293121, "loss": 5.1607, "step": 8122 }, { "epoch": 0.9221807138726581, "grad_norm": 0.10107421875, "learning_rate": 0.0007989393260253392, "loss": 5.1886, "step": 8123 }, { "epoch": 0.9222942409825772, "grad_norm": 0.10400390625, "learning_rate": 0.0007974172123577599, "loss": 5.1664, "step": 8124 }, { "epoch": 0.9224077680924962, "grad_norm": 0.0986328125, "learning_rate": 0.0007958960738319305, "loss": 5.1883, "step": 8125 }, { "epoch": 0.9225212952024153, "grad_norm": 0.1083984375, "learning_rate": 0.0007943759153500665, "loss": 5.156, "step": 8126 }, { "epoch": 0.9226348223123344, "grad_norm": 0.11083984375, "learning_rate": 0.0007928567418112229, "loss": 5.1808, "step": 8127 }, { "epoch": 0.9227483494222535, "grad_norm": 0.10009765625, "learning_rate": 0.0007913385581112817, "loss": 5.1537, "step": 8128 }, { "epoch": 0.9228618765321726, "grad_norm": 0.0947265625, "learning_rate": 0.0007898213691429348, "loss": 5.1715, "step": 8129 }, { "epoch": 0.9229754036420916, "grad_norm": 0.09765625, "learning_rate": 0.0007883051797956679, "loss": 5.1693, "step": 8130 }, { "epoch": 0.9230889307520107, "grad_norm": 0.0986328125, "learning_rate": 0.000786789994955746, "loss": 5.196, "step": 8131 }, { "epoch": 0.9232024578619298, "grad_norm": 0.10595703125, "learning_rate": 0.0007852758195061955, "loss": 5.1696, "step": 8132 }, { "epoch": 0.9233159849718489, "grad_norm": 0.10546875, "learning_rate": 0.0007837626583267916, "loss": 5.1719, "step": 8133 }, { "epoch": 0.9234295120817679, "grad_norm": 0.1044921875, "learning_rate": 0.0007822505162940392, "loss": 5.1936, "step": 8134 }, { "epoch": 0.923543039191687, "grad_norm": 0.099609375, "learning_rate": 0.0007807393982811599, "loss": 5.1906, "step": 8135 }, { "epoch": 0.9236565663016061, "grad_norm": 0.10546875, "learning_rate": 0.0007792293091580745, "loss": 5.1816, "step": 8136 }, { "epoch": 0.9237700934115252, "grad_norm": 0.0966796875, "learning_rate": 0.0007777202537913878, "loss": 5.1559, "step": 8137 }, { "epoch": 0.9238836205214442, "grad_norm": 0.09912109375, "learning_rate": 0.000776212237044374, "loss": 5.1688, "step": 8138 }, { "epoch": 0.9239971476313633, "grad_norm": 0.10107421875, "learning_rate": 0.0007747052637769591, "loss": 5.185, "step": 8139 }, { "epoch": 0.9241106747412824, "grad_norm": 0.10302734375, "learning_rate": 0.0007731993388457066, "loss": 5.1774, "step": 8140 }, { "epoch": 0.9242242018512015, "grad_norm": 0.09375, "learning_rate": 0.000771694467103802, "loss": 5.1767, "step": 8141 }, { "epoch": 0.9243377289611205, "grad_norm": 0.09228515625, "learning_rate": 0.0007701906534010361, "loss": 5.169, "step": 8142 }, { "epoch": 0.9244512560710396, "grad_norm": 0.09326171875, "learning_rate": 0.0007686879025837898, "loss": 5.1879, "step": 8143 }, { "epoch": 0.9245647831809587, "grad_norm": 0.09765625, "learning_rate": 0.0007671862194950194, "loss": 5.1811, "step": 8144 }, { "epoch": 0.9246783102908778, "grad_norm": 0.09033203125, "learning_rate": 0.0007656856089742396, "loss": 5.1636, "step": 8145 }, { "epoch": 0.9247918374007968, "grad_norm": 0.0927734375, "learning_rate": 0.0007641860758575081, "loss": 5.1663, "step": 8146 }, { "epoch": 0.9249053645107159, "grad_norm": 0.09033203125, "learning_rate": 0.000762687624977411, "loss": 5.1732, "step": 8147 }, { "epoch": 0.925018891620635, "grad_norm": 0.0966796875, "learning_rate": 0.0007611902611630473, "loss": 5.142, "step": 8148 }, { "epoch": 0.9251324187305541, "grad_norm": 0.09716796875, "learning_rate": 0.0007596939892400112, "loss": 5.1859, "step": 8149 }, { "epoch": 0.9252459458404731, "grad_norm": 0.10009765625, "learning_rate": 0.0007581988140303791, "loss": 5.17, "step": 8150 }, { "epoch": 0.9253594729503922, "grad_norm": 0.09814453125, "learning_rate": 0.0007567047403526925, "loss": 5.1652, "step": 8151 }, { "epoch": 0.9254730000603113, "grad_norm": 0.0986328125, "learning_rate": 0.0007552117730219434, "loss": 5.1747, "step": 8152 }, { "epoch": 0.9255865271702304, "grad_norm": 0.09521484375, "learning_rate": 0.0007537199168495577, "loss": 5.1422, "step": 8153 }, { "epoch": 0.9257000542801495, "grad_norm": 0.095703125, "learning_rate": 0.0007522291766433809, "loss": 5.1636, "step": 8154 }, { "epoch": 0.9258135813900685, "grad_norm": 0.0947265625, "learning_rate": 0.0007507395572076621, "loss": 5.1438, "step": 8155 }, { "epoch": 0.9259271084999876, "grad_norm": 0.09814453125, "learning_rate": 0.0007492510633430378, "loss": 5.164, "step": 8156 }, { "epoch": 0.9260406356099067, "grad_norm": 0.09228515625, "learning_rate": 0.0007477636998465178, "loss": 5.1579, "step": 8157 }, { "epoch": 0.9261541627198258, "grad_norm": 0.09375, "learning_rate": 0.0007462774715114688, "loss": 5.1659, "step": 8158 }, { "epoch": 0.9262676898297448, "grad_norm": 0.09228515625, "learning_rate": 0.0007447923831275989, "loss": 5.1565, "step": 8159 }, { "epoch": 0.9263812169396639, "grad_norm": 0.09423828125, "learning_rate": 0.0007433084394809431, "loss": 5.1695, "step": 8160 }, { "epoch": 0.926494744049583, "grad_norm": 0.1005859375, "learning_rate": 0.0007418256453538459, "loss": 5.1713, "step": 8161 }, { "epoch": 0.9266082711595021, "grad_norm": 0.1005859375, "learning_rate": 0.0007403440055249491, "loss": 5.1808, "step": 8162 }, { "epoch": 0.9267217982694211, "grad_norm": 0.09716796875, "learning_rate": 0.0007388635247691733, "loss": 5.175, "step": 8163 }, { "epoch": 0.9268353253793402, "grad_norm": 0.1015625, "learning_rate": 0.0007373842078577038, "loss": 5.1892, "step": 8164 }, { "epoch": 0.9269488524892593, "grad_norm": 0.09716796875, "learning_rate": 0.0007359060595579752, "loss": 5.175, "step": 8165 }, { "epoch": 0.9270623795991784, "grad_norm": 0.099609375, "learning_rate": 0.0007344290846336561, "loss": 5.1682, "step": 8166 }, { "epoch": 0.9271759067090974, "grad_norm": 0.09326171875, "learning_rate": 0.0007329532878446339, "loss": 5.173, "step": 8167 }, { "epoch": 0.9272894338190165, "grad_norm": 0.08935546875, "learning_rate": 0.0007314786739469987, "loss": 5.1673, "step": 8168 }, { "epoch": 0.9274029609289356, "grad_norm": 0.0927734375, "learning_rate": 0.0007300052476930286, "loss": 5.172, "step": 8169 }, { "epoch": 0.9275164880388547, "grad_norm": 0.0927734375, "learning_rate": 0.0007285330138311746, "loss": 5.1901, "step": 8170 }, { "epoch": 0.9276300151487737, "grad_norm": 0.08837890625, "learning_rate": 0.0007270619771060443, "loss": 5.1783, "step": 8171 }, { "epoch": 0.9277435422586928, "grad_norm": 0.09521484375, "learning_rate": 0.000725592142258388, "loss": 5.1678, "step": 8172 }, { "epoch": 0.9278570693686119, "grad_norm": 0.08837890625, "learning_rate": 0.0007241235140250822, "loss": 5.1475, "step": 8173 }, { "epoch": 0.927970596478531, "grad_norm": 0.09765625, "learning_rate": 0.0007226560971391145, "loss": 5.1827, "step": 8174 }, { "epoch": 0.92808412358845, "grad_norm": 0.09716796875, "learning_rate": 0.00072118989632957, "loss": 5.1874, "step": 8175 }, { "epoch": 0.9281976506983691, "grad_norm": 0.095703125, "learning_rate": 0.0007197249163216122, "loss": 5.1481, "step": 8176 }, { "epoch": 0.9283111778082882, "grad_norm": 0.09521484375, "learning_rate": 0.0007182611618364736, "loss": 5.1714, "step": 8177 }, { "epoch": 0.9284247049182073, "grad_norm": 0.08740234375, "learning_rate": 0.0007167986375914346, "loss": 5.1695, "step": 8178 }, { "epoch": 0.9285382320281264, "grad_norm": 0.0869140625, "learning_rate": 0.000715337348299812, "loss": 5.1633, "step": 8179 }, { "epoch": 0.9286517591380454, "grad_norm": 0.0869140625, "learning_rate": 0.0007138772986709421, "loss": 5.1806, "step": 8180 }, { "epoch": 0.9287652862479645, "grad_norm": 0.083984375, "learning_rate": 0.0007124184934101665, "loss": 5.1536, "step": 8181 }, { "epoch": 0.9288788133578836, "grad_norm": 0.0859375, "learning_rate": 0.0007109609372188164, "loss": 5.1775, "step": 8182 }, { "epoch": 0.9289923404678027, "grad_norm": 0.0859375, "learning_rate": 0.0007095046347941978, "loss": 5.1505, "step": 8183 }, { "epoch": 0.9291058675777217, "grad_norm": 0.0908203125, "learning_rate": 0.0007080495908295759, "loss": 5.1821, "step": 8184 }, { "epoch": 0.9292193946876408, "grad_norm": 0.09033203125, "learning_rate": 0.0007065958100141607, "loss": 5.1717, "step": 8185 }, { "epoch": 0.9293329217975599, "grad_norm": 0.09423828125, "learning_rate": 0.0007051432970330902, "loss": 5.1634, "step": 8186 }, { "epoch": 0.929446448907479, "grad_norm": 0.09375, "learning_rate": 0.0007036920565674181, "loss": 5.1572, "step": 8187 }, { "epoch": 0.929559976017398, "grad_norm": 0.09228515625, "learning_rate": 0.0007022420932940962, "loss": 5.1538, "step": 8188 }, { "epoch": 0.9296735031273171, "grad_norm": 0.09326171875, "learning_rate": 0.00070079341188596, "loss": 5.1414, "step": 8189 }, { "epoch": 0.9297870302372362, "grad_norm": 0.08984375, "learning_rate": 0.000699346017011715, "loss": 5.1603, "step": 8190 }, { "epoch": 0.9299005573471553, "grad_norm": 0.0888671875, "learning_rate": 0.0006978999133359192, "loss": 5.1521, "step": 8191 }, { "epoch": 0.9300140844570743, "grad_norm": 0.09375, "learning_rate": 0.0006964551055189712, "loss": 5.1734, "step": 8192 }, { "epoch": 0.9301276115669934, "grad_norm": 0.09033203125, "learning_rate": 0.000695011598217091, "loss": 5.1705, "step": 8193 }, { "epoch": 0.9302411386769125, "grad_norm": 0.0947265625, "learning_rate": 0.0006935693960823097, "loss": 5.1803, "step": 8194 }, { "epoch": 0.9303546657868316, "grad_norm": 0.0849609375, "learning_rate": 0.0006921285037624507, "loss": 5.176, "step": 8195 }, { "epoch": 0.9304681928967506, "grad_norm": 0.087890625, "learning_rate": 0.0006906889259011165, "loss": 5.1605, "step": 8196 }, { "epoch": 0.9305817200066697, "grad_norm": 0.0849609375, "learning_rate": 0.0006892506671376742, "loss": 5.1814, "step": 8197 }, { "epoch": 0.9306952471165888, "grad_norm": 0.0849609375, "learning_rate": 0.0006878137321072394, "loss": 5.1576, "step": 8198 }, { "epoch": 0.9308087742265079, "grad_norm": 0.0859375, "learning_rate": 0.0006863781254406611, "loss": 5.184, "step": 8199 }, { "epoch": 0.930922301336427, "grad_norm": 0.08984375, "learning_rate": 0.0006849438517645077, "loss": 5.1449, "step": 8200 }, { "epoch": 0.931035828446346, "grad_norm": 0.0888671875, "learning_rate": 0.0006835109157010528, "loss": 5.1922, "step": 8201 }, { "epoch": 0.9311493555562651, "grad_norm": 0.09228515625, "learning_rate": 0.0006820793218682572, "loss": 5.1593, "step": 8202 }, { "epoch": 0.9312628826661842, "grad_norm": 0.0908203125, "learning_rate": 0.0006806490748797581, "loss": 5.1539, "step": 8203 }, { "epoch": 0.9313764097761033, "grad_norm": 0.09423828125, "learning_rate": 0.0006792201793448501, "loss": 5.1673, "step": 8204 }, { "epoch": 0.9314899368860223, "grad_norm": 0.08544921875, "learning_rate": 0.0006777926398684743, "loss": 5.1838, "step": 8205 }, { "epoch": 0.9316034639959414, "grad_norm": 0.08740234375, "learning_rate": 0.0006763664610512007, "loss": 5.1741, "step": 8206 }, { "epoch": 0.9317169911058605, "grad_norm": 0.08984375, "learning_rate": 0.0006749416474892148, "loss": 5.161, "step": 8207 }, { "epoch": 0.9318305182157796, "grad_norm": 0.08544921875, "learning_rate": 0.0006735182037743013, "loss": 5.1653, "step": 8208 }, { "epoch": 0.9319440453256986, "grad_norm": 0.08837890625, "learning_rate": 0.000672096134493831, "loss": 5.1721, "step": 8209 }, { "epoch": 0.9320575724356177, "grad_norm": 0.08740234375, "learning_rate": 0.0006706754442307456, "loss": 5.1812, "step": 8210 }, { "epoch": 0.9321710995455368, "grad_norm": 0.0791015625, "learning_rate": 0.0006692561375635414, "loss": 5.1738, "step": 8211 }, { "epoch": 0.9322846266554559, "grad_norm": 0.0849609375, "learning_rate": 0.0006678382190662568, "loss": 5.1649, "step": 8212 }, { "epoch": 0.9323981537653749, "grad_norm": 0.0859375, "learning_rate": 0.0006664216933084562, "loss": 5.1495, "step": 8213 }, { "epoch": 0.932511680875294, "grad_norm": 0.0927734375, "learning_rate": 0.0006650065648552158, "loss": 5.16, "step": 8214 }, { "epoch": 0.9326252079852131, "grad_norm": 0.08349609375, "learning_rate": 0.0006635928382671077, "loss": 5.1602, "step": 8215 }, { "epoch": 0.9327387350951322, "grad_norm": 0.083984375, "learning_rate": 0.0006621805181001876, "loss": 5.1514, "step": 8216 }, { "epoch": 0.9328522622050512, "grad_norm": 0.08740234375, "learning_rate": 0.0006607696089059775, "loss": 5.1794, "step": 8217 }, { "epoch": 0.9329657893149703, "grad_norm": 0.0810546875, "learning_rate": 0.0006593601152314532, "loss": 5.1523, "step": 8218 }, { "epoch": 0.9330793164248894, "grad_norm": 0.08154296875, "learning_rate": 0.0006579520416190272, "loss": 5.1533, "step": 8219 }, { "epoch": 0.9331928435348085, "grad_norm": 0.08740234375, "learning_rate": 0.0006565453926065377, "loss": 5.155, "step": 8220 }, { "epoch": 0.9333063706447275, "grad_norm": 0.0888671875, "learning_rate": 0.0006551401727272299, "loss": 5.1594, "step": 8221 }, { "epoch": 0.9334198977546466, "grad_norm": 0.0849609375, "learning_rate": 0.0006537363865097438, "loss": 5.1779, "step": 8222 }, { "epoch": 0.9335334248645657, "grad_norm": 0.08251953125, "learning_rate": 0.0006523340384781003, "loss": 5.1652, "step": 8223 }, { "epoch": 0.9336469519744848, "grad_norm": 0.080078125, "learning_rate": 0.0006509331331516834, "loss": 5.1586, "step": 8224 }, { "epoch": 0.9337604790844038, "grad_norm": 0.08203125, "learning_rate": 0.0006495336750452292, "loss": 5.1692, "step": 8225 }, { "epoch": 0.9338740061943229, "grad_norm": 0.08203125, "learning_rate": 0.0006481356686688084, "loss": 5.1772, "step": 8226 }, { "epoch": 0.933987533304242, "grad_norm": 0.08056640625, "learning_rate": 0.0006467391185278153, "loss": 5.1742, "step": 8227 }, { "epoch": 0.9341010604141611, "grad_norm": 0.0830078125, "learning_rate": 0.0006453440291229491, "loss": 5.1521, "step": 8228 }, { "epoch": 0.9342145875240802, "grad_norm": 0.08154296875, "learning_rate": 0.0006439504049502026, "loss": 5.1452, "step": 8229 }, { "epoch": 0.9343281146339992, "grad_norm": 0.0810546875, "learning_rate": 0.0006425582505008459, "loss": 5.1677, "step": 8230 }, { "epoch": 0.9344416417439183, "grad_norm": 0.08251953125, "learning_rate": 0.0006411675702614131, "loss": 5.1749, "step": 8231 }, { "epoch": 0.9345551688538374, "grad_norm": 0.08642578125, "learning_rate": 0.0006397783687136868, "loss": 5.1502, "step": 8232 }, { "epoch": 0.9346686959637565, "grad_norm": 0.08544921875, "learning_rate": 0.0006383906503346846, "loss": 5.1791, "step": 8233 }, { "epoch": 0.9347822230736755, "grad_norm": 0.0859375, "learning_rate": 0.0006370044195966443, "loss": 5.152, "step": 8234 }, { "epoch": 0.9348957501835946, "grad_norm": 0.0849609375, "learning_rate": 0.0006356196809670089, "loss": 5.144, "step": 8235 }, { "epoch": 0.9350092772935137, "grad_norm": 0.0888671875, "learning_rate": 0.000634236438908414, "loss": 5.1706, "step": 8236 }, { "epoch": 0.9351228044034328, "grad_norm": 0.0830078125, "learning_rate": 0.0006328546978786702, "loss": 5.1641, "step": 8237 }, { "epoch": 0.9352363315133518, "grad_norm": 0.08203125, "learning_rate": 0.0006314744623307526, "loss": 5.1651, "step": 8238 }, { "epoch": 0.9353498586232709, "grad_norm": 0.0849609375, "learning_rate": 0.0006300957367127829, "loss": 5.1847, "step": 8239 }, { "epoch": 0.93546338573319, "grad_norm": 0.07958984375, "learning_rate": 0.0006287185254680183, "loss": 5.1746, "step": 8240 }, { "epoch": 0.9355769128431091, "grad_norm": 0.0830078125, "learning_rate": 0.0006273428330348338, "loss": 5.16, "step": 8241 }, { "epoch": 0.9356904399530281, "grad_norm": 0.0869140625, "learning_rate": 0.0006259686638467119, "loss": 5.1647, "step": 8242 }, { "epoch": 0.9358039670629472, "grad_norm": 0.08154296875, "learning_rate": 0.0006245960223322241, "loss": 5.1594, "step": 8243 }, { "epoch": 0.9359174941728663, "grad_norm": 0.083984375, "learning_rate": 0.0006232249129150201, "loss": 5.1901, "step": 8244 }, { "epoch": 0.9360310212827854, "grad_norm": 0.08154296875, "learning_rate": 0.0006218553400138102, "loss": 5.1631, "step": 8245 }, { "epoch": 0.9361445483927044, "grad_norm": 0.0810546875, "learning_rate": 0.0006204873080423549, "loss": 5.1782, "step": 8246 }, { "epoch": 0.9362580755026235, "grad_norm": 0.080078125, "learning_rate": 0.0006191208214094484, "loss": 5.1767, "step": 8247 }, { "epoch": 0.9363716026125426, "grad_norm": 0.08154296875, "learning_rate": 0.0006177558845189029, "loss": 5.1477, "step": 8248 }, { "epoch": 0.9364851297224617, "grad_norm": 0.07763671875, "learning_rate": 0.0006163925017695389, "loss": 5.177, "step": 8249 }, { "epoch": 0.9365986568323807, "grad_norm": 0.078125, "learning_rate": 0.0006150306775551659, "loss": 5.1655, "step": 8250 }, { "epoch": 0.9367121839422998, "grad_norm": 0.07861328125, "learning_rate": 0.0006136704162645724, "loss": 5.17, "step": 8251 }, { "epoch": 0.9368257110522189, "grad_norm": 0.08349609375, "learning_rate": 0.0006123117222815085, "loss": 5.1784, "step": 8252 }, { "epoch": 0.936939238162138, "grad_norm": 0.07763671875, "learning_rate": 0.0006109545999846751, "loss": 5.1483, "step": 8253 }, { "epoch": 0.937052765272057, "grad_norm": 0.08056640625, "learning_rate": 0.0006095990537477063, "loss": 5.1718, "step": 8254 }, { "epoch": 0.9371662923819761, "grad_norm": 0.0810546875, "learning_rate": 0.0006082450879391579, "loss": 5.1824, "step": 8255 }, { "epoch": 0.9372798194918952, "grad_norm": 0.08349609375, "learning_rate": 0.0006068927069224924, "loss": 5.1844, "step": 8256 }, { "epoch": 0.9373933466018143, "grad_norm": 0.0791015625, "learning_rate": 0.0006055419150560646, "loss": 5.1544, "step": 8257 }, { "epoch": 0.9375068737117334, "grad_norm": 0.0791015625, "learning_rate": 0.0006041927166931078, "loss": 5.1615, "step": 8258 }, { "epoch": 0.9376204008216524, "grad_norm": 0.0810546875, "learning_rate": 0.0006028451161817206, "loss": 5.1449, "step": 8259 }, { "epoch": 0.9377339279315715, "grad_norm": 0.08447265625, "learning_rate": 0.0006014991178648515, "loss": 5.1657, "step": 8260 }, { "epoch": 0.9378474550414906, "grad_norm": 0.08203125, "learning_rate": 0.0006001547260802855, "loss": 5.1387, "step": 8261 }, { "epoch": 0.9379609821514097, "grad_norm": 0.08203125, "learning_rate": 0.0005988119451606312, "loss": 5.1431, "step": 8262 }, { "epoch": 0.9380745092613287, "grad_norm": 0.08203125, "learning_rate": 0.0005974707794333036, "loss": 5.1639, "step": 8263 }, { "epoch": 0.9381880363712478, "grad_norm": 0.07958984375, "learning_rate": 0.0005961312332205157, "loss": 5.1696, "step": 8264 }, { "epoch": 0.9383015634811669, "grad_norm": 0.07763671875, "learning_rate": 0.0005947933108392579, "loss": 5.1578, "step": 8265 }, { "epoch": 0.938415090591086, "grad_norm": 0.080078125, "learning_rate": 0.0005934570166012898, "loss": 5.1651, "step": 8266 }, { "epoch": 0.938528617701005, "grad_norm": 0.078125, "learning_rate": 0.0005921223548131225, "loss": 5.1502, "step": 8267 }, { "epoch": 0.9386421448109241, "grad_norm": 0.0751953125, "learning_rate": 0.0005907893297760069, "loss": 5.1696, "step": 8268 }, { "epoch": 0.9387556719208432, "grad_norm": 0.076171875, "learning_rate": 0.0005894579457859194, "loss": 5.1743, "step": 8269 }, { "epoch": 0.9388691990307623, "grad_norm": 0.0771484375, "learning_rate": 0.0005881282071335464, "loss": 5.1559, "step": 8270 }, { "epoch": 0.9389827261406813, "grad_norm": 0.07666015625, "learning_rate": 0.0005868001181042733, "loss": 5.1433, "step": 8271 }, { "epoch": 0.9390962532506004, "grad_norm": 0.08251953125, "learning_rate": 0.0005854736829781681, "loss": 5.1427, "step": 8272 }, { "epoch": 0.9392097803605195, "grad_norm": 0.07666015625, "learning_rate": 0.0005841489060299701, "loss": 5.1766, "step": 8273 }, { "epoch": 0.9393233074704386, "grad_norm": 0.078125, "learning_rate": 0.000582825791529073, "loss": 5.1475, "step": 8274 }, { "epoch": 0.9394368345803576, "grad_norm": 0.0791015625, "learning_rate": 0.0005815043437395144, "loss": 5.1636, "step": 8275 }, { "epoch": 0.9395503616902767, "grad_norm": 0.07958984375, "learning_rate": 0.0005801845669199594, "loss": 5.1669, "step": 8276 }, { "epoch": 0.9396638888001958, "grad_norm": 0.07763671875, "learning_rate": 0.0005788664653236886, "loss": 5.1647, "step": 8277 }, { "epoch": 0.9397774159101149, "grad_norm": 0.08154296875, "learning_rate": 0.0005775500431985838, "loss": 5.1584, "step": 8278 }, { "epoch": 0.939890943020034, "grad_norm": 0.07421875, "learning_rate": 0.0005762353047871148, "loss": 5.1642, "step": 8279 }, { "epoch": 0.940004470129953, "grad_norm": 0.07763671875, "learning_rate": 0.000574922254326324, "loss": 5.1611, "step": 8280 }, { "epoch": 0.9401179972398721, "grad_norm": 0.07763671875, "learning_rate": 0.0005736108960478148, "loss": 5.1681, "step": 8281 }, { "epoch": 0.9402315243497912, "grad_norm": 0.07666015625, "learning_rate": 0.0005723012341777374, "loss": 5.1757, "step": 8282 }, { "epoch": 0.9403450514597103, "grad_norm": 0.0771484375, "learning_rate": 0.0005709932729367744, "loss": 5.1648, "step": 8283 }, { "epoch": 0.9404585785696293, "grad_norm": 0.076171875, "learning_rate": 0.0005696870165401276, "loss": 5.1755, "step": 8284 }, { "epoch": 0.9405721056795484, "grad_norm": 0.07568359375, "learning_rate": 0.0005683824691975056, "loss": 5.166, "step": 8285 }, { "epoch": 0.9406856327894675, "grad_norm": 0.07470703125, "learning_rate": 0.0005670796351131082, "loss": 5.1804, "step": 8286 }, { "epoch": 0.9407991598993866, "grad_norm": 0.07470703125, "learning_rate": 0.0005657785184856138, "loss": 5.1485, "step": 8287 }, { "epoch": 0.9409126870093056, "grad_norm": 0.07568359375, "learning_rate": 0.0005644791235081668, "loss": 5.1624, "step": 8288 }, { "epoch": 0.9410262141192247, "grad_norm": 0.07861328125, "learning_rate": 0.000563181454368362, "loss": 5.1624, "step": 8289 }, { "epoch": 0.9411397412291438, "grad_norm": 0.08056640625, "learning_rate": 0.0005618855152482334, "loss": 5.166, "step": 8290 }, { "epoch": 0.9412532683390629, "grad_norm": 0.08154296875, "learning_rate": 0.0005605913103242381, "loss": 5.1648, "step": 8291 }, { "epoch": 0.9413667954489819, "grad_norm": 0.07763671875, "learning_rate": 0.000559298843767247, "loss": 5.1579, "step": 8292 }, { "epoch": 0.941480322558901, "grad_norm": 0.07470703125, "learning_rate": 0.000558008119742526, "loss": 5.166, "step": 8293 }, { "epoch": 0.9415938496688201, "grad_norm": 0.07568359375, "learning_rate": 0.0005567191424097268, "loss": 5.1716, "step": 8294 }, { "epoch": 0.9417073767787392, "grad_norm": 0.078125, "learning_rate": 0.0005554319159228716, "loss": 5.1862, "step": 8295 }, { "epoch": 0.9418209038886582, "grad_norm": 0.07666015625, "learning_rate": 0.0005541464444303397, "loss": 5.1987, "step": 8296 }, { "epoch": 0.9419344309985773, "grad_norm": 0.078125, "learning_rate": 0.0005528627320748554, "loss": 5.161, "step": 8297 }, { "epoch": 0.9420479581084964, "grad_norm": 0.078125, "learning_rate": 0.0005515807829934728, "loss": 5.1589, "step": 8298 }, { "epoch": 0.9421614852184155, "grad_norm": 0.07763671875, "learning_rate": 0.0005503006013175642, "loss": 5.1627, "step": 8299 }, { "epoch": 0.9422750123283345, "grad_norm": 0.07666015625, "learning_rate": 0.0005490221911728059, "loss": 5.1718, "step": 8300 }, { "epoch": 0.9423885394382536, "grad_norm": 0.076171875, "learning_rate": 0.0005477455566791649, "loss": 5.1723, "step": 8301 }, { "epoch": 0.9425020665481727, "grad_norm": 0.0771484375, "learning_rate": 0.0005464707019508856, "loss": 5.1596, "step": 8302 }, { "epoch": 0.9426155936580918, "grad_norm": 0.07421875, "learning_rate": 0.0005451976310964772, "loss": 5.162, "step": 8303 }, { "epoch": 0.9427291207680109, "grad_norm": 0.07763671875, "learning_rate": 0.0005439263482186992, "loss": 5.1533, "step": 8304 }, { "epoch": 0.9428426478779299, "grad_norm": 0.076171875, "learning_rate": 0.0005426568574145496, "loss": 5.1798, "step": 8305 }, { "epoch": 0.942956174987849, "grad_norm": 0.07958984375, "learning_rate": 0.0005413891627752509, "loss": 5.1338, "step": 8306 }, { "epoch": 0.9430697020977681, "grad_norm": 0.0771484375, "learning_rate": 0.0005401232683862367, "loss": 5.1738, "step": 8307 }, { "epoch": 0.9431832292076872, "grad_norm": 0.078125, "learning_rate": 0.0005388591783271399, "loss": 5.1531, "step": 8308 }, { "epoch": 0.9432967563176062, "grad_norm": 0.07666015625, "learning_rate": 0.0005375968966717769, "loss": 5.157, "step": 8309 }, { "epoch": 0.9434102834275253, "grad_norm": 0.07421875, "learning_rate": 0.0005363364274881375, "loss": 5.1576, "step": 8310 }, { "epoch": 0.9435238105374444, "grad_norm": 0.07275390625, "learning_rate": 0.0005350777748383697, "loss": 5.1734, "step": 8311 }, { "epoch": 0.9436373376473635, "grad_norm": 0.07470703125, "learning_rate": 0.0005338209427787677, "loss": 5.1639, "step": 8312 }, { "epoch": 0.9437508647572825, "grad_norm": 0.076171875, "learning_rate": 0.0005325659353597575, "loss": 5.1678, "step": 8313 }, { "epoch": 0.9438643918672016, "grad_norm": 0.07666015625, "learning_rate": 0.0005313127566258869, "loss": 5.1637, "step": 8314 }, { "epoch": 0.9439779189771207, "grad_norm": 0.0771484375, "learning_rate": 0.0005300614106158077, "loss": 5.1608, "step": 8315 }, { "epoch": 0.9440914460870398, "grad_norm": 0.07470703125, "learning_rate": 0.0005288119013622674, "loss": 5.1805, "step": 8316 }, { "epoch": 0.9442049731969588, "grad_norm": 0.07373046875, "learning_rate": 0.0005275642328920929, "loss": 5.1689, "step": 8317 }, { "epoch": 0.9443185003068779, "grad_norm": 0.07763671875, "learning_rate": 0.0005263184092261792, "loss": 5.1627, "step": 8318 }, { "epoch": 0.944432027416797, "grad_norm": 0.07470703125, "learning_rate": 0.0005250744343794766, "loss": 5.1598, "step": 8319 }, { "epoch": 0.9445455545267161, "grad_norm": 0.07470703125, "learning_rate": 0.0005238323123609753, "loss": 5.1602, "step": 8320 }, { "epoch": 0.9446590816366351, "grad_norm": 0.07275390625, "learning_rate": 0.000522592047173697, "loss": 5.1608, "step": 8321 }, { "epoch": 0.9447726087465542, "grad_norm": 0.07373046875, "learning_rate": 0.000521353642814677, "loss": 5.1616, "step": 8322 }, { "epoch": 0.9448861358564733, "grad_norm": 0.0732421875, "learning_rate": 0.0005201171032749552, "loss": 5.1671, "step": 8323 }, { "epoch": 0.9449996629663924, "grad_norm": 0.0751953125, "learning_rate": 0.0005188824325395605, "loss": 5.1789, "step": 8324 }, { "epoch": 0.9451131900763114, "grad_norm": 0.0732421875, "learning_rate": 0.0005176496345875004, "loss": 5.1621, "step": 8325 }, { "epoch": 0.9452267171862306, "grad_norm": 0.07177734375, "learning_rate": 0.0005164187133917455, "loss": 5.1653, "step": 8326 }, { "epoch": 0.9453402442961497, "grad_norm": 0.072265625, "learning_rate": 0.0005151896729192194, "loss": 5.164, "step": 8327 }, { "epoch": 0.9454537714060688, "grad_norm": 0.07177734375, "learning_rate": 0.0005139625171307838, "loss": 5.1466, "step": 8328 }, { "epoch": 0.9455672985159879, "grad_norm": 0.07421875, "learning_rate": 0.0005127372499812276, "loss": 5.1636, "step": 8329 }, { "epoch": 0.9456808256259069, "grad_norm": 0.0771484375, "learning_rate": 0.0005115138754192517, "loss": 5.1528, "step": 8330 }, { "epoch": 0.945794352735826, "grad_norm": 0.072265625, "learning_rate": 0.0005102923973874588, "loss": 5.1666, "step": 8331 }, { "epoch": 0.9459078798457451, "grad_norm": 0.0732421875, "learning_rate": 0.0005090728198223393, "loss": 5.1435, "step": 8332 }, { "epoch": 0.9460214069556642, "grad_norm": 0.07470703125, "learning_rate": 0.0005078551466542587, "loss": 5.1327, "step": 8333 }, { "epoch": 0.9461349340655832, "grad_norm": 0.07421875, "learning_rate": 0.0005066393818074457, "loss": 5.1594, "step": 8334 }, { "epoch": 0.9462484611755023, "grad_norm": 0.07177734375, "learning_rate": 0.0005054255291999777, "loss": 5.1938, "step": 8335 }, { "epoch": 0.9463619882854214, "grad_norm": 0.07373046875, "learning_rate": 0.0005042135927437716, "loss": 5.1565, "step": 8336 }, { "epoch": 0.9464755153953405, "grad_norm": 0.07275390625, "learning_rate": 0.000503003576344567, "loss": 5.1678, "step": 8337 }, { "epoch": 0.9465890425052595, "grad_norm": 0.07275390625, "learning_rate": 0.0005017954839019172, "loss": 5.1839, "step": 8338 }, { "epoch": 0.9467025696151786, "grad_norm": 0.07373046875, "learning_rate": 0.0005005893193091736, "loss": 5.168, "step": 8339 }, { "epoch": 0.9468160967250977, "grad_norm": 0.072265625, "learning_rate": 0.0004993850864534762, "loss": 5.1828, "step": 8340 }, { "epoch": 0.9469296238350168, "grad_norm": 0.07421875, "learning_rate": 0.0004981827892157389, "loss": 5.1729, "step": 8341 }, { "epoch": 0.9470431509449359, "grad_norm": 0.07373046875, "learning_rate": 0.0004969824314706371, "loss": 5.1595, "step": 8342 }, { "epoch": 0.9471566780548549, "grad_norm": 0.0732421875, "learning_rate": 0.0004957840170865964, "loss": 5.1748, "step": 8343 }, { "epoch": 0.947270205164774, "grad_norm": 0.07568359375, "learning_rate": 0.0004945875499257796, "loss": 5.1526, "step": 8344 }, { "epoch": 0.9473837322746931, "grad_norm": 0.072265625, "learning_rate": 0.0004933930338440739, "loss": 5.1646, "step": 8345 }, { "epoch": 0.9474972593846122, "grad_norm": 0.0732421875, "learning_rate": 0.0004922004726910779, "loss": 5.1554, "step": 8346 }, { "epoch": 0.9476107864945312, "grad_norm": 0.07275390625, "learning_rate": 0.0004910098703100919, "loss": 5.1661, "step": 8347 }, { "epoch": 0.9477243136044503, "grad_norm": 0.0751953125, "learning_rate": 0.0004898212305381015, "loss": 5.1527, "step": 8348 }, { "epoch": 0.9478378407143694, "grad_norm": 0.0732421875, "learning_rate": 0.0004886345572057683, "loss": 5.1607, "step": 8349 }, { "epoch": 0.9479513678242885, "grad_norm": 0.0771484375, "learning_rate": 0.00048744985413741725, "loss": 5.1639, "step": 8350 }, { "epoch": 0.9480648949342075, "grad_norm": 0.07373046875, "learning_rate": 0.0004862671251510229, "loss": 5.1745, "step": 8351 }, { "epoch": 0.9481784220441266, "grad_norm": 0.07275390625, "learning_rate": 0.00048508637405819763, "loss": 5.159, "step": 8352 }, { "epoch": 0.9482919491540457, "grad_norm": 0.076171875, "learning_rate": 0.00048390760466418016, "loss": 5.1807, "step": 8353 }, { "epoch": 0.9484054762639648, "grad_norm": 0.07177734375, "learning_rate": 0.0004827308207678229, "loss": 5.1392, "step": 8354 }, { "epoch": 0.9485190033738838, "grad_norm": 0.072265625, "learning_rate": 0.0004815560261615786, "loss": 5.1411, "step": 8355 }, { "epoch": 0.9486325304838029, "grad_norm": 0.0732421875, "learning_rate": 0.00048038322463149025, "loss": 5.1527, "step": 8356 }, { "epoch": 0.948746057593722, "grad_norm": 0.072265625, "learning_rate": 0.0004792124199571763, "loss": 5.1696, "step": 8357 }, { "epoch": 0.9488595847036411, "grad_norm": 0.06982421875, "learning_rate": 0.0004780436159118218, "loss": 5.1748, "step": 8358 }, { "epoch": 0.9489731118135601, "grad_norm": 0.07080078125, "learning_rate": 0.0004768768162621625, "loss": 5.1834, "step": 8359 }, { "epoch": 0.9490866389234792, "grad_norm": 0.07177734375, "learning_rate": 0.00047571202476847567, "loss": 5.167, "step": 8360 }, { "epoch": 0.9492001660333983, "grad_norm": 0.0712890625, "learning_rate": 0.00047454924518456595, "loss": 5.158, "step": 8361 }, { "epoch": 0.9493136931433174, "grad_norm": 0.0703125, "learning_rate": 0.00047338848125775515, "loss": 5.1563, "step": 8362 }, { "epoch": 0.9494272202532364, "grad_norm": 0.07177734375, "learning_rate": 0.0004722297367288681, "loss": 5.1888, "step": 8363 }, { "epoch": 0.9495407473631555, "grad_norm": 0.072265625, "learning_rate": 0.0004710730153322224, "loss": 5.175, "step": 8364 }, { "epoch": 0.9496542744730746, "grad_norm": 0.0712890625, "learning_rate": 0.00046991832079561554, "loss": 5.1681, "step": 8365 }, { "epoch": 0.9497678015829937, "grad_norm": 0.0712890625, "learning_rate": 0.0004687656568403127, "loss": 5.1737, "step": 8366 }, { "epoch": 0.9498813286929128, "grad_norm": 0.0693359375, "learning_rate": 0.00046761502718103585, "loss": 5.1348, "step": 8367 }, { "epoch": 0.9499948558028318, "grad_norm": 0.0712890625, "learning_rate": 0.0004664664355259496, "loss": 5.141, "step": 8368 }, { "epoch": 0.9501083829127509, "grad_norm": 0.0712890625, "learning_rate": 0.000465319885576652, "loss": 5.1691, "step": 8369 }, { "epoch": 0.95022191002267, "grad_norm": 0.0712890625, "learning_rate": 0.0004641753810281601, "loss": 5.154, "step": 8370 }, { "epoch": 0.9503354371325891, "grad_norm": 0.072265625, "learning_rate": 0.00046303292556890005, "loss": 5.1613, "step": 8371 }, { "epoch": 0.9504489642425081, "grad_norm": 0.0712890625, "learning_rate": 0.0004618925228806939, "loss": 5.1707, "step": 8372 }, { "epoch": 0.9505624913524272, "grad_norm": 0.07275390625, "learning_rate": 0.0004607541766387488, "loss": 5.1596, "step": 8373 }, { "epoch": 0.9506760184623463, "grad_norm": 0.06982421875, "learning_rate": 0.0004596178905116432, "loss": 5.1814, "step": 8374 }, { "epoch": 0.9507895455722654, "grad_norm": 0.07080078125, "learning_rate": 0.00045848366816131803, "loss": 5.1379, "step": 8375 }, { "epoch": 0.9509030726821844, "grad_norm": 0.06884765625, "learning_rate": 0.00045735151324306154, "loss": 5.1881, "step": 8376 }, { "epoch": 0.9510165997921035, "grad_norm": 0.072265625, "learning_rate": 0.0004562214294055007, "loss": 5.1655, "step": 8377 }, { "epoch": 0.9511301269020226, "grad_norm": 0.072265625, "learning_rate": 0.000455093420290587, "loss": 5.1595, "step": 8378 }, { "epoch": 0.9512436540119417, "grad_norm": 0.0693359375, "learning_rate": 0.00045396748953358614, "loss": 5.1536, "step": 8379 }, { "epoch": 0.9513571811218607, "grad_norm": 0.07080078125, "learning_rate": 0.000452843640763066, "loss": 5.1454, "step": 8380 }, { "epoch": 0.9514707082317798, "grad_norm": 0.06982421875, "learning_rate": 0.0004517218776008838, "loss": 5.173, "step": 8381 }, { "epoch": 0.9515842353416989, "grad_norm": 0.07275390625, "learning_rate": 0.0004506022036621767, "loss": 5.1873, "step": 8382 }, { "epoch": 0.951697762451618, "grad_norm": 0.06982421875, "learning_rate": 0.0004494846225553477, "loss": 5.165, "step": 8383 }, { "epoch": 0.951811289561537, "grad_norm": 0.07177734375, "learning_rate": 0.00044836913788205636, "loss": 5.1473, "step": 8384 }, { "epoch": 0.9519248166714561, "grad_norm": 0.07080078125, "learning_rate": 0.00044725575323720424, "loss": 5.1566, "step": 8385 }, { "epoch": 0.9520383437813752, "grad_norm": 0.07080078125, "learning_rate": 0.0004461444722089277, "loss": 5.1857, "step": 8386 }, { "epoch": 0.9521518708912943, "grad_norm": 0.06982421875, "learning_rate": 0.00044503529837858116, "loss": 5.1842, "step": 8387 }, { "epoch": 0.9522653980012133, "grad_norm": 0.072265625, "learning_rate": 0.00044392823532072976, "loss": 5.1414, "step": 8388 }, { "epoch": 0.9523789251111324, "grad_norm": 0.0712890625, "learning_rate": 0.0004428232866031352, "loss": 5.1723, "step": 8389 }, { "epoch": 0.9524924522210515, "grad_norm": 0.0703125, "learning_rate": 0.0004417204557867458, "loss": 5.1782, "step": 8390 }, { "epoch": 0.9526059793309706, "grad_norm": 0.068359375, "learning_rate": 0.0004406197464256847, "loss": 5.1488, "step": 8391 }, { "epoch": 0.9527195064408897, "grad_norm": 0.0703125, "learning_rate": 0.000439521162067237, "loss": 5.1583, "step": 8392 }, { "epoch": 0.9528330335508087, "grad_norm": 0.072265625, "learning_rate": 0.00043842470625184095, "loss": 5.1665, "step": 8393 }, { "epoch": 0.9529465606607278, "grad_norm": 0.07080078125, "learning_rate": 0.0004373303825130741, "loss": 5.1728, "step": 8394 }, { "epoch": 0.9530600877706469, "grad_norm": 0.06982421875, "learning_rate": 0.00043623819437764357, "loss": 5.1462, "step": 8395 }, { "epoch": 0.953173614880566, "grad_norm": 0.06982421875, "learning_rate": 0.0004351481453653733, "loss": 5.1407, "step": 8396 }, { "epoch": 0.953287141990485, "grad_norm": 0.0693359375, "learning_rate": 0.00043406023898919405, "loss": 5.1738, "step": 8397 }, { "epoch": 0.9534006691004041, "grad_norm": 0.07177734375, "learning_rate": 0.00043297447875513084, "loss": 5.1405, "step": 8398 }, { "epoch": 0.9535141962103232, "grad_norm": 0.06884765625, "learning_rate": 0.00043189086816229275, "loss": 5.1716, "step": 8399 }, { "epoch": 0.9536277233202423, "grad_norm": 0.06982421875, "learning_rate": 0.0004308094107028612, "loss": 5.1796, "step": 8400 }, { "epoch": 0.9537412504301613, "grad_norm": 0.0693359375, "learning_rate": 0.0004297301098620784, "loss": 5.1668, "step": 8401 }, { "epoch": 0.9538547775400804, "grad_norm": 0.0703125, "learning_rate": 0.00042865296911823614, "loss": 5.1635, "step": 8402 }, { "epoch": 0.9539683046499995, "grad_norm": 0.06982421875, "learning_rate": 0.0004275779919426653, "loss": 5.1601, "step": 8403 }, { "epoch": 0.9540818317599186, "grad_norm": 0.0703125, "learning_rate": 0.0004265051817997242, "loss": 5.1738, "step": 8404 }, { "epoch": 0.9541953588698376, "grad_norm": 0.0693359375, "learning_rate": 0.0004254345421467867, "loss": 5.1473, "step": 8405 }, { "epoch": 0.9543088859797567, "grad_norm": 0.072265625, "learning_rate": 0.00042436607643423266, "loss": 5.1403, "step": 8406 }, { "epoch": 0.9544224130896758, "grad_norm": 0.0703125, "learning_rate": 0.00042329978810543467, "loss": 5.1473, "step": 8407 }, { "epoch": 0.9545359401995949, "grad_norm": 0.0703125, "learning_rate": 0.0004222356805967502, "loss": 5.173, "step": 8408 }, { "epoch": 0.9546494673095139, "grad_norm": 0.07080078125, "learning_rate": 0.0004211737573375065, "loss": 5.1485, "step": 8409 }, { "epoch": 0.954762994419433, "grad_norm": 0.06884765625, "learning_rate": 0.00042011402174999316, "loss": 5.1655, "step": 8410 }, { "epoch": 0.9548765215293521, "grad_norm": 0.0693359375, "learning_rate": 0.00041905647724944787, "loss": 5.175, "step": 8411 }, { "epoch": 0.9549900486392712, "grad_norm": 0.0712890625, "learning_rate": 0.00041800112724404845, "loss": 5.1521, "step": 8412 }, { "epoch": 0.9551035757491902, "grad_norm": 0.0693359375, "learning_rate": 0.0004169479751349001, "loss": 5.1531, "step": 8413 }, { "epoch": 0.9552171028591093, "grad_norm": 0.0654296875, "learning_rate": 0.00041589702431602443, "loss": 5.1519, "step": 8414 }, { "epoch": 0.9553306299690284, "grad_norm": 0.06787109375, "learning_rate": 0.0004148482781743491, "loss": 5.1755, "step": 8415 }, { "epoch": 0.9554441570789475, "grad_norm": 0.0693359375, "learning_rate": 0.0004138017400896968, "loss": 5.1596, "step": 8416 }, { "epoch": 0.9555576841888666, "grad_norm": 0.072265625, "learning_rate": 0.00041275741343477457, "loss": 5.1418, "step": 8417 }, { "epoch": 0.9556712112987856, "grad_norm": 0.06982421875, "learning_rate": 0.00041171530157516143, "loss": 5.1324, "step": 8418 }, { "epoch": 0.9557847384087047, "grad_norm": 0.06787109375, "learning_rate": 0.00041067540786930034, "loss": 5.1526, "step": 8419 }, { "epoch": 0.9558982655186238, "grad_norm": 0.06982421875, "learning_rate": 0.0004096377356684842, "loss": 5.1527, "step": 8420 }, { "epoch": 0.9560117926285429, "grad_norm": 0.06787109375, "learning_rate": 0.00040860228831684787, "loss": 5.1771, "step": 8421 }, { "epoch": 0.9561253197384619, "grad_norm": 0.068359375, "learning_rate": 0.00040756906915135493, "loss": 5.1582, "step": 8422 }, { "epoch": 0.956238846848381, "grad_norm": 0.06884765625, "learning_rate": 0.0004065380815017898, "loss": 5.1622, "step": 8423 }, { "epoch": 0.9563523739583001, "grad_norm": 0.068359375, "learning_rate": 0.00040550932869074343, "loss": 5.175, "step": 8424 }, { "epoch": 0.9564659010682192, "grad_norm": 0.06689453125, "learning_rate": 0.00040448281403360583, "loss": 5.1768, "step": 8425 }, { "epoch": 0.9565794281781382, "grad_norm": 0.06884765625, "learning_rate": 0.0004034585408385536, "loss": 5.1851, "step": 8426 }, { "epoch": 0.9566929552880573, "grad_norm": 0.07080078125, "learning_rate": 0.00040243651240653945, "loss": 5.1442, "step": 8427 }, { "epoch": 0.9568064823979764, "grad_norm": 0.0712890625, "learning_rate": 0.00040141673203128247, "loss": 5.1728, "step": 8428 }, { "epoch": 0.9569200095078955, "grad_norm": 0.06640625, "learning_rate": 0.00040039920299925593, "loss": 5.1554, "step": 8429 }, { "epoch": 0.9570335366178145, "grad_norm": 0.068359375, "learning_rate": 0.00039938392858967895, "loss": 5.1575, "step": 8430 }, { "epoch": 0.9571470637277336, "grad_norm": 0.07080078125, "learning_rate": 0.0003983709120745035, "loss": 5.1694, "step": 8431 }, { "epoch": 0.9572605908376527, "grad_norm": 0.068359375, "learning_rate": 0.00039736015671840543, "loss": 5.1523, "step": 8432 }, { "epoch": 0.9573741179475718, "grad_norm": 0.0673828125, "learning_rate": 0.00039635166577877323, "loss": 5.1434, "step": 8433 }, { "epoch": 0.9574876450574908, "grad_norm": 0.0703125, "learning_rate": 0.0003953454425056984, "loss": 5.1476, "step": 8434 }, { "epoch": 0.9576011721674099, "grad_norm": 0.068359375, "learning_rate": 0.0003943414901419635, "loss": 5.1356, "step": 8435 }, { "epoch": 0.957714699277329, "grad_norm": 0.06787109375, "learning_rate": 0.00039333981192303313, "loss": 5.1555, "step": 8436 }, { "epoch": 0.9578282263872481, "grad_norm": 0.06884765625, "learning_rate": 0.000392340411077043, "loss": 5.1687, "step": 8437 }, { "epoch": 0.9579417534971671, "grad_norm": 0.06982421875, "learning_rate": 0.0003913432908247892, "loss": 5.1538, "step": 8438 }, { "epoch": 0.9580552806070862, "grad_norm": 0.0673828125, "learning_rate": 0.0003903484543797184, "loss": 5.1586, "step": 8439 }, { "epoch": 0.9581688077170053, "grad_norm": 0.06884765625, "learning_rate": 0.0003893559049479167, "loss": 5.1481, "step": 8440 }, { "epoch": 0.9582823348269244, "grad_norm": 0.06787109375, "learning_rate": 0.0003883656457281003, "loss": 5.1598, "step": 8441 }, { "epoch": 0.9583958619368435, "grad_norm": 0.06591796875, "learning_rate": 0.000387377679911604, "loss": 5.1739, "step": 8442 }, { "epoch": 0.9585093890467625, "grad_norm": 0.06689453125, "learning_rate": 0.0003863920106823723, "loss": 5.1644, "step": 8443 }, { "epoch": 0.9586229161566816, "grad_norm": 0.06640625, "learning_rate": 0.0003854086412169482, "loss": 5.1607, "step": 8444 }, { "epoch": 0.9587364432666007, "grad_norm": 0.06640625, "learning_rate": 0.0003844275746844632, "loss": 5.1585, "step": 8445 }, { "epoch": 0.9588499703765198, "grad_norm": 0.0673828125, "learning_rate": 0.0003834488142466266, "loss": 5.1447, "step": 8446 }, { "epoch": 0.9589634974864388, "grad_norm": 0.0673828125, "learning_rate": 0.00038247236305771683, "loss": 5.1594, "step": 8447 }, { "epoch": 0.9590770245963579, "grad_norm": 0.06787109375, "learning_rate": 0.0003814982242645688, "loss": 5.1618, "step": 8448 }, { "epoch": 0.959190551706277, "grad_norm": 0.0673828125, "learning_rate": 0.0003805264010065665, "loss": 5.1641, "step": 8449 }, { "epoch": 0.9593040788161961, "grad_norm": 0.0673828125, "learning_rate": 0.0003795568964156312, "loss": 5.1479, "step": 8450 }, { "epoch": 0.9594176059261151, "grad_norm": 0.06591796875, "learning_rate": 0.00037858971361621097, "loss": 5.1503, "step": 8451 }, { "epoch": 0.9595311330360342, "grad_norm": 0.06787109375, "learning_rate": 0.00037762485572527294, "loss": 5.1409, "step": 8452 }, { "epoch": 0.9596446601459533, "grad_norm": 0.06640625, "learning_rate": 0.00037666232585229013, "loss": 5.1789, "step": 8453 }, { "epoch": 0.9597581872558724, "grad_norm": 0.06640625, "learning_rate": 0.000375702127099234, "loss": 5.1513, "step": 8454 }, { "epoch": 0.9598717143657914, "grad_norm": 0.0654296875, "learning_rate": 0.0003747442625605628, "loss": 5.155, "step": 8455 }, { "epoch": 0.9599852414757105, "grad_norm": 0.06689453125, "learning_rate": 0.0003737887353232129, "loss": 5.1621, "step": 8456 }, { "epoch": 0.9600987685856296, "grad_norm": 0.06689453125, "learning_rate": 0.00037283554846658727, "loss": 5.1607, "step": 8457 }, { "epoch": 0.9602122956955487, "grad_norm": 0.06640625, "learning_rate": 0.00037188470506254747, "loss": 5.1743, "step": 8458 }, { "epoch": 0.9603258228054677, "grad_norm": 0.06591796875, "learning_rate": 0.0003709362081754021, "loss": 5.178, "step": 8459 }, { "epoch": 0.9604393499153868, "grad_norm": 0.06787109375, "learning_rate": 0.0003699900608618981, "loss": 5.1516, "step": 8460 }, { "epoch": 0.9605528770253059, "grad_norm": 0.06591796875, "learning_rate": 0.00036904626617120963, "loss": 5.18, "step": 8461 }, { "epoch": 0.960666404135225, "grad_norm": 0.06689453125, "learning_rate": 0.00036810482714492965, "loss": 5.1616, "step": 8462 }, { "epoch": 0.960779931245144, "grad_norm": 0.06689453125, "learning_rate": 0.0003671657468170594, "loss": 5.1806, "step": 8463 }, { "epoch": 0.9608934583550631, "grad_norm": 0.06689453125, "learning_rate": 0.0003662290282139982, "loss": 5.1625, "step": 8464 }, { "epoch": 0.9610069854649822, "grad_norm": 0.0673828125, "learning_rate": 0.00036529467435453446, "loss": 5.1943, "step": 8465 }, { "epoch": 0.9611205125749013, "grad_norm": 0.0673828125, "learning_rate": 0.0003643626882498359, "loss": 5.1392, "step": 8466 }, { "epoch": 0.9612340396848204, "grad_norm": 0.068359375, "learning_rate": 0.0003634330729034394, "loss": 5.1505, "step": 8467 }, { "epoch": 0.9613475667947394, "grad_norm": 0.0673828125, "learning_rate": 0.0003625058313112413, "loss": 5.1745, "step": 8468 }, { "epoch": 0.9614610939046585, "grad_norm": 0.06787109375, "learning_rate": 0.00036158096646148866, "loss": 5.1437, "step": 8469 }, { "epoch": 0.9615746210145776, "grad_norm": 0.0654296875, "learning_rate": 0.0003606584813347678, "loss": 5.1422, "step": 8470 }, { "epoch": 0.9616881481244967, "grad_norm": 0.06640625, "learning_rate": 0.000359738378903997, "loss": 5.1627, "step": 8471 }, { "epoch": 0.9618016752344157, "grad_norm": 0.0673828125, "learning_rate": 0.0003588206621344153, "loss": 5.17, "step": 8472 }, { "epoch": 0.9619152023443348, "grad_norm": 0.06787109375, "learning_rate": 0.0003579053339835735, "loss": 5.1463, "step": 8473 }, { "epoch": 0.9620287294542539, "grad_norm": 0.068359375, "learning_rate": 0.00035699239740132397, "loss": 5.1765, "step": 8474 }, { "epoch": 0.962142256564173, "grad_norm": 0.06494140625, "learning_rate": 0.00035608185532981263, "loss": 5.1532, "step": 8475 }, { "epoch": 0.962255783674092, "grad_norm": 0.06591796875, "learning_rate": 0.00035517371070346805, "loss": 5.164, "step": 8476 }, { "epoch": 0.9623693107840111, "grad_norm": 0.06640625, "learning_rate": 0.0003542679664489922, "loss": 5.1611, "step": 8477 }, { "epoch": 0.9624828378939302, "grad_norm": 0.0673828125, "learning_rate": 0.0003533646254853522, "loss": 5.1444, "step": 8478 }, { "epoch": 0.9625963650038493, "grad_norm": 0.068359375, "learning_rate": 0.0003524636907237685, "loss": 5.1598, "step": 8479 }, { "epoch": 0.9627098921137683, "grad_norm": 0.068359375, "learning_rate": 0.0003515651650677092, "loss": 5.1917, "step": 8480 }, { "epoch": 0.9628234192236874, "grad_norm": 0.0654296875, "learning_rate": 0.0003506690514128763, "loss": 5.1478, "step": 8481 }, { "epoch": 0.9629369463336065, "grad_norm": 0.064453125, "learning_rate": 0.00034977535264720026, "loss": 5.1298, "step": 8482 }, { "epoch": 0.9630504734435256, "grad_norm": 0.0654296875, "learning_rate": 0.0003488840716508276, "loss": 5.1423, "step": 8483 }, { "epoch": 0.9631640005534446, "grad_norm": 0.06640625, "learning_rate": 0.0003479952112961143, "loss": 5.1577, "step": 8484 }, { "epoch": 0.9632775276633637, "grad_norm": 0.06640625, "learning_rate": 0.0003471087744476148, "loss": 5.1698, "step": 8485 }, { "epoch": 0.9633910547732828, "grad_norm": 0.0654296875, "learning_rate": 0.00034622476396207254, "loss": 5.1411, "step": 8486 }, { "epoch": 0.9635045818832019, "grad_norm": 0.0654296875, "learning_rate": 0.0003453431826884125, "loss": 5.1627, "step": 8487 }, { "epoch": 0.963618108993121, "grad_norm": 0.064453125, "learning_rate": 0.0003444640334677305, "loss": 5.1594, "step": 8488 }, { "epoch": 0.96373163610304, "grad_norm": 0.0654296875, "learning_rate": 0.00034358731913328465, "loss": 5.1723, "step": 8489 }, { "epoch": 0.9638451632129591, "grad_norm": 0.06640625, "learning_rate": 0.0003427130425104857, "loss": 5.1604, "step": 8490 }, { "epoch": 0.9639586903228782, "grad_norm": 0.06494140625, "learning_rate": 0.00034184120641688907, "loss": 5.1615, "step": 8491 }, { "epoch": 0.9640722174327973, "grad_norm": 0.0654296875, "learning_rate": 0.000340971813662184, "loss": 5.1714, "step": 8492 }, { "epoch": 0.9641857445427163, "grad_norm": 0.0654296875, "learning_rate": 0.00034010486704818665, "loss": 5.1526, "step": 8493 }, { "epoch": 0.9642992716526354, "grad_norm": 0.06591796875, "learning_rate": 0.00033924036936882864, "loss": 5.1567, "step": 8494 }, { "epoch": 0.9644127987625545, "grad_norm": 0.06640625, "learning_rate": 0.00033837832341015105, "loss": 5.1515, "step": 8495 }, { "epoch": 0.9645263258724736, "grad_norm": 0.06591796875, "learning_rate": 0.0003375187319502919, "loss": 5.1505, "step": 8496 }, { "epoch": 0.9646398529823926, "grad_norm": 0.06640625, "learning_rate": 0.00033666159775948024, "loss": 5.1558, "step": 8497 }, { "epoch": 0.9647533800923117, "grad_norm": 0.06494140625, "learning_rate": 0.00033580692360002585, "loss": 5.1429, "step": 8498 }, { "epoch": 0.9648669072022308, "grad_norm": 0.06787109375, "learning_rate": 0.00033495471222631, "loss": 5.1636, "step": 8499 }, { "epoch": 0.9649804343121499, "grad_norm": 0.068359375, "learning_rate": 0.0003341049663847775, "loss": 5.1804, "step": 8500 }, { "epoch": 0.9650939614220689, "grad_norm": 0.06640625, "learning_rate": 0.00033325768881392696, "loss": 5.1479, "step": 8501 }, { "epoch": 0.965207488531988, "grad_norm": 0.06591796875, "learning_rate": 0.0003324128822443035, "loss": 5.1508, "step": 8502 }, { "epoch": 0.9653210156419071, "grad_norm": 0.0654296875, "learning_rate": 0.0003315705493984875, "loss": 5.1564, "step": 8503 }, { "epoch": 0.9654345427518262, "grad_norm": 0.06591796875, "learning_rate": 0.0003307306929910883, "loss": 5.1587, "step": 8504 }, { "epoch": 0.9655480698617452, "grad_norm": 0.064453125, "learning_rate": 0.00032989331572873335, "loss": 5.1652, "step": 8505 }, { "epoch": 0.9656615969716643, "grad_norm": 0.06689453125, "learning_rate": 0.00032905842031006184, "loss": 5.138, "step": 8506 }, { "epoch": 0.9657751240815834, "grad_norm": 0.064453125, "learning_rate": 0.0003282260094257131, "loss": 5.146, "step": 8507 }, { "epoch": 0.9658886511915025, "grad_norm": 0.06640625, "learning_rate": 0.0003273960857583205, "loss": 5.1465, "step": 8508 }, { "epoch": 0.9660021783014215, "grad_norm": 0.06640625, "learning_rate": 0.00032656865198250174, "loss": 5.1534, "step": 8509 }, { "epoch": 0.9661157054113406, "grad_norm": 0.06494140625, "learning_rate": 0.00032574371076484984, "loss": 5.1687, "step": 8510 }, { "epoch": 0.9662292325212597, "grad_norm": 0.064453125, "learning_rate": 0.0003249212647639255, "loss": 5.1535, "step": 8511 }, { "epoch": 0.9663427596311788, "grad_norm": 0.06494140625, "learning_rate": 0.0003241013166302474, "loss": 5.1669, "step": 8512 }, { "epoch": 0.9664562867410978, "grad_norm": 0.064453125, "learning_rate": 0.00032328386900628485, "loss": 5.1484, "step": 8513 }, { "epoch": 0.9665698138510169, "grad_norm": 0.064453125, "learning_rate": 0.00032246892452644825, "loss": 5.1524, "step": 8514 }, { "epoch": 0.966683340960936, "grad_norm": 0.0654296875, "learning_rate": 0.00032165648581708163, "loss": 5.131, "step": 8515 }, { "epoch": 0.9667968680708551, "grad_norm": 0.06591796875, "learning_rate": 0.00032084655549645256, "loss": 5.141, "step": 8516 }, { "epoch": 0.9669103951807742, "grad_norm": 0.064453125, "learning_rate": 0.0003200391361747462, "loss": 5.1481, "step": 8517 }, { "epoch": 0.9670239222906932, "grad_norm": 0.06494140625, "learning_rate": 0.0003192342304540542, "loss": 5.1569, "step": 8518 }, { "epoch": 0.9671374494006123, "grad_norm": 0.0654296875, "learning_rate": 0.0003184318409283685, "loss": 5.1414, "step": 8519 }, { "epoch": 0.9672509765105314, "grad_norm": 0.06494140625, "learning_rate": 0.00031763197018357093, "loss": 5.1676, "step": 8520 }, { "epoch": 0.9673645036204505, "grad_norm": 0.064453125, "learning_rate": 0.0003168346207974269, "loss": 5.1565, "step": 8521 }, { "epoch": 0.9674780307303695, "grad_norm": 0.06494140625, "learning_rate": 0.0003160397953395762, "loss": 5.1599, "step": 8522 }, { "epoch": 0.9675915578402886, "grad_norm": 0.0673828125, "learning_rate": 0.00031524749637152374, "loss": 5.1333, "step": 8523 }, { "epoch": 0.9677050849502077, "grad_norm": 0.0654296875, "learning_rate": 0.0003144577264466336, "loss": 5.1722, "step": 8524 }, { "epoch": 0.9678186120601268, "grad_norm": 0.06689453125, "learning_rate": 0.00031367048811011824, "loss": 5.1816, "step": 8525 }, { "epoch": 0.9679321391700458, "grad_norm": 0.06591796875, "learning_rate": 0.0003128857838990322, "loss": 5.1324, "step": 8526 }, { "epoch": 0.9680456662799649, "grad_norm": 0.06494140625, "learning_rate": 0.00031210361634226286, "loss": 5.1443, "step": 8527 }, { "epoch": 0.968159193389884, "grad_norm": 0.064453125, "learning_rate": 0.000311323987960523, "loss": 5.1518, "step": 8528 }, { "epoch": 0.9682727204998031, "grad_norm": 0.06689453125, "learning_rate": 0.00031054690126634175, "loss": 5.1591, "step": 8529 }, { "epoch": 0.9683862476097221, "grad_norm": 0.064453125, "learning_rate": 0.0003097723587640577, "loss": 5.1504, "step": 8530 }, { "epoch": 0.9684997747196412, "grad_norm": 0.06640625, "learning_rate": 0.0003090003629498101, "loss": 5.1482, "step": 8531 }, { "epoch": 0.9686133018295603, "grad_norm": 0.06640625, "learning_rate": 0.00030823091631153066, "loss": 5.1625, "step": 8532 }, { "epoch": 0.9687268289394794, "grad_norm": 0.0654296875, "learning_rate": 0.0003074640213289358, "loss": 5.1462, "step": 8533 }, { "epoch": 0.9688403560493984, "grad_norm": 0.06640625, "learning_rate": 0.0003066996804735189, "loss": 5.1582, "step": 8534 }, { "epoch": 0.9689538831593175, "grad_norm": 0.0654296875, "learning_rate": 0.00030593789620854216, "loss": 5.1627, "step": 8535 }, { "epoch": 0.9690674102692366, "grad_norm": 0.06494140625, "learning_rate": 0.000305178670989028, "loss": 5.1492, "step": 8536 }, { "epoch": 0.9691809373791557, "grad_norm": 0.06591796875, "learning_rate": 0.00030442200726175207, "loss": 5.134, "step": 8537 }, { "epoch": 0.9692944644890747, "grad_norm": 0.064453125, "learning_rate": 0.00030366790746523537, "loss": 5.1613, "step": 8538 }, { "epoch": 0.9694079915989938, "grad_norm": 0.0654296875, "learning_rate": 0.00030291637402973576, "loss": 5.1403, "step": 8539 }, { "epoch": 0.9695215187089129, "grad_norm": 0.064453125, "learning_rate": 0.00030216740937724016, "loss": 5.1566, "step": 8540 }, { "epoch": 0.969635045818832, "grad_norm": 0.064453125, "learning_rate": 0.0003014210159214575, "loss": 5.1305, "step": 8541 }, { "epoch": 0.969748572928751, "grad_norm": 0.06591796875, "learning_rate": 0.00030067719606780995, "loss": 5.1384, "step": 8542 }, { "epoch": 0.9698621000386701, "grad_norm": 0.0654296875, "learning_rate": 0.0002999359522134262, "loss": 5.1418, "step": 8543 }, { "epoch": 0.9699756271485892, "grad_norm": 0.06640625, "learning_rate": 0.00029919728674713267, "loss": 5.1481, "step": 8544 }, { "epoch": 0.9700891542585083, "grad_norm": 0.064453125, "learning_rate": 0.00029846120204944654, "loss": 5.1791, "step": 8545 }, { "epoch": 0.9702026813684274, "grad_norm": 0.06494140625, "learning_rate": 0.00029772770049256816, "loss": 5.1543, "step": 8546 }, { "epoch": 0.9703162084783464, "grad_norm": 0.06396484375, "learning_rate": 0.00029699678444037263, "loss": 5.1671, "step": 8547 }, { "epoch": 0.9704297355882655, "grad_norm": 0.06396484375, "learning_rate": 0.00029626845624840324, "loss": 5.1639, "step": 8548 }, { "epoch": 0.9705432626981846, "grad_norm": 0.06494140625, "learning_rate": 0.00029554271826386265, "loss": 5.1705, "step": 8549 }, { "epoch": 0.9706567898081037, "grad_norm": 0.06494140625, "learning_rate": 0.00029481957282560665, "loss": 5.1375, "step": 8550 }, { "epoch": 0.9707703169180227, "grad_norm": 0.064453125, "learning_rate": 0.00029409902226413523, "loss": 5.1314, "step": 8551 }, { "epoch": 0.9708838440279418, "grad_norm": 0.064453125, "learning_rate": 0.0002933810689015865, "loss": 5.1703, "step": 8552 }, { "epoch": 0.9709973711378609, "grad_norm": 0.06591796875, "learning_rate": 0.00029266571505172823, "loss": 5.1458, "step": 8553 }, { "epoch": 0.97111089824778, "grad_norm": 0.064453125, "learning_rate": 0.0002919529630199507, "loss": 5.1653, "step": 8554 }, { "epoch": 0.971224425357699, "grad_norm": 0.0654296875, "learning_rate": 0.00029124281510325896, "loss": 5.1452, "step": 8555 }, { "epoch": 0.9713379524676181, "grad_norm": 0.06640625, "learning_rate": 0.000290535273590266, "loss": 5.1518, "step": 8556 }, { "epoch": 0.9714514795775372, "grad_norm": 0.06396484375, "learning_rate": 0.00028983034076118547, "loss": 5.1601, "step": 8557 }, { "epoch": 0.9715650066874563, "grad_norm": 0.06396484375, "learning_rate": 0.00028912801888782293, "loss": 5.1352, "step": 8558 }, { "epoch": 0.9716785337973753, "grad_norm": 0.06494140625, "learning_rate": 0.00028842831023357025, "loss": 5.1599, "step": 8559 }, { "epoch": 0.9717920609072944, "grad_norm": 0.06396484375, "learning_rate": 0.00028773121705339755, "loss": 5.1502, "step": 8560 }, { "epoch": 0.9719055880172135, "grad_norm": 0.0654296875, "learning_rate": 0.0002870367415938462, "loss": 5.1485, "step": 8561 }, { "epoch": 0.9720191151271326, "grad_norm": 0.06396484375, "learning_rate": 0.0002863448860930206, "loss": 5.1572, "step": 8562 }, { "epoch": 0.9721326422370516, "grad_norm": 0.0654296875, "learning_rate": 0.0002856556527805826, "loss": 5.1339, "step": 8563 }, { "epoch": 0.9722461693469707, "grad_norm": 0.0654296875, "learning_rate": 0.00028496904387774263, "loss": 5.1607, "step": 8564 }, { "epoch": 0.9723596964568898, "grad_norm": 0.064453125, "learning_rate": 0.00028428506159725443, "loss": 5.1588, "step": 8565 }, { "epoch": 0.9724732235668089, "grad_norm": 0.06494140625, "learning_rate": 0.00028360370814340546, "loss": 5.1873, "step": 8566 }, { "epoch": 0.972586750676728, "grad_norm": 0.0634765625, "learning_rate": 0.0002829249857120128, "loss": 5.1185, "step": 8567 }, { "epoch": 0.9727002777866471, "grad_norm": 0.064453125, "learning_rate": 0.00028224889649041296, "loss": 5.1456, "step": 8568 }, { "epoch": 0.9728138048965662, "grad_norm": 0.06494140625, "learning_rate": 0.0002815754426574574, "loss": 5.1567, "step": 8569 }, { "epoch": 0.9729273320064853, "grad_norm": 0.06396484375, "learning_rate": 0.00028090462638350394, "loss": 5.1474, "step": 8570 }, { "epoch": 0.9730408591164044, "grad_norm": 0.06689453125, "learning_rate": 0.0002802364498304102, "loss": 5.166, "step": 8571 }, { "epoch": 0.9731543862263234, "grad_norm": 0.06494140625, "learning_rate": 0.0002795709151515272, "loss": 5.1605, "step": 8572 }, { "epoch": 0.9732679133362425, "grad_norm": 0.064453125, "learning_rate": 0.0002789080244916909, "loss": 5.1688, "step": 8573 }, { "epoch": 0.9733814404461616, "grad_norm": 0.06396484375, "learning_rate": 0.00027824777998721806, "loss": 5.1593, "step": 8574 }, { "epoch": 0.9734949675560807, "grad_norm": 0.0654296875, "learning_rate": 0.0002775901837658956, "loss": 5.1504, "step": 8575 }, { "epoch": 0.9736084946659997, "grad_norm": 0.06494140625, "learning_rate": 0.0002769352379469773, "loss": 5.1656, "step": 8576 }, { "epoch": 0.9737220217759188, "grad_norm": 0.06396484375, "learning_rate": 0.00027628294464117424, "loss": 5.1571, "step": 8577 }, { "epoch": 0.9738355488858379, "grad_norm": 0.0654296875, "learning_rate": 0.00027563330595065024, "loss": 5.1568, "step": 8578 }, { "epoch": 0.973949075995757, "grad_norm": 0.064453125, "learning_rate": 0.0002749863239690132, "loss": 5.1562, "step": 8579 }, { "epoch": 0.974062603105676, "grad_norm": 0.064453125, "learning_rate": 0.00027434200078130954, "loss": 5.1615, "step": 8580 }, { "epoch": 0.9741761302155951, "grad_norm": 0.06396484375, "learning_rate": 0.00027370033846401716, "loss": 5.1633, "step": 8581 }, { "epoch": 0.9742896573255142, "grad_norm": 0.064453125, "learning_rate": 0.0002730613390850386, "loss": 5.1638, "step": 8582 }, { "epoch": 0.9744031844354333, "grad_norm": 0.06591796875, "learning_rate": 0.00027242500470369454, "loss": 5.1587, "step": 8583 }, { "epoch": 0.9745167115453524, "grad_norm": 0.064453125, "learning_rate": 0.0002717913373707167, "loss": 5.1436, "step": 8584 }, { "epoch": 0.9746302386552714, "grad_norm": 0.06396484375, "learning_rate": 0.00027116033912824247, "loss": 5.1434, "step": 8585 }, { "epoch": 0.9747437657651905, "grad_norm": 0.0654296875, "learning_rate": 0.00027053201200980643, "loss": 5.1793, "step": 8586 }, { "epoch": 0.9748572928751096, "grad_norm": 0.064453125, "learning_rate": 0.0002699063580403359, "loss": 5.1637, "step": 8587 }, { "epoch": 0.9749708199850287, "grad_norm": 0.06396484375, "learning_rate": 0.0002692833792361424, "loss": 5.1565, "step": 8588 }, { "epoch": 0.9750843470949477, "grad_norm": 0.06396484375, "learning_rate": 0.0002686630776049172, "loss": 5.1466, "step": 8589 }, { "epoch": 0.9751978742048668, "grad_norm": 0.064453125, "learning_rate": 0.0002680454551457228, "loss": 5.1508, "step": 8590 }, { "epoch": 0.9753114013147859, "grad_norm": 0.06689453125, "learning_rate": 0.00026743051384898786, "loss": 5.1237, "step": 8591 }, { "epoch": 0.975424928424705, "grad_norm": 0.06494140625, "learning_rate": 0.00026681825569650037, "loss": 5.1547, "step": 8592 }, { "epoch": 0.975538455534624, "grad_norm": 0.068359375, "learning_rate": 0.0002662086826614013, "loss": 5.1528, "step": 8593 }, { "epoch": 0.9756519826445431, "grad_norm": 0.064453125, "learning_rate": 0.00026560179670817835, "loss": 5.1782, "step": 8594 }, { "epoch": 0.9757655097544622, "grad_norm": 0.06298828125, "learning_rate": 0.0002649975997926589, "loss": 5.172, "step": 8595 }, { "epoch": 0.9758790368643813, "grad_norm": 0.0634765625, "learning_rate": 0.0002643960938620053, "loss": 5.1388, "step": 8596 }, { "epoch": 0.9759925639743003, "grad_norm": 0.06494140625, "learning_rate": 0.0002637972808547065, "loss": 5.1525, "step": 8597 }, { "epoch": 0.9761060910842194, "grad_norm": 0.06396484375, "learning_rate": 0.00026320116270057384, "loss": 5.1637, "step": 8598 }, { "epoch": 0.9762196181941385, "grad_norm": 0.064453125, "learning_rate": 0.00026260774132073314, "loss": 5.1719, "step": 8599 }, { "epoch": 0.9763331453040576, "grad_norm": 0.06396484375, "learning_rate": 0.00026201701862761965, "loss": 5.1764, "step": 8600 }, { "epoch": 0.9764466724139766, "grad_norm": 0.06298828125, "learning_rate": 0.00026142899652497145, "loss": 5.1427, "step": 8601 }, { "epoch": 0.9765601995238957, "grad_norm": 0.06396484375, "learning_rate": 0.0002608436769078231, "loss": 5.1842, "step": 8602 }, { "epoch": 0.9766737266338148, "grad_norm": 0.06640625, "learning_rate": 0.00026026106166250027, "loss": 5.1527, "step": 8603 }, { "epoch": 0.9767872537437339, "grad_norm": 0.0634765625, "learning_rate": 0.00025968115266661285, "loss": 5.1621, "step": 8604 }, { "epoch": 0.976900780853653, "grad_norm": 0.0625, "learning_rate": 0.0002591039517890492, "loss": 5.1766, "step": 8605 }, { "epoch": 0.977014307963572, "grad_norm": 0.064453125, "learning_rate": 0.00025852946088997025, "loss": 5.1638, "step": 8606 }, { "epoch": 0.9771278350734911, "grad_norm": 0.06396484375, "learning_rate": 0.00025795768182080344, "loss": 5.1427, "step": 8607 }, { "epoch": 0.9772413621834102, "grad_norm": 0.0634765625, "learning_rate": 0.0002573886164242366, "loss": 5.1831, "step": 8608 }, { "epoch": 0.9773548892933293, "grad_norm": 0.064453125, "learning_rate": 0.00025682226653421226, "loss": 5.1355, "step": 8609 }, { "epoch": 0.9774684164032483, "grad_norm": 0.064453125, "learning_rate": 0.00025625863397592133, "loss": 5.1669, "step": 8610 }, { "epoch": 0.9775819435131674, "grad_norm": 0.06298828125, "learning_rate": 0.0002556977205657982, "loss": 5.172, "step": 8611 }, { "epoch": 0.9776954706230865, "grad_norm": 0.06591796875, "learning_rate": 0.0002551395281115134, "loss": 5.1424, "step": 8612 }, { "epoch": 0.9778089977330056, "grad_norm": 0.0634765625, "learning_rate": 0.000254584058411969, "loss": 5.1695, "step": 8613 }, { "epoch": 0.9779225248429246, "grad_norm": 0.06298828125, "learning_rate": 0.00025403131325729195, "loss": 5.1417, "step": 8614 }, { "epoch": 0.9780360519528437, "grad_norm": 0.0634765625, "learning_rate": 0.0002534812944288294, "loss": 5.1522, "step": 8615 }, { "epoch": 0.9781495790627628, "grad_norm": 0.0634765625, "learning_rate": 0.000252934003699142, "loss": 5.1485, "step": 8616 }, { "epoch": 0.9782631061726819, "grad_norm": 0.0634765625, "learning_rate": 0.000252389442831998, "loss": 5.1561, "step": 8617 }, { "epoch": 0.9783766332826009, "grad_norm": 0.0634765625, "learning_rate": 0.00025184761358236875, "loss": 5.1676, "step": 8618 }, { "epoch": 0.97849016039252, "grad_norm": 0.0634765625, "learning_rate": 0.0002513085176964219, "loss": 5.1676, "step": 8619 }, { "epoch": 0.9786036875024391, "grad_norm": 0.064453125, "learning_rate": 0.0002507721569115168, "loss": 5.1599, "step": 8620 }, { "epoch": 0.9787172146123582, "grad_norm": 0.064453125, "learning_rate": 0.00025023853295619725, "loss": 5.1543, "step": 8621 }, { "epoch": 0.9788307417222772, "grad_norm": 0.06298828125, "learning_rate": 0.00024970764755018817, "loss": 5.1539, "step": 8622 }, { "epoch": 0.9789442688321963, "grad_norm": 0.064453125, "learning_rate": 0.00024917950240438804, "loss": 5.1604, "step": 8623 }, { "epoch": 0.9790577959421154, "grad_norm": 0.06494140625, "learning_rate": 0.0002486540992208646, "loss": 5.1331, "step": 8624 }, { "epoch": 0.9791713230520345, "grad_norm": 0.06494140625, "learning_rate": 0.00024813143969284893, "loss": 5.1693, "step": 8625 }, { "epoch": 0.9792848501619535, "grad_norm": 0.0634765625, "learning_rate": 0.0002476115255047302, "loss": 5.1358, "step": 8626 }, { "epoch": 0.9793983772718726, "grad_norm": 0.0625, "learning_rate": 0.0002470943583320497, "loss": 5.1416, "step": 8627 }, { "epoch": 0.9795119043817917, "grad_norm": 0.06494140625, "learning_rate": 0.0002465799398414962, "loss": 5.1871, "step": 8628 }, { "epoch": 0.9796254314917108, "grad_norm": 0.06298828125, "learning_rate": 0.0002460682716909004, "loss": 5.1599, "step": 8629 }, { "epoch": 0.9797389586016299, "grad_norm": 0.062255859375, "learning_rate": 0.0002455593555292287, "loss": 5.1541, "step": 8630 }, { "epoch": 0.9798524857115489, "grad_norm": 0.06396484375, "learning_rate": 0.000245053192996579, "loss": 5.1649, "step": 8631 }, { "epoch": 0.979966012821468, "grad_norm": 0.064453125, "learning_rate": 0.00024454978572417533, "loss": 5.1633, "step": 8632 }, { "epoch": 0.9800795399313871, "grad_norm": 0.06494140625, "learning_rate": 0.00024404913533436186, "loss": 5.1495, "step": 8633 }, { "epoch": 0.9801930670413062, "grad_norm": 0.06298828125, "learning_rate": 0.00024355124344059792, "loss": 5.1779, "step": 8634 }, { "epoch": 0.9803065941512252, "grad_norm": 0.0625, "learning_rate": 0.00024305611164745358, "loss": 5.1705, "step": 8635 }, { "epoch": 0.9804201212611443, "grad_norm": 0.064453125, "learning_rate": 0.0002425637415506032, "loss": 5.1446, "step": 8636 }, { "epoch": 0.9805336483710634, "grad_norm": 0.0634765625, "learning_rate": 0.00024207413473682147, "loss": 5.1645, "step": 8637 }, { "epoch": 0.9806471754809825, "grad_norm": 0.06787109375, "learning_rate": 0.00024158729278397724, "loss": 5.1464, "step": 8638 }, { "epoch": 0.9807607025909015, "grad_norm": 0.06298828125, "learning_rate": 0.00024110321726102937, "loss": 5.1403, "step": 8639 }, { "epoch": 0.9808742297008206, "grad_norm": 0.06494140625, "learning_rate": 0.00024062190972802137, "loss": 5.1649, "step": 8640 }, { "epoch": 0.9809877568107397, "grad_norm": 0.0634765625, "learning_rate": 0.00024014337173607585, "loss": 5.1627, "step": 8641 }, { "epoch": 0.9811012839206588, "grad_norm": 0.0634765625, "learning_rate": 0.00023966760482739037, "loss": 5.1667, "step": 8642 }, { "epoch": 0.9812148110305778, "grad_norm": 0.0634765625, "learning_rate": 0.00023919461053523167, "loss": 5.1573, "step": 8643 }, { "epoch": 0.9813283381404969, "grad_norm": 0.06298828125, "learning_rate": 0.0002387243903839313, "loss": 5.1371, "step": 8644 }, { "epoch": 0.981441865250416, "grad_norm": 0.0634765625, "learning_rate": 0.00023825694588888046, "loss": 5.1593, "step": 8645 }, { "epoch": 0.9815553923603351, "grad_norm": 0.06396484375, "learning_rate": 0.00023779227855652525, "loss": 5.1372, "step": 8646 }, { "epoch": 0.9816689194702541, "grad_norm": 0.06396484375, "learning_rate": 0.00023733038988436167, "loss": 5.1428, "step": 8647 }, { "epoch": 0.9817824465801732, "grad_norm": 0.0634765625, "learning_rate": 0.000236871281360931, "loss": 5.1505, "step": 8648 }, { "epoch": 0.9818959736900923, "grad_norm": 0.0634765625, "learning_rate": 0.00023641495446581447, "loss": 5.1238, "step": 8649 }, { "epoch": 0.9820095008000114, "grad_norm": 0.06396484375, "learning_rate": 0.0002359614106696293, "loss": 5.1757, "step": 8650 }, { "epoch": 0.9821230279099304, "grad_norm": 0.0634765625, "learning_rate": 0.00023551065143402318, "loss": 5.127, "step": 8651 }, { "epoch": 0.9822365550198495, "grad_norm": 0.062255859375, "learning_rate": 0.00023506267821167033, "loss": 5.1608, "step": 8652 }, { "epoch": 0.9823500821297686, "grad_norm": 0.06396484375, "learning_rate": 0.00023461749244626604, "loss": 5.1723, "step": 8653 }, { "epoch": 0.9824636092396877, "grad_norm": 0.0625, "learning_rate": 0.00023417509557252267, "loss": 5.1628, "step": 8654 }, { "epoch": 0.9825771363496068, "grad_norm": 0.0634765625, "learning_rate": 0.00023373548901616464, "loss": 5.162, "step": 8655 }, { "epoch": 0.9826906634595258, "grad_norm": 0.06396484375, "learning_rate": 0.00023329867419392373, "loss": 5.1505, "step": 8656 }, { "epoch": 0.9828041905694449, "grad_norm": 0.0634765625, "learning_rate": 0.0002328646525135352, "loss": 5.1481, "step": 8657 }, { "epoch": 0.982917717679364, "grad_norm": 0.064453125, "learning_rate": 0.0002324334253737321, "loss": 5.1644, "step": 8658 }, { "epoch": 0.983031244789283, "grad_norm": 0.06298828125, "learning_rate": 0.0002320049941642421, "loss": 5.1591, "step": 8659 }, { "epoch": 0.9831447718992021, "grad_norm": 0.0634765625, "learning_rate": 0.00023157936026578185, "loss": 5.1626, "step": 8660 }, { "epoch": 0.9832582990091212, "grad_norm": 0.06396484375, "learning_rate": 0.00023115652505005353, "loss": 5.1559, "step": 8661 }, { "epoch": 0.9833718261190403, "grad_norm": 0.0634765625, "learning_rate": 0.00023073648987973942, "loss": 5.1334, "step": 8662 }, { "epoch": 0.9834853532289594, "grad_norm": 0.064453125, "learning_rate": 0.00023031925610849852, "loss": 5.1693, "step": 8663 }, { "epoch": 0.9835988803388784, "grad_norm": 0.06298828125, "learning_rate": 0.00022990482508096103, "loss": 5.1548, "step": 8664 }, { "epoch": 0.9837124074487975, "grad_norm": 0.06201171875, "learning_rate": 0.0002294931981327252, "loss": 5.1424, "step": 8665 }, { "epoch": 0.9838259345587166, "grad_norm": 0.064453125, "learning_rate": 0.0002290843765903526, "loss": 5.1587, "step": 8666 }, { "epoch": 0.9839394616686357, "grad_norm": 0.0634765625, "learning_rate": 0.00022867836177136312, "loss": 5.1506, "step": 8667 }, { "epoch": 0.9840529887785547, "grad_norm": 0.06396484375, "learning_rate": 0.00022827515498423205, "loss": 5.1191, "step": 8668 }, { "epoch": 0.9841665158884738, "grad_norm": 0.0634765625, "learning_rate": 0.00022787475752838448, "loss": 5.1645, "step": 8669 }, { "epoch": 0.9842800429983929, "grad_norm": 0.0625, "learning_rate": 0.00022747717069419255, "loss": 5.1468, "step": 8670 }, { "epoch": 0.984393570108312, "grad_norm": 0.0634765625, "learning_rate": 0.00022708239576296987, "loss": 5.1568, "step": 8671 }, { "epoch": 0.984507097218231, "grad_norm": 0.064453125, "learning_rate": 0.00022669043400696837, "loss": 5.1601, "step": 8672 }, { "epoch": 0.9846206243281501, "grad_norm": 0.062255859375, "learning_rate": 0.0002263012866893737, "loss": 5.1506, "step": 8673 }, { "epoch": 0.9847341514380692, "grad_norm": 0.064453125, "learning_rate": 0.00022591495506430156, "loss": 5.145, "step": 8674 }, { "epoch": 0.9848476785479883, "grad_norm": 0.064453125, "learning_rate": 0.00022553144037679344, "loss": 5.1336, "step": 8675 }, { "epoch": 0.9849612056579073, "grad_norm": 0.062255859375, "learning_rate": 0.00022515074386281257, "loss": 5.1797, "step": 8676 }, { "epoch": 0.9850747327678264, "grad_norm": 0.06298828125, "learning_rate": 0.0002247728667492396, "loss": 5.1442, "step": 8677 }, { "epoch": 0.9851882598777455, "grad_norm": 0.06298828125, "learning_rate": 0.00022439781025386957, "loss": 5.1528, "step": 8678 }, { "epoch": 0.9853017869876646, "grad_norm": 0.06298828125, "learning_rate": 0.00022402557558540726, "loss": 5.1608, "step": 8679 }, { "epoch": 0.9854153140975837, "grad_norm": 0.06494140625, "learning_rate": 0.00022365616394346333, "loss": 5.1588, "step": 8680 }, { "epoch": 0.9855288412075027, "grad_norm": 0.06396484375, "learning_rate": 0.0002232895765185508, "loss": 5.1482, "step": 8681 }, { "epoch": 0.9856423683174218, "grad_norm": 0.0654296875, "learning_rate": 0.00022292581449208048, "loss": 5.1762, "step": 8682 }, { "epoch": 0.9857558954273409, "grad_norm": 0.0625, "learning_rate": 0.00022256487903635875, "loss": 5.1579, "step": 8683 }, { "epoch": 0.98586942253726, "grad_norm": 0.06298828125, "learning_rate": 0.00022220677131458172, "loss": 5.1438, "step": 8684 }, { "epoch": 0.985982949647179, "grad_norm": 0.06494140625, "learning_rate": 0.0002218514924808329, "loss": 5.1589, "step": 8685 }, { "epoch": 0.9860964767570981, "grad_norm": 0.0654296875, "learning_rate": 0.00022149904368007903, "loss": 5.1768, "step": 8686 }, { "epoch": 0.9862100038670172, "grad_norm": 0.0634765625, "learning_rate": 0.00022114942604816633, "loss": 5.1403, "step": 8687 }, { "epoch": 0.9863235309769363, "grad_norm": 0.06298828125, "learning_rate": 0.0002208026407118172, "loss": 5.165, "step": 8688 }, { "epoch": 0.9864370580868553, "grad_norm": 0.064453125, "learning_rate": 0.0002204586887886259, "loss": 5.1627, "step": 8689 }, { "epoch": 0.9865505851967744, "grad_norm": 0.064453125, "learning_rate": 0.0002201175713870558, "loss": 5.171, "step": 8690 }, { "epoch": 0.9866641123066935, "grad_norm": 0.06396484375, "learning_rate": 0.00021977928960643513, "loss": 5.156, "step": 8691 }, { "epoch": 0.9867776394166126, "grad_norm": 0.06201171875, "learning_rate": 0.00021944384453695387, "loss": 5.1631, "step": 8692 }, { "epoch": 0.9868911665265316, "grad_norm": 0.0625, "learning_rate": 0.00021911123725965972, "loss": 5.1614, "step": 8693 }, { "epoch": 0.9870046936364507, "grad_norm": 0.06201171875, "learning_rate": 0.00021878146884645542, "loss": 5.1535, "step": 8694 }, { "epoch": 0.9871182207463698, "grad_norm": 0.06298828125, "learning_rate": 0.00021845454036009438, "loss": 5.1541, "step": 8695 }, { "epoch": 0.9872317478562889, "grad_norm": 0.0634765625, "learning_rate": 0.00021813045285417785, "loss": 5.173, "step": 8696 }, { "epoch": 0.9873452749662079, "grad_norm": 0.06298828125, "learning_rate": 0.00021780920737315152, "loss": 5.1459, "step": 8697 }, { "epoch": 0.987458802076127, "grad_norm": 0.0625, "learning_rate": 0.000217490804952302, "loss": 5.1545, "step": 8698 }, { "epoch": 0.9875723291860461, "grad_norm": 0.06396484375, "learning_rate": 0.00021717524661775322, "loss": 5.1579, "step": 8699 }, { "epoch": 0.9876858562959652, "grad_norm": 0.06396484375, "learning_rate": 0.00021686253338646378, "loss": 5.1641, "step": 8700 }, { "epoch": 0.9877993834058842, "grad_norm": 0.0654296875, "learning_rate": 0.00021655266626622295, "loss": 5.1597, "step": 8701 }, { "epoch": 0.9879129105158033, "grad_norm": 0.0634765625, "learning_rate": 0.00021624564625564782, "loss": 5.1852, "step": 8702 }, { "epoch": 0.9880264376257224, "grad_norm": 0.0625, "learning_rate": 0.00021594147434418027, "loss": 5.159, "step": 8703 }, { "epoch": 0.9881399647356415, "grad_norm": 0.0634765625, "learning_rate": 0.00021564015151208314, "loss": 5.1526, "step": 8704 }, { "epoch": 0.9882534918455605, "grad_norm": 0.064453125, "learning_rate": 0.00021534167873043806, "loss": 5.157, "step": 8705 }, { "epoch": 0.9883670189554796, "grad_norm": 0.064453125, "learning_rate": 0.00021504605696114095, "loss": 5.17, "step": 8706 }, { "epoch": 0.9884805460653987, "grad_norm": 0.0634765625, "learning_rate": 0.0002147532871569004, "loss": 5.1395, "step": 8707 }, { "epoch": 0.9885940731753178, "grad_norm": 0.062255859375, "learning_rate": 0.0002144633702612333, "loss": 5.1422, "step": 8708 }, { "epoch": 0.9887076002852369, "grad_norm": 0.0634765625, "learning_rate": 0.00021417630720846305, "loss": 5.152, "step": 8709 }, { "epoch": 0.9888211273951559, "grad_norm": 0.0625, "learning_rate": 0.00021389209892371523, "loss": 5.1682, "step": 8710 }, { "epoch": 0.988934654505075, "grad_norm": 0.06494140625, "learning_rate": 0.00021361074632291561, "loss": 5.1635, "step": 8711 }, { "epoch": 0.9890481816149941, "grad_norm": 0.06396484375, "learning_rate": 0.00021333225031278701, "loss": 5.1526, "step": 8712 }, { "epoch": 0.9891617087249132, "grad_norm": 0.06298828125, "learning_rate": 0.000213056611790846, "loss": 5.1435, "step": 8713 }, { "epoch": 0.9892752358348322, "grad_norm": 0.06396484375, "learning_rate": 0.00021278383164540028, "loss": 5.163, "step": 8714 }, { "epoch": 0.9893887629447513, "grad_norm": 0.06396484375, "learning_rate": 0.00021251391075554583, "loss": 5.1407, "step": 8715 }, { "epoch": 0.9895022900546704, "grad_norm": 0.06201171875, "learning_rate": 0.00021224684999116407, "loss": 5.1741, "step": 8716 }, { "epoch": 0.9896158171645895, "grad_norm": 0.06494140625, "learning_rate": 0.0002119826502129188, "loss": 5.1389, "step": 8717 }, { "epoch": 0.9897293442745085, "grad_norm": 0.062255859375, "learning_rate": 0.0002117213122722539, "loss": 5.1593, "step": 8718 }, { "epoch": 0.9898428713844276, "grad_norm": 0.0625, "learning_rate": 0.00021146283701139028, "loss": 5.1619, "step": 8719 }, { "epoch": 0.9899563984943467, "grad_norm": 0.06298828125, "learning_rate": 0.00021120722526332328, "loss": 5.15, "step": 8720 }, { "epoch": 0.9900699256042658, "grad_norm": 0.06298828125, "learning_rate": 0.00021095447785181954, "loss": 5.1518, "step": 8721 }, { "epoch": 0.9901834527141848, "grad_norm": 0.0634765625, "learning_rate": 0.0002107045955914152, "loss": 5.1544, "step": 8722 }, { "epoch": 0.9902969798241039, "grad_norm": 0.0634765625, "learning_rate": 0.0002104575792874124, "loss": 5.1629, "step": 8723 }, { "epoch": 0.990410506934023, "grad_norm": 0.0634765625, "learning_rate": 0.00021021342973587745, "loss": 5.1611, "step": 8724 }, { "epoch": 0.9905240340439421, "grad_norm": 0.06787109375, "learning_rate": 0.00020997214772363763, "loss": 5.1418, "step": 8725 }, { "epoch": 0.9906375611538611, "grad_norm": 0.06298828125, "learning_rate": 0.00020973373402827908, "loss": 5.1578, "step": 8726 }, { "epoch": 0.9907510882637802, "grad_norm": 0.0634765625, "learning_rate": 0.00020949818941814406, "loss": 5.1534, "step": 8727 }, { "epoch": 0.9908646153736993, "grad_norm": 0.0625, "learning_rate": 0.00020926551465232863, "loss": 5.1665, "step": 8728 }, { "epoch": 0.9909781424836184, "grad_norm": 0.0625, "learning_rate": 0.00020903571048067992, "loss": 5.1685, "step": 8729 }, { "epoch": 0.9910916695935374, "grad_norm": 0.0634765625, "learning_rate": 0.00020880877764379414, "loss": 5.1565, "step": 8730 }, { "epoch": 0.9912051967034565, "grad_norm": 0.06982421875, "learning_rate": 0.00020858471687301393, "loss": 5.1554, "step": 8731 }, { "epoch": 0.9913187238133756, "grad_norm": 0.06591796875, "learning_rate": 0.000208363528890426, "loss": 5.1642, "step": 8732 }, { "epoch": 0.9914322509232947, "grad_norm": 0.064453125, "learning_rate": 0.00020814521440885877, "loss": 5.1496, "step": 8733 }, { "epoch": 0.9915457780332138, "grad_norm": 0.06298828125, "learning_rate": 0.0002079297741318803, "loss": 5.1448, "step": 8734 }, { "epoch": 0.9916593051431328, "grad_norm": 0.06298828125, "learning_rate": 0.00020771720875379586, "loss": 5.134, "step": 8735 }, { "epoch": 0.9917728322530519, "grad_norm": 0.06298828125, "learning_rate": 0.00020750751895964547, "loss": 5.1615, "step": 8736 }, { "epoch": 0.991886359362971, "grad_norm": 0.06396484375, "learning_rate": 0.00020730070542520233, "loss": 5.1553, "step": 8737 }, { "epoch": 0.9919998864728901, "grad_norm": 0.06298828125, "learning_rate": 0.00020709676881697003, "loss": 5.1501, "step": 8738 }, { "epoch": 0.9921134135828091, "grad_norm": 0.06396484375, "learning_rate": 0.00020689570979218037, "loss": 5.1231, "step": 8739 }, { "epoch": 0.9922269406927282, "grad_norm": 0.06298828125, "learning_rate": 0.00020669752899879192, "loss": 5.1596, "step": 8740 }, { "epoch": 0.9923404678026473, "grad_norm": 0.06396484375, "learning_rate": 0.00020650222707548717, "loss": 5.1706, "step": 8741 }, { "epoch": 0.9924539949125664, "grad_norm": 0.06396484375, "learning_rate": 0.00020630980465167106, "loss": 5.1505, "step": 8742 }, { "epoch": 0.9925675220224854, "grad_norm": 0.0625, "learning_rate": 0.0002061202623474684, "loss": 5.1626, "step": 8743 }, { "epoch": 0.9926810491324045, "grad_norm": 0.062255859375, "learning_rate": 0.00020593360077372249, "loss": 5.1444, "step": 8744 }, { "epoch": 0.9927945762423236, "grad_norm": 0.062255859375, "learning_rate": 0.00020574982053199245, "loss": 5.1584, "step": 8745 }, { "epoch": 0.9929081033522427, "grad_norm": 0.06298828125, "learning_rate": 0.00020556892221455195, "loss": 5.1746, "step": 8746 }, { "epoch": 0.9930216304621617, "grad_norm": 0.06298828125, "learning_rate": 0.00020539090640438691, "loss": 5.149, "step": 8747 }, { "epoch": 0.9931351575720808, "grad_norm": 0.062255859375, "learning_rate": 0.00020521577367519373, "loss": 5.1403, "step": 8748 }, { "epoch": 0.9932486846819999, "grad_norm": 0.06298828125, "learning_rate": 0.0002050435245913772, "loss": 5.1414, "step": 8749 }, { "epoch": 0.993362211791919, "grad_norm": 0.06494140625, "learning_rate": 0.00020487415970804935, "loss": 5.1603, "step": 8750 }, { "epoch": 0.993475738901838, "grad_norm": 0.06298828125, "learning_rate": 0.00020470767957102694, "loss": 5.1697, "step": 8751 }, { "epoch": 0.9935892660117571, "grad_norm": 0.061767578125, "learning_rate": 0.00020454408471682988, "loss": 5.1719, "step": 8752 }, { "epoch": 0.9937027931216762, "grad_norm": 0.06396484375, "learning_rate": 0.00020438337567267993, "loss": 5.1562, "step": 8753 }, { "epoch": 0.9938163202315953, "grad_norm": 0.0634765625, "learning_rate": 0.0002042255529564982, "loss": 5.1772, "step": 8754 }, { "epoch": 0.9939298473415143, "grad_norm": 0.0634765625, "learning_rate": 0.00020407061707690467, "loss": 5.1478, "step": 8755 }, { "epoch": 0.9940433744514334, "grad_norm": 0.0634765625, "learning_rate": 0.000203918568533215, "loss": 5.1444, "step": 8756 }, { "epoch": 0.9941569015613525, "grad_norm": 0.06494140625, "learning_rate": 0.00020376940781544035, "loss": 5.1625, "step": 8757 }, { "epoch": 0.9942704286712716, "grad_norm": 0.06396484375, "learning_rate": 0.00020362313540428484, "loss": 5.153, "step": 8758 }, { "epoch": 0.9943839557811907, "grad_norm": 0.0634765625, "learning_rate": 0.00020347975177114475, "loss": 5.1682, "step": 8759 }, { "epoch": 0.9944974828911097, "grad_norm": 0.0625, "learning_rate": 0.00020333925737810632, "loss": 5.1691, "step": 8760 }, { "epoch": 0.9946110100010288, "grad_norm": 0.0634765625, "learning_rate": 0.00020320165267794462, "loss": 5.1531, "step": 8761 }, { "epoch": 0.9947245371109479, "grad_norm": 0.0634765625, "learning_rate": 0.00020306693811412199, "loss": 5.1392, "step": 8762 }, { "epoch": 0.994838064220867, "grad_norm": 0.061767578125, "learning_rate": 0.00020293511412078688, "loss": 5.1369, "step": 8763 }, { "epoch": 0.994951591330786, "grad_norm": 0.0625, "learning_rate": 0.0002028061811227718, "loss": 5.1646, "step": 8764 }, { "epoch": 0.9950651184407051, "grad_norm": 0.062255859375, "learning_rate": 0.00020268013953559277, "loss": 5.1592, "step": 8765 }, { "epoch": 0.9951786455506242, "grad_norm": 0.06298828125, "learning_rate": 0.00020255698976544747, "loss": 5.1502, "step": 8766 }, { "epoch": 0.9952921726605433, "grad_norm": 0.0634765625, "learning_rate": 0.00020243673220921393, "loss": 5.142, "step": 8767 }, { "epoch": 0.9954056997704623, "grad_norm": 0.0634765625, "learning_rate": 0.00020231936725444938, "loss": 5.1435, "step": 8768 }, { "epoch": 0.9955192268803814, "grad_norm": 0.0634765625, "learning_rate": 0.0002022048952793891, "loss": 5.1484, "step": 8769 }, { "epoch": 0.9956327539903005, "grad_norm": 0.06396484375, "learning_rate": 0.0002020933166529451, "loss": 5.18, "step": 8770 }, { "epoch": 0.9957462811002196, "grad_norm": 0.0625, "learning_rate": 0.00020198463173470477, "loss": 5.1516, "step": 8771 }, { "epoch": 0.9958598082101386, "grad_norm": 0.062255859375, "learning_rate": 0.00020187884087492991, "loss": 5.1448, "step": 8772 }, { "epoch": 0.9959733353200577, "grad_norm": 0.06298828125, "learning_rate": 0.0002017759444145556, "loss": 5.1466, "step": 8773 }, { "epoch": 0.9960868624299768, "grad_norm": 0.0634765625, "learning_rate": 0.0002016759426851891, "loss": 5.1691, "step": 8774 }, { "epoch": 0.9962003895398959, "grad_norm": 0.064453125, "learning_rate": 0.0002015788360091087, "loss": 5.1403, "step": 8775 }, { "epoch": 0.996313916649815, "grad_norm": 0.064453125, "learning_rate": 0.00020148462469926265, "loss": 5.1548, "step": 8776 }, { "epoch": 0.996427443759734, "grad_norm": 0.06494140625, "learning_rate": 0.00020139330905926835, "loss": 5.1401, "step": 8777 }, { "epoch": 0.9965409708696531, "grad_norm": 0.06201171875, "learning_rate": 0.00020130488938341118, "loss": 5.1417, "step": 8778 }, { "epoch": 0.9966544979795722, "grad_norm": 0.0625, "learning_rate": 0.00020121936595664358, "loss": 5.1478, "step": 8779 }, { "epoch": 0.9967680250894912, "grad_norm": 0.0634765625, "learning_rate": 0.0002011367390545843, "loss": 5.1462, "step": 8780 }, { "epoch": 0.9968815521994103, "grad_norm": 0.0625, "learning_rate": 0.00020105700894351716, "loss": 5.1421, "step": 8781 }, { "epoch": 0.9969950793093294, "grad_norm": 0.0634765625, "learning_rate": 0.0002009801758803907, "loss": 5.1539, "step": 8782 }, { "epoch": 0.9971086064192485, "grad_norm": 0.06298828125, "learning_rate": 0.00020090624011281688, "loss": 5.1587, "step": 8783 }, { "epoch": 0.9972221335291676, "grad_norm": 0.06298828125, "learning_rate": 0.00020083520187907033, "loss": 5.1473, "step": 8784 }, { "epoch": 0.9973356606390866, "grad_norm": 0.0634765625, "learning_rate": 0.00020076706140808814, "loss": 5.149, "step": 8785 }, { "epoch": 0.9974491877490057, "grad_norm": 0.0625, "learning_rate": 0.00020070181891946833, "loss": 5.1587, "step": 8786 }, { "epoch": 0.9975627148589248, "grad_norm": 0.06396484375, "learning_rate": 0.00020063947462346975, "loss": 5.1448, "step": 8787 }, { "epoch": 0.9976762419688439, "grad_norm": 0.062255859375, "learning_rate": 0.00020058002872101077, "loss": 5.1528, "step": 8788 }, { "epoch": 0.9977897690787629, "grad_norm": 0.06298828125, "learning_rate": 0.0002005234814036696, "loss": 5.1646, "step": 8789 }, { "epoch": 0.997903296188682, "grad_norm": 0.06396484375, "learning_rate": 0.0002004698328536827, "loss": 5.1574, "step": 8790 }, { "epoch": 0.9980168232986011, "grad_norm": 0.062255859375, "learning_rate": 0.0002004190832439447, "loss": 5.1553, "step": 8791 }, { "epoch": 0.9981303504085202, "grad_norm": 0.06298828125, "learning_rate": 0.00020037123273800782, "loss": 5.1813, "step": 8792 }, { "epoch": 0.9982438775184392, "grad_norm": 0.0625, "learning_rate": 0.000200326281490081, "loss": 5.1492, "step": 8793 }, { "epoch": 0.9983574046283583, "grad_norm": 0.06201171875, "learning_rate": 0.00020028422964503008, "loss": 5.1408, "step": 8794 }, { "epoch": 0.9984709317382774, "grad_norm": 0.06298828125, "learning_rate": 0.00020024507733837644, "loss": 5.1743, "step": 8795 }, { "epoch": 0.9985844588481965, "grad_norm": 0.062255859375, "learning_rate": 0.00020020882469629753, "loss": 5.1757, "step": 8796 }, { "epoch": 0.9986979859581155, "grad_norm": 0.06201171875, "learning_rate": 0.00020017547183562553, "loss": 5.1422, "step": 8797 }, { "epoch": 0.9988115130680346, "grad_norm": 0.061767578125, "learning_rate": 0.00020014501886384765, "loss": 5.1514, "step": 8798 }, { "epoch": 0.9989250401779537, "grad_norm": 0.062255859375, "learning_rate": 0.00020011746587910557, "loss": 5.1403, "step": 8799 }, { "epoch": 0.9990385672878728, "grad_norm": 0.064453125, "learning_rate": 0.00020009281297019498, "loss": 5.1583, "step": 8800 }, { "epoch": 0.9991520943977918, "grad_norm": 0.0615234375, "learning_rate": 0.0002000710602165655, "loss": 5.1545, "step": 8801 }, { "epoch": 0.9992656215077109, "grad_norm": 0.0625, "learning_rate": 0.0002000522076883203, "loss": 5.1756, "step": 8802 }, { "epoch": 0.99937914861763, "grad_norm": 0.06298828125, "learning_rate": 0.00020003625544621603, "loss": 5.1673, "step": 8803 }, { "epoch": 0.9994926757275491, "grad_norm": 0.0625, "learning_rate": 0.00020002320354166228, "loss": 5.1647, "step": 8804 }, { "epoch": 0.9996062028374681, "grad_norm": 0.06298828125, "learning_rate": 0.00020001305201672182, "loss": 5.1787, "step": 8805 }, { "epoch": 0.9997197299473872, "grad_norm": 0.0634765625, "learning_rate": 0.00020000580090411026, "loss": 5.1421, "step": 8806 }, { "epoch": 0.9998332570573063, "grad_norm": 0.0625, "learning_rate": 0.00020000145022719605, "loss": 5.1743, "step": 8807 }, { "epoch": 0.9999467841672254, "grad_norm": 0.0634765625, "learning_rate": 0.0002, "loss": 5.143, "step": 8808 }, { "epoch": 0.9999467841672254, "step": 8808, "total_flos": 4.588406021280372e+20, "train_loss": 5.391583533096487, "train_runtime": 330234.0549, "train_samples_per_second": 23.899, "train_steps_per_second": 0.027 } ], "logging_steps": 1.0, "max_steps": 8808, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 4.588406021280372e+20, "train_batch_size": 1, "trial_name": null, "trial_params": null }