| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 3.0, | |
| "eval_steps": 500, | |
| "global_step": 1401, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0021413276231263384, | |
| "grad_norm": 57.48312185632039, | |
| "learning_rate": 3.546099290780142e-07, | |
| "loss": 11.0536, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.004282655246252677, | |
| "grad_norm": 56.04665199470871, | |
| "learning_rate": 7.092198581560284e-07, | |
| "loss": 11.108, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.006423982869379015, | |
| "grad_norm": 57.69353550561418, | |
| "learning_rate": 1.0638297872340427e-06, | |
| "loss": 11.118, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.008565310492505354, | |
| "grad_norm": 57.098115231260124, | |
| "learning_rate": 1.4184397163120568e-06, | |
| "loss": 11.0464, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.010706638115631691, | |
| "grad_norm": 58.67499851336777, | |
| "learning_rate": 1.7730496453900712e-06, | |
| "loss": 10.9625, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.01284796573875803, | |
| "grad_norm": 58.414650577956415, | |
| "learning_rate": 2.1276595744680853e-06, | |
| "loss": 10.9616, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.014989293361884369, | |
| "grad_norm": 64.47207593228225, | |
| "learning_rate": 2.4822695035460995e-06, | |
| "loss": 10.6887, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.017130620985010708, | |
| "grad_norm": 64.98938276175704, | |
| "learning_rate": 2.8368794326241136e-06, | |
| "loss": 10.6541, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.019271948608137045, | |
| "grad_norm": 96.47909270749008, | |
| "learning_rate": 3.1914893617021277e-06, | |
| "loss": 9.209, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.021413276231263382, | |
| "grad_norm": 111.61956792060187, | |
| "learning_rate": 3.5460992907801423e-06, | |
| "loss": 8.6722, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.023554603854389723, | |
| "grad_norm": 66.79651103290082, | |
| "learning_rate": 3.9007092198581565e-06, | |
| "loss": 3.7251, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.02569593147751606, | |
| "grad_norm": 56.39679177509825, | |
| "learning_rate": 4.255319148936171e-06, | |
| "loss": 3.2238, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.027837259100642397, | |
| "grad_norm": 76.3878582360963, | |
| "learning_rate": 4.609929078014184e-06, | |
| "loss": 2.5959, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.029978586723768737, | |
| "grad_norm": 31.950352388440905, | |
| "learning_rate": 4.964539007092199e-06, | |
| "loss": 2.3433, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.032119914346895075, | |
| "grad_norm": 6.951146815684312, | |
| "learning_rate": 5.319148936170213e-06, | |
| "loss": 1.4451, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.034261241970021415, | |
| "grad_norm": 4.818010610834237, | |
| "learning_rate": 5.673758865248227e-06, | |
| "loss": 1.294, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.03640256959314775, | |
| "grad_norm": 3.6480447594140033, | |
| "learning_rate": 6.028368794326241e-06, | |
| "loss": 1.256, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.03854389721627409, | |
| "grad_norm": 2.720810833620355, | |
| "learning_rate": 6.3829787234042555e-06, | |
| "loss": 1.1491, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.04068522483940043, | |
| "grad_norm": 2.19419575616829, | |
| "learning_rate": 6.73758865248227e-06, | |
| "loss": 1.1041, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.042826552462526764, | |
| "grad_norm": 1.5787733821950154, | |
| "learning_rate": 7.092198581560285e-06, | |
| "loss": 0.9964, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.044967880085653104, | |
| "grad_norm": 6.086448551226785, | |
| "learning_rate": 7.446808510638298e-06, | |
| "loss": 0.9374, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.047109207708779445, | |
| "grad_norm": 1.508606587040093, | |
| "learning_rate": 7.801418439716313e-06, | |
| "loss": 0.9276, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.04925053533190578, | |
| "grad_norm": 1.0854043801940743, | |
| "learning_rate": 8.156028368794328e-06, | |
| "loss": 0.8555, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.05139186295503212, | |
| "grad_norm": 0.9443347844565788, | |
| "learning_rate": 8.510638297872341e-06, | |
| "loss": 0.8648, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.05353319057815846, | |
| "grad_norm": 0.8257150817721549, | |
| "learning_rate": 8.865248226950355e-06, | |
| "loss": 0.8433, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.055674518201284794, | |
| "grad_norm": 0.7611313593085876, | |
| "learning_rate": 9.219858156028368e-06, | |
| "loss": 0.7927, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.057815845824411134, | |
| "grad_norm": 0.9175929091877931, | |
| "learning_rate": 9.574468085106383e-06, | |
| "loss": 0.7758, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.059957173447537475, | |
| "grad_norm": 0.6622602227048174, | |
| "learning_rate": 9.929078014184398e-06, | |
| "loss": 0.724, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.06209850107066381, | |
| "grad_norm": 0.673388358753726, | |
| "learning_rate": 1.0283687943262411e-05, | |
| "loss": 0.7647, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.06423982869379015, | |
| "grad_norm": 0.6658973555598119, | |
| "learning_rate": 1.0638297872340426e-05, | |
| "loss": 0.7529, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.06638115631691649, | |
| "grad_norm": 0.7382952955966445, | |
| "learning_rate": 1.0992907801418441e-05, | |
| "loss": 0.7324, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.06852248394004283, | |
| "grad_norm": 0.6870316045369611, | |
| "learning_rate": 1.1347517730496454e-05, | |
| "loss": 0.7258, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.07066381156316917, | |
| "grad_norm": 0.5409451023446604, | |
| "learning_rate": 1.170212765957447e-05, | |
| "loss": 0.6995, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.0728051391862955, | |
| "grad_norm": 0.4718286692000531, | |
| "learning_rate": 1.2056737588652483e-05, | |
| "loss": 0.6937, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.07494646680942184, | |
| "grad_norm": 0.5560095137091816, | |
| "learning_rate": 1.2411347517730498e-05, | |
| "loss": 0.6849, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.07708779443254818, | |
| "grad_norm": 0.5519607067166765, | |
| "learning_rate": 1.2765957446808511e-05, | |
| "loss": 0.6615, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.07922912205567452, | |
| "grad_norm": 0.4650149562699858, | |
| "learning_rate": 1.3120567375886524e-05, | |
| "loss": 0.6624, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.08137044967880086, | |
| "grad_norm": 0.4222769877707214, | |
| "learning_rate": 1.347517730496454e-05, | |
| "loss": 0.6429, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.0835117773019272, | |
| "grad_norm": 0.41323190280074, | |
| "learning_rate": 1.3829787234042554e-05, | |
| "loss": 0.6267, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.08565310492505353, | |
| "grad_norm": 0.45734022776906186, | |
| "learning_rate": 1.418439716312057e-05, | |
| "loss": 0.6516, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.08779443254817987, | |
| "grad_norm": 0.42800824849390673, | |
| "learning_rate": 1.4539007092198581e-05, | |
| "loss": 0.6654, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.08993576017130621, | |
| "grad_norm": 0.33157687702952515, | |
| "learning_rate": 1.4893617021276596e-05, | |
| "loss": 0.6061, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.09207708779443255, | |
| "grad_norm": 0.34740038476735874, | |
| "learning_rate": 1.5248226950354611e-05, | |
| "loss": 0.6166, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.09421841541755889, | |
| "grad_norm": 0.34140646940139696, | |
| "learning_rate": 1.5602836879432626e-05, | |
| "loss": 0.6102, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.09635974304068523, | |
| "grad_norm": 0.3292213866559836, | |
| "learning_rate": 1.595744680851064e-05, | |
| "loss": 0.6227, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.09850107066381156, | |
| "grad_norm": 0.30198419319828534, | |
| "learning_rate": 1.6312056737588656e-05, | |
| "loss": 0.6107, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.1006423982869379, | |
| "grad_norm": 0.31063047872860655, | |
| "learning_rate": 1.6666666666666667e-05, | |
| "loss": 0.6188, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.10278372591006424, | |
| "grad_norm": 0.30350185379074784, | |
| "learning_rate": 1.7021276595744682e-05, | |
| "loss": 0.5828, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.10492505353319058, | |
| "grad_norm": 0.33592357937468176, | |
| "learning_rate": 1.7375886524822697e-05, | |
| "loss": 0.6074, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.10706638115631692, | |
| "grad_norm": 0.2977049583100526, | |
| "learning_rate": 1.773049645390071e-05, | |
| "loss": 0.5956, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.10920770877944326, | |
| "grad_norm": 0.29548881007123645, | |
| "learning_rate": 1.8085106382978724e-05, | |
| "loss": 0.5875, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.11134903640256959, | |
| "grad_norm": 0.3157281891885196, | |
| "learning_rate": 1.8439716312056736e-05, | |
| "loss": 0.5875, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.11349036402569593, | |
| "grad_norm": 0.2866517435357405, | |
| "learning_rate": 1.879432624113475e-05, | |
| "loss": 0.5827, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.11563169164882227, | |
| "grad_norm": 0.2849703271418174, | |
| "learning_rate": 1.9148936170212766e-05, | |
| "loss": 0.5903, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.11777301927194861, | |
| "grad_norm": 0.2883104724475601, | |
| "learning_rate": 1.950354609929078e-05, | |
| "loss": 0.566, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.11991434689507495, | |
| "grad_norm": 0.27848766799340263, | |
| "learning_rate": 1.9858156028368796e-05, | |
| "loss": 0.6028, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.12205567451820129, | |
| "grad_norm": 0.27054084886113955, | |
| "learning_rate": 2.0212765957446807e-05, | |
| "loss": 0.5738, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.12419700214132762, | |
| "grad_norm": 0.28790406613678704, | |
| "learning_rate": 2.0567375886524822e-05, | |
| "loss": 0.5835, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.12633832976445397, | |
| "grad_norm": 0.2609950876468609, | |
| "learning_rate": 2.0921985815602837e-05, | |
| "loss": 0.5679, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.1284796573875803, | |
| "grad_norm": 0.22708508929335028, | |
| "learning_rate": 2.1276595744680852e-05, | |
| "loss": 0.5644, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.13062098501070663, | |
| "grad_norm": 0.29273734573536586, | |
| "learning_rate": 2.1631205673758867e-05, | |
| "loss": 0.5506, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.13276231263383298, | |
| "grad_norm": 0.25373390652035177, | |
| "learning_rate": 2.1985815602836882e-05, | |
| "loss": 0.5574, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.1349036402569593, | |
| "grad_norm": 0.24510687261458605, | |
| "learning_rate": 2.2340425531914894e-05, | |
| "loss": 0.5977, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.13704496788008566, | |
| "grad_norm": 0.24945740897429344, | |
| "learning_rate": 2.269503546099291e-05, | |
| "loss": 0.55, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.139186295503212, | |
| "grad_norm": 0.2366238459163537, | |
| "learning_rate": 2.3049645390070924e-05, | |
| "loss": 0.5645, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.14132762312633834, | |
| "grad_norm": 0.2671806828673314, | |
| "learning_rate": 2.340425531914894e-05, | |
| "loss": 0.5575, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.14346895074946467, | |
| "grad_norm": 0.2693204831683393, | |
| "learning_rate": 2.3758865248226954e-05, | |
| "loss": 0.5644, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.145610278372591, | |
| "grad_norm": 0.27274356761854357, | |
| "learning_rate": 2.4113475177304965e-05, | |
| "loss": 0.569, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.14775160599571735, | |
| "grad_norm": 0.2834119787499386, | |
| "learning_rate": 2.446808510638298e-05, | |
| "loss": 0.5489, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.14989293361884368, | |
| "grad_norm": 0.28557474380682984, | |
| "learning_rate": 2.4822695035460995e-05, | |
| "loss": 0.5516, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.15203426124197003, | |
| "grad_norm": 0.28955467692470377, | |
| "learning_rate": 2.5177304964539007e-05, | |
| "loss": 0.5489, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.15417558886509636, | |
| "grad_norm": 0.28586765508193035, | |
| "learning_rate": 2.5531914893617022e-05, | |
| "loss": 0.5531, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.15631691648822268, | |
| "grad_norm": 0.26463443393592084, | |
| "learning_rate": 2.5886524822695034e-05, | |
| "loss": 0.5524, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.15845824411134904, | |
| "grad_norm": 0.27995499634374477, | |
| "learning_rate": 2.624113475177305e-05, | |
| "loss": 0.569, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.16059957173447537, | |
| "grad_norm": 0.263684223704656, | |
| "learning_rate": 2.6595744680851064e-05, | |
| "loss": 0.5287, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.16274089935760172, | |
| "grad_norm": 0.2919043059115593, | |
| "learning_rate": 2.695035460992908e-05, | |
| "loss": 0.5541, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.16488222698072805, | |
| "grad_norm": 0.2805459610716478, | |
| "learning_rate": 2.7304964539007094e-05, | |
| "loss": 0.5328, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.1670235546038544, | |
| "grad_norm": 0.2704831415044607, | |
| "learning_rate": 2.765957446808511e-05, | |
| "loss": 0.5506, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.16916488222698073, | |
| "grad_norm": 0.26497156122554943, | |
| "learning_rate": 2.8014184397163124e-05, | |
| "loss": 0.5383, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.17130620985010706, | |
| "grad_norm": 0.26473030453104546, | |
| "learning_rate": 2.836879432624114e-05, | |
| "loss": 0.5278, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.1734475374732334, | |
| "grad_norm": 0.26833166887012466, | |
| "learning_rate": 2.8723404255319154e-05, | |
| "loss": 0.543, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.17558886509635974, | |
| "grad_norm": 0.2669805636364855, | |
| "learning_rate": 2.9078014184397162e-05, | |
| "loss": 0.5282, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.1777301927194861, | |
| "grad_norm": 0.3149827479787504, | |
| "learning_rate": 2.9432624113475177e-05, | |
| "loss": 0.538, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.17987152034261242, | |
| "grad_norm": 0.27006814672073254, | |
| "learning_rate": 2.9787234042553192e-05, | |
| "loss": 0.5468, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.18201284796573874, | |
| "grad_norm": 0.2526636958624785, | |
| "learning_rate": 3.0141843971631207e-05, | |
| "loss": 0.515, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.1841541755888651, | |
| "grad_norm": 0.3066822441172635, | |
| "learning_rate": 3.0496453900709222e-05, | |
| "loss": 0.5363, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.18629550321199143, | |
| "grad_norm": 0.24592969798097147, | |
| "learning_rate": 3.085106382978723e-05, | |
| "loss": 0.5172, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.18843683083511778, | |
| "grad_norm": 0.26341288237609334, | |
| "learning_rate": 3.120567375886525e-05, | |
| "loss": 0.5406, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.1905781584582441, | |
| "grad_norm": 0.29569701405093984, | |
| "learning_rate": 3.156028368794326e-05, | |
| "loss": 0.5286, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.19271948608137046, | |
| "grad_norm": 0.26720546438232595, | |
| "learning_rate": 3.191489361702128e-05, | |
| "loss": 0.5191, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.1948608137044968, | |
| "grad_norm": 0.2861008299317861, | |
| "learning_rate": 3.226950354609929e-05, | |
| "loss": 0.5292, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.19700214132762311, | |
| "grad_norm": 0.3127809337444568, | |
| "learning_rate": 3.262411347517731e-05, | |
| "loss": 0.5379, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.19914346895074947, | |
| "grad_norm": 0.2584764301110668, | |
| "learning_rate": 3.2978723404255317e-05, | |
| "loss": 0.5066, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.2012847965738758, | |
| "grad_norm": 0.2780596173019734, | |
| "learning_rate": 3.3333333333333335e-05, | |
| "loss": 0.5286, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.20342612419700215, | |
| "grad_norm": 0.3273661550437294, | |
| "learning_rate": 3.3687943262411347e-05, | |
| "loss": 0.5155, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.20556745182012848, | |
| "grad_norm": 0.3036241503506882, | |
| "learning_rate": 3.4042553191489365e-05, | |
| "loss": 0.5282, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.20770877944325483, | |
| "grad_norm": 0.275255132741623, | |
| "learning_rate": 3.4397163120567377e-05, | |
| "loss": 0.5018, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.20985010706638116, | |
| "grad_norm": 0.28093670646107644, | |
| "learning_rate": 3.4751773049645395e-05, | |
| "loss": 0.4945, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.21199143468950749, | |
| "grad_norm": 0.27959022964420993, | |
| "learning_rate": 3.5106382978723407e-05, | |
| "loss": 0.5293, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.21413276231263384, | |
| "grad_norm": 0.2987862837213484, | |
| "learning_rate": 3.546099290780142e-05, | |
| "loss": 0.5184, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.21627408993576017, | |
| "grad_norm": 0.2856449444610249, | |
| "learning_rate": 3.5815602836879437e-05, | |
| "loss": 0.5226, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.21841541755888652, | |
| "grad_norm": 0.2968896630112509, | |
| "learning_rate": 3.617021276595745e-05, | |
| "loss": 0.5314, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.22055674518201285, | |
| "grad_norm": 0.30586641123740416, | |
| "learning_rate": 3.6524822695035466e-05, | |
| "loss": 0.5195, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.22269807280513917, | |
| "grad_norm": 0.28057509983972945, | |
| "learning_rate": 3.687943262411347e-05, | |
| "loss": 0.5287, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.22483940042826553, | |
| "grad_norm": 0.2752404260641325, | |
| "learning_rate": 3.723404255319149e-05, | |
| "loss": 0.525, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.22698072805139186, | |
| "grad_norm": 0.28309242710377003, | |
| "learning_rate": 3.75886524822695e-05, | |
| "loss": 0.5178, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.2291220556745182, | |
| "grad_norm": 0.2986830233079882, | |
| "learning_rate": 3.794326241134752e-05, | |
| "loss": 0.5338, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.23126338329764454, | |
| "grad_norm": 0.27191322829490466, | |
| "learning_rate": 3.829787234042553e-05, | |
| "loss": 0.5064, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.2334047109207709, | |
| "grad_norm": 0.3080545126102214, | |
| "learning_rate": 3.865248226950355e-05, | |
| "loss": 0.515, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.23554603854389722, | |
| "grad_norm": 0.31855156829294484, | |
| "learning_rate": 3.900709219858156e-05, | |
| "loss": 0.5267, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.23768736616702354, | |
| "grad_norm": 0.3118610221875637, | |
| "learning_rate": 3.936170212765958e-05, | |
| "loss": 0.4993, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.2398286937901499, | |
| "grad_norm": 0.3563490136500225, | |
| "learning_rate": 3.971631205673759e-05, | |
| "loss": 0.494, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.24197002141327623, | |
| "grad_norm": 0.33491792784430063, | |
| "learning_rate": 4.007092198581561e-05, | |
| "loss": 0.5166, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.24411134903640258, | |
| "grad_norm": 0.3107234877761307, | |
| "learning_rate": 4.0425531914893614e-05, | |
| "loss": 0.5016, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.2462526766595289, | |
| "grad_norm": 0.3574327616039191, | |
| "learning_rate": 4.078014184397163e-05, | |
| "loss": 0.4997, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.24839400428265523, | |
| "grad_norm": 0.31250608779187283, | |
| "learning_rate": 4.1134751773049644e-05, | |
| "loss": 0.5012, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.2505353319057816, | |
| "grad_norm": 0.31466096005113264, | |
| "learning_rate": 4.148936170212766e-05, | |
| "loss": 0.4898, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.25267665952890794, | |
| "grad_norm": 0.35023750633522854, | |
| "learning_rate": 4.1843971631205674e-05, | |
| "loss": 0.5151, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.25481798715203424, | |
| "grad_norm": 0.3055990330623537, | |
| "learning_rate": 4.219858156028369e-05, | |
| "loss": 0.4976, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.2569593147751606, | |
| "grad_norm": 0.3558935694444545, | |
| "learning_rate": 4.2553191489361704e-05, | |
| "loss": 0.517, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.25910064239828695, | |
| "grad_norm": 0.34915884790434, | |
| "learning_rate": 4.2907801418439716e-05, | |
| "loss": 0.5044, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.26124197002141325, | |
| "grad_norm": 0.348632342188165, | |
| "learning_rate": 4.3262411347517734e-05, | |
| "loss": 0.505, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.2633832976445396, | |
| "grad_norm": 0.3785527848062378, | |
| "learning_rate": 4.3617021276595746e-05, | |
| "loss": 0.5085, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.26552462526766596, | |
| "grad_norm": 0.37162644733897265, | |
| "learning_rate": 4.3971631205673764e-05, | |
| "loss": 0.4833, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.2676659528907923, | |
| "grad_norm": 0.3980723109877404, | |
| "learning_rate": 4.432624113475177e-05, | |
| "loss": 0.4955, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.2698072805139186, | |
| "grad_norm": 0.3725438131171889, | |
| "learning_rate": 4.468085106382979e-05, | |
| "loss": 0.4879, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.27194860813704497, | |
| "grad_norm": 0.3546366551258052, | |
| "learning_rate": 4.50354609929078e-05, | |
| "loss": 0.485, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.2740899357601713, | |
| "grad_norm": 0.3352604060765713, | |
| "learning_rate": 4.539007092198582e-05, | |
| "loss": 0.5125, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.2762312633832976, | |
| "grad_norm": 0.38902168152696476, | |
| "learning_rate": 4.574468085106383e-05, | |
| "loss": 0.4923, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.278372591006424, | |
| "grad_norm": 0.4722633165183755, | |
| "learning_rate": 4.609929078014185e-05, | |
| "loss": 0.4933, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.28051391862955033, | |
| "grad_norm": 0.40974642024145574, | |
| "learning_rate": 4.645390070921986e-05, | |
| "loss": 0.4823, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.2826552462526767, | |
| "grad_norm": 0.34213507876160526, | |
| "learning_rate": 4.680851063829788e-05, | |
| "loss": 0.4865, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.284796573875803, | |
| "grad_norm": 0.3961185410637184, | |
| "learning_rate": 4.716312056737589e-05, | |
| "loss": 0.4865, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.28693790149892934, | |
| "grad_norm": 0.37000080466083957, | |
| "learning_rate": 4.751773049645391e-05, | |
| "loss": 0.4916, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.2890792291220557, | |
| "grad_norm": 0.37414319777051985, | |
| "learning_rate": 4.787234042553192e-05, | |
| "loss": 0.5044, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.291220556745182, | |
| "grad_norm": 0.3164774177894217, | |
| "learning_rate": 4.822695035460993e-05, | |
| "loss": 0.4811, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.29336188436830835, | |
| "grad_norm": 0.47067620082781225, | |
| "learning_rate": 4.858156028368794e-05, | |
| "loss": 0.4945, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.2955032119914347, | |
| "grad_norm": 0.4294475664764798, | |
| "learning_rate": 4.893617021276596e-05, | |
| "loss": 0.5094, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.29764453961456105, | |
| "grad_norm": 0.3040028099316697, | |
| "learning_rate": 4.929078014184397e-05, | |
| "loss": 0.4942, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.29978586723768735, | |
| "grad_norm": 0.37748057667062174, | |
| "learning_rate": 4.964539007092199e-05, | |
| "loss": 0.4905, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.3019271948608137, | |
| "grad_norm": 0.2989419262617704, | |
| "learning_rate": 5e-05, | |
| "loss": 0.4905, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.30406852248394006, | |
| "grad_norm": 0.3871243075644112, | |
| "learning_rate": 4.996031746031746e-05, | |
| "loss": 0.4852, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.30620985010706636, | |
| "grad_norm": 0.29602921563211776, | |
| "learning_rate": 4.9920634920634924e-05, | |
| "loss": 0.4803, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.3083511777301927, | |
| "grad_norm": 0.48126987646952557, | |
| "learning_rate": 4.9880952380952385e-05, | |
| "loss": 0.4963, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.31049250535331907, | |
| "grad_norm": 0.3271921917836504, | |
| "learning_rate": 4.9841269841269845e-05, | |
| "loss": 0.4739, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.31263383297644537, | |
| "grad_norm": 0.30962361825644874, | |
| "learning_rate": 4.9801587301587306e-05, | |
| "loss": 0.4659, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.3147751605995717, | |
| "grad_norm": 0.2862717141303497, | |
| "learning_rate": 4.976190476190477e-05, | |
| "loss": 0.4805, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.3169164882226981, | |
| "grad_norm": 0.29165213355742037, | |
| "learning_rate": 4.972222222222223e-05, | |
| "loss": 0.4893, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.31905781584582443, | |
| "grad_norm": 0.2837190882523078, | |
| "learning_rate": 4.968253968253969e-05, | |
| "loss": 0.4877, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.32119914346895073, | |
| "grad_norm": 0.3162679943468195, | |
| "learning_rate": 4.964285714285715e-05, | |
| "loss": 0.4949, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.3233404710920771, | |
| "grad_norm": 0.3166915931837597, | |
| "learning_rate": 4.960317460317461e-05, | |
| "loss": 0.4745, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.32548179871520344, | |
| "grad_norm": 0.2867036124644211, | |
| "learning_rate": 4.956349206349207e-05, | |
| "loss": 0.4827, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.32762312633832974, | |
| "grad_norm": 0.3127471586658351, | |
| "learning_rate": 4.9523809523809525e-05, | |
| "loss": 0.4817, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.3297644539614561, | |
| "grad_norm": 0.28863042358499497, | |
| "learning_rate": 4.9484126984126985e-05, | |
| "loss": 0.4944, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.33190578158458245, | |
| "grad_norm": 0.30635024454971044, | |
| "learning_rate": 4.9444444444444446e-05, | |
| "loss": 0.4882, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.3340471092077088, | |
| "grad_norm": 0.25406638929643754, | |
| "learning_rate": 4.940476190476191e-05, | |
| "loss": 0.4583, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.3361884368308351, | |
| "grad_norm": 0.4236621872965728, | |
| "learning_rate": 4.936507936507937e-05, | |
| "loss": 0.5011, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 0.33832976445396146, | |
| "grad_norm": 0.33240414924657075, | |
| "learning_rate": 4.932539682539683e-05, | |
| "loss": 0.4984, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.3404710920770878, | |
| "grad_norm": 0.27992076972800195, | |
| "learning_rate": 4.928571428571429e-05, | |
| "loss": 0.4882, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.3426124197002141, | |
| "grad_norm": 0.3378573050357272, | |
| "learning_rate": 4.924603174603175e-05, | |
| "loss": 0.4821, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.34475374732334046, | |
| "grad_norm": 0.3116481418117616, | |
| "learning_rate": 4.9206349206349204e-05, | |
| "loss": 0.501, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.3468950749464668, | |
| "grad_norm": 0.30756617491799604, | |
| "learning_rate": 4.9166666666666665e-05, | |
| "loss": 0.499, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.3490364025695932, | |
| "grad_norm": 0.3501139975577791, | |
| "learning_rate": 4.9126984126984125e-05, | |
| "loss": 0.4998, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 0.3511777301927195, | |
| "grad_norm": 0.30095198583101096, | |
| "learning_rate": 4.9087301587301586e-05, | |
| "loss": 0.4772, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.3533190578158458, | |
| "grad_norm": 0.29146076836363266, | |
| "learning_rate": 4.904761904761905e-05, | |
| "loss": 0.4932, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.3554603854389722, | |
| "grad_norm": 0.3245937860525494, | |
| "learning_rate": 4.900793650793651e-05, | |
| "loss": 0.4717, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.3576017130620985, | |
| "grad_norm": 0.27276729093930885, | |
| "learning_rate": 4.896825396825397e-05, | |
| "loss": 0.485, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 0.35974304068522484, | |
| "grad_norm": 0.36020649417508827, | |
| "learning_rate": 4.892857142857143e-05, | |
| "loss": 0.4873, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.3618843683083512, | |
| "grad_norm": 0.2918070400385983, | |
| "learning_rate": 4.888888888888889e-05, | |
| "loss": 0.4905, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 0.3640256959314775, | |
| "grad_norm": 0.33570775396857644, | |
| "learning_rate": 4.884920634920635e-05, | |
| "loss": 0.4852, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.36616702355460384, | |
| "grad_norm": 0.3134384968531535, | |
| "learning_rate": 4.880952380952381e-05, | |
| "loss": 0.477, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 0.3683083511777302, | |
| "grad_norm": 0.31510450944034885, | |
| "learning_rate": 4.876984126984127e-05, | |
| "loss": 0.4851, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.37044967880085655, | |
| "grad_norm": 0.3154584471333162, | |
| "learning_rate": 4.873015873015873e-05, | |
| "loss": 0.4974, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 0.37259100642398285, | |
| "grad_norm": 0.2909626760317359, | |
| "learning_rate": 4.8690476190476194e-05, | |
| "loss": 0.4837, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.3747323340471092, | |
| "grad_norm": 0.3647746614109793, | |
| "learning_rate": 4.8650793650793654e-05, | |
| "loss": 0.4766, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.37687366167023556, | |
| "grad_norm": 0.2994221837641762, | |
| "learning_rate": 4.8611111111111115e-05, | |
| "loss": 0.4889, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.37901498929336186, | |
| "grad_norm": 0.3352657823786708, | |
| "learning_rate": 4.8571428571428576e-05, | |
| "loss": 0.5019, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 0.3811563169164882, | |
| "grad_norm": 0.3916874320487166, | |
| "learning_rate": 4.853174603174604e-05, | |
| "loss": 0.5092, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.38329764453961457, | |
| "grad_norm": 0.29941263170579285, | |
| "learning_rate": 4.84920634920635e-05, | |
| "loss": 0.4803, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 0.3854389721627409, | |
| "grad_norm": 0.32217426505709545, | |
| "learning_rate": 4.845238095238095e-05, | |
| "loss": 0.4984, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.3875802997858672, | |
| "grad_norm": 0.3032060935319803, | |
| "learning_rate": 4.841269841269841e-05, | |
| "loss": 0.4673, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 0.3897216274089936, | |
| "grad_norm": 0.2726502791688811, | |
| "learning_rate": 4.837301587301587e-05, | |
| "loss": 0.4664, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.39186295503211993, | |
| "grad_norm": 0.2971786378502053, | |
| "learning_rate": 4.8333333333333334e-05, | |
| "loss": 0.4798, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 0.39400428265524623, | |
| "grad_norm": 0.2865455200314379, | |
| "learning_rate": 4.8293650793650794e-05, | |
| "loss": 0.4775, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.3961456102783726, | |
| "grad_norm": 0.2547381179115949, | |
| "learning_rate": 4.8253968253968255e-05, | |
| "loss": 0.473, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.39828693790149894, | |
| "grad_norm": 0.2970502284932418, | |
| "learning_rate": 4.8214285714285716e-05, | |
| "loss": 0.4777, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.4004282655246253, | |
| "grad_norm": 0.3081755326163523, | |
| "learning_rate": 4.817460317460318e-05, | |
| "loss": 0.4884, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 0.4025695931477516, | |
| "grad_norm": 0.2988953377132881, | |
| "learning_rate": 4.813492063492064e-05, | |
| "loss": 0.4721, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.40471092077087795, | |
| "grad_norm": 0.32840372938982254, | |
| "learning_rate": 4.80952380952381e-05, | |
| "loss": 0.4592, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 0.4068522483940043, | |
| "grad_norm": 0.27671818316763175, | |
| "learning_rate": 4.805555555555556e-05, | |
| "loss": 0.4712, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.4089935760171306, | |
| "grad_norm": 0.3246160797192649, | |
| "learning_rate": 4.801587301587302e-05, | |
| "loss": 0.4752, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 0.41113490364025695, | |
| "grad_norm": 0.3566765772926943, | |
| "learning_rate": 4.797619047619048e-05, | |
| "loss": 0.4672, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.4132762312633833, | |
| "grad_norm": 0.26828922214902995, | |
| "learning_rate": 4.793650793650794e-05, | |
| "loss": 0.4878, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 0.41541755888650966, | |
| "grad_norm": 0.3550040895109434, | |
| "learning_rate": 4.78968253968254e-05, | |
| "loss": 0.48, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.41755888650963596, | |
| "grad_norm": 0.2838693253149512, | |
| "learning_rate": 4.785714285714286e-05, | |
| "loss": 0.4611, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.4197002141327623, | |
| "grad_norm": 0.39397907047263425, | |
| "learning_rate": 4.781746031746032e-05, | |
| "loss": 0.4655, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.42184154175588867, | |
| "grad_norm": 0.26305224728667986, | |
| "learning_rate": 4.7777777777777784e-05, | |
| "loss": 0.4841, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 0.42398286937901497, | |
| "grad_norm": 0.325701418774634, | |
| "learning_rate": 4.7738095238095245e-05, | |
| "loss": 0.4603, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.4261241970021413, | |
| "grad_norm": 0.442210465125467, | |
| "learning_rate": 4.7698412698412706e-05, | |
| "loss": 0.4599, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 0.4282655246252677, | |
| "grad_norm": 0.30449934938572704, | |
| "learning_rate": 4.7658730158730166e-05, | |
| "loss": 0.4715, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.430406852248394, | |
| "grad_norm": 0.30205346239976966, | |
| "learning_rate": 4.761904761904762e-05, | |
| "loss": 0.4832, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 0.43254817987152033, | |
| "grad_norm": 0.29056082365428426, | |
| "learning_rate": 4.757936507936508e-05, | |
| "loss": 0.489, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.4346895074946467, | |
| "grad_norm": 0.36617244189716913, | |
| "learning_rate": 4.753968253968254e-05, | |
| "loss": 0.4716, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 0.43683083511777304, | |
| "grad_norm": 0.2589569319503199, | |
| "learning_rate": 4.75e-05, | |
| "loss": 0.4533, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.43897216274089934, | |
| "grad_norm": 0.3693939526209848, | |
| "learning_rate": 4.746031746031746e-05, | |
| "loss": 0.477, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.4411134903640257, | |
| "grad_norm": 0.32847584920064604, | |
| "learning_rate": 4.7420634920634924e-05, | |
| "loss": 0.4928, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.44325481798715205, | |
| "grad_norm": 0.44099566221142933, | |
| "learning_rate": 4.738095238095238e-05, | |
| "loss": 0.4556, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 0.44539614561027835, | |
| "grad_norm": 0.2472596663917096, | |
| "learning_rate": 4.734126984126984e-05, | |
| "loss": 0.4589, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.4475374732334047, | |
| "grad_norm": 0.4006991143128524, | |
| "learning_rate": 4.73015873015873e-05, | |
| "loss": 0.469, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 0.44967880085653106, | |
| "grad_norm": 0.2851927359912657, | |
| "learning_rate": 4.726190476190476e-05, | |
| "loss": 0.4766, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.4518201284796574, | |
| "grad_norm": 0.3365161124957976, | |
| "learning_rate": 4.722222222222222e-05, | |
| "loss": 0.4723, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 0.4539614561027837, | |
| "grad_norm": 0.34565226910942354, | |
| "learning_rate": 4.718253968253968e-05, | |
| "loss": 0.4758, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.45610278372591007, | |
| "grad_norm": 0.27344513413865157, | |
| "learning_rate": 4.714285714285714e-05, | |
| "loss": 0.4679, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 0.4582441113490364, | |
| "grad_norm": 0.38311587773024736, | |
| "learning_rate": 4.71031746031746e-05, | |
| "loss": 0.48, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.4603854389721627, | |
| "grad_norm": 0.34364682832024224, | |
| "learning_rate": 4.7063492063492064e-05, | |
| "loss": 0.4782, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.4625267665952891, | |
| "grad_norm": 0.3914058243306043, | |
| "learning_rate": 4.7023809523809525e-05, | |
| "loss": 0.4661, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.46466809421841543, | |
| "grad_norm": 0.29870287943266227, | |
| "learning_rate": 4.6984126984126986e-05, | |
| "loss": 0.4638, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 0.4668094218415418, | |
| "grad_norm": 0.4094058032238448, | |
| "learning_rate": 4.6944444444444446e-05, | |
| "loss": 0.46, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.4689507494646681, | |
| "grad_norm": 0.3350045536223048, | |
| "learning_rate": 4.690476190476191e-05, | |
| "loss": 0.4679, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 0.47109207708779444, | |
| "grad_norm": 0.3512288511765975, | |
| "learning_rate": 4.686507936507937e-05, | |
| "loss": 0.4685, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.4732334047109208, | |
| "grad_norm": 0.3378187173840975, | |
| "learning_rate": 4.682539682539683e-05, | |
| "loss": 0.4465, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 0.4753747323340471, | |
| "grad_norm": 0.3038250915138398, | |
| "learning_rate": 4.678571428571429e-05, | |
| "loss": 0.4821, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.47751605995717344, | |
| "grad_norm": 0.29703824475925095, | |
| "learning_rate": 4.674603174603175e-05, | |
| "loss": 0.4747, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 0.4796573875802998, | |
| "grad_norm": 0.3806097625906433, | |
| "learning_rate": 4.670634920634921e-05, | |
| "loss": 0.4979, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.4817987152034261, | |
| "grad_norm": 0.3317321869836366, | |
| "learning_rate": 4.666666666666667e-05, | |
| "loss": 0.4775, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.48394004282655245, | |
| "grad_norm": 0.35267177499930097, | |
| "learning_rate": 4.662698412698413e-05, | |
| "loss": 0.4707, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.4860813704496788, | |
| "grad_norm": 0.3134789773559103, | |
| "learning_rate": 4.658730158730159e-05, | |
| "loss": 0.4676, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 0.48822269807280516, | |
| "grad_norm": 0.31826459466880935, | |
| "learning_rate": 4.6547619047619054e-05, | |
| "loss": 0.4741, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.49036402569593146, | |
| "grad_norm": 0.3172595965266178, | |
| "learning_rate": 4.6507936507936515e-05, | |
| "loss": 0.4769, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 0.4925053533190578, | |
| "grad_norm": 0.281104120201888, | |
| "learning_rate": 4.646825396825397e-05, | |
| "loss": 0.4486, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.49464668094218417, | |
| "grad_norm": 0.3390660082734012, | |
| "learning_rate": 4.642857142857143e-05, | |
| "loss": 0.4721, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 0.49678800856531047, | |
| "grad_norm": 0.2624873773296077, | |
| "learning_rate": 4.638888888888889e-05, | |
| "loss": 0.4579, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.4989293361884368, | |
| "grad_norm": 0.30482318678415316, | |
| "learning_rate": 4.634920634920635e-05, | |
| "loss": 0.4575, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 0.5010706638115632, | |
| "grad_norm": 0.3163232948976339, | |
| "learning_rate": 4.630952380952381e-05, | |
| "loss": 0.4771, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.5032119914346895, | |
| "grad_norm": 0.28403210542670865, | |
| "learning_rate": 4.626984126984127e-05, | |
| "loss": 0.4571, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.5053533190578159, | |
| "grad_norm": 0.3169956103989183, | |
| "learning_rate": 4.623015873015873e-05, | |
| "loss": 0.4642, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.5074946466809421, | |
| "grad_norm": 0.3124625210824864, | |
| "learning_rate": 4.6190476190476194e-05, | |
| "loss": 0.4934, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 0.5096359743040685, | |
| "grad_norm": 0.27914854479065176, | |
| "learning_rate": 4.6150793650793655e-05, | |
| "loss": 0.4777, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.5117773019271948, | |
| "grad_norm": 0.30405223359375916, | |
| "learning_rate": 4.6111111111111115e-05, | |
| "loss": 0.4774, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 0.5139186295503212, | |
| "grad_norm": 0.27824494271256855, | |
| "learning_rate": 4.607142857142857e-05, | |
| "loss": 0.4778, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.5160599571734475, | |
| "grad_norm": 0.28631142513329905, | |
| "learning_rate": 4.603174603174603e-05, | |
| "loss": 0.4742, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 0.5182012847965739, | |
| "grad_norm": 0.2734339260745776, | |
| "learning_rate": 4.599206349206349e-05, | |
| "loss": 0.4598, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.5203426124197003, | |
| "grad_norm": 0.4662830157775148, | |
| "learning_rate": 4.595238095238095e-05, | |
| "loss": 0.4622, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 0.5224839400428265, | |
| "grad_norm": 0.27437983872972277, | |
| "learning_rate": 4.591269841269841e-05, | |
| "loss": 0.4661, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.5246252676659529, | |
| "grad_norm": 0.3569225244178123, | |
| "learning_rate": 4.587301587301587e-05, | |
| "loss": 0.4558, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.5267665952890792, | |
| "grad_norm": 0.2921312413477028, | |
| "learning_rate": 4.5833333333333334e-05, | |
| "loss": 0.473, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.5289079229122056, | |
| "grad_norm": 0.3105118836185397, | |
| "learning_rate": 4.5793650793650795e-05, | |
| "loss": 0.4592, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 0.5310492505353319, | |
| "grad_norm": 0.2602252376592866, | |
| "learning_rate": 4.5753968253968255e-05, | |
| "loss": 0.4475, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.5331905781584583, | |
| "grad_norm": 0.31546458571249575, | |
| "learning_rate": 4.5714285714285716e-05, | |
| "loss": 0.4655, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 0.5353319057815846, | |
| "grad_norm": 0.29234521833457866, | |
| "learning_rate": 4.567460317460318e-05, | |
| "loss": 0.4386, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.5374732334047109, | |
| "grad_norm": 0.27089799714474905, | |
| "learning_rate": 4.563492063492064e-05, | |
| "loss": 0.4552, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 0.5396145610278372, | |
| "grad_norm": 0.2876790516942422, | |
| "learning_rate": 4.55952380952381e-05, | |
| "loss": 0.4643, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.5417558886509636, | |
| "grad_norm": 0.2796690158138871, | |
| "learning_rate": 4.555555555555556e-05, | |
| "loss": 0.4685, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 0.5438972162740899, | |
| "grad_norm": 0.2620205830889585, | |
| "learning_rate": 4.551587301587302e-05, | |
| "loss": 0.4628, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.5460385438972163, | |
| "grad_norm": 0.27675126452812215, | |
| "learning_rate": 4.547619047619048e-05, | |
| "loss": 0.4659, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.5481798715203426, | |
| "grad_norm": 0.2529727200895915, | |
| "learning_rate": 4.543650793650794e-05, | |
| "loss": 0.4719, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.550321199143469, | |
| "grad_norm": 0.28938734392771215, | |
| "learning_rate": 4.5396825396825395e-05, | |
| "loss": 0.4499, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 0.5524625267665952, | |
| "grad_norm": 0.2732820003277474, | |
| "learning_rate": 4.5357142857142856e-05, | |
| "loss": 0.4677, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.5546038543897216, | |
| "grad_norm": 0.24867294850473456, | |
| "learning_rate": 4.531746031746032e-05, | |
| "loss": 0.4714, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 0.556745182012848, | |
| "grad_norm": 0.3040098909557791, | |
| "learning_rate": 4.527777777777778e-05, | |
| "loss": 0.4757, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.5588865096359743, | |
| "grad_norm": 0.2705701549546398, | |
| "learning_rate": 4.523809523809524e-05, | |
| "loss": 0.4642, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 0.5610278372591007, | |
| "grad_norm": 0.2949114446979747, | |
| "learning_rate": 4.51984126984127e-05, | |
| "loss": 0.4671, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.563169164882227, | |
| "grad_norm": 0.28678659699639003, | |
| "learning_rate": 4.515873015873016e-05, | |
| "loss": 0.4519, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 0.5653104925053534, | |
| "grad_norm": 0.2582358880028455, | |
| "learning_rate": 4.511904761904762e-05, | |
| "loss": 0.4545, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.5674518201284796, | |
| "grad_norm": 0.304233138313808, | |
| "learning_rate": 4.507936507936508e-05, | |
| "loss": 0.4616, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.569593147751606, | |
| "grad_norm": 0.2721420459823916, | |
| "learning_rate": 4.503968253968254e-05, | |
| "loss": 0.4547, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.5717344753747323, | |
| "grad_norm": 0.2687992677978425, | |
| "learning_rate": 4.5e-05, | |
| "loss": 0.4576, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 0.5738758029978587, | |
| "grad_norm": 0.2742900950884715, | |
| "learning_rate": 4.4960317460317464e-05, | |
| "loss": 0.4459, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.576017130620985, | |
| "grad_norm": 0.28795217687814245, | |
| "learning_rate": 4.4920634920634924e-05, | |
| "loss": 0.4698, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 0.5781584582441114, | |
| "grad_norm": 0.25641275034601446, | |
| "learning_rate": 4.4880952380952385e-05, | |
| "loss": 0.4679, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.5802997858672377, | |
| "grad_norm": 0.26668890447386795, | |
| "learning_rate": 4.4841269841269846e-05, | |
| "loss": 0.463, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 0.582441113490364, | |
| "grad_norm": 0.24030260367205755, | |
| "learning_rate": 4.4801587301587307e-05, | |
| "loss": 0.4628, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.5845824411134903, | |
| "grad_norm": 0.2790517674146759, | |
| "learning_rate": 4.476190476190477e-05, | |
| "loss": 0.4671, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 0.5867237687366167, | |
| "grad_norm": 0.22820456884707455, | |
| "learning_rate": 4.472222222222223e-05, | |
| "loss": 0.4457, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.588865096359743, | |
| "grad_norm": 0.3271552248804396, | |
| "learning_rate": 4.468253968253969e-05, | |
| "loss": 0.473, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.5910064239828694, | |
| "grad_norm": 0.2862040648114735, | |
| "learning_rate": 4.464285714285715e-05, | |
| "loss": 0.4675, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.5931477516059958, | |
| "grad_norm": 0.3114706333448285, | |
| "learning_rate": 4.460317460317461e-05, | |
| "loss": 0.4666, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 0.5952890792291221, | |
| "grad_norm": 0.32309306607336397, | |
| "learning_rate": 4.456349206349207e-05, | |
| "loss": 0.463, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.5974304068522484, | |
| "grad_norm": 0.24906075204446051, | |
| "learning_rate": 4.4523809523809525e-05, | |
| "loss": 0.4588, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 0.5995717344753747, | |
| "grad_norm": 0.3269798068381555, | |
| "learning_rate": 4.4484126984126986e-05, | |
| "loss": 0.4434, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.6017130620985011, | |
| "grad_norm": 0.24388996621753112, | |
| "learning_rate": 4.4444444444444447e-05, | |
| "loss": 0.4773, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 0.6038543897216274, | |
| "grad_norm": 0.3069850683178239, | |
| "learning_rate": 4.440476190476191e-05, | |
| "loss": 0.4517, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.6059957173447538, | |
| "grad_norm": 0.249901094708518, | |
| "learning_rate": 4.436507936507937e-05, | |
| "loss": 0.4426, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 0.6081370449678801, | |
| "grad_norm": 0.2678810481940219, | |
| "learning_rate": 4.432539682539683e-05, | |
| "loss": 0.4446, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.6102783725910065, | |
| "grad_norm": 0.26626008865151485, | |
| "learning_rate": 4.428571428571428e-05, | |
| "loss": 0.444, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.6124197002141327, | |
| "grad_norm": 0.25802913659501, | |
| "learning_rate": 4.4246031746031744e-05, | |
| "loss": 0.4514, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.6145610278372591, | |
| "grad_norm": 0.3058519427291137, | |
| "learning_rate": 4.4206349206349204e-05, | |
| "loss": 0.4657, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 0.6167023554603854, | |
| "grad_norm": 0.25739722317495545, | |
| "learning_rate": 4.4166666666666665e-05, | |
| "loss": 0.4565, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.6188436830835118, | |
| "grad_norm": 0.2846833831079424, | |
| "learning_rate": 4.4126984126984126e-05, | |
| "loss": 0.4584, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 0.6209850107066381, | |
| "grad_norm": 0.258608736893351, | |
| "learning_rate": 4.4087301587301587e-05, | |
| "loss": 0.4335, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.6231263383297645, | |
| "grad_norm": 0.2911602326047305, | |
| "learning_rate": 4.404761904761905e-05, | |
| "loss": 0.4394, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 0.6252676659528907, | |
| "grad_norm": 0.26424730063896607, | |
| "learning_rate": 4.400793650793651e-05, | |
| "loss": 0.4739, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.6274089935760171, | |
| "grad_norm": 0.27052936749041473, | |
| "learning_rate": 4.396825396825397e-05, | |
| "loss": 0.4737, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 0.6295503211991434, | |
| "grad_norm": 0.2826287053464395, | |
| "learning_rate": 4.392857142857143e-05, | |
| "loss": 0.4557, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.6316916488222698, | |
| "grad_norm": 0.22012742826317977, | |
| "learning_rate": 4.388888888888889e-05, | |
| "loss": 0.4408, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.6338329764453962, | |
| "grad_norm": 0.28162001988825747, | |
| "learning_rate": 4.384920634920635e-05, | |
| "loss": 0.4574, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.6359743040685225, | |
| "grad_norm": 0.23283539296950778, | |
| "learning_rate": 4.380952380952381e-05, | |
| "loss": 0.4462, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 0.6381156316916489, | |
| "grad_norm": 0.2498862871583954, | |
| "learning_rate": 4.376984126984127e-05, | |
| "loss": 0.439, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.6402569593147751, | |
| "grad_norm": 0.30936218743365784, | |
| "learning_rate": 4.373015873015873e-05, | |
| "loss": 0.4405, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 0.6423982869379015, | |
| "grad_norm": 0.22890543657069876, | |
| "learning_rate": 4.3690476190476194e-05, | |
| "loss": 0.4337, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.6445396145610278, | |
| "grad_norm": 0.3478614587683788, | |
| "learning_rate": 4.3650793650793655e-05, | |
| "loss": 0.4596, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 0.6466809421841542, | |
| "grad_norm": 0.2696278385429742, | |
| "learning_rate": 4.3611111111111116e-05, | |
| "loss": 0.4713, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.6488222698072805, | |
| "grad_norm": 0.3139962716399602, | |
| "learning_rate": 4.3571428571428576e-05, | |
| "loss": 0.4527, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 0.6509635974304069, | |
| "grad_norm": 0.2670285875512765, | |
| "learning_rate": 4.353174603174604e-05, | |
| "loss": 0.4627, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.6531049250535332, | |
| "grad_norm": 0.3044721685500663, | |
| "learning_rate": 4.34920634920635e-05, | |
| "loss": 0.4785, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.6552462526766595, | |
| "grad_norm": 0.27396062258454984, | |
| "learning_rate": 4.345238095238096e-05, | |
| "loss": 0.4523, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.6573875802997858, | |
| "grad_norm": 0.2886960728161468, | |
| "learning_rate": 4.341269841269842e-05, | |
| "loss": 0.4517, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 0.6595289079229122, | |
| "grad_norm": 0.2533722996661738, | |
| "learning_rate": 4.337301587301587e-05, | |
| "loss": 0.4646, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.6616702355460385, | |
| "grad_norm": 0.2699468559182033, | |
| "learning_rate": 4.3333333333333334e-05, | |
| "loss": 0.4421, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 0.6638115631691649, | |
| "grad_norm": 0.283041429141369, | |
| "learning_rate": 4.3293650793650795e-05, | |
| "loss": 0.4552, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.6659528907922913, | |
| "grad_norm": 0.2591706001178728, | |
| "learning_rate": 4.3253968253968256e-05, | |
| "loss": 0.4527, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 0.6680942184154176, | |
| "grad_norm": 0.2864467963151813, | |
| "learning_rate": 4.3214285714285716e-05, | |
| "loss": 0.4666, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.6702355460385439, | |
| "grad_norm": 0.281657408994935, | |
| "learning_rate": 4.317460317460318e-05, | |
| "loss": 0.4582, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 0.6723768736616702, | |
| "grad_norm": 0.2474591816739494, | |
| "learning_rate": 4.313492063492064e-05, | |
| "loss": 0.4361, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.6745182012847966, | |
| "grad_norm": 0.2973486888734723, | |
| "learning_rate": 4.30952380952381e-05, | |
| "loss": 0.4583, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.6766595289079229, | |
| "grad_norm": 0.2444964189560587, | |
| "learning_rate": 4.305555555555556e-05, | |
| "loss": 0.4664, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.6788008565310493, | |
| "grad_norm": 0.2480078169039145, | |
| "learning_rate": 4.301587301587302e-05, | |
| "loss": 0.4675, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 0.6809421841541756, | |
| "grad_norm": 0.26312534234905105, | |
| "learning_rate": 4.297619047619048e-05, | |
| "loss": 0.4489, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.683083511777302, | |
| "grad_norm": 0.2462656315297276, | |
| "learning_rate": 4.2936507936507935e-05, | |
| "loss": 0.4591, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 0.6852248394004282, | |
| "grad_norm": 0.29816624349859283, | |
| "learning_rate": 4.2896825396825396e-05, | |
| "loss": 0.4477, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.6873661670235546, | |
| "grad_norm": 0.24698029570776403, | |
| "learning_rate": 4.2857142857142856e-05, | |
| "loss": 0.4695, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 0.6895074946466809, | |
| "grad_norm": 0.34703431482300096, | |
| "learning_rate": 4.281746031746032e-05, | |
| "loss": 0.4566, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 0.6916488222698073, | |
| "grad_norm": 0.2437786811285962, | |
| "learning_rate": 4.277777777777778e-05, | |
| "loss": 0.4723, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 0.6937901498929336, | |
| "grad_norm": 0.3412185273042048, | |
| "learning_rate": 4.273809523809524e-05, | |
| "loss": 0.4532, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.69593147751606, | |
| "grad_norm": 0.24904956038384604, | |
| "learning_rate": 4.26984126984127e-05, | |
| "loss": 0.4334, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.6980728051391863, | |
| "grad_norm": 0.2782237575432937, | |
| "learning_rate": 4.265873015873016e-05, | |
| "loss": 0.4463, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 0.7002141327623126, | |
| "grad_norm": 0.2657363496164857, | |
| "learning_rate": 4.261904761904762e-05, | |
| "loss": 0.4591, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 0.702355460385439, | |
| "grad_norm": 0.2555708376181943, | |
| "learning_rate": 4.257936507936508e-05, | |
| "loss": 0.4464, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.7044967880085653, | |
| "grad_norm": 0.23571687740169944, | |
| "learning_rate": 4.253968253968254e-05, | |
| "loss": 0.4455, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 0.7066381156316917, | |
| "grad_norm": 0.2687735724291379, | |
| "learning_rate": 4.25e-05, | |
| "loss": 0.4442, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.708779443254818, | |
| "grad_norm": 0.21677485542153913, | |
| "learning_rate": 4.2460317460317464e-05, | |
| "loss": 0.4389, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 0.7109207708779444, | |
| "grad_norm": 0.30463445202519995, | |
| "learning_rate": 4.2420634920634925e-05, | |
| "loss": 0.4641, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 0.7130620985010707, | |
| "grad_norm": 0.25037659567634474, | |
| "learning_rate": 4.2380952380952385e-05, | |
| "loss": 0.4601, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 0.715203426124197, | |
| "grad_norm": 0.2597863908544412, | |
| "learning_rate": 4.2341269841269846e-05, | |
| "loss": 0.4692, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 0.7173447537473233, | |
| "grad_norm": 0.26953376114396266, | |
| "learning_rate": 4.23015873015873e-05, | |
| "loss": 0.454, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.7194860813704497, | |
| "grad_norm": 0.246208461301376, | |
| "learning_rate": 4.226190476190476e-05, | |
| "loss": 0.463, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.721627408993576, | |
| "grad_norm": 0.2370326879028165, | |
| "learning_rate": 4.222222222222222e-05, | |
| "loss": 0.4467, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 0.7237687366167024, | |
| "grad_norm": 0.25036860829476676, | |
| "learning_rate": 4.218253968253968e-05, | |
| "loss": 0.4411, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 0.7259100642398287, | |
| "grad_norm": 0.28329537291272733, | |
| "learning_rate": 4.214285714285714e-05, | |
| "loss": 0.454, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 0.728051391862955, | |
| "grad_norm": 0.24559865290348679, | |
| "learning_rate": 4.2103174603174604e-05, | |
| "loss": 0.4498, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.7301927194860813, | |
| "grad_norm": 0.23500115967335444, | |
| "learning_rate": 4.2063492063492065e-05, | |
| "loss": 0.4552, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 0.7323340471092077, | |
| "grad_norm": 0.2919643243289569, | |
| "learning_rate": 4.2023809523809525e-05, | |
| "loss": 0.4567, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 0.734475374732334, | |
| "grad_norm": 0.26791366452779686, | |
| "learning_rate": 4.1984126984126986e-05, | |
| "loss": 0.4442, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 0.7366167023554604, | |
| "grad_norm": 0.28291332973939526, | |
| "learning_rate": 4.194444444444445e-05, | |
| "loss": 0.451, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.7387580299785867, | |
| "grad_norm": 0.25359000867556963, | |
| "learning_rate": 4.190476190476191e-05, | |
| "loss": 0.4425, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.7408993576017131, | |
| "grad_norm": 0.2408660681176407, | |
| "learning_rate": 4.186507936507937e-05, | |
| "loss": 0.4493, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 0.7430406852248393, | |
| "grad_norm": 0.3349449875093207, | |
| "learning_rate": 4.182539682539683e-05, | |
| "loss": 0.4592, | |
| "step": 347 | |
| }, | |
| { | |
| "epoch": 0.7451820128479657, | |
| "grad_norm": 0.2559707605515902, | |
| "learning_rate": 4.178571428571429e-05, | |
| "loss": 0.4538, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 0.7473233404710921, | |
| "grad_norm": 0.2737394625735486, | |
| "learning_rate": 4.174603174603175e-05, | |
| "loss": 0.4563, | |
| "step": 349 | |
| }, | |
| { | |
| "epoch": 0.7494646680942184, | |
| "grad_norm": 0.28364900087424905, | |
| "learning_rate": 4.170634920634921e-05, | |
| "loss": 0.4741, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.7516059957173448, | |
| "grad_norm": 0.2741462100795298, | |
| "learning_rate": 4.166666666666667e-05, | |
| "loss": 0.4363, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 0.7537473233404711, | |
| "grad_norm": 0.25635492049633196, | |
| "learning_rate": 4.162698412698413e-05, | |
| "loss": 0.4445, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.7558886509635975, | |
| "grad_norm": 0.273452663060448, | |
| "learning_rate": 4.1587301587301594e-05, | |
| "loss": 0.4408, | |
| "step": 353 | |
| }, | |
| { | |
| "epoch": 0.7580299785867237, | |
| "grad_norm": 0.32668360058869866, | |
| "learning_rate": 4.1547619047619054e-05, | |
| "loss": 0.4653, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 0.7601713062098501, | |
| "grad_norm": 0.28322113864222886, | |
| "learning_rate": 4.1507936507936515e-05, | |
| "loss": 0.449, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.7623126338329764, | |
| "grad_norm": 0.3314451874615497, | |
| "learning_rate": 4.1468253968253976e-05, | |
| "loss": 0.4345, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 0.7644539614561028, | |
| "grad_norm": 0.2909325990893949, | |
| "learning_rate": 4.1428571428571437e-05, | |
| "loss": 0.4501, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 0.7665952890792291, | |
| "grad_norm": 0.3155371913936611, | |
| "learning_rate": 4.138888888888889e-05, | |
| "loss": 0.4362, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 0.7687366167023555, | |
| "grad_norm": 0.24516857979495404, | |
| "learning_rate": 4.134920634920635e-05, | |
| "loss": 0.4457, | |
| "step": 359 | |
| }, | |
| { | |
| "epoch": 0.7708779443254818, | |
| "grad_norm": 0.30672444383523295, | |
| "learning_rate": 4.130952380952381e-05, | |
| "loss": 0.4382, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.7730192719486081, | |
| "grad_norm": 0.2622046826324259, | |
| "learning_rate": 4.126984126984127e-05, | |
| "loss": 0.4427, | |
| "step": 361 | |
| }, | |
| { | |
| "epoch": 0.7751605995717344, | |
| "grad_norm": 0.28744995963189524, | |
| "learning_rate": 4.123015873015873e-05, | |
| "loss": 0.4608, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 0.7773019271948608, | |
| "grad_norm": 0.2566986215900858, | |
| "learning_rate": 4.119047619047619e-05, | |
| "loss": 0.4395, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 0.7794432548179872, | |
| "grad_norm": 0.2754243217704112, | |
| "learning_rate": 4.115079365079365e-05, | |
| "loss": 0.4472, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 0.7815845824411135, | |
| "grad_norm": 0.2679510555818145, | |
| "learning_rate": 4.111111111111111e-05, | |
| "loss": 0.4364, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.7837259100642399, | |
| "grad_norm": 0.22802893505394484, | |
| "learning_rate": 4.107142857142857e-05, | |
| "loss": 0.4341, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 0.7858672376873662, | |
| "grad_norm": 0.25400510440151647, | |
| "learning_rate": 4.103174603174603e-05, | |
| "loss": 0.446, | |
| "step": 367 | |
| }, | |
| { | |
| "epoch": 0.7880085653104925, | |
| "grad_norm": 0.24898639805458245, | |
| "learning_rate": 4.099206349206349e-05, | |
| "loss": 0.4417, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 0.7901498929336188, | |
| "grad_norm": 0.2820704635610971, | |
| "learning_rate": 4.095238095238095e-05, | |
| "loss": 0.4518, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 0.7922912205567452, | |
| "grad_norm": 0.23351424195098205, | |
| "learning_rate": 4.091269841269841e-05, | |
| "loss": 0.4515, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.7944325481798715, | |
| "grad_norm": 0.2815505680256724, | |
| "learning_rate": 4.0873015873015874e-05, | |
| "loss": 0.4296, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 0.7965738758029979, | |
| "grad_norm": 0.23872824455145594, | |
| "learning_rate": 4.0833333333333334e-05, | |
| "loss": 0.4253, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 0.7987152034261242, | |
| "grad_norm": 0.2509283478607524, | |
| "learning_rate": 4.0793650793650795e-05, | |
| "loss": 0.4272, | |
| "step": 373 | |
| }, | |
| { | |
| "epoch": 0.8008565310492506, | |
| "grad_norm": 0.23260890926948258, | |
| "learning_rate": 4.0753968253968256e-05, | |
| "loss": 0.4422, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 0.8029978586723768, | |
| "grad_norm": 0.24640220531762067, | |
| "learning_rate": 4.0714285714285717e-05, | |
| "loss": 0.4325, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.8051391862955032, | |
| "grad_norm": 0.2557935473418916, | |
| "learning_rate": 4.067460317460318e-05, | |
| "loss": 0.4495, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 0.8072805139186295, | |
| "grad_norm": 0.24864370999195623, | |
| "learning_rate": 4.063492063492064e-05, | |
| "loss": 0.44, | |
| "step": 377 | |
| }, | |
| { | |
| "epoch": 0.8094218415417559, | |
| "grad_norm": 0.2494337325169666, | |
| "learning_rate": 4.05952380952381e-05, | |
| "loss": 0.4292, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 0.8115631691648822, | |
| "grad_norm": 0.26643265488032447, | |
| "learning_rate": 4.055555555555556e-05, | |
| "loss": 0.4258, | |
| "step": 379 | |
| }, | |
| { | |
| "epoch": 0.8137044967880086, | |
| "grad_norm": 0.29981525233093126, | |
| "learning_rate": 4.051587301587302e-05, | |
| "loss": 0.4462, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.815845824411135, | |
| "grad_norm": 0.23483623618223876, | |
| "learning_rate": 4.047619047619048e-05, | |
| "loss": 0.4403, | |
| "step": 381 | |
| }, | |
| { | |
| "epoch": 0.8179871520342612, | |
| "grad_norm": 0.31621068938716507, | |
| "learning_rate": 4.043650793650794e-05, | |
| "loss": 0.4542, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 0.8201284796573876, | |
| "grad_norm": 0.27447908054014813, | |
| "learning_rate": 4.03968253968254e-05, | |
| "loss": 0.4498, | |
| "step": 383 | |
| }, | |
| { | |
| "epoch": 0.8222698072805139, | |
| "grad_norm": 0.2979087905677751, | |
| "learning_rate": 4.035714285714286e-05, | |
| "loss": 0.4326, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 0.8244111349036403, | |
| "grad_norm": 0.2738788951804694, | |
| "learning_rate": 4.031746031746032e-05, | |
| "loss": 0.4433, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 0.8265524625267666, | |
| "grad_norm": 0.27184798198740845, | |
| "learning_rate": 4.027777777777778e-05, | |
| "loss": 0.4385, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 0.828693790149893, | |
| "grad_norm": 0.2596242318887042, | |
| "learning_rate": 4.023809523809524e-05, | |
| "loss": 0.4391, | |
| "step": 387 | |
| }, | |
| { | |
| "epoch": 0.8308351177730193, | |
| "grad_norm": 0.3257005719389779, | |
| "learning_rate": 4.01984126984127e-05, | |
| "loss": 0.4515, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 0.8329764453961456, | |
| "grad_norm": 0.26020892798323664, | |
| "learning_rate": 4.015873015873016e-05, | |
| "loss": 0.4525, | |
| "step": 389 | |
| }, | |
| { | |
| "epoch": 0.8351177730192719, | |
| "grad_norm": 0.3282937862319744, | |
| "learning_rate": 4.011904761904762e-05, | |
| "loss": 0.4522, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.8372591006423983, | |
| "grad_norm": 0.3036258464302225, | |
| "learning_rate": 4.007936507936508e-05, | |
| "loss": 0.4477, | |
| "step": 391 | |
| }, | |
| { | |
| "epoch": 0.8394004282655246, | |
| "grad_norm": 0.26338851157250837, | |
| "learning_rate": 4.003968253968254e-05, | |
| "loss": 0.4422, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 0.841541755888651, | |
| "grad_norm": 0.3106182811071532, | |
| "learning_rate": 4e-05, | |
| "loss": 0.4309, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 0.8436830835117773, | |
| "grad_norm": 0.24266766603453416, | |
| "learning_rate": 3.9960317460317464e-05, | |
| "loss": 0.4466, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 0.8458244111349036, | |
| "grad_norm": 0.34811764903452264, | |
| "learning_rate": 3.9920634920634925e-05, | |
| "loss": 0.452, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 0.8479657387580299, | |
| "grad_norm": 0.24788695830598698, | |
| "learning_rate": 3.9880952380952386e-05, | |
| "loss": 0.4433, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 0.8501070663811563, | |
| "grad_norm": 0.2783115131710977, | |
| "learning_rate": 3.984126984126984e-05, | |
| "loss": 0.4561, | |
| "step": 397 | |
| }, | |
| { | |
| "epoch": 0.8522483940042827, | |
| "grad_norm": 0.2576730623053907, | |
| "learning_rate": 3.98015873015873e-05, | |
| "loss": 0.4708, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 0.854389721627409, | |
| "grad_norm": 0.24712208382727868, | |
| "learning_rate": 3.976190476190476e-05, | |
| "loss": 0.449, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 0.8565310492505354, | |
| "grad_norm": 0.2708918582747635, | |
| "learning_rate": 3.972222222222222e-05, | |
| "loss": 0.446, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.8586723768736617, | |
| "grad_norm": 0.2713784850892593, | |
| "learning_rate": 3.968253968253968e-05, | |
| "loss": 0.4386, | |
| "step": 401 | |
| }, | |
| { | |
| "epoch": 0.860813704496788, | |
| "grad_norm": 0.25255493559196973, | |
| "learning_rate": 3.964285714285714e-05, | |
| "loss": 0.44, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 0.8629550321199143, | |
| "grad_norm": 0.27193870527806324, | |
| "learning_rate": 3.9603174603174604e-05, | |
| "loss": 0.4485, | |
| "step": 403 | |
| }, | |
| { | |
| "epoch": 0.8650963597430407, | |
| "grad_norm": 0.27126270170714106, | |
| "learning_rate": 3.9563492063492065e-05, | |
| "loss": 0.4525, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 0.867237687366167, | |
| "grad_norm": 0.24844125447179155, | |
| "learning_rate": 3.9523809523809526e-05, | |
| "loss": 0.4336, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 0.8693790149892934, | |
| "grad_norm": 0.30939011775053954, | |
| "learning_rate": 3.9484126984126986e-05, | |
| "loss": 0.4494, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 0.8715203426124197, | |
| "grad_norm": 0.26378548519547557, | |
| "learning_rate": 3.944444444444445e-05, | |
| "loss": 0.447, | |
| "step": 407 | |
| }, | |
| { | |
| "epoch": 0.8736616702355461, | |
| "grad_norm": 0.2849481666705005, | |
| "learning_rate": 3.940476190476191e-05, | |
| "loss": 0.4583, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 0.8758029978586723, | |
| "grad_norm": 0.2815143992545685, | |
| "learning_rate": 3.936507936507937e-05, | |
| "loss": 0.4348, | |
| "step": 409 | |
| }, | |
| { | |
| "epoch": 0.8779443254817987, | |
| "grad_norm": 0.27153299884477455, | |
| "learning_rate": 3.932539682539683e-05, | |
| "loss": 0.4434, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.880085653104925, | |
| "grad_norm": 0.27535841889086504, | |
| "learning_rate": 3.928571428571429e-05, | |
| "loss": 0.4398, | |
| "step": 411 | |
| }, | |
| { | |
| "epoch": 0.8822269807280514, | |
| "grad_norm": 0.30776377057560106, | |
| "learning_rate": 3.9246031746031744e-05, | |
| "loss": 0.4464, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 0.8843683083511777, | |
| "grad_norm": 0.24429372628536725, | |
| "learning_rate": 3.9206349206349205e-05, | |
| "loss": 0.4345, | |
| "step": 413 | |
| }, | |
| { | |
| "epoch": 0.8865096359743041, | |
| "grad_norm": 0.34967840303013203, | |
| "learning_rate": 3.9166666666666665e-05, | |
| "loss": 0.447, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 0.8886509635974305, | |
| "grad_norm": 0.25224213794765044, | |
| "learning_rate": 3.9126984126984126e-05, | |
| "loss": 0.4477, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 0.8907922912205567, | |
| "grad_norm": 0.2899965867095618, | |
| "learning_rate": 3.908730158730159e-05, | |
| "loss": 0.4183, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 0.892933618843683, | |
| "grad_norm": 0.3091076754528513, | |
| "learning_rate": 3.904761904761905e-05, | |
| "loss": 0.4438, | |
| "step": 417 | |
| }, | |
| { | |
| "epoch": 0.8950749464668094, | |
| "grad_norm": 0.25144689022877925, | |
| "learning_rate": 3.900793650793651e-05, | |
| "loss": 0.4467, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 0.8972162740899358, | |
| "grad_norm": 0.3164686564857124, | |
| "learning_rate": 3.896825396825397e-05, | |
| "loss": 0.4348, | |
| "step": 419 | |
| }, | |
| { | |
| "epoch": 0.8993576017130621, | |
| "grad_norm": 0.2879131478620461, | |
| "learning_rate": 3.892857142857143e-05, | |
| "loss": 0.4546, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.9014989293361885, | |
| "grad_norm": 0.2718535367036997, | |
| "learning_rate": 3.888888888888889e-05, | |
| "loss": 0.4493, | |
| "step": 421 | |
| }, | |
| { | |
| "epoch": 0.9036402569593148, | |
| "grad_norm": 0.3215237762816025, | |
| "learning_rate": 3.884920634920635e-05, | |
| "loss": 0.4324, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 0.9057815845824411, | |
| "grad_norm": 0.23703610924543467, | |
| "learning_rate": 3.880952380952381e-05, | |
| "loss": 0.4423, | |
| "step": 423 | |
| }, | |
| { | |
| "epoch": 0.9079229122055674, | |
| "grad_norm": 0.2790375906958242, | |
| "learning_rate": 3.876984126984127e-05, | |
| "loss": 0.459, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 0.9100642398286938, | |
| "grad_norm": 0.31216707197799737, | |
| "learning_rate": 3.8730158730158734e-05, | |
| "loss": 0.4358, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.9122055674518201, | |
| "grad_norm": 0.2238572953572114, | |
| "learning_rate": 3.8690476190476195e-05, | |
| "loss": 0.4329, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 0.9143468950749465, | |
| "grad_norm": 0.3300050366021809, | |
| "learning_rate": 3.8650793650793655e-05, | |
| "loss": 0.4411, | |
| "step": 427 | |
| }, | |
| { | |
| "epoch": 0.9164882226980728, | |
| "grad_norm": 0.24904767848985657, | |
| "learning_rate": 3.8611111111111116e-05, | |
| "loss": 0.4395, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 0.9186295503211992, | |
| "grad_norm": 0.2558995321875151, | |
| "learning_rate": 3.857142857142858e-05, | |
| "loss": 0.4325, | |
| "step": 429 | |
| }, | |
| { | |
| "epoch": 0.9207708779443254, | |
| "grad_norm": 0.2810339034644166, | |
| "learning_rate": 3.853174603174604e-05, | |
| "loss": 0.4333, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.9229122055674518, | |
| "grad_norm": 0.25199173622671855, | |
| "learning_rate": 3.84920634920635e-05, | |
| "loss": 0.4516, | |
| "step": 431 | |
| }, | |
| { | |
| "epoch": 0.9250535331905781, | |
| "grad_norm": 0.27563768688654877, | |
| "learning_rate": 3.845238095238096e-05, | |
| "loss": 0.4306, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 0.9271948608137045, | |
| "grad_norm": 0.24180893154053762, | |
| "learning_rate": 3.841269841269842e-05, | |
| "loss": 0.4327, | |
| "step": 433 | |
| }, | |
| { | |
| "epoch": 0.9293361884368309, | |
| "grad_norm": 0.232187215372479, | |
| "learning_rate": 3.837301587301588e-05, | |
| "loss": 0.4383, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 0.9314775160599572, | |
| "grad_norm": 0.2524306893162537, | |
| "learning_rate": 3.8333333333333334e-05, | |
| "loss": 0.4383, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 0.9336188436830836, | |
| "grad_norm": 0.21917495109126314, | |
| "learning_rate": 3.8293650793650795e-05, | |
| "loss": 0.442, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 0.9357601713062098, | |
| "grad_norm": 0.2491947970666118, | |
| "learning_rate": 3.8253968253968256e-05, | |
| "loss": 0.4284, | |
| "step": 437 | |
| }, | |
| { | |
| "epoch": 0.9379014989293362, | |
| "grad_norm": 0.24928999737243862, | |
| "learning_rate": 3.821428571428572e-05, | |
| "loss": 0.4366, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 0.9400428265524625, | |
| "grad_norm": 0.2696841956340179, | |
| "learning_rate": 3.817460317460317e-05, | |
| "loss": 0.4426, | |
| "step": 439 | |
| }, | |
| { | |
| "epoch": 0.9421841541755889, | |
| "grad_norm": 0.22630244921640252, | |
| "learning_rate": 3.813492063492063e-05, | |
| "loss": 0.4363, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.9443254817987152, | |
| "grad_norm": 0.2600201845565814, | |
| "learning_rate": 3.809523809523809e-05, | |
| "loss": 0.4484, | |
| "step": 441 | |
| }, | |
| { | |
| "epoch": 0.9464668094218416, | |
| "grad_norm": 0.24580135509156412, | |
| "learning_rate": 3.805555555555555e-05, | |
| "loss": 0.4671, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 0.9486081370449678, | |
| "grad_norm": 0.29113072327829725, | |
| "learning_rate": 3.8015873015873014e-05, | |
| "loss": 0.451, | |
| "step": 443 | |
| }, | |
| { | |
| "epoch": 0.9507494646680942, | |
| "grad_norm": 0.24308118122648267, | |
| "learning_rate": 3.7976190476190474e-05, | |
| "loss": 0.4564, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 0.9528907922912205, | |
| "grad_norm": 0.3071448001103864, | |
| "learning_rate": 3.7936507936507935e-05, | |
| "loss": 0.4397, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 0.9550321199143469, | |
| "grad_norm": 0.2420827295559477, | |
| "learning_rate": 3.7896825396825396e-05, | |
| "loss": 0.4454, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 0.9571734475374732, | |
| "grad_norm": 0.23925961181164737, | |
| "learning_rate": 3.785714285714286e-05, | |
| "loss": 0.4422, | |
| "step": 447 | |
| }, | |
| { | |
| "epoch": 0.9593147751605996, | |
| "grad_norm": 0.27857277637461475, | |
| "learning_rate": 3.781746031746032e-05, | |
| "loss": 0.4451, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 0.961456102783726, | |
| "grad_norm": 0.25387578048055814, | |
| "learning_rate": 3.777777777777778e-05, | |
| "loss": 0.454, | |
| "step": 449 | |
| }, | |
| { | |
| "epoch": 0.9635974304068522, | |
| "grad_norm": 0.2365009172752972, | |
| "learning_rate": 3.773809523809524e-05, | |
| "loss": 0.4477, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.9657387580299786, | |
| "grad_norm": 0.24886378867096692, | |
| "learning_rate": 3.76984126984127e-05, | |
| "loss": 0.4455, | |
| "step": 451 | |
| }, | |
| { | |
| "epoch": 0.9678800856531049, | |
| "grad_norm": 0.22709536457385576, | |
| "learning_rate": 3.765873015873016e-05, | |
| "loss": 0.4421, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 0.9700214132762313, | |
| "grad_norm": 0.24437838490375288, | |
| "learning_rate": 3.761904761904762e-05, | |
| "loss": 0.4407, | |
| "step": 453 | |
| }, | |
| { | |
| "epoch": 0.9721627408993576, | |
| "grad_norm": 0.25704107343861016, | |
| "learning_rate": 3.757936507936508e-05, | |
| "loss": 0.4688, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 0.974304068522484, | |
| "grad_norm": 0.22904949606864244, | |
| "learning_rate": 3.753968253968254e-05, | |
| "loss": 0.4293, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 0.9764453961456103, | |
| "grad_norm": 0.2849795459258746, | |
| "learning_rate": 3.7500000000000003e-05, | |
| "loss": 0.4462, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 0.9785867237687366, | |
| "grad_norm": 0.25874699145604835, | |
| "learning_rate": 3.7460317460317464e-05, | |
| "loss": 0.4533, | |
| "step": 457 | |
| }, | |
| { | |
| "epoch": 0.9807280513918629, | |
| "grad_norm": 0.3152490582808027, | |
| "learning_rate": 3.7420634920634925e-05, | |
| "loss": 0.4407, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 0.9828693790149893, | |
| "grad_norm": 0.26375789665291616, | |
| "learning_rate": 3.7380952380952386e-05, | |
| "loss": 0.4465, | |
| "step": 459 | |
| }, | |
| { | |
| "epoch": 0.9850107066381156, | |
| "grad_norm": 0.29544790745591465, | |
| "learning_rate": 3.7341269841269846e-05, | |
| "loss": 0.4488, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.987152034261242, | |
| "grad_norm": 0.2825194356752145, | |
| "learning_rate": 3.730158730158731e-05, | |
| "loss": 0.4254, | |
| "step": 461 | |
| }, | |
| { | |
| "epoch": 0.9892933618843683, | |
| "grad_norm": 0.24723341454362188, | |
| "learning_rate": 3.726190476190476e-05, | |
| "loss": 0.4313, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 0.9914346895074947, | |
| "grad_norm": 0.23944247120858028, | |
| "learning_rate": 3.722222222222222e-05, | |
| "loss": 0.4297, | |
| "step": 463 | |
| }, | |
| { | |
| "epoch": 0.9935760171306209, | |
| "grad_norm": 0.25318330122355875, | |
| "learning_rate": 3.718253968253968e-05, | |
| "loss": 0.4248, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 0.9957173447537473, | |
| "grad_norm": 0.2385083732481541, | |
| "learning_rate": 3.7142857142857143e-05, | |
| "loss": 0.4342, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 0.9978586723768736, | |
| "grad_norm": 0.25267257737774884, | |
| "learning_rate": 3.7103174603174604e-05, | |
| "loss": 0.4244, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 0.26302764538521656, | |
| "learning_rate": 3.7063492063492065e-05, | |
| "loss": 0.4197, | |
| "step": 467 | |
| }, | |
| { | |
| "epoch": 1.0021413276231264, | |
| "grad_norm": 0.3321143759595324, | |
| "learning_rate": 3.7023809523809526e-05, | |
| "loss": 0.3746, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 1.0042826552462527, | |
| "grad_norm": 0.2523859067388859, | |
| "learning_rate": 3.6984126984126986e-05, | |
| "loss": 0.3629, | |
| "step": 469 | |
| }, | |
| { | |
| "epoch": 1.006423982869379, | |
| "grad_norm": 0.2740491528188909, | |
| "learning_rate": 3.694444444444445e-05, | |
| "loss": 0.3739, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 1.0085653104925054, | |
| "grad_norm": 0.34376513245262397, | |
| "learning_rate": 3.690476190476191e-05, | |
| "loss": 0.366, | |
| "step": 471 | |
| }, | |
| { | |
| "epoch": 1.0107066381156318, | |
| "grad_norm": 0.28059137656399624, | |
| "learning_rate": 3.686507936507937e-05, | |
| "loss": 0.3727, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 1.0128479657387581, | |
| "grad_norm": 0.33729133415206547, | |
| "learning_rate": 3.682539682539683e-05, | |
| "loss": 0.3657, | |
| "step": 473 | |
| }, | |
| { | |
| "epoch": 1.0149892933618843, | |
| "grad_norm": 0.3201683000077876, | |
| "learning_rate": 3.678571428571429e-05, | |
| "loss": 0.3802, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 1.0171306209850106, | |
| "grad_norm": 0.274999594299655, | |
| "learning_rate": 3.674603174603175e-05, | |
| "loss": 0.3772, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 1.019271948608137, | |
| "grad_norm": 0.31109609046109504, | |
| "learning_rate": 3.6706349206349205e-05, | |
| "loss": 0.3795, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 1.0214132762312633, | |
| "grad_norm": 0.2575665550459576, | |
| "learning_rate": 3.6666666666666666e-05, | |
| "loss": 0.376, | |
| "step": 477 | |
| }, | |
| { | |
| "epoch": 1.0235546038543897, | |
| "grad_norm": 0.2937108429839069, | |
| "learning_rate": 3.6626984126984126e-05, | |
| "loss": 0.3638, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 1.025695931477516, | |
| "grad_norm": 0.2511369961460089, | |
| "learning_rate": 3.658730158730159e-05, | |
| "loss": 0.3725, | |
| "step": 479 | |
| }, | |
| { | |
| "epoch": 1.0278372591006424, | |
| "grad_norm": 0.2839057922395332, | |
| "learning_rate": 3.654761904761905e-05, | |
| "loss": 0.3627, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 1.0299785867237687, | |
| "grad_norm": 0.2920184281736407, | |
| "learning_rate": 3.650793650793651e-05, | |
| "loss": 0.3808, | |
| "step": 481 | |
| }, | |
| { | |
| "epoch": 1.032119914346895, | |
| "grad_norm": 0.233470618770099, | |
| "learning_rate": 3.646825396825397e-05, | |
| "loss": 0.3617, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 1.0342612419700214, | |
| "grad_norm": 0.280577964771194, | |
| "learning_rate": 3.642857142857143e-05, | |
| "loss": 0.3625, | |
| "step": 483 | |
| }, | |
| { | |
| "epoch": 1.0364025695931478, | |
| "grad_norm": 0.24608809205741292, | |
| "learning_rate": 3.638888888888889e-05, | |
| "loss": 0.3646, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 1.0385438972162742, | |
| "grad_norm": 0.2200793066660099, | |
| "learning_rate": 3.634920634920635e-05, | |
| "loss": 0.3593, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 1.0406852248394005, | |
| "grad_norm": 0.3327179418533381, | |
| "learning_rate": 3.630952380952381e-05, | |
| "loss": 0.3933, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 1.0428265524625269, | |
| "grad_norm": 0.22445951219430535, | |
| "learning_rate": 3.626984126984127e-05, | |
| "loss": 0.3591, | |
| "step": 487 | |
| }, | |
| { | |
| "epoch": 1.044967880085653, | |
| "grad_norm": 0.2496038218726081, | |
| "learning_rate": 3.6230158730158734e-05, | |
| "loss": 0.3858, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 1.0471092077087794, | |
| "grad_norm": 0.31223282629773935, | |
| "learning_rate": 3.619047619047619e-05, | |
| "loss": 0.3833, | |
| "step": 489 | |
| }, | |
| { | |
| "epoch": 1.0492505353319057, | |
| "grad_norm": 0.2500697247329698, | |
| "learning_rate": 3.615079365079365e-05, | |
| "loss": 0.3675, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 1.051391862955032, | |
| "grad_norm": 0.2569196171542971, | |
| "learning_rate": 3.611111111111111e-05, | |
| "loss": 0.3682, | |
| "step": 491 | |
| }, | |
| { | |
| "epoch": 1.0535331905781584, | |
| "grad_norm": 0.2862772036703233, | |
| "learning_rate": 3.607142857142857e-05, | |
| "loss": 0.3521, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 1.0556745182012848, | |
| "grad_norm": 0.22001094989053036, | |
| "learning_rate": 3.603174603174603e-05, | |
| "loss": 0.3737, | |
| "step": 493 | |
| }, | |
| { | |
| "epoch": 1.0578158458244111, | |
| "grad_norm": 0.25763962840222093, | |
| "learning_rate": 3.599206349206349e-05, | |
| "loss": 0.3559, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 1.0599571734475375, | |
| "grad_norm": 0.26453793828520467, | |
| "learning_rate": 3.595238095238095e-05, | |
| "loss": 0.3764, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 1.0620985010706638, | |
| "grad_norm": 0.24859096396107916, | |
| "learning_rate": 3.591269841269841e-05, | |
| "loss": 0.3646, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 1.0642398286937902, | |
| "grad_norm": 0.24879931824582785, | |
| "learning_rate": 3.5873015873015874e-05, | |
| "loss": 0.3856, | |
| "step": 497 | |
| }, | |
| { | |
| "epoch": 1.0663811563169165, | |
| "grad_norm": 0.26070745567359305, | |
| "learning_rate": 3.5833333333333335e-05, | |
| "loss": 0.3776, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 1.068522483940043, | |
| "grad_norm": 0.25811232439255394, | |
| "learning_rate": 3.5793650793650795e-05, | |
| "loss": 0.3668, | |
| "step": 499 | |
| }, | |
| { | |
| "epoch": 1.0706638115631693, | |
| "grad_norm": 0.23394126650129565, | |
| "learning_rate": 3.5753968253968256e-05, | |
| "loss": 0.3644, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 1.0728051391862956, | |
| "grad_norm": 0.28885516256092003, | |
| "learning_rate": 3.571428571428572e-05, | |
| "loss": 0.3697, | |
| "step": 501 | |
| }, | |
| { | |
| "epoch": 1.0749464668094217, | |
| "grad_norm": 0.22243797367983625, | |
| "learning_rate": 3.567460317460318e-05, | |
| "loss": 0.3624, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 1.077087794432548, | |
| "grad_norm": 0.25222271031887367, | |
| "learning_rate": 3.563492063492064e-05, | |
| "loss": 0.3719, | |
| "step": 503 | |
| }, | |
| { | |
| "epoch": 1.0792291220556745, | |
| "grad_norm": 0.26965537434104925, | |
| "learning_rate": 3.55952380952381e-05, | |
| "loss": 0.3959, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 1.0813704496788008, | |
| "grad_norm": 0.24050297443124782, | |
| "learning_rate": 3.555555555555556e-05, | |
| "loss": 0.3581, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 1.0835117773019272, | |
| "grad_norm": 0.2687861602822808, | |
| "learning_rate": 3.551587301587302e-05, | |
| "loss": 0.3758, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 1.0856531049250535, | |
| "grad_norm": 0.2140853649825794, | |
| "learning_rate": 3.547619047619048e-05, | |
| "loss": 0.3806, | |
| "step": 507 | |
| }, | |
| { | |
| "epoch": 1.0877944325481799, | |
| "grad_norm": 0.24365107193268598, | |
| "learning_rate": 3.543650793650794e-05, | |
| "loss": 0.3731, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 1.0899357601713062, | |
| "grad_norm": 0.24553711140882334, | |
| "learning_rate": 3.53968253968254e-05, | |
| "loss": 0.3478, | |
| "step": 509 | |
| }, | |
| { | |
| "epoch": 1.0920770877944326, | |
| "grad_norm": 0.2489843525995639, | |
| "learning_rate": 3.5357142857142864e-05, | |
| "loss": 0.3639, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 1.094218415417559, | |
| "grad_norm": 0.2847640243532346, | |
| "learning_rate": 3.5317460317460324e-05, | |
| "loss": 0.3652, | |
| "step": 511 | |
| }, | |
| { | |
| "epoch": 1.0963597430406853, | |
| "grad_norm": 0.24864833640916995, | |
| "learning_rate": 3.527777777777778e-05, | |
| "loss": 0.3771, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 1.0985010706638116, | |
| "grad_norm": 0.24597951146232283, | |
| "learning_rate": 3.523809523809524e-05, | |
| "loss": 0.3474, | |
| "step": 513 | |
| }, | |
| { | |
| "epoch": 1.100642398286938, | |
| "grad_norm": 0.2710332839771354, | |
| "learning_rate": 3.51984126984127e-05, | |
| "loss": 0.3611, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 1.1027837259100641, | |
| "grad_norm": 0.24106875283916565, | |
| "learning_rate": 3.515873015873016e-05, | |
| "loss": 0.3694, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 1.1049250535331905, | |
| "grad_norm": 0.25198007958486407, | |
| "learning_rate": 3.511904761904762e-05, | |
| "loss": 0.3676, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 1.1070663811563168, | |
| "grad_norm": 0.285475860805922, | |
| "learning_rate": 3.5079365079365075e-05, | |
| "loss": 0.3694, | |
| "step": 517 | |
| }, | |
| { | |
| "epoch": 1.1092077087794432, | |
| "grad_norm": 0.232641447262773, | |
| "learning_rate": 3.5039682539682536e-05, | |
| "loss": 0.3543, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 1.1113490364025695, | |
| "grad_norm": 0.26148277895318806, | |
| "learning_rate": 3.5e-05, | |
| "loss": 0.3737, | |
| "step": 519 | |
| }, | |
| { | |
| "epoch": 1.113490364025696, | |
| "grad_norm": 0.2522296596506257, | |
| "learning_rate": 3.496031746031746e-05, | |
| "loss": 0.3685, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 1.1156316916488223, | |
| "grad_norm": 0.24283436286680268, | |
| "learning_rate": 3.492063492063492e-05, | |
| "loss": 0.3607, | |
| "step": 521 | |
| }, | |
| { | |
| "epoch": 1.1177730192719486, | |
| "grad_norm": 0.2546190850581306, | |
| "learning_rate": 3.488095238095238e-05, | |
| "loss": 0.379, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 1.119914346895075, | |
| "grad_norm": 0.2695857257313346, | |
| "learning_rate": 3.484126984126984e-05, | |
| "loss": 0.3791, | |
| "step": 523 | |
| }, | |
| { | |
| "epoch": 1.1220556745182013, | |
| "grad_norm": 0.26122674703315873, | |
| "learning_rate": 3.48015873015873e-05, | |
| "loss": 0.3654, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 1.1241970021413277, | |
| "grad_norm": 0.23999078841945964, | |
| "learning_rate": 3.476190476190476e-05, | |
| "loss": 0.3876, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 1.126338329764454, | |
| "grad_norm": 0.27711890693427227, | |
| "learning_rate": 3.472222222222222e-05, | |
| "loss": 0.3678, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 1.1284796573875804, | |
| "grad_norm": 0.22153222863862254, | |
| "learning_rate": 3.468253968253968e-05, | |
| "loss": 0.3695, | |
| "step": 527 | |
| }, | |
| { | |
| "epoch": 1.1306209850107067, | |
| "grad_norm": 0.2367741399209089, | |
| "learning_rate": 3.4642857142857144e-05, | |
| "loss": 0.3619, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 1.132762312633833, | |
| "grad_norm": 0.2279683733856979, | |
| "learning_rate": 3.4603174603174604e-05, | |
| "loss": 0.3602, | |
| "step": 529 | |
| }, | |
| { | |
| "epoch": 1.1349036402569592, | |
| "grad_norm": 0.21797256781445612, | |
| "learning_rate": 3.4563492063492065e-05, | |
| "loss": 0.3538, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 1.1370449678800856, | |
| "grad_norm": 0.23243584252507798, | |
| "learning_rate": 3.4523809523809526e-05, | |
| "loss": 0.3676, | |
| "step": 531 | |
| }, | |
| { | |
| "epoch": 1.139186295503212, | |
| "grad_norm": 0.22337304786840087, | |
| "learning_rate": 3.448412698412699e-05, | |
| "loss": 0.35, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 1.1413276231263383, | |
| "grad_norm": 0.2723870117088924, | |
| "learning_rate": 3.444444444444445e-05, | |
| "loss": 0.3681, | |
| "step": 533 | |
| }, | |
| { | |
| "epoch": 1.1434689507494646, | |
| "grad_norm": 0.23683938267697505, | |
| "learning_rate": 3.440476190476191e-05, | |
| "loss": 0.3584, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 1.145610278372591, | |
| "grad_norm": 0.27263835901881306, | |
| "learning_rate": 3.436507936507937e-05, | |
| "loss": 0.3585, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 1.1477516059957173, | |
| "grad_norm": 0.2413247019766125, | |
| "learning_rate": 3.432539682539683e-05, | |
| "loss": 0.3715, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 1.1498929336188437, | |
| "grad_norm": 0.24835573947131598, | |
| "learning_rate": 3.428571428571429e-05, | |
| "loss": 0.3834, | |
| "step": 537 | |
| }, | |
| { | |
| "epoch": 1.15203426124197, | |
| "grad_norm": 0.24833529249782688, | |
| "learning_rate": 3.424603174603175e-05, | |
| "loss": 0.3753, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 1.1541755888650964, | |
| "grad_norm": 0.2271111061767862, | |
| "learning_rate": 3.420634920634921e-05, | |
| "loss": 0.3751, | |
| "step": 539 | |
| }, | |
| { | |
| "epoch": 1.1563169164882228, | |
| "grad_norm": 0.2432938466019091, | |
| "learning_rate": 3.4166666666666666e-05, | |
| "loss": 0.3521, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 1.1584582441113491, | |
| "grad_norm": 0.2458707748193166, | |
| "learning_rate": 3.412698412698413e-05, | |
| "loss": 0.3583, | |
| "step": 541 | |
| }, | |
| { | |
| "epoch": 1.1605995717344753, | |
| "grad_norm": 0.23943667615924907, | |
| "learning_rate": 3.408730158730159e-05, | |
| "loss": 0.3623, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 1.1627408993576016, | |
| "grad_norm": 0.29767722341183855, | |
| "learning_rate": 3.404761904761905e-05, | |
| "loss": 0.3624, | |
| "step": 543 | |
| }, | |
| { | |
| "epoch": 1.164882226980728, | |
| "grad_norm": 0.2643286960690453, | |
| "learning_rate": 3.400793650793651e-05, | |
| "loss": 0.3688, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 1.1670235546038543, | |
| "grad_norm": 0.261706842552651, | |
| "learning_rate": 3.396825396825397e-05, | |
| "loss": 0.363, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 1.1691648822269807, | |
| "grad_norm": 0.22632083311117027, | |
| "learning_rate": 3.392857142857143e-05, | |
| "loss": 0.37, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 1.171306209850107, | |
| "grad_norm": 0.24969845223117051, | |
| "learning_rate": 3.388888888888889e-05, | |
| "loss": 0.371, | |
| "step": 547 | |
| }, | |
| { | |
| "epoch": 1.1734475374732334, | |
| "grad_norm": 0.2653686082584629, | |
| "learning_rate": 3.384920634920635e-05, | |
| "loss": 0.3641, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 1.1755888650963597, | |
| "grad_norm": 0.26484419864560405, | |
| "learning_rate": 3.380952380952381e-05, | |
| "loss": 0.3532, | |
| "step": 549 | |
| }, | |
| { | |
| "epoch": 1.177730192719486, | |
| "grad_norm": 0.23731845700258258, | |
| "learning_rate": 3.3769841269841273e-05, | |
| "loss": 0.3844, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 1.1798715203426124, | |
| "grad_norm": 0.26369894447851056, | |
| "learning_rate": 3.3730158730158734e-05, | |
| "loss": 0.3535, | |
| "step": 551 | |
| }, | |
| { | |
| "epoch": 1.1820128479657388, | |
| "grad_norm": 0.25523841664126234, | |
| "learning_rate": 3.3690476190476195e-05, | |
| "loss": 0.3915, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 1.1841541755888652, | |
| "grad_norm": 0.21550220946043203, | |
| "learning_rate": 3.3650793650793656e-05, | |
| "loss": 0.3623, | |
| "step": 553 | |
| }, | |
| { | |
| "epoch": 1.1862955032119915, | |
| "grad_norm": 0.24110638888192076, | |
| "learning_rate": 3.3611111111111116e-05, | |
| "loss": 0.3499, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 1.1884368308351179, | |
| "grad_norm": 0.22464037830444605, | |
| "learning_rate": 3.357142857142857e-05, | |
| "loss": 0.3578, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 1.1905781584582442, | |
| "grad_norm": 0.21226847946734964, | |
| "learning_rate": 3.353174603174603e-05, | |
| "loss": 0.3634, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 1.1927194860813706, | |
| "grad_norm": 0.2626157214230889, | |
| "learning_rate": 3.349206349206349e-05, | |
| "loss": 0.3589, | |
| "step": 557 | |
| }, | |
| { | |
| "epoch": 1.1948608137044967, | |
| "grad_norm": 0.24071856058212684, | |
| "learning_rate": 3.345238095238095e-05, | |
| "loss": 0.3863, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 1.197002141327623, | |
| "grad_norm": 0.2508127197520457, | |
| "learning_rate": 3.3412698412698413e-05, | |
| "loss": 0.386, | |
| "step": 559 | |
| }, | |
| { | |
| "epoch": 1.1991434689507494, | |
| "grad_norm": 0.30389459018728837, | |
| "learning_rate": 3.3373015873015874e-05, | |
| "loss": 0.3833, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 1.2012847965738758, | |
| "grad_norm": 0.25210041774328223, | |
| "learning_rate": 3.3333333333333335e-05, | |
| "loss": 0.3517, | |
| "step": 561 | |
| }, | |
| { | |
| "epoch": 1.2034261241970021, | |
| "grad_norm": 0.23500359192291567, | |
| "learning_rate": 3.3293650793650796e-05, | |
| "loss": 0.3605, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 1.2055674518201285, | |
| "grad_norm": 0.2546074299402884, | |
| "learning_rate": 3.3253968253968256e-05, | |
| "loss": 0.3516, | |
| "step": 563 | |
| }, | |
| { | |
| "epoch": 1.2077087794432548, | |
| "grad_norm": 0.2709353855529332, | |
| "learning_rate": 3.321428571428572e-05, | |
| "loss": 0.3701, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 1.2098501070663812, | |
| "grad_norm": 0.2518216621854237, | |
| "learning_rate": 3.317460317460318e-05, | |
| "loss": 0.3652, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 1.2119914346895075, | |
| "grad_norm": 0.23203813858204622, | |
| "learning_rate": 3.313492063492064e-05, | |
| "loss": 0.357, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 1.214132762312634, | |
| "grad_norm": 0.27322569967364363, | |
| "learning_rate": 3.309523809523809e-05, | |
| "loss": 0.3795, | |
| "step": 567 | |
| }, | |
| { | |
| "epoch": 1.2162740899357602, | |
| "grad_norm": 0.21862325914700695, | |
| "learning_rate": 3.3055555555555553e-05, | |
| "loss": 0.3803, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 1.2184154175588866, | |
| "grad_norm": 0.23817063938434127, | |
| "learning_rate": 3.3015873015873014e-05, | |
| "loss": 0.3864, | |
| "step": 569 | |
| }, | |
| { | |
| "epoch": 1.2205567451820127, | |
| "grad_norm": 0.30402766988609314, | |
| "learning_rate": 3.2976190476190475e-05, | |
| "loss": 0.3784, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 1.222698072805139, | |
| "grad_norm": 0.23766830428808083, | |
| "learning_rate": 3.2936507936507936e-05, | |
| "loss": 0.3588, | |
| "step": 571 | |
| }, | |
| { | |
| "epoch": 1.2248394004282654, | |
| "grad_norm": 0.25852053977967815, | |
| "learning_rate": 3.2896825396825396e-05, | |
| "loss": 0.3781, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 1.2269807280513918, | |
| "grad_norm": 0.292675984396594, | |
| "learning_rate": 3.285714285714286e-05, | |
| "loss": 0.3791, | |
| "step": 573 | |
| }, | |
| { | |
| "epoch": 1.2291220556745182, | |
| "grad_norm": 0.22578595387659117, | |
| "learning_rate": 3.281746031746032e-05, | |
| "loss": 0.368, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 1.2312633832976445, | |
| "grad_norm": 0.24728132039412187, | |
| "learning_rate": 3.277777777777778e-05, | |
| "loss": 0.3696, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 1.2334047109207709, | |
| "grad_norm": 0.27218412013650006, | |
| "learning_rate": 3.273809523809524e-05, | |
| "loss": 0.3688, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 1.2355460385438972, | |
| "grad_norm": 0.23158465192272315, | |
| "learning_rate": 3.26984126984127e-05, | |
| "loss": 0.3842, | |
| "step": 577 | |
| }, | |
| { | |
| "epoch": 1.2376873661670236, | |
| "grad_norm": 0.27482920742913025, | |
| "learning_rate": 3.265873015873016e-05, | |
| "loss": 0.3663, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 1.23982869379015, | |
| "grad_norm": 0.25920426583780776, | |
| "learning_rate": 3.261904761904762e-05, | |
| "loss": 0.3571, | |
| "step": 579 | |
| }, | |
| { | |
| "epoch": 1.2419700214132763, | |
| "grad_norm": 0.2032778358032685, | |
| "learning_rate": 3.257936507936508e-05, | |
| "loss": 0.3488, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 1.2441113490364026, | |
| "grad_norm": 0.2549778314308203, | |
| "learning_rate": 3.253968253968254e-05, | |
| "loss": 0.37, | |
| "step": 581 | |
| }, | |
| { | |
| "epoch": 1.246252676659529, | |
| "grad_norm": 0.25860176335450585, | |
| "learning_rate": 3.2500000000000004e-05, | |
| "loss": 0.3633, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 1.2483940042826553, | |
| "grad_norm": 0.21568263900821674, | |
| "learning_rate": 3.2460317460317465e-05, | |
| "loss": 0.3594, | |
| "step": 583 | |
| }, | |
| { | |
| "epoch": 1.2505353319057817, | |
| "grad_norm": 0.25190898114681015, | |
| "learning_rate": 3.2420634920634925e-05, | |
| "loss": 0.3543, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 1.252676659528908, | |
| "grad_norm": 0.21237788073269723, | |
| "learning_rate": 3.2380952380952386e-05, | |
| "loss": 0.3755, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 1.2548179871520342, | |
| "grad_norm": 0.22878119129492097, | |
| "learning_rate": 3.234126984126985e-05, | |
| "loss": 0.3598, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 1.2569593147751605, | |
| "grad_norm": 0.24998274367059609, | |
| "learning_rate": 3.230158730158731e-05, | |
| "loss": 0.3671, | |
| "step": 587 | |
| }, | |
| { | |
| "epoch": 1.259100642398287, | |
| "grad_norm": 0.24248609005355534, | |
| "learning_rate": 3.226190476190477e-05, | |
| "loss": 0.365, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 1.2612419700214133, | |
| "grad_norm": 0.2502819042148945, | |
| "learning_rate": 3.222222222222223e-05, | |
| "loss": 0.3654, | |
| "step": 589 | |
| }, | |
| { | |
| "epoch": 1.2633832976445396, | |
| "grad_norm": 0.24428764588220775, | |
| "learning_rate": 3.218253968253968e-05, | |
| "loss": 0.375, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 1.265524625267666, | |
| "grad_norm": 0.24064115102187758, | |
| "learning_rate": 3.2142857142857144e-05, | |
| "loss": 0.3646, | |
| "step": 591 | |
| }, | |
| { | |
| "epoch": 1.2676659528907923, | |
| "grad_norm": 0.2580980916190992, | |
| "learning_rate": 3.2103174603174605e-05, | |
| "loss": 0.3805, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 1.2698072805139187, | |
| "grad_norm": 0.21389149883559067, | |
| "learning_rate": 3.2063492063492065e-05, | |
| "loss": 0.3854, | |
| "step": 593 | |
| }, | |
| { | |
| "epoch": 1.271948608137045, | |
| "grad_norm": 0.23647472042052045, | |
| "learning_rate": 3.202380952380952e-05, | |
| "loss": 0.3645, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 1.2740899357601714, | |
| "grad_norm": 0.23724295037763593, | |
| "learning_rate": 3.198412698412698e-05, | |
| "loss": 0.3735, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 1.2762312633832975, | |
| "grad_norm": 0.22413428930081114, | |
| "learning_rate": 3.194444444444444e-05, | |
| "loss": 0.359, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 1.2783725910064239, | |
| "grad_norm": 0.2223223372100241, | |
| "learning_rate": 3.19047619047619e-05, | |
| "loss": 0.3491, | |
| "step": 597 | |
| }, | |
| { | |
| "epoch": 1.2805139186295502, | |
| "grad_norm": 0.21897487856488682, | |
| "learning_rate": 3.186507936507936e-05, | |
| "loss": 0.357, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 1.2826552462526766, | |
| "grad_norm": 0.2502304419567601, | |
| "learning_rate": 3.182539682539682e-05, | |
| "loss": 0.3536, | |
| "step": 599 | |
| }, | |
| { | |
| "epoch": 1.284796573875803, | |
| "grad_norm": 0.2253482204551222, | |
| "learning_rate": 3.1785714285714284e-05, | |
| "loss": 0.3558, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 1.2869379014989293, | |
| "grad_norm": 0.24613177754827212, | |
| "learning_rate": 3.1746031746031745e-05, | |
| "loss": 0.3515, | |
| "step": 601 | |
| }, | |
| { | |
| "epoch": 1.2890792291220556, | |
| "grad_norm": 0.2523575221500364, | |
| "learning_rate": 3.1706349206349205e-05, | |
| "loss": 0.3741, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 1.291220556745182, | |
| "grad_norm": 0.2165503319453962, | |
| "learning_rate": 3.1666666666666666e-05, | |
| "loss": 0.3727, | |
| "step": 603 | |
| }, | |
| { | |
| "epoch": 1.2933618843683083, | |
| "grad_norm": 0.21823243034291184, | |
| "learning_rate": 3.162698412698413e-05, | |
| "loss": 0.3533, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 1.2955032119914347, | |
| "grad_norm": 0.24997589624196248, | |
| "learning_rate": 3.158730158730159e-05, | |
| "loss": 0.3542, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 1.297644539614561, | |
| "grad_norm": 0.2398097391159386, | |
| "learning_rate": 3.154761904761905e-05, | |
| "loss": 0.3697, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 1.2997858672376874, | |
| "grad_norm": 0.20653894840882975, | |
| "learning_rate": 3.150793650793651e-05, | |
| "loss": 0.3541, | |
| "step": 607 | |
| }, | |
| { | |
| "epoch": 1.3019271948608138, | |
| "grad_norm": 0.24435984092571286, | |
| "learning_rate": 3.146825396825397e-05, | |
| "loss": 0.3596, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 1.3040685224839401, | |
| "grad_norm": 0.2542985397543424, | |
| "learning_rate": 3.142857142857143e-05, | |
| "loss": 0.3583, | |
| "step": 609 | |
| }, | |
| { | |
| "epoch": 1.3062098501070665, | |
| "grad_norm": 0.20683543774079505, | |
| "learning_rate": 3.138888888888889e-05, | |
| "loss": 0.3483, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 1.3083511777301928, | |
| "grad_norm": 0.2711230309464256, | |
| "learning_rate": 3.134920634920635e-05, | |
| "loss": 0.3616, | |
| "step": 611 | |
| }, | |
| { | |
| "epoch": 1.3104925053533192, | |
| "grad_norm": 0.24151563936505477, | |
| "learning_rate": 3.130952380952381e-05, | |
| "loss": 0.3498, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 1.3126338329764453, | |
| "grad_norm": 0.2369073543305672, | |
| "learning_rate": 3.1269841269841274e-05, | |
| "loss": 0.366, | |
| "step": 613 | |
| }, | |
| { | |
| "epoch": 1.3147751605995717, | |
| "grad_norm": 0.24227771996000944, | |
| "learning_rate": 3.1230158730158734e-05, | |
| "loss": 0.3599, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 1.316916488222698, | |
| "grad_norm": 0.2221305104163018, | |
| "learning_rate": 3.1190476190476195e-05, | |
| "loss": 0.3456, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 1.3190578158458244, | |
| "grad_norm": 0.2745562820217464, | |
| "learning_rate": 3.1150793650793656e-05, | |
| "loss": 0.373, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 1.3211991434689507, | |
| "grad_norm": 0.223755905789405, | |
| "learning_rate": 3.111111111111111e-05, | |
| "loss": 0.3743, | |
| "step": 617 | |
| }, | |
| { | |
| "epoch": 1.323340471092077, | |
| "grad_norm": 0.2540980055756857, | |
| "learning_rate": 3.107142857142857e-05, | |
| "loss": 0.3752, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 1.3254817987152034, | |
| "grad_norm": 0.31394476060654153, | |
| "learning_rate": 3.103174603174603e-05, | |
| "loss": 0.3918, | |
| "step": 619 | |
| }, | |
| { | |
| "epoch": 1.3276231263383298, | |
| "grad_norm": 0.2326066740490904, | |
| "learning_rate": 3.099206349206349e-05, | |
| "loss": 0.3697, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 1.3297644539614561, | |
| "grad_norm": 0.24392074483576617, | |
| "learning_rate": 3.095238095238095e-05, | |
| "loss": 0.3522, | |
| "step": 621 | |
| }, | |
| { | |
| "epoch": 1.3319057815845825, | |
| "grad_norm": 0.25390065497286196, | |
| "learning_rate": 3.0912698412698414e-05, | |
| "loss": 0.3757, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 1.3340471092077089, | |
| "grad_norm": 0.20396843300419068, | |
| "learning_rate": 3.0873015873015874e-05, | |
| "loss": 0.3651, | |
| "step": 623 | |
| }, | |
| { | |
| "epoch": 1.336188436830835, | |
| "grad_norm": 0.22881137081512465, | |
| "learning_rate": 3.0833333333333335e-05, | |
| "loss": 0.3638, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 1.3383297644539613, | |
| "grad_norm": 0.23141190116614863, | |
| "learning_rate": 3.0793650793650796e-05, | |
| "loss": 0.3617, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 1.3404710920770877, | |
| "grad_norm": 0.21197621308607237, | |
| "learning_rate": 3.075396825396826e-05, | |
| "loss": 0.3533, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 1.342612419700214, | |
| "grad_norm": 0.22886167158750198, | |
| "learning_rate": 3.071428571428572e-05, | |
| "loss": 0.3798, | |
| "step": 627 | |
| }, | |
| { | |
| "epoch": 1.3447537473233404, | |
| "grad_norm": 0.21750304837930745, | |
| "learning_rate": 3.067460317460318e-05, | |
| "loss": 0.3781, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 1.3468950749464668, | |
| "grad_norm": 0.21938329417465666, | |
| "learning_rate": 3.063492063492064e-05, | |
| "loss": 0.3586, | |
| "step": 629 | |
| }, | |
| { | |
| "epoch": 1.3490364025695931, | |
| "grad_norm": 0.21402476475138535, | |
| "learning_rate": 3.05952380952381e-05, | |
| "loss": 0.3716, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 1.3511777301927195, | |
| "grad_norm": 0.20938404630600568, | |
| "learning_rate": 3.055555555555556e-05, | |
| "loss": 0.37, | |
| "step": 631 | |
| }, | |
| { | |
| "epoch": 1.3533190578158458, | |
| "grad_norm": 0.21383889850254142, | |
| "learning_rate": 3.051587301587302e-05, | |
| "loss": 0.3792, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 1.3554603854389722, | |
| "grad_norm": 0.22055010594265703, | |
| "learning_rate": 3.0476190476190482e-05, | |
| "loss": 0.3589, | |
| "step": 633 | |
| }, | |
| { | |
| "epoch": 1.3576017130620985, | |
| "grad_norm": 0.19487907168624696, | |
| "learning_rate": 3.0436507936507936e-05, | |
| "loss": 0.3535, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 1.359743040685225, | |
| "grad_norm": 0.22306820784789425, | |
| "learning_rate": 3.0396825396825397e-05, | |
| "loss": 0.3572, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 1.3618843683083512, | |
| "grad_norm": 0.2229559913435905, | |
| "learning_rate": 3.0357142857142857e-05, | |
| "loss": 0.3707, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 1.3640256959314776, | |
| "grad_norm": 0.2024293507877544, | |
| "learning_rate": 3.0317460317460318e-05, | |
| "loss": 0.3638, | |
| "step": 637 | |
| }, | |
| { | |
| "epoch": 1.366167023554604, | |
| "grad_norm": 0.23534992817477, | |
| "learning_rate": 3.0277777777777776e-05, | |
| "loss": 0.365, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 1.3683083511777303, | |
| "grad_norm": 0.22873997219956177, | |
| "learning_rate": 3.0238095238095236e-05, | |
| "loss": 0.3338, | |
| "step": 639 | |
| }, | |
| { | |
| "epoch": 1.3704496788008567, | |
| "grad_norm": 0.2211599162133884, | |
| "learning_rate": 3.0198412698412697e-05, | |
| "loss": 0.3784, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 1.3725910064239828, | |
| "grad_norm": 0.22100903081850962, | |
| "learning_rate": 3.0158730158730158e-05, | |
| "loss": 0.3677, | |
| "step": 641 | |
| }, | |
| { | |
| "epoch": 1.3747323340471092, | |
| "grad_norm": 0.20600842295123745, | |
| "learning_rate": 3.011904761904762e-05, | |
| "loss": 0.361, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 1.3768736616702355, | |
| "grad_norm": 0.22980010113385077, | |
| "learning_rate": 3.007936507936508e-05, | |
| "loss": 0.3465, | |
| "step": 643 | |
| }, | |
| { | |
| "epoch": 1.3790149892933619, | |
| "grad_norm": 0.21464631919266589, | |
| "learning_rate": 3.003968253968254e-05, | |
| "loss": 0.3681, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 1.3811563169164882, | |
| "grad_norm": 0.2103030135241598, | |
| "learning_rate": 3e-05, | |
| "loss": 0.3603, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 1.3832976445396146, | |
| "grad_norm": 0.23432239498652505, | |
| "learning_rate": 2.996031746031746e-05, | |
| "loss": 0.3676, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 1.385438972162741, | |
| "grad_norm": 0.21783149705335722, | |
| "learning_rate": 2.9920634920634922e-05, | |
| "loss": 0.3588, | |
| "step": 647 | |
| }, | |
| { | |
| "epoch": 1.3875802997858673, | |
| "grad_norm": 0.274165710942849, | |
| "learning_rate": 2.9880952380952383e-05, | |
| "loss": 0.361, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 1.3897216274089936, | |
| "grad_norm": 0.21073344703205954, | |
| "learning_rate": 2.9841269841269844e-05, | |
| "loss": 0.3497, | |
| "step": 649 | |
| }, | |
| { | |
| "epoch": 1.39186295503212, | |
| "grad_norm": 0.2501430083399487, | |
| "learning_rate": 2.98015873015873e-05, | |
| "loss": 0.3634, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 1.3940042826552461, | |
| "grad_norm": 0.21899163049205936, | |
| "learning_rate": 2.9761904761904762e-05, | |
| "loss": 0.3679, | |
| "step": 651 | |
| }, | |
| { | |
| "epoch": 1.3961456102783725, | |
| "grad_norm": 0.294173480692159, | |
| "learning_rate": 2.9722222222222223e-05, | |
| "loss": 0.3826, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 1.3982869379014988, | |
| "grad_norm": 0.2482472851407277, | |
| "learning_rate": 2.9682539682539683e-05, | |
| "loss": 0.3713, | |
| "step": 653 | |
| }, | |
| { | |
| "epoch": 1.4004282655246252, | |
| "grad_norm": 0.24006701265887465, | |
| "learning_rate": 2.9642857142857144e-05, | |
| "loss": 0.366, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 1.4025695931477515, | |
| "grad_norm": 0.2591909670041966, | |
| "learning_rate": 2.9603174603174605e-05, | |
| "loss": 0.3623, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 1.404710920770878, | |
| "grad_norm": 0.22731690936226825, | |
| "learning_rate": 2.9563492063492066e-05, | |
| "loss": 0.3691, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 1.4068522483940042, | |
| "grad_norm": 0.2536340951226291, | |
| "learning_rate": 2.9523809523809526e-05, | |
| "loss": 0.3774, | |
| "step": 657 | |
| }, | |
| { | |
| "epoch": 1.4089935760171306, | |
| "grad_norm": 0.22286664711972803, | |
| "learning_rate": 2.9484126984126987e-05, | |
| "loss": 0.3743, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 1.411134903640257, | |
| "grad_norm": 0.23183154595936484, | |
| "learning_rate": 2.9444444444444448e-05, | |
| "loss": 0.3598, | |
| "step": 659 | |
| }, | |
| { | |
| "epoch": 1.4132762312633833, | |
| "grad_norm": 0.25169102444114877, | |
| "learning_rate": 2.940476190476191e-05, | |
| "loss": 0.3589, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 1.4154175588865097, | |
| "grad_norm": 0.21476384262074055, | |
| "learning_rate": 2.9365079365079366e-05, | |
| "loss": 0.3652, | |
| "step": 661 | |
| }, | |
| { | |
| "epoch": 1.417558886509636, | |
| "grad_norm": 0.25197060268479204, | |
| "learning_rate": 2.9325396825396827e-05, | |
| "loss": 0.3666, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 1.4197002141327624, | |
| "grad_norm": 0.2673997115188798, | |
| "learning_rate": 2.9285714285714288e-05, | |
| "loss": 0.3671, | |
| "step": 663 | |
| }, | |
| { | |
| "epoch": 1.4218415417558887, | |
| "grad_norm": 0.23612940999561857, | |
| "learning_rate": 2.9246031746031748e-05, | |
| "loss": 0.3492, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 1.423982869379015, | |
| "grad_norm": 0.2601602289574086, | |
| "learning_rate": 2.920634920634921e-05, | |
| "loss": 0.3581, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 1.4261241970021414, | |
| "grad_norm": 0.2276622282515761, | |
| "learning_rate": 2.916666666666667e-05, | |
| "loss": 0.3625, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 1.4282655246252678, | |
| "grad_norm": 0.2231777877927664, | |
| "learning_rate": 2.912698412698413e-05, | |
| "loss": 0.357, | |
| "step": 667 | |
| }, | |
| { | |
| "epoch": 1.430406852248394, | |
| "grad_norm": 0.21491218629534398, | |
| "learning_rate": 2.908730158730159e-05, | |
| "loss": 0.3659, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 1.4325481798715203, | |
| "grad_norm": 0.2286321200334171, | |
| "learning_rate": 2.9047619047619052e-05, | |
| "loss": 0.3616, | |
| "step": 669 | |
| }, | |
| { | |
| "epoch": 1.4346895074946466, | |
| "grad_norm": 0.2431901099756692, | |
| "learning_rate": 2.9007936507936513e-05, | |
| "loss": 0.3574, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 1.436830835117773, | |
| "grad_norm": 0.23060779470960058, | |
| "learning_rate": 2.8968253968253974e-05, | |
| "loss": 0.3584, | |
| "step": 671 | |
| }, | |
| { | |
| "epoch": 1.4389721627408993, | |
| "grad_norm": 0.21613457164084013, | |
| "learning_rate": 2.8928571428571434e-05, | |
| "loss": 0.3566, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 1.4411134903640257, | |
| "grad_norm": 0.264503775266096, | |
| "learning_rate": 2.8888888888888888e-05, | |
| "loss": 0.3828, | |
| "step": 673 | |
| }, | |
| { | |
| "epoch": 1.443254817987152, | |
| "grad_norm": 0.23265546959620192, | |
| "learning_rate": 2.884920634920635e-05, | |
| "loss": 0.3633, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 1.4453961456102784, | |
| "grad_norm": 0.23025174524649092, | |
| "learning_rate": 2.880952380952381e-05, | |
| "loss": 0.3676, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 1.4475374732334048, | |
| "grad_norm": 0.21855417995132626, | |
| "learning_rate": 2.876984126984127e-05, | |
| "loss": 0.3683, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 1.4496788008565311, | |
| "grad_norm": 0.22496528832364351, | |
| "learning_rate": 2.8730158730158728e-05, | |
| "loss": 0.3779, | |
| "step": 677 | |
| }, | |
| { | |
| "epoch": 1.4518201284796575, | |
| "grad_norm": 0.2244181509364692, | |
| "learning_rate": 2.869047619047619e-05, | |
| "loss": 0.3766, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 1.4539614561027836, | |
| "grad_norm": 0.2079076247700585, | |
| "learning_rate": 2.865079365079365e-05, | |
| "loss": 0.3617, | |
| "step": 679 | |
| }, | |
| { | |
| "epoch": 1.45610278372591, | |
| "grad_norm": 0.23278584379048903, | |
| "learning_rate": 2.861111111111111e-05, | |
| "loss": 0.3613, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 1.4582441113490363, | |
| "grad_norm": 0.22944194687094452, | |
| "learning_rate": 2.857142857142857e-05, | |
| "loss": 0.3514, | |
| "step": 681 | |
| }, | |
| { | |
| "epoch": 1.4603854389721627, | |
| "grad_norm": 0.24034672179425406, | |
| "learning_rate": 2.853174603174603e-05, | |
| "loss": 0.3723, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 1.462526766595289, | |
| "grad_norm": 0.235937135297554, | |
| "learning_rate": 2.8492063492063492e-05, | |
| "loss": 0.3597, | |
| "step": 683 | |
| }, | |
| { | |
| "epoch": 1.4646680942184154, | |
| "grad_norm": 0.26577068348082256, | |
| "learning_rate": 2.8452380952380953e-05, | |
| "loss": 0.3839, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 1.4668094218415417, | |
| "grad_norm": 0.22074582010953367, | |
| "learning_rate": 2.8412698412698414e-05, | |
| "loss": 0.3762, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 1.468950749464668, | |
| "grad_norm": 0.28955823214995086, | |
| "learning_rate": 2.8373015873015875e-05, | |
| "loss": 0.3742, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 1.4710920770877944, | |
| "grad_norm": 0.23728903924448824, | |
| "learning_rate": 2.8333333333333335e-05, | |
| "loss": 0.3682, | |
| "step": 687 | |
| }, | |
| { | |
| "epoch": 1.4732334047109208, | |
| "grad_norm": 0.21949914443063165, | |
| "learning_rate": 2.8293650793650793e-05, | |
| "loss": 0.3827, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 1.4753747323340471, | |
| "grad_norm": 0.27980639261823936, | |
| "learning_rate": 2.8253968253968253e-05, | |
| "loss": 0.3525, | |
| "step": 689 | |
| }, | |
| { | |
| "epoch": 1.4775160599571735, | |
| "grad_norm": 0.2314438069257415, | |
| "learning_rate": 2.8214285714285714e-05, | |
| "loss": 0.3679, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 1.4796573875802999, | |
| "grad_norm": 0.2722100613441337, | |
| "learning_rate": 2.8174603174603175e-05, | |
| "loss": 0.3751, | |
| "step": 691 | |
| }, | |
| { | |
| "epoch": 1.4817987152034262, | |
| "grad_norm": 0.2445277948976506, | |
| "learning_rate": 2.8134920634920636e-05, | |
| "loss": 0.378, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 1.4839400428265526, | |
| "grad_norm": 0.2658539356677542, | |
| "learning_rate": 2.8095238095238096e-05, | |
| "loss": 0.3619, | |
| "step": 693 | |
| }, | |
| { | |
| "epoch": 1.486081370449679, | |
| "grad_norm": 0.26351984231630443, | |
| "learning_rate": 2.8055555555555557e-05, | |
| "loss": 0.3884, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 1.4882226980728053, | |
| "grad_norm": 0.23831638328525492, | |
| "learning_rate": 2.8015873015873018e-05, | |
| "loss": 0.359, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 1.4903640256959314, | |
| "grad_norm": 0.21588034310392898, | |
| "learning_rate": 2.797619047619048e-05, | |
| "loss": 0.3576, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 1.4925053533190578, | |
| "grad_norm": 0.2814689991132766, | |
| "learning_rate": 2.793650793650794e-05, | |
| "loss": 0.3702, | |
| "step": 697 | |
| }, | |
| { | |
| "epoch": 1.4946466809421841, | |
| "grad_norm": 0.19677015269212336, | |
| "learning_rate": 2.78968253968254e-05, | |
| "loss": 0.3481, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 1.4967880085653105, | |
| "grad_norm": 0.24694240495989817, | |
| "learning_rate": 2.785714285714286e-05, | |
| "loss": 0.3404, | |
| "step": 699 | |
| }, | |
| { | |
| "epoch": 1.4989293361884368, | |
| "grad_norm": 0.24560310196329904, | |
| "learning_rate": 2.781746031746032e-05, | |
| "loss": 0.3559, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 1.5010706638115632, | |
| "grad_norm": 0.22770557903012514, | |
| "learning_rate": 2.777777777777778e-05, | |
| "loss": 0.3774, | |
| "step": 701 | |
| }, | |
| { | |
| "epoch": 1.5032119914346895, | |
| "grad_norm": 0.22247306696683353, | |
| "learning_rate": 2.773809523809524e-05, | |
| "loss": 0.3531, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 1.5053533190578159, | |
| "grad_norm": 0.23940565806300032, | |
| "learning_rate": 2.76984126984127e-05, | |
| "loss": 0.3548, | |
| "step": 703 | |
| }, | |
| { | |
| "epoch": 1.507494646680942, | |
| "grad_norm": 0.2535843655330012, | |
| "learning_rate": 2.765873015873016e-05, | |
| "loss": 0.3657, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 1.5096359743040684, | |
| "grad_norm": 0.23618580242395265, | |
| "learning_rate": 2.7619047619047622e-05, | |
| "loss": 0.3684, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 1.5117773019271947, | |
| "grad_norm": 0.2548030311217275, | |
| "learning_rate": 2.7579365079365083e-05, | |
| "loss": 0.3575, | |
| "step": 706 | |
| }, | |
| { | |
| "epoch": 1.513918629550321, | |
| "grad_norm": 0.23669897400390896, | |
| "learning_rate": 2.7539682539682544e-05, | |
| "loss": 0.3678, | |
| "step": 707 | |
| }, | |
| { | |
| "epoch": 1.5160599571734474, | |
| "grad_norm": 0.22764730629262053, | |
| "learning_rate": 2.7500000000000004e-05, | |
| "loss": 0.3738, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 1.5182012847965738, | |
| "grad_norm": 0.2522265156348486, | |
| "learning_rate": 2.7460317460317465e-05, | |
| "loss": 0.3752, | |
| "step": 709 | |
| }, | |
| { | |
| "epoch": 1.5203426124197001, | |
| "grad_norm": 0.22848735051444552, | |
| "learning_rate": 2.7420634920634926e-05, | |
| "loss": 0.3699, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 1.5224839400428265, | |
| "grad_norm": 0.23087075847319702, | |
| "learning_rate": 2.7380952380952383e-05, | |
| "loss": 0.3539, | |
| "step": 711 | |
| }, | |
| { | |
| "epoch": 1.5246252676659529, | |
| "grad_norm": 0.22350228895743782, | |
| "learning_rate": 2.734126984126984e-05, | |
| "loss": 0.3513, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 1.5267665952890792, | |
| "grad_norm": 0.23714590267506044, | |
| "learning_rate": 2.73015873015873e-05, | |
| "loss": 0.37, | |
| "step": 713 | |
| }, | |
| { | |
| "epoch": 1.5289079229122056, | |
| "grad_norm": 0.23601477099160736, | |
| "learning_rate": 2.7261904761904762e-05, | |
| "loss": 0.3654, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 1.531049250535332, | |
| "grad_norm": 0.22820632242815328, | |
| "learning_rate": 2.7222222222222223e-05, | |
| "loss": 0.3643, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 1.5331905781584583, | |
| "grad_norm": 0.2367592790064326, | |
| "learning_rate": 2.718253968253968e-05, | |
| "loss": 0.3588, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 1.5353319057815846, | |
| "grad_norm": 0.2283679400061131, | |
| "learning_rate": 2.714285714285714e-05, | |
| "loss": 0.3698, | |
| "step": 717 | |
| }, | |
| { | |
| "epoch": 1.537473233404711, | |
| "grad_norm": 0.23687279355617852, | |
| "learning_rate": 2.7103174603174602e-05, | |
| "loss": 0.3796, | |
| "step": 718 | |
| }, | |
| { | |
| "epoch": 1.5396145610278373, | |
| "grad_norm": 0.23367650341137466, | |
| "learning_rate": 2.7063492063492062e-05, | |
| "loss": 0.3817, | |
| "step": 719 | |
| }, | |
| { | |
| "epoch": 1.5417558886509637, | |
| "grad_norm": 0.2314615253573604, | |
| "learning_rate": 2.7023809523809523e-05, | |
| "loss": 0.3656, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 1.54389721627409, | |
| "grad_norm": 0.24177942634670613, | |
| "learning_rate": 2.6984126984126984e-05, | |
| "loss": 0.3639, | |
| "step": 721 | |
| }, | |
| { | |
| "epoch": 1.5460385438972164, | |
| "grad_norm": 0.21932445481704804, | |
| "learning_rate": 2.6944444444444445e-05, | |
| "loss": 0.3702, | |
| "step": 722 | |
| }, | |
| { | |
| "epoch": 1.5481798715203428, | |
| "grad_norm": 0.24065414879821986, | |
| "learning_rate": 2.6904761904761905e-05, | |
| "loss": 0.3885, | |
| "step": 723 | |
| }, | |
| { | |
| "epoch": 1.550321199143469, | |
| "grad_norm": 0.20826666740635788, | |
| "learning_rate": 2.6865079365079366e-05, | |
| "loss": 0.372, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 1.5524625267665952, | |
| "grad_norm": 0.22699790366079173, | |
| "learning_rate": 2.6825396825396827e-05, | |
| "loss": 0.3529, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 1.5546038543897216, | |
| "grad_norm": 0.23143139316201608, | |
| "learning_rate": 2.6785714285714288e-05, | |
| "loss": 0.3873, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 1.556745182012848, | |
| "grad_norm": 0.23566599378296404, | |
| "learning_rate": 2.6746031746031745e-05, | |
| "loss": 0.3613, | |
| "step": 727 | |
| }, | |
| { | |
| "epoch": 1.5588865096359743, | |
| "grad_norm": 0.2431611538451522, | |
| "learning_rate": 2.6706349206349206e-05, | |
| "loss": 0.3564, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 1.5610278372591007, | |
| "grad_norm": 0.21557222183068245, | |
| "learning_rate": 2.6666666666666667e-05, | |
| "loss": 0.3519, | |
| "step": 729 | |
| }, | |
| { | |
| "epoch": 1.563169164882227, | |
| "grad_norm": 0.21761967858926073, | |
| "learning_rate": 2.6626984126984127e-05, | |
| "loss": 0.3676, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 1.5653104925053534, | |
| "grad_norm": 0.2067924331453489, | |
| "learning_rate": 2.6587301587301588e-05, | |
| "loss": 0.357, | |
| "step": 731 | |
| }, | |
| { | |
| "epoch": 1.5674518201284795, | |
| "grad_norm": 0.21006075912880307, | |
| "learning_rate": 2.654761904761905e-05, | |
| "loss": 0.3545, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 1.5695931477516059, | |
| "grad_norm": 0.23470822509906164, | |
| "learning_rate": 2.650793650793651e-05, | |
| "loss": 0.3698, | |
| "step": 733 | |
| }, | |
| { | |
| "epoch": 1.5717344753747322, | |
| "grad_norm": 0.19926495187855447, | |
| "learning_rate": 2.646825396825397e-05, | |
| "loss": 0.3622, | |
| "step": 734 | |
| }, | |
| { | |
| "epoch": 1.5738758029978586, | |
| "grad_norm": 0.24016060928887698, | |
| "learning_rate": 2.642857142857143e-05, | |
| "loss": 0.3582, | |
| "step": 735 | |
| }, | |
| { | |
| "epoch": 1.576017130620985, | |
| "grad_norm": 0.22487176055383068, | |
| "learning_rate": 2.6388888888888892e-05, | |
| "loss": 0.386, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 1.5781584582441113, | |
| "grad_norm": 0.21079271486663667, | |
| "learning_rate": 2.6349206349206353e-05, | |
| "loss": 0.3623, | |
| "step": 737 | |
| }, | |
| { | |
| "epoch": 1.5802997858672376, | |
| "grad_norm": 0.2334507646133209, | |
| "learning_rate": 2.6309523809523813e-05, | |
| "loss": 0.3762, | |
| "step": 738 | |
| }, | |
| { | |
| "epoch": 1.582441113490364, | |
| "grad_norm": 0.19667313653775484, | |
| "learning_rate": 2.626984126984127e-05, | |
| "loss": 0.3474, | |
| "step": 739 | |
| }, | |
| { | |
| "epoch": 1.5845824411134903, | |
| "grad_norm": 0.21267970407005282, | |
| "learning_rate": 2.623015873015873e-05, | |
| "loss": 0.3582, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 1.5867237687366167, | |
| "grad_norm": 0.19547706247312066, | |
| "learning_rate": 2.6190476190476192e-05, | |
| "loss": 0.364, | |
| "step": 741 | |
| }, | |
| { | |
| "epoch": 1.588865096359743, | |
| "grad_norm": 0.22715519405850873, | |
| "learning_rate": 2.6150793650793653e-05, | |
| "loss": 0.3589, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 1.5910064239828694, | |
| "grad_norm": 0.2248474130605718, | |
| "learning_rate": 2.6111111111111114e-05, | |
| "loss": 0.3658, | |
| "step": 743 | |
| }, | |
| { | |
| "epoch": 1.5931477516059958, | |
| "grad_norm": 0.207264130106897, | |
| "learning_rate": 2.6071428571428574e-05, | |
| "loss": 0.3677, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 1.595289079229122, | |
| "grad_norm": 0.20684564047898432, | |
| "learning_rate": 2.6031746031746035e-05, | |
| "loss": 0.3696, | |
| "step": 745 | |
| }, | |
| { | |
| "epoch": 1.5974304068522485, | |
| "grad_norm": 0.20367525386940707, | |
| "learning_rate": 2.5992063492063496e-05, | |
| "loss": 0.3481, | |
| "step": 746 | |
| }, | |
| { | |
| "epoch": 1.5995717344753748, | |
| "grad_norm": 0.20679426091923925, | |
| "learning_rate": 2.5952380952380957e-05, | |
| "loss": 0.3701, | |
| "step": 747 | |
| }, | |
| { | |
| "epoch": 1.6017130620985012, | |
| "grad_norm": 0.20915864152813907, | |
| "learning_rate": 2.5912698412698417e-05, | |
| "loss": 0.3633, | |
| "step": 748 | |
| }, | |
| { | |
| "epoch": 1.6038543897216275, | |
| "grad_norm": 0.2234637695145891, | |
| "learning_rate": 2.5873015873015878e-05, | |
| "loss": 0.3579, | |
| "step": 749 | |
| }, | |
| { | |
| "epoch": 1.6059957173447539, | |
| "grad_norm": 0.23358262127655052, | |
| "learning_rate": 2.5833333333333336e-05, | |
| "loss": 0.3823, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 1.6081370449678802, | |
| "grad_norm": 0.22541931799902007, | |
| "learning_rate": 2.5793650793650796e-05, | |
| "loss": 0.3651, | |
| "step": 751 | |
| }, | |
| { | |
| "epoch": 1.6102783725910066, | |
| "grad_norm": 0.22458604036006147, | |
| "learning_rate": 2.5753968253968254e-05, | |
| "loss": 0.3672, | |
| "step": 752 | |
| }, | |
| { | |
| "epoch": 1.6124197002141327, | |
| "grad_norm": 0.19850956726240754, | |
| "learning_rate": 2.5714285714285714e-05, | |
| "loss": 0.3495, | |
| "step": 753 | |
| }, | |
| { | |
| "epoch": 1.614561027837259, | |
| "grad_norm": 0.22459012390393387, | |
| "learning_rate": 2.5674603174603172e-05, | |
| "loss": 0.3615, | |
| "step": 754 | |
| }, | |
| { | |
| "epoch": 1.6167023554603854, | |
| "grad_norm": 0.22306811697664986, | |
| "learning_rate": 2.5634920634920633e-05, | |
| "loss": 0.3629, | |
| "step": 755 | |
| }, | |
| { | |
| "epoch": 1.6188436830835118, | |
| "grad_norm": 0.22471296174548994, | |
| "learning_rate": 2.5595238095238093e-05, | |
| "loss": 0.3764, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 1.6209850107066381, | |
| "grad_norm": 0.21158605873651065, | |
| "learning_rate": 2.5555555555555554e-05, | |
| "loss": 0.3653, | |
| "step": 757 | |
| }, | |
| { | |
| "epoch": 1.6231263383297645, | |
| "grad_norm": 0.21671080824519806, | |
| "learning_rate": 2.5515873015873015e-05, | |
| "loss": 0.371, | |
| "step": 758 | |
| }, | |
| { | |
| "epoch": 1.6252676659528906, | |
| "grad_norm": 0.2356438023908563, | |
| "learning_rate": 2.5476190476190476e-05, | |
| "loss": 0.364, | |
| "step": 759 | |
| }, | |
| { | |
| "epoch": 1.627408993576017, | |
| "grad_norm": 0.19447430345844002, | |
| "learning_rate": 2.5436507936507936e-05, | |
| "loss": 0.3368, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 1.6295503211991433, | |
| "grad_norm": 0.22994645655583826, | |
| "learning_rate": 2.5396825396825397e-05, | |
| "loss": 0.3567, | |
| "step": 761 | |
| }, | |
| { | |
| "epoch": 1.6316916488222697, | |
| "grad_norm": 0.23001951683149954, | |
| "learning_rate": 2.5357142857142858e-05, | |
| "loss": 0.3684, | |
| "step": 762 | |
| }, | |
| { | |
| "epoch": 1.633832976445396, | |
| "grad_norm": 0.2119578315059528, | |
| "learning_rate": 2.531746031746032e-05, | |
| "loss": 0.3536, | |
| "step": 763 | |
| }, | |
| { | |
| "epoch": 1.6359743040685224, | |
| "grad_norm": 0.2615589609533618, | |
| "learning_rate": 2.527777777777778e-05, | |
| "loss": 0.389, | |
| "step": 764 | |
| }, | |
| { | |
| "epoch": 1.6381156316916488, | |
| "grad_norm": 0.22688862347727404, | |
| "learning_rate": 2.523809523809524e-05, | |
| "loss": 0.3659, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 1.640256959314775, | |
| "grad_norm": 0.22643676902104382, | |
| "learning_rate": 2.5198412698412697e-05, | |
| "loss": 0.3698, | |
| "step": 766 | |
| }, | |
| { | |
| "epoch": 1.6423982869379015, | |
| "grad_norm": 0.21169667439175707, | |
| "learning_rate": 2.5158730158730158e-05, | |
| "loss": 0.3762, | |
| "step": 767 | |
| }, | |
| { | |
| "epoch": 1.6445396145610278, | |
| "grad_norm": 0.22932111037643213, | |
| "learning_rate": 2.511904761904762e-05, | |
| "loss": 0.367, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 1.6466809421841542, | |
| "grad_norm": 0.21149524956469937, | |
| "learning_rate": 2.507936507936508e-05, | |
| "loss": 0.3764, | |
| "step": 769 | |
| }, | |
| { | |
| "epoch": 1.6488222698072805, | |
| "grad_norm": 0.22593475888288625, | |
| "learning_rate": 2.503968253968254e-05, | |
| "loss": 0.3651, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 1.6509635974304069, | |
| "grad_norm": 0.22008417552751885, | |
| "learning_rate": 2.5e-05, | |
| "loss": 0.377, | |
| "step": 771 | |
| }, | |
| { | |
| "epoch": 1.6531049250535332, | |
| "grad_norm": 0.20441837816311934, | |
| "learning_rate": 2.4960317460317462e-05, | |
| "loss": 0.3716, | |
| "step": 772 | |
| }, | |
| { | |
| "epoch": 1.6552462526766596, | |
| "grad_norm": 0.21552942260647906, | |
| "learning_rate": 2.4920634920634923e-05, | |
| "loss": 0.3385, | |
| "step": 773 | |
| }, | |
| { | |
| "epoch": 1.657387580299786, | |
| "grad_norm": 0.2353507438436094, | |
| "learning_rate": 2.4880952380952383e-05, | |
| "loss": 0.3864, | |
| "step": 774 | |
| }, | |
| { | |
| "epoch": 1.6595289079229123, | |
| "grad_norm": 0.21413713760888206, | |
| "learning_rate": 2.4841269841269844e-05, | |
| "loss": 0.3647, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 1.6616702355460387, | |
| "grad_norm": 0.2160670298547777, | |
| "learning_rate": 2.4801587301587305e-05, | |
| "loss": 0.3868, | |
| "step": 776 | |
| }, | |
| { | |
| "epoch": 1.663811563169165, | |
| "grad_norm": 0.21929322311304728, | |
| "learning_rate": 2.4761904761904762e-05, | |
| "loss": 0.3739, | |
| "step": 777 | |
| }, | |
| { | |
| "epoch": 1.6659528907922914, | |
| "grad_norm": 0.22788233645299552, | |
| "learning_rate": 2.4722222222222223e-05, | |
| "loss": 0.373, | |
| "step": 778 | |
| }, | |
| { | |
| "epoch": 1.6680942184154177, | |
| "grad_norm": 0.210338880481676, | |
| "learning_rate": 2.4682539682539684e-05, | |
| "loss": 0.3713, | |
| "step": 779 | |
| }, | |
| { | |
| "epoch": 1.6702355460385439, | |
| "grad_norm": 0.22350067161462142, | |
| "learning_rate": 2.4642857142857145e-05, | |
| "loss": 0.3664, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 1.6723768736616702, | |
| "grad_norm": 0.22340962121009117, | |
| "learning_rate": 2.4603174603174602e-05, | |
| "loss": 0.3881, | |
| "step": 781 | |
| }, | |
| { | |
| "epoch": 1.6745182012847966, | |
| "grad_norm": 0.1990507179838242, | |
| "learning_rate": 2.4563492063492063e-05, | |
| "loss": 0.3746, | |
| "step": 782 | |
| }, | |
| { | |
| "epoch": 1.676659528907923, | |
| "grad_norm": 0.22318485815266106, | |
| "learning_rate": 2.4523809523809523e-05, | |
| "loss": 0.353, | |
| "step": 783 | |
| }, | |
| { | |
| "epoch": 1.6788008565310493, | |
| "grad_norm": 0.22041375294909685, | |
| "learning_rate": 2.4484126984126984e-05, | |
| "loss": 0.3724, | |
| "step": 784 | |
| }, | |
| { | |
| "epoch": 1.6809421841541756, | |
| "grad_norm": 0.21498770171477544, | |
| "learning_rate": 2.4444444444444445e-05, | |
| "loss": 0.3819, | |
| "step": 785 | |
| }, | |
| { | |
| "epoch": 1.683083511777302, | |
| "grad_norm": 0.23245272113804277, | |
| "learning_rate": 2.4404761904761906e-05, | |
| "loss": 0.3632, | |
| "step": 786 | |
| }, | |
| { | |
| "epoch": 1.685224839400428, | |
| "grad_norm": 0.2249931267054549, | |
| "learning_rate": 2.4365079365079366e-05, | |
| "loss": 0.3639, | |
| "step": 787 | |
| }, | |
| { | |
| "epoch": 1.6873661670235545, | |
| "grad_norm": 0.21799548712885633, | |
| "learning_rate": 2.4325396825396827e-05, | |
| "loss": 0.3581, | |
| "step": 788 | |
| }, | |
| { | |
| "epoch": 1.6895074946466808, | |
| "grad_norm": 0.22018263174045988, | |
| "learning_rate": 2.4285714285714288e-05, | |
| "loss": 0.3612, | |
| "step": 789 | |
| }, | |
| { | |
| "epoch": 1.6916488222698072, | |
| "grad_norm": 0.20033740087548269, | |
| "learning_rate": 2.424603174603175e-05, | |
| "loss": 0.3313, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 1.6937901498929335, | |
| "grad_norm": 0.19788624340907016, | |
| "learning_rate": 2.4206349206349206e-05, | |
| "loss": 0.345, | |
| "step": 791 | |
| }, | |
| { | |
| "epoch": 1.6959314775160599, | |
| "grad_norm": 0.21154589560113807, | |
| "learning_rate": 2.4166666666666667e-05, | |
| "loss": 0.3448, | |
| "step": 792 | |
| }, | |
| { | |
| "epoch": 1.6980728051391862, | |
| "grad_norm": 0.21812600023361592, | |
| "learning_rate": 2.4126984126984128e-05, | |
| "loss": 0.3506, | |
| "step": 793 | |
| }, | |
| { | |
| "epoch": 1.7002141327623126, | |
| "grad_norm": 0.21274687742218662, | |
| "learning_rate": 2.408730158730159e-05, | |
| "loss": 0.3602, | |
| "step": 794 | |
| }, | |
| { | |
| "epoch": 1.702355460385439, | |
| "grad_norm": 0.20945076032569784, | |
| "learning_rate": 2.404761904761905e-05, | |
| "loss": 0.3609, | |
| "step": 795 | |
| }, | |
| { | |
| "epoch": 1.7044967880085653, | |
| "grad_norm": 0.22319883020181144, | |
| "learning_rate": 2.400793650793651e-05, | |
| "loss": 0.3707, | |
| "step": 796 | |
| }, | |
| { | |
| "epoch": 1.7066381156316917, | |
| "grad_norm": 0.23376387552204406, | |
| "learning_rate": 2.396825396825397e-05, | |
| "loss": 0.3777, | |
| "step": 797 | |
| }, | |
| { | |
| "epoch": 1.708779443254818, | |
| "grad_norm": 0.21375479088785895, | |
| "learning_rate": 2.392857142857143e-05, | |
| "loss": 0.3637, | |
| "step": 798 | |
| }, | |
| { | |
| "epoch": 1.7109207708779444, | |
| "grad_norm": 0.20013544930852412, | |
| "learning_rate": 2.3888888888888892e-05, | |
| "loss": 0.3662, | |
| "step": 799 | |
| }, | |
| { | |
| "epoch": 1.7130620985010707, | |
| "grad_norm": 0.23771022394213084, | |
| "learning_rate": 2.3849206349206353e-05, | |
| "loss": 0.3541, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 1.715203426124197, | |
| "grad_norm": 0.2125013216241467, | |
| "learning_rate": 2.380952380952381e-05, | |
| "loss": 0.3586, | |
| "step": 801 | |
| }, | |
| { | |
| "epoch": 1.7173447537473234, | |
| "grad_norm": 0.24611181377977226, | |
| "learning_rate": 2.376984126984127e-05, | |
| "loss": 0.3765, | |
| "step": 802 | |
| }, | |
| { | |
| "epoch": 1.7194860813704498, | |
| "grad_norm": 0.2327060873577735, | |
| "learning_rate": 2.373015873015873e-05, | |
| "loss": 0.3533, | |
| "step": 803 | |
| }, | |
| { | |
| "epoch": 1.7216274089935761, | |
| "grad_norm": 0.22138909685445068, | |
| "learning_rate": 2.369047619047619e-05, | |
| "loss": 0.3583, | |
| "step": 804 | |
| }, | |
| { | |
| "epoch": 1.7237687366167025, | |
| "grad_norm": 0.208629956972268, | |
| "learning_rate": 2.365079365079365e-05, | |
| "loss": 0.3755, | |
| "step": 805 | |
| }, | |
| { | |
| "epoch": 1.7259100642398288, | |
| "grad_norm": 0.23858026530704865, | |
| "learning_rate": 2.361111111111111e-05, | |
| "loss": 0.3694, | |
| "step": 806 | |
| }, | |
| { | |
| "epoch": 1.728051391862955, | |
| "grad_norm": 0.21718501714953736, | |
| "learning_rate": 2.357142857142857e-05, | |
| "loss": 0.353, | |
| "step": 807 | |
| }, | |
| { | |
| "epoch": 1.7301927194860813, | |
| "grad_norm": 0.1938804380949089, | |
| "learning_rate": 2.3531746031746032e-05, | |
| "loss": 0.3474, | |
| "step": 808 | |
| }, | |
| { | |
| "epoch": 1.7323340471092077, | |
| "grad_norm": 0.21740566223114047, | |
| "learning_rate": 2.3492063492063493e-05, | |
| "loss": 0.3754, | |
| "step": 809 | |
| }, | |
| { | |
| "epoch": 1.734475374732334, | |
| "grad_norm": 0.2142359822570361, | |
| "learning_rate": 2.3452380952380954e-05, | |
| "loss": 0.358, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 1.7366167023554604, | |
| "grad_norm": 0.2070519093899908, | |
| "learning_rate": 2.3412698412698414e-05, | |
| "loss": 0.3648, | |
| "step": 811 | |
| }, | |
| { | |
| "epoch": 1.7387580299785867, | |
| "grad_norm": 0.1994890470093942, | |
| "learning_rate": 2.3373015873015875e-05, | |
| "loss": 0.3721, | |
| "step": 812 | |
| }, | |
| { | |
| "epoch": 1.740899357601713, | |
| "grad_norm": 0.19930905538311752, | |
| "learning_rate": 2.3333333333333336e-05, | |
| "loss": 0.3634, | |
| "step": 813 | |
| }, | |
| { | |
| "epoch": 1.7430406852248392, | |
| "grad_norm": 0.22943112463202014, | |
| "learning_rate": 2.3293650793650797e-05, | |
| "loss": 0.3674, | |
| "step": 814 | |
| }, | |
| { | |
| "epoch": 1.7451820128479656, | |
| "grad_norm": 0.19595373289639564, | |
| "learning_rate": 2.3253968253968257e-05, | |
| "loss": 0.3826, | |
| "step": 815 | |
| }, | |
| { | |
| "epoch": 1.747323340471092, | |
| "grad_norm": 0.23295233026294598, | |
| "learning_rate": 2.3214285714285715e-05, | |
| "loss": 0.3656, | |
| "step": 816 | |
| }, | |
| { | |
| "epoch": 1.7494646680942183, | |
| "grad_norm": 0.21540081438261183, | |
| "learning_rate": 2.3174603174603175e-05, | |
| "loss": 0.3829, | |
| "step": 817 | |
| }, | |
| { | |
| "epoch": 1.7516059957173447, | |
| "grad_norm": 0.1864879412189447, | |
| "learning_rate": 2.3134920634920636e-05, | |
| "loss": 0.3554, | |
| "step": 818 | |
| }, | |
| { | |
| "epoch": 1.753747323340471, | |
| "grad_norm": 0.2387089230839924, | |
| "learning_rate": 2.3095238095238097e-05, | |
| "loss": 0.385, | |
| "step": 819 | |
| }, | |
| { | |
| "epoch": 1.7558886509635974, | |
| "grad_norm": 0.2232438395168808, | |
| "learning_rate": 2.3055555555555558e-05, | |
| "loss": 0.3504, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 1.7580299785867237, | |
| "grad_norm": 0.20415844428245095, | |
| "learning_rate": 2.3015873015873015e-05, | |
| "loss": 0.3789, | |
| "step": 821 | |
| }, | |
| { | |
| "epoch": 1.76017130620985, | |
| "grad_norm": 0.19921172666296888, | |
| "learning_rate": 2.2976190476190476e-05, | |
| "loss": 0.3701, | |
| "step": 822 | |
| }, | |
| { | |
| "epoch": 1.7623126338329764, | |
| "grad_norm": 0.22470908559459463, | |
| "learning_rate": 2.2936507936507937e-05, | |
| "loss": 0.3619, | |
| "step": 823 | |
| }, | |
| { | |
| "epoch": 1.7644539614561028, | |
| "grad_norm": 0.1959331611395145, | |
| "learning_rate": 2.2896825396825397e-05, | |
| "loss": 0.343, | |
| "step": 824 | |
| }, | |
| { | |
| "epoch": 1.7665952890792291, | |
| "grad_norm": 0.21307394355566658, | |
| "learning_rate": 2.2857142857142858e-05, | |
| "loss": 0.3546, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 1.7687366167023555, | |
| "grad_norm": 0.20578224521899074, | |
| "learning_rate": 2.281746031746032e-05, | |
| "loss": 0.3795, | |
| "step": 826 | |
| }, | |
| { | |
| "epoch": 1.7708779443254818, | |
| "grad_norm": 0.2189247535460949, | |
| "learning_rate": 2.277777777777778e-05, | |
| "loss": 0.3607, | |
| "step": 827 | |
| }, | |
| { | |
| "epoch": 1.7730192719486082, | |
| "grad_norm": 0.21596310355670684, | |
| "learning_rate": 2.273809523809524e-05, | |
| "loss": 0.3584, | |
| "step": 828 | |
| }, | |
| { | |
| "epoch": 1.7751605995717346, | |
| "grad_norm": 0.2008393875425573, | |
| "learning_rate": 2.2698412698412698e-05, | |
| "loss": 0.3647, | |
| "step": 829 | |
| }, | |
| { | |
| "epoch": 1.777301927194861, | |
| "grad_norm": 0.21184243202841968, | |
| "learning_rate": 2.265873015873016e-05, | |
| "loss": 0.3455, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 1.7794432548179873, | |
| "grad_norm": 0.22865192619400915, | |
| "learning_rate": 2.261904761904762e-05, | |
| "loss": 0.3901, | |
| "step": 831 | |
| }, | |
| { | |
| "epoch": 1.7815845824411136, | |
| "grad_norm": 0.22618545858724132, | |
| "learning_rate": 2.257936507936508e-05, | |
| "loss": 0.3832, | |
| "step": 832 | |
| }, | |
| { | |
| "epoch": 1.78372591006424, | |
| "grad_norm": 0.22738066487492137, | |
| "learning_rate": 2.253968253968254e-05, | |
| "loss": 0.3399, | |
| "step": 833 | |
| }, | |
| { | |
| "epoch": 1.7858672376873663, | |
| "grad_norm": 0.23621094995588868, | |
| "learning_rate": 2.25e-05, | |
| "loss": 0.3737, | |
| "step": 834 | |
| }, | |
| { | |
| "epoch": 1.7880085653104925, | |
| "grad_norm": 0.2157005750309211, | |
| "learning_rate": 2.2460317460317462e-05, | |
| "loss": 0.3556, | |
| "step": 835 | |
| }, | |
| { | |
| "epoch": 1.7901498929336188, | |
| "grad_norm": 0.20967061124515987, | |
| "learning_rate": 2.2420634920634923e-05, | |
| "loss": 0.3711, | |
| "step": 836 | |
| }, | |
| { | |
| "epoch": 1.7922912205567452, | |
| "grad_norm": 0.21011640159026213, | |
| "learning_rate": 2.2380952380952384e-05, | |
| "loss": 0.3696, | |
| "step": 837 | |
| }, | |
| { | |
| "epoch": 1.7944325481798715, | |
| "grad_norm": 0.2301543332161994, | |
| "learning_rate": 2.2341269841269844e-05, | |
| "loss": 0.3619, | |
| "step": 838 | |
| }, | |
| { | |
| "epoch": 1.7965738758029979, | |
| "grad_norm": 0.19783929504773992, | |
| "learning_rate": 2.2301587301587305e-05, | |
| "loss": 0.3711, | |
| "step": 839 | |
| }, | |
| { | |
| "epoch": 1.7987152034261242, | |
| "grad_norm": 0.2027645971091363, | |
| "learning_rate": 2.2261904761904763e-05, | |
| "loss": 0.3575, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 1.8008565310492506, | |
| "grad_norm": 0.21010286682467308, | |
| "learning_rate": 2.2222222222222223e-05, | |
| "loss": 0.3684, | |
| "step": 841 | |
| }, | |
| { | |
| "epoch": 1.8029978586723767, | |
| "grad_norm": 0.1962020508553967, | |
| "learning_rate": 2.2182539682539684e-05, | |
| "loss": 0.3651, | |
| "step": 842 | |
| }, | |
| { | |
| "epoch": 1.805139186295503, | |
| "grad_norm": 0.20781819889894468, | |
| "learning_rate": 2.214285714285714e-05, | |
| "loss": 0.365, | |
| "step": 843 | |
| }, | |
| { | |
| "epoch": 1.8072805139186294, | |
| "grad_norm": 0.19696845772215546, | |
| "learning_rate": 2.2103174603174602e-05, | |
| "loss": 0.3629, | |
| "step": 844 | |
| }, | |
| { | |
| "epoch": 1.8094218415417558, | |
| "grad_norm": 0.21583117590285078, | |
| "learning_rate": 2.2063492063492063e-05, | |
| "loss": 0.3617, | |
| "step": 845 | |
| }, | |
| { | |
| "epoch": 1.8115631691648821, | |
| "grad_norm": 0.2173603569007904, | |
| "learning_rate": 2.2023809523809524e-05, | |
| "loss": 0.3734, | |
| "step": 846 | |
| }, | |
| { | |
| "epoch": 1.8137044967880085, | |
| "grad_norm": 0.2022970217705777, | |
| "learning_rate": 2.1984126984126984e-05, | |
| "loss": 0.3659, | |
| "step": 847 | |
| }, | |
| { | |
| "epoch": 1.8158458244111348, | |
| "grad_norm": 0.24139485756064838, | |
| "learning_rate": 2.1944444444444445e-05, | |
| "loss": 0.3644, | |
| "step": 848 | |
| }, | |
| { | |
| "epoch": 1.8179871520342612, | |
| "grad_norm": 0.216967803177166, | |
| "learning_rate": 2.1904761904761906e-05, | |
| "loss": 0.3625, | |
| "step": 849 | |
| }, | |
| { | |
| "epoch": 1.8201284796573876, | |
| "grad_norm": 0.21018593632480406, | |
| "learning_rate": 2.1865079365079367e-05, | |
| "loss": 0.3674, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 1.822269807280514, | |
| "grad_norm": 0.21491326859755341, | |
| "learning_rate": 2.1825396825396827e-05, | |
| "loss": 0.376, | |
| "step": 851 | |
| }, | |
| { | |
| "epoch": 1.8244111349036403, | |
| "grad_norm": 0.2016970958055073, | |
| "learning_rate": 2.1785714285714288e-05, | |
| "loss": 0.3682, | |
| "step": 852 | |
| }, | |
| { | |
| "epoch": 1.8265524625267666, | |
| "grad_norm": 0.23649936879215913, | |
| "learning_rate": 2.174603174603175e-05, | |
| "loss": 0.3671, | |
| "step": 853 | |
| }, | |
| { | |
| "epoch": 1.828693790149893, | |
| "grad_norm": 0.20337632262549168, | |
| "learning_rate": 2.170634920634921e-05, | |
| "loss": 0.3461, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 1.8308351177730193, | |
| "grad_norm": 0.21300952050085178, | |
| "learning_rate": 2.1666666666666667e-05, | |
| "loss": 0.3542, | |
| "step": 855 | |
| }, | |
| { | |
| "epoch": 1.8329764453961457, | |
| "grad_norm": 0.21370860755844873, | |
| "learning_rate": 2.1626984126984128e-05, | |
| "loss": 0.3485, | |
| "step": 856 | |
| }, | |
| { | |
| "epoch": 1.835117773019272, | |
| "grad_norm": 0.2238345205767126, | |
| "learning_rate": 2.158730158730159e-05, | |
| "loss": 0.3538, | |
| "step": 857 | |
| }, | |
| { | |
| "epoch": 1.8372591006423984, | |
| "grad_norm": 0.20314755234997184, | |
| "learning_rate": 2.154761904761905e-05, | |
| "loss": 0.3618, | |
| "step": 858 | |
| }, | |
| { | |
| "epoch": 1.8394004282655247, | |
| "grad_norm": 0.19823141224145824, | |
| "learning_rate": 2.150793650793651e-05, | |
| "loss": 0.3496, | |
| "step": 859 | |
| }, | |
| { | |
| "epoch": 1.841541755888651, | |
| "grad_norm": 0.19176641722853272, | |
| "learning_rate": 2.1468253968253967e-05, | |
| "loss": 0.3712, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 1.8436830835117775, | |
| "grad_norm": 0.20478508554466868, | |
| "learning_rate": 2.1428571428571428e-05, | |
| "loss": 0.3548, | |
| "step": 861 | |
| }, | |
| { | |
| "epoch": 1.8458244111349036, | |
| "grad_norm": 0.19864762266874297, | |
| "learning_rate": 2.138888888888889e-05, | |
| "loss": 0.3616, | |
| "step": 862 | |
| }, | |
| { | |
| "epoch": 1.84796573875803, | |
| "grad_norm": 0.20776884101667364, | |
| "learning_rate": 2.134920634920635e-05, | |
| "loss": 0.3674, | |
| "step": 863 | |
| }, | |
| { | |
| "epoch": 1.8501070663811563, | |
| "grad_norm": 0.20958140144946166, | |
| "learning_rate": 2.130952380952381e-05, | |
| "loss": 0.3681, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 1.8522483940042827, | |
| "grad_norm": 0.1866017449921812, | |
| "learning_rate": 2.126984126984127e-05, | |
| "loss": 0.3509, | |
| "step": 865 | |
| }, | |
| { | |
| "epoch": 1.854389721627409, | |
| "grad_norm": 0.1893458266122647, | |
| "learning_rate": 2.1230158730158732e-05, | |
| "loss": 0.3508, | |
| "step": 866 | |
| }, | |
| { | |
| "epoch": 1.8565310492505354, | |
| "grad_norm": 0.22008473964117356, | |
| "learning_rate": 2.1190476190476193e-05, | |
| "loss": 0.3678, | |
| "step": 867 | |
| }, | |
| { | |
| "epoch": 1.8586723768736617, | |
| "grad_norm": 0.1936262836818084, | |
| "learning_rate": 2.115079365079365e-05, | |
| "loss": 0.3507, | |
| "step": 868 | |
| }, | |
| { | |
| "epoch": 1.8608137044967878, | |
| "grad_norm": 0.19939279008943295, | |
| "learning_rate": 2.111111111111111e-05, | |
| "loss": 0.3606, | |
| "step": 869 | |
| }, | |
| { | |
| "epoch": 1.8629550321199142, | |
| "grad_norm": 0.20666085743177595, | |
| "learning_rate": 2.107142857142857e-05, | |
| "loss": 0.3564, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 1.8650963597430406, | |
| "grad_norm": 0.22692963345797962, | |
| "learning_rate": 2.1031746031746032e-05, | |
| "loss": 0.3512, | |
| "step": 871 | |
| }, | |
| { | |
| "epoch": 1.867237687366167, | |
| "grad_norm": 0.21239492391629777, | |
| "learning_rate": 2.0992063492063493e-05, | |
| "loss": 0.3728, | |
| "step": 872 | |
| }, | |
| { | |
| "epoch": 1.8693790149892933, | |
| "grad_norm": 0.1984911807328775, | |
| "learning_rate": 2.0952380952380954e-05, | |
| "loss": 0.3651, | |
| "step": 873 | |
| }, | |
| { | |
| "epoch": 1.8715203426124196, | |
| "grad_norm": 0.2035710275475753, | |
| "learning_rate": 2.0912698412698415e-05, | |
| "loss": 0.3566, | |
| "step": 874 | |
| }, | |
| { | |
| "epoch": 1.873661670235546, | |
| "grad_norm": 0.21183590394095844, | |
| "learning_rate": 2.0873015873015875e-05, | |
| "loss": 0.3544, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 1.8758029978586723, | |
| "grad_norm": 0.20710933660481542, | |
| "learning_rate": 2.0833333333333336e-05, | |
| "loss": 0.3526, | |
| "step": 876 | |
| }, | |
| { | |
| "epoch": 1.8779443254817987, | |
| "grad_norm": 0.20824572300787447, | |
| "learning_rate": 2.0793650793650797e-05, | |
| "loss": 0.3429, | |
| "step": 877 | |
| }, | |
| { | |
| "epoch": 1.880085653104925, | |
| "grad_norm": 0.21277139237572662, | |
| "learning_rate": 2.0753968253968258e-05, | |
| "loss": 0.3647, | |
| "step": 878 | |
| }, | |
| { | |
| "epoch": 1.8822269807280514, | |
| "grad_norm": 0.20675962730393843, | |
| "learning_rate": 2.0714285714285718e-05, | |
| "loss": 0.3656, | |
| "step": 879 | |
| }, | |
| { | |
| "epoch": 1.8843683083511777, | |
| "grad_norm": 0.2578061912750937, | |
| "learning_rate": 2.0674603174603176e-05, | |
| "loss": 0.3533, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 1.886509635974304, | |
| "grad_norm": 0.20710356677023856, | |
| "learning_rate": 2.0634920634920636e-05, | |
| "loss": 0.3609, | |
| "step": 881 | |
| }, | |
| { | |
| "epoch": 1.8886509635974305, | |
| "grad_norm": 0.20736571003231136, | |
| "learning_rate": 2.0595238095238094e-05, | |
| "loss": 0.3518, | |
| "step": 882 | |
| }, | |
| { | |
| "epoch": 1.8907922912205568, | |
| "grad_norm": 0.219731020163135, | |
| "learning_rate": 2.0555555555555555e-05, | |
| "loss": 0.3507, | |
| "step": 883 | |
| }, | |
| { | |
| "epoch": 1.8929336188436832, | |
| "grad_norm": 0.21068978476088052, | |
| "learning_rate": 2.0515873015873015e-05, | |
| "loss": 0.3499, | |
| "step": 884 | |
| }, | |
| { | |
| "epoch": 1.8950749464668095, | |
| "grad_norm": 0.2585013587777131, | |
| "learning_rate": 2.0476190476190476e-05, | |
| "loss": 0.3627, | |
| "step": 885 | |
| }, | |
| { | |
| "epoch": 1.8972162740899359, | |
| "grad_norm": 0.19255613092161056, | |
| "learning_rate": 2.0436507936507937e-05, | |
| "loss": 0.3547, | |
| "step": 886 | |
| }, | |
| { | |
| "epoch": 1.8993576017130622, | |
| "grad_norm": 0.21046046883766856, | |
| "learning_rate": 2.0396825396825398e-05, | |
| "loss": 0.3688, | |
| "step": 887 | |
| }, | |
| { | |
| "epoch": 1.9014989293361886, | |
| "grad_norm": 0.2031259267946657, | |
| "learning_rate": 2.0357142857142858e-05, | |
| "loss": 0.366, | |
| "step": 888 | |
| }, | |
| { | |
| "epoch": 1.903640256959315, | |
| "grad_norm": 0.22385718639419275, | |
| "learning_rate": 2.031746031746032e-05, | |
| "loss": 0.366, | |
| "step": 889 | |
| }, | |
| { | |
| "epoch": 1.905781584582441, | |
| "grad_norm": 0.20168399193948486, | |
| "learning_rate": 2.027777777777778e-05, | |
| "loss": 0.3562, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 1.9079229122055674, | |
| "grad_norm": 0.19396525152354396, | |
| "learning_rate": 2.023809523809524e-05, | |
| "loss": 0.356, | |
| "step": 891 | |
| }, | |
| { | |
| "epoch": 1.9100642398286938, | |
| "grad_norm": 0.20798336659145256, | |
| "learning_rate": 2.01984126984127e-05, | |
| "loss": 0.3438, | |
| "step": 892 | |
| }, | |
| { | |
| "epoch": 1.9122055674518201, | |
| "grad_norm": 0.2135461327301388, | |
| "learning_rate": 2.015873015873016e-05, | |
| "loss": 0.3556, | |
| "step": 893 | |
| }, | |
| { | |
| "epoch": 1.9143468950749465, | |
| "grad_norm": 0.20025569788736822, | |
| "learning_rate": 2.011904761904762e-05, | |
| "loss": 0.3522, | |
| "step": 894 | |
| }, | |
| { | |
| "epoch": 1.9164882226980728, | |
| "grad_norm": 0.21073141685921978, | |
| "learning_rate": 2.007936507936508e-05, | |
| "loss": 0.3669, | |
| "step": 895 | |
| }, | |
| { | |
| "epoch": 1.9186295503211992, | |
| "grad_norm": 0.20550729295532494, | |
| "learning_rate": 2.003968253968254e-05, | |
| "loss": 0.3563, | |
| "step": 896 | |
| }, | |
| { | |
| "epoch": 1.9207708779443253, | |
| "grad_norm": 0.2219289886669546, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3632, | |
| "step": 897 | |
| }, | |
| { | |
| "epoch": 1.9229122055674517, | |
| "grad_norm": 0.20426747560999273, | |
| "learning_rate": 1.9960317460317462e-05, | |
| "loss": 0.3731, | |
| "step": 898 | |
| }, | |
| { | |
| "epoch": 1.925053533190578, | |
| "grad_norm": 0.21309608447398956, | |
| "learning_rate": 1.992063492063492e-05, | |
| "loss": 0.3822, | |
| "step": 899 | |
| }, | |
| { | |
| "epoch": 1.9271948608137044, | |
| "grad_norm": 0.2047630612588562, | |
| "learning_rate": 1.988095238095238e-05, | |
| "loss": 0.3826, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 1.9293361884368307, | |
| "grad_norm": 0.20339744126965478, | |
| "learning_rate": 1.984126984126984e-05, | |
| "loss": 0.3635, | |
| "step": 901 | |
| }, | |
| { | |
| "epoch": 1.931477516059957, | |
| "grad_norm": 0.19470480788266056, | |
| "learning_rate": 1.9801587301587302e-05, | |
| "loss": 0.3444, | |
| "step": 902 | |
| }, | |
| { | |
| "epoch": 1.9336188436830835, | |
| "grad_norm": 0.22460440732783768, | |
| "learning_rate": 1.9761904761904763e-05, | |
| "loss": 0.379, | |
| "step": 903 | |
| }, | |
| { | |
| "epoch": 1.9357601713062098, | |
| "grad_norm": 0.22213495508308273, | |
| "learning_rate": 1.9722222222222224e-05, | |
| "loss": 0.3676, | |
| "step": 904 | |
| }, | |
| { | |
| "epoch": 1.9379014989293362, | |
| "grad_norm": 0.20343330118385294, | |
| "learning_rate": 1.9682539682539684e-05, | |
| "loss": 0.3546, | |
| "step": 905 | |
| }, | |
| { | |
| "epoch": 1.9400428265524625, | |
| "grad_norm": 0.19101512005815446, | |
| "learning_rate": 1.9642857142857145e-05, | |
| "loss": 0.3545, | |
| "step": 906 | |
| }, | |
| { | |
| "epoch": 1.9421841541755889, | |
| "grad_norm": 0.27419563179542383, | |
| "learning_rate": 1.9603174603174602e-05, | |
| "loss": 0.339, | |
| "step": 907 | |
| }, | |
| { | |
| "epoch": 1.9443254817987152, | |
| "grad_norm": 0.20460100369509523, | |
| "learning_rate": 1.9563492063492063e-05, | |
| "loss": 0.3713, | |
| "step": 908 | |
| }, | |
| { | |
| "epoch": 1.9464668094218416, | |
| "grad_norm": 0.21726477076370293, | |
| "learning_rate": 1.9523809523809524e-05, | |
| "loss": 0.3674, | |
| "step": 909 | |
| }, | |
| { | |
| "epoch": 1.948608137044968, | |
| "grad_norm": 0.2366081598522377, | |
| "learning_rate": 1.9484126984126985e-05, | |
| "loss": 0.3716, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 1.9507494646680943, | |
| "grad_norm": 0.2309915328014547, | |
| "learning_rate": 1.9444444444444445e-05, | |
| "loss": 0.3777, | |
| "step": 911 | |
| }, | |
| { | |
| "epoch": 1.9528907922912206, | |
| "grad_norm": 0.21457199704732352, | |
| "learning_rate": 1.9404761904761906e-05, | |
| "loss": 0.3519, | |
| "step": 912 | |
| }, | |
| { | |
| "epoch": 1.955032119914347, | |
| "grad_norm": 0.21421731139535555, | |
| "learning_rate": 1.9365079365079367e-05, | |
| "loss": 0.3595, | |
| "step": 913 | |
| }, | |
| { | |
| "epoch": 1.9571734475374734, | |
| "grad_norm": 0.2227014663977621, | |
| "learning_rate": 1.9325396825396828e-05, | |
| "loss": 0.3618, | |
| "step": 914 | |
| }, | |
| { | |
| "epoch": 1.9593147751605997, | |
| "grad_norm": 0.19983383858352952, | |
| "learning_rate": 1.928571428571429e-05, | |
| "loss": 0.3566, | |
| "step": 915 | |
| }, | |
| { | |
| "epoch": 1.961456102783726, | |
| "grad_norm": 0.19358450153214074, | |
| "learning_rate": 1.924603174603175e-05, | |
| "loss": 0.3456, | |
| "step": 916 | |
| }, | |
| { | |
| "epoch": 1.9635974304068522, | |
| "grad_norm": 0.19694776679615417, | |
| "learning_rate": 1.920634920634921e-05, | |
| "loss": 0.3601, | |
| "step": 917 | |
| }, | |
| { | |
| "epoch": 1.9657387580299786, | |
| "grad_norm": 0.20280005475553886, | |
| "learning_rate": 1.9166666666666667e-05, | |
| "loss": 0.3537, | |
| "step": 918 | |
| }, | |
| { | |
| "epoch": 1.967880085653105, | |
| "grad_norm": 0.19909912132770102, | |
| "learning_rate": 1.9126984126984128e-05, | |
| "loss": 0.3579, | |
| "step": 919 | |
| }, | |
| { | |
| "epoch": 1.9700214132762313, | |
| "grad_norm": 0.22713082416817215, | |
| "learning_rate": 1.9087301587301585e-05, | |
| "loss": 0.3768, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 1.9721627408993576, | |
| "grad_norm": 0.211019913621537, | |
| "learning_rate": 1.9047619047619046e-05, | |
| "loss": 0.3541, | |
| "step": 921 | |
| }, | |
| { | |
| "epoch": 1.974304068522484, | |
| "grad_norm": 0.20335978017528178, | |
| "learning_rate": 1.9007936507936507e-05, | |
| "loss": 0.3715, | |
| "step": 922 | |
| }, | |
| { | |
| "epoch": 1.9764453961456103, | |
| "grad_norm": 0.2138102480162949, | |
| "learning_rate": 1.8968253968253968e-05, | |
| "loss": 0.3591, | |
| "step": 923 | |
| }, | |
| { | |
| "epoch": 1.9785867237687365, | |
| "grad_norm": 0.24453013877035817, | |
| "learning_rate": 1.892857142857143e-05, | |
| "loss": 0.3853, | |
| "step": 924 | |
| }, | |
| { | |
| "epoch": 1.9807280513918628, | |
| "grad_norm": 0.20138123670876026, | |
| "learning_rate": 1.888888888888889e-05, | |
| "loss": 0.3693, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 1.9828693790149892, | |
| "grad_norm": 0.2130228707007481, | |
| "learning_rate": 1.884920634920635e-05, | |
| "loss": 0.3756, | |
| "step": 926 | |
| }, | |
| { | |
| "epoch": 1.9850107066381155, | |
| "grad_norm": 0.21159991261313507, | |
| "learning_rate": 1.880952380952381e-05, | |
| "loss": 0.3559, | |
| "step": 927 | |
| }, | |
| { | |
| "epoch": 1.9871520342612419, | |
| "grad_norm": 0.1926680844736931, | |
| "learning_rate": 1.876984126984127e-05, | |
| "loss": 0.3564, | |
| "step": 928 | |
| }, | |
| { | |
| "epoch": 1.9892933618843682, | |
| "grad_norm": 0.19459069355708655, | |
| "learning_rate": 1.8730158730158732e-05, | |
| "loss": 0.3496, | |
| "step": 929 | |
| }, | |
| { | |
| "epoch": 1.9914346895074946, | |
| "grad_norm": 0.2133220689283602, | |
| "learning_rate": 1.8690476190476193e-05, | |
| "loss": 0.3651, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 1.993576017130621, | |
| "grad_norm": 0.2027554437180913, | |
| "learning_rate": 1.8650793650793654e-05, | |
| "loss": 0.3695, | |
| "step": 931 | |
| }, | |
| { | |
| "epoch": 1.9957173447537473, | |
| "grad_norm": 0.19702179225284736, | |
| "learning_rate": 1.861111111111111e-05, | |
| "loss": 0.3659, | |
| "step": 932 | |
| }, | |
| { | |
| "epoch": 1.9978586723768736, | |
| "grad_norm": 0.21240087364498741, | |
| "learning_rate": 1.8571428571428572e-05, | |
| "loss": 0.362, | |
| "step": 933 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 0.21641052576639658, | |
| "learning_rate": 1.8531746031746032e-05, | |
| "loss": 0.3479, | |
| "step": 934 | |
| }, | |
| { | |
| "epoch": 2.0021413276231264, | |
| "grad_norm": 0.3127795260229852, | |
| "learning_rate": 1.8492063492063493e-05, | |
| "loss": 0.2938, | |
| "step": 935 | |
| }, | |
| { | |
| "epoch": 2.0042826552462527, | |
| "grad_norm": 0.20537155001189475, | |
| "learning_rate": 1.8452380952380954e-05, | |
| "loss": 0.2725, | |
| "step": 936 | |
| }, | |
| { | |
| "epoch": 2.006423982869379, | |
| "grad_norm": 0.4035860023057039, | |
| "learning_rate": 1.8412698412698415e-05, | |
| "loss": 0.2877, | |
| "step": 937 | |
| }, | |
| { | |
| "epoch": 2.0085653104925054, | |
| "grad_norm": 0.2729391560493891, | |
| "learning_rate": 1.8373015873015875e-05, | |
| "loss": 0.2821, | |
| "step": 938 | |
| }, | |
| { | |
| "epoch": 2.0107066381156318, | |
| "grad_norm": 0.23930664003910482, | |
| "learning_rate": 1.8333333333333333e-05, | |
| "loss": 0.2732, | |
| "step": 939 | |
| }, | |
| { | |
| "epoch": 2.012847965738758, | |
| "grad_norm": 0.2534172257988802, | |
| "learning_rate": 1.8293650793650794e-05, | |
| "loss": 0.2823, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 2.0149892933618845, | |
| "grad_norm": 0.2842003636688084, | |
| "learning_rate": 1.8253968253968254e-05, | |
| "loss": 0.2699, | |
| "step": 941 | |
| }, | |
| { | |
| "epoch": 2.017130620985011, | |
| "grad_norm": 0.2546284637134941, | |
| "learning_rate": 1.8214285714285715e-05, | |
| "loss": 0.2898, | |
| "step": 942 | |
| }, | |
| { | |
| "epoch": 2.019271948608137, | |
| "grad_norm": 0.25923976328845644, | |
| "learning_rate": 1.8174603174603176e-05, | |
| "loss": 0.2835, | |
| "step": 943 | |
| }, | |
| { | |
| "epoch": 2.0214132762312635, | |
| "grad_norm": 0.2694090119639284, | |
| "learning_rate": 1.8134920634920637e-05, | |
| "loss": 0.2789, | |
| "step": 944 | |
| }, | |
| { | |
| "epoch": 2.02355460385439, | |
| "grad_norm": 0.2560183056621401, | |
| "learning_rate": 1.8095238095238094e-05, | |
| "loss": 0.288, | |
| "step": 945 | |
| }, | |
| { | |
| "epoch": 2.0256959314775163, | |
| "grad_norm": 0.20946395889175382, | |
| "learning_rate": 1.8055555555555555e-05, | |
| "loss": 0.2726, | |
| "step": 946 | |
| }, | |
| { | |
| "epoch": 2.0278372591006426, | |
| "grad_norm": 0.23286538060698495, | |
| "learning_rate": 1.8015873015873015e-05, | |
| "loss": 0.2649, | |
| "step": 947 | |
| }, | |
| { | |
| "epoch": 2.0299785867237685, | |
| "grad_norm": 0.2812062902226991, | |
| "learning_rate": 1.7976190476190476e-05, | |
| "loss": 0.2761, | |
| "step": 948 | |
| }, | |
| { | |
| "epoch": 2.032119914346895, | |
| "grad_norm": 0.21066691235196103, | |
| "learning_rate": 1.7936507936507937e-05, | |
| "loss": 0.2767, | |
| "step": 949 | |
| }, | |
| { | |
| "epoch": 2.0342612419700212, | |
| "grad_norm": 0.22546963160204261, | |
| "learning_rate": 1.7896825396825398e-05, | |
| "loss": 0.2748, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 2.0364025695931476, | |
| "grad_norm": 0.2695316483089908, | |
| "learning_rate": 1.785714285714286e-05, | |
| "loss": 0.2931, | |
| "step": 951 | |
| }, | |
| { | |
| "epoch": 2.038543897216274, | |
| "grad_norm": 0.224650667985638, | |
| "learning_rate": 1.781746031746032e-05, | |
| "loss": 0.286, | |
| "step": 952 | |
| }, | |
| { | |
| "epoch": 2.0406852248394003, | |
| "grad_norm": 0.23425810863567934, | |
| "learning_rate": 1.777777777777778e-05, | |
| "loss": 0.2768, | |
| "step": 953 | |
| }, | |
| { | |
| "epoch": 2.0428265524625266, | |
| "grad_norm": 0.21885265388287464, | |
| "learning_rate": 1.773809523809524e-05, | |
| "loss": 0.2664, | |
| "step": 954 | |
| }, | |
| { | |
| "epoch": 2.044967880085653, | |
| "grad_norm": 0.2220128915627926, | |
| "learning_rate": 1.76984126984127e-05, | |
| "loss": 0.2792, | |
| "step": 955 | |
| }, | |
| { | |
| "epoch": 2.0471092077087794, | |
| "grad_norm": 0.21795261361627535, | |
| "learning_rate": 1.7658730158730162e-05, | |
| "loss": 0.2963, | |
| "step": 956 | |
| }, | |
| { | |
| "epoch": 2.0492505353319057, | |
| "grad_norm": 0.22853139984187426, | |
| "learning_rate": 1.761904761904762e-05, | |
| "loss": 0.2774, | |
| "step": 957 | |
| }, | |
| { | |
| "epoch": 2.051391862955032, | |
| "grad_norm": 0.21005634342559973, | |
| "learning_rate": 1.757936507936508e-05, | |
| "loss": 0.278, | |
| "step": 958 | |
| }, | |
| { | |
| "epoch": 2.0535331905781584, | |
| "grad_norm": 0.1972736100196403, | |
| "learning_rate": 1.7539682539682538e-05, | |
| "loss": 0.276, | |
| "step": 959 | |
| }, | |
| { | |
| "epoch": 2.0556745182012848, | |
| "grad_norm": 0.2013981720969393, | |
| "learning_rate": 1.75e-05, | |
| "loss": 0.2725, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 2.057815845824411, | |
| "grad_norm": 0.21114011142047054, | |
| "learning_rate": 1.746031746031746e-05, | |
| "loss": 0.2692, | |
| "step": 961 | |
| }, | |
| { | |
| "epoch": 2.0599571734475375, | |
| "grad_norm": 0.19810710536290826, | |
| "learning_rate": 1.742063492063492e-05, | |
| "loss": 0.2689, | |
| "step": 962 | |
| }, | |
| { | |
| "epoch": 2.062098501070664, | |
| "grad_norm": 0.20962838769163566, | |
| "learning_rate": 1.738095238095238e-05, | |
| "loss": 0.2755, | |
| "step": 963 | |
| }, | |
| { | |
| "epoch": 2.06423982869379, | |
| "grad_norm": 0.2007780468677198, | |
| "learning_rate": 1.734126984126984e-05, | |
| "loss": 0.2742, | |
| "step": 964 | |
| }, | |
| { | |
| "epoch": 2.0663811563169165, | |
| "grad_norm": 0.20730329087379573, | |
| "learning_rate": 1.7301587301587302e-05, | |
| "loss": 0.2796, | |
| "step": 965 | |
| }, | |
| { | |
| "epoch": 2.068522483940043, | |
| "grad_norm": 0.20422916553473747, | |
| "learning_rate": 1.7261904761904763e-05, | |
| "loss": 0.2717, | |
| "step": 966 | |
| }, | |
| { | |
| "epoch": 2.0706638115631693, | |
| "grad_norm": 0.21624827975621064, | |
| "learning_rate": 1.7222222222222224e-05, | |
| "loss": 0.2736, | |
| "step": 967 | |
| }, | |
| { | |
| "epoch": 2.0728051391862956, | |
| "grad_norm": 0.211530596933148, | |
| "learning_rate": 1.7182539682539684e-05, | |
| "loss": 0.2856, | |
| "step": 968 | |
| }, | |
| { | |
| "epoch": 2.074946466809422, | |
| "grad_norm": 0.21392283135095658, | |
| "learning_rate": 1.7142857142857145e-05, | |
| "loss": 0.2784, | |
| "step": 969 | |
| }, | |
| { | |
| "epoch": 2.0770877944325483, | |
| "grad_norm": 0.22575272429775897, | |
| "learning_rate": 1.7103174603174606e-05, | |
| "loss": 0.2683, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 2.0792291220556747, | |
| "grad_norm": 0.20807075241886105, | |
| "learning_rate": 1.7063492063492063e-05, | |
| "loss": 0.2811, | |
| "step": 971 | |
| }, | |
| { | |
| "epoch": 2.081370449678801, | |
| "grad_norm": 0.19696007356880943, | |
| "learning_rate": 1.7023809523809524e-05, | |
| "loss": 0.2616, | |
| "step": 972 | |
| }, | |
| { | |
| "epoch": 2.0835117773019274, | |
| "grad_norm": 0.2067907488812531, | |
| "learning_rate": 1.6984126984126985e-05, | |
| "loss": 0.2867, | |
| "step": 973 | |
| }, | |
| { | |
| "epoch": 2.0856531049250537, | |
| "grad_norm": 0.20785661967310565, | |
| "learning_rate": 1.6944444444444446e-05, | |
| "loss": 0.2804, | |
| "step": 974 | |
| }, | |
| { | |
| "epoch": 2.08779443254818, | |
| "grad_norm": 0.21316354407545085, | |
| "learning_rate": 1.6904761904761906e-05, | |
| "loss": 0.2825, | |
| "step": 975 | |
| }, | |
| { | |
| "epoch": 2.089935760171306, | |
| "grad_norm": 0.20417574340319766, | |
| "learning_rate": 1.6865079365079367e-05, | |
| "loss": 0.2698, | |
| "step": 976 | |
| }, | |
| { | |
| "epoch": 2.0920770877944324, | |
| "grad_norm": 0.20059086132923634, | |
| "learning_rate": 1.6825396825396828e-05, | |
| "loss": 0.2838, | |
| "step": 977 | |
| }, | |
| { | |
| "epoch": 2.0942184154175587, | |
| "grad_norm": 0.20806774530665742, | |
| "learning_rate": 1.6785714285714285e-05, | |
| "loss": 0.2954, | |
| "step": 978 | |
| }, | |
| { | |
| "epoch": 2.096359743040685, | |
| "grad_norm": 0.21795686396623343, | |
| "learning_rate": 1.6746031746031746e-05, | |
| "loss": 0.2758, | |
| "step": 979 | |
| }, | |
| { | |
| "epoch": 2.0985010706638114, | |
| "grad_norm": 0.20525399917369724, | |
| "learning_rate": 1.6706349206349207e-05, | |
| "loss": 0.2852, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 2.1006423982869378, | |
| "grad_norm": 0.20153493188561758, | |
| "learning_rate": 1.6666666666666667e-05, | |
| "loss": 0.2671, | |
| "step": 981 | |
| }, | |
| { | |
| "epoch": 2.102783725910064, | |
| "grad_norm": 0.21682029099052538, | |
| "learning_rate": 1.6626984126984128e-05, | |
| "loss": 0.2794, | |
| "step": 982 | |
| }, | |
| { | |
| "epoch": 2.1049250535331905, | |
| "grad_norm": 0.20982679296246803, | |
| "learning_rate": 1.658730158730159e-05, | |
| "loss": 0.2755, | |
| "step": 983 | |
| }, | |
| { | |
| "epoch": 2.107066381156317, | |
| "grad_norm": 0.20945525288086686, | |
| "learning_rate": 1.6547619047619046e-05, | |
| "loss": 0.2675, | |
| "step": 984 | |
| }, | |
| { | |
| "epoch": 2.109207708779443, | |
| "grad_norm": 0.21376143878454182, | |
| "learning_rate": 1.6507936507936507e-05, | |
| "loss": 0.2832, | |
| "step": 985 | |
| }, | |
| { | |
| "epoch": 2.1113490364025695, | |
| "grad_norm": 0.19436511910516777, | |
| "learning_rate": 1.6468253968253968e-05, | |
| "loss": 0.2737, | |
| "step": 986 | |
| }, | |
| { | |
| "epoch": 2.113490364025696, | |
| "grad_norm": 0.20930636270391903, | |
| "learning_rate": 1.642857142857143e-05, | |
| "loss": 0.2758, | |
| "step": 987 | |
| }, | |
| { | |
| "epoch": 2.1156316916488223, | |
| "grad_norm": 0.23244369003497906, | |
| "learning_rate": 1.638888888888889e-05, | |
| "loss": 0.296, | |
| "step": 988 | |
| }, | |
| { | |
| "epoch": 2.1177730192719486, | |
| "grad_norm": 0.19066256457699418, | |
| "learning_rate": 1.634920634920635e-05, | |
| "loss": 0.2738, | |
| "step": 989 | |
| }, | |
| { | |
| "epoch": 2.119914346895075, | |
| "grad_norm": 0.19911072991641976, | |
| "learning_rate": 1.630952380952381e-05, | |
| "loss": 0.2715, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 2.1220556745182013, | |
| "grad_norm": 0.21201959394684514, | |
| "learning_rate": 1.626984126984127e-05, | |
| "loss": 0.2922, | |
| "step": 991 | |
| }, | |
| { | |
| "epoch": 2.1241970021413277, | |
| "grad_norm": 0.1967089705488496, | |
| "learning_rate": 1.6230158730158732e-05, | |
| "loss": 0.2715, | |
| "step": 992 | |
| }, | |
| { | |
| "epoch": 2.126338329764454, | |
| "grad_norm": 0.19369231393643502, | |
| "learning_rate": 1.6190476190476193e-05, | |
| "loss": 0.2784, | |
| "step": 993 | |
| }, | |
| { | |
| "epoch": 2.1284796573875804, | |
| "grad_norm": 0.2040785519443548, | |
| "learning_rate": 1.6150793650793654e-05, | |
| "loss": 0.2767, | |
| "step": 994 | |
| }, | |
| { | |
| "epoch": 2.1306209850107067, | |
| "grad_norm": 0.1898501436472166, | |
| "learning_rate": 1.6111111111111115e-05, | |
| "loss": 0.2678, | |
| "step": 995 | |
| }, | |
| { | |
| "epoch": 2.132762312633833, | |
| "grad_norm": 0.20111950499112383, | |
| "learning_rate": 1.6071428571428572e-05, | |
| "loss": 0.2823, | |
| "step": 996 | |
| }, | |
| { | |
| "epoch": 2.1349036402569594, | |
| "grad_norm": 0.1981498781288381, | |
| "learning_rate": 1.6031746031746033e-05, | |
| "loss": 0.2747, | |
| "step": 997 | |
| }, | |
| { | |
| "epoch": 2.137044967880086, | |
| "grad_norm": 0.19271209341880322, | |
| "learning_rate": 1.599206349206349e-05, | |
| "loss": 0.2752, | |
| "step": 998 | |
| }, | |
| { | |
| "epoch": 2.139186295503212, | |
| "grad_norm": 0.2221235505452683, | |
| "learning_rate": 1.595238095238095e-05, | |
| "loss": 0.2716, | |
| "step": 999 | |
| }, | |
| { | |
| "epoch": 2.1413276231263385, | |
| "grad_norm": 0.19713198559950088, | |
| "learning_rate": 1.591269841269841e-05, | |
| "loss": 0.2812, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 2.143468950749465, | |
| "grad_norm": 0.19897355551674525, | |
| "learning_rate": 1.5873015873015872e-05, | |
| "loss": 0.2846, | |
| "step": 1001 | |
| }, | |
| { | |
| "epoch": 2.145610278372591, | |
| "grad_norm": 0.19298432452205258, | |
| "learning_rate": 1.5833333333333333e-05, | |
| "loss": 0.2682, | |
| "step": 1002 | |
| }, | |
| { | |
| "epoch": 2.147751605995717, | |
| "grad_norm": 0.2019146536667124, | |
| "learning_rate": 1.5793650793650794e-05, | |
| "loss": 0.2735, | |
| "step": 1003 | |
| }, | |
| { | |
| "epoch": 2.1498929336188435, | |
| "grad_norm": 0.20154869107354417, | |
| "learning_rate": 1.5753968253968255e-05, | |
| "loss": 0.2632, | |
| "step": 1004 | |
| }, | |
| { | |
| "epoch": 2.15203426124197, | |
| "grad_norm": 0.19363808622473339, | |
| "learning_rate": 1.5714285714285715e-05, | |
| "loss": 0.2874, | |
| "step": 1005 | |
| }, | |
| { | |
| "epoch": 2.154175588865096, | |
| "grad_norm": 0.2046720100698483, | |
| "learning_rate": 1.5674603174603176e-05, | |
| "loss": 0.2754, | |
| "step": 1006 | |
| }, | |
| { | |
| "epoch": 2.1563169164882225, | |
| "grad_norm": 0.1930006937085633, | |
| "learning_rate": 1.5634920634920637e-05, | |
| "loss": 0.2726, | |
| "step": 1007 | |
| }, | |
| { | |
| "epoch": 2.158458244111349, | |
| "grad_norm": 0.20734097354646286, | |
| "learning_rate": 1.5595238095238098e-05, | |
| "loss": 0.2879, | |
| "step": 1008 | |
| }, | |
| { | |
| "epoch": 2.1605995717344753, | |
| "grad_norm": 0.19538298745906257, | |
| "learning_rate": 1.5555555555555555e-05, | |
| "loss": 0.2809, | |
| "step": 1009 | |
| }, | |
| { | |
| "epoch": 2.1627408993576016, | |
| "grad_norm": 0.2252535619453128, | |
| "learning_rate": 1.5515873015873016e-05, | |
| "loss": 0.286, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 2.164882226980728, | |
| "grad_norm": 0.19426151913359876, | |
| "learning_rate": 1.5476190476190476e-05, | |
| "loss": 0.2806, | |
| "step": 1011 | |
| }, | |
| { | |
| "epoch": 2.1670235546038543, | |
| "grad_norm": 0.20018884891444158, | |
| "learning_rate": 1.5436507936507937e-05, | |
| "loss": 0.2922, | |
| "step": 1012 | |
| }, | |
| { | |
| "epoch": 2.1691648822269807, | |
| "grad_norm": 0.20745084715389592, | |
| "learning_rate": 1.5396825396825398e-05, | |
| "loss": 0.272, | |
| "step": 1013 | |
| }, | |
| { | |
| "epoch": 2.171306209850107, | |
| "grad_norm": 0.2005966941166649, | |
| "learning_rate": 1.535714285714286e-05, | |
| "loss": 0.2748, | |
| "step": 1014 | |
| }, | |
| { | |
| "epoch": 2.1734475374732334, | |
| "grad_norm": 0.1959554222950539, | |
| "learning_rate": 1.531746031746032e-05, | |
| "loss": 0.2725, | |
| "step": 1015 | |
| }, | |
| { | |
| "epoch": 2.1755888650963597, | |
| "grad_norm": 0.2024673027309239, | |
| "learning_rate": 1.527777777777778e-05, | |
| "loss": 0.2925, | |
| "step": 1016 | |
| }, | |
| { | |
| "epoch": 2.177730192719486, | |
| "grad_norm": 0.18968936546486206, | |
| "learning_rate": 1.5238095238095241e-05, | |
| "loss": 0.2734, | |
| "step": 1017 | |
| }, | |
| { | |
| "epoch": 2.1798715203426124, | |
| "grad_norm": 0.21451121125699396, | |
| "learning_rate": 1.5198412698412698e-05, | |
| "loss": 0.2729, | |
| "step": 1018 | |
| }, | |
| { | |
| "epoch": 2.182012847965739, | |
| "grad_norm": 0.20234427279725273, | |
| "learning_rate": 1.5158730158730159e-05, | |
| "loss": 0.2846, | |
| "step": 1019 | |
| }, | |
| { | |
| "epoch": 2.184154175588865, | |
| "grad_norm": 0.19307169286798295, | |
| "learning_rate": 1.5119047619047618e-05, | |
| "loss": 0.2704, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 2.1862955032119915, | |
| "grad_norm": 0.1831847011586216, | |
| "learning_rate": 1.5079365079365079e-05, | |
| "loss": 0.2682, | |
| "step": 1021 | |
| }, | |
| { | |
| "epoch": 2.188436830835118, | |
| "grad_norm": 0.19718660927111226, | |
| "learning_rate": 1.503968253968254e-05, | |
| "loss": 0.2665, | |
| "step": 1022 | |
| }, | |
| { | |
| "epoch": 2.190578158458244, | |
| "grad_norm": 0.21657711095140936, | |
| "learning_rate": 1.5e-05, | |
| "loss": 0.2793, | |
| "step": 1023 | |
| }, | |
| { | |
| "epoch": 2.1927194860813706, | |
| "grad_norm": 0.2085788754213796, | |
| "learning_rate": 1.4960317460317461e-05, | |
| "loss": 0.2816, | |
| "step": 1024 | |
| }, | |
| { | |
| "epoch": 2.194860813704497, | |
| "grad_norm": 0.20682253217524985, | |
| "learning_rate": 1.4920634920634922e-05, | |
| "loss": 0.2861, | |
| "step": 1025 | |
| }, | |
| { | |
| "epoch": 2.1970021413276233, | |
| "grad_norm": 0.19353447072521687, | |
| "learning_rate": 1.4880952380952381e-05, | |
| "loss": 0.2818, | |
| "step": 1026 | |
| }, | |
| { | |
| "epoch": 2.1991434689507496, | |
| "grad_norm": 0.2023000282481551, | |
| "learning_rate": 1.4841269841269842e-05, | |
| "loss": 0.2897, | |
| "step": 1027 | |
| }, | |
| { | |
| "epoch": 2.201284796573876, | |
| "grad_norm": 0.2091382321658723, | |
| "learning_rate": 1.4801587301587302e-05, | |
| "loss": 0.2803, | |
| "step": 1028 | |
| }, | |
| { | |
| "epoch": 2.2034261241970023, | |
| "grad_norm": 0.19040214411798756, | |
| "learning_rate": 1.4761904761904763e-05, | |
| "loss": 0.2709, | |
| "step": 1029 | |
| }, | |
| { | |
| "epoch": 2.2055674518201283, | |
| "grad_norm": 0.18805950537913466, | |
| "learning_rate": 1.4722222222222224e-05, | |
| "loss": 0.2641, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 2.207708779443255, | |
| "grad_norm": 0.19809642692965637, | |
| "learning_rate": 1.4682539682539683e-05, | |
| "loss": 0.2886, | |
| "step": 1031 | |
| }, | |
| { | |
| "epoch": 2.209850107066381, | |
| "grad_norm": 0.1912784314191019, | |
| "learning_rate": 1.4642857142857144e-05, | |
| "loss": 0.2795, | |
| "step": 1032 | |
| }, | |
| { | |
| "epoch": 2.2119914346895073, | |
| "grad_norm": 0.1966882118912259, | |
| "learning_rate": 1.4603174603174605e-05, | |
| "loss": 0.2837, | |
| "step": 1033 | |
| }, | |
| { | |
| "epoch": 2.2141327623126337, | |
| "grad_norm": 0.19981226873116584, | |
| "learning_rate": 1.4563492063492065e-05, | |
| "loss": 0.2798, | |
| "step": 1034 | |
| }, | |
| { | |
| "epoch": 2.21627408993576, | |
| "grad_norm": 0.19609944493947298, | |
| "learning_rate": 1.4523809523809526e-05, | |
| "loss": 0.2851, | |
| "step": 1035 | |
| }, | |
| { | |
| "epoch": 2.2184154175588864, | |
| "grad_norm": 0.20172541461591478, | |
| "learning_rate": 1.4484126984126987e-05, | |
| "loss": 0.2806, | |
| "step": 1036 | |
| }, | |
| { | |
| "epoch": 2.2205567451820127, | |
| "grad_norm": 0.2184127818693799, | |
| "learning_rate": 1.4444444444444444e-05, | |
| "loss": 0.2788, | |
| "step": 1037 | |
| }, | |
| { | |
| "epoch": 2.222698072805139, | |
| "grad_norm": 0.1934456256040119, | |
| "learning_rate": 1.4404761904761905e-05, | |
| "loss": 0.2684, | |
| "step": 1038 | |
| }, | |
| { | |
| "epoch": 2.2248394004282654, | |
| "grad_norm": 0.2082433040162603, | |
| "learning_rate": 1.4365079365079364e-05, | |
| "loss": 0.2923, | |
| "step": 1039 | |
| }, | |
| { | |
| "epoch": 2.226980728051392, | |
| "grad_norm": 0.22380158655105736, | |
| "learning_rate": 1.4325396825396825e-05, | |
| "loss": 0.2779, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 2.229122055674518, | |
| "grad_norm": 0.1942322520767476, | |
| "learning_rate": 1.4285714285714285e-05, | |
| "loss": 0.2759, | |
| "step": 1041 | |
| }, | |
| { | |
| "epoch": 2.2312633832976445, | |
| "grad_norm": 0.1974411304770536, | |
| "learning_rate": 1.4246031746031746e-05, | |
| "loss": 0.2688, | |
| "step": 1042 | |
| }, | |
| { | |
| "epoch": 2.233404710920771, | |
| "grad_norm": 0.19709284568411106, | |
| "learning_rate": 1.4206349206349207e-05, | |
| "loss": 0.2789, | |
| "step": 1043 | |
| }, | |
| { | |
| "epoch": 2.235546038543897, | |
| "grad_norm": 0.19313497813772162, | |
| "learning_rate": 1.4166666666666668e-05, | |
| "loss": 0.28, | |
| "step": 1044 | |
| }, | |
| { | |
| "epoch": 2.2376873661670236, | |
| "grad_norm": 0.19756395359956005, | |
| "learning_rate": 1.4126984126984127e-05, | |
| "loss": 0.287, | |
| "step": 1045 | |
| }, | |
| { | |
| "epoch": 2.23982869379015, | |
| "grad_norm": 0.19707117382557698, | |
| "learning_rate": 1.4087301587301587e-05, | |
| "loss": 0.2793, | |
| "step": 1046 | |
| }, | |
| { | |
| "epoch": 2.2419700214132763, | |
| "grad_norm": 0.2185295361546076, | |
| "learning_rate": 1.4047619047619048e-05, | |
| "loss": 0.2801, | |
| "step": 1047 | |
| }, | |
| { | |
| "epoch": 2.2441113490364026, | |
| "grad_norm": 0.1959782383553124, | |
| "learning_rate": 1.4007936507936509e-05, | |
| "loss": 0.2778, | |
| "step": 1048 | |
| }, | |
| { | |
| "epoch": 2.246252676659529, | |
| "grad_norm": 0.20220432730319396, | |
| "learning_rate": 1.396825396825397e-05, | |
| "loss": 0.2739, | |
| "step": 1049 | |
| }, | |
| { | |
| "epoch": 2.2483940042826553, | |
| "grad_norm": 0.203202312939082, | |
| "learning_rate": 1.392857142857143e-05, | |
| "loss": 0.2686, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 2.2505353319057817, | |
| "grad_norm": 0.19629196239471303, | |
| "learning_rate": 1.388888888888889e-05, | |
| "loss": 0.2741, | |
| "step": 1051 | |
| }, | |
| { | |
| "epoch": 2.252676659528908, | |
| "grad_norm": 0.20785913348172835, | |
| "learning_rate": 1.384920634920635e-05, | |
| "loss": 0.2738, | |
| "step": 1052 | |
| }, | |
| { | |
| "epoch": 2.2548179871520344, | |
| "grad_norm": 0.20245154360926115, | |
| "learning_rate": 1.3809523809523811e-05, | |
| "loss": 0.2944, | |
| "step": 1053 | |
| }, | |
| { | |
| "epoch": 2.2569593147751608, | |
| "grad_norm": 0.2096114189668644, | |
| "learning_rate": 1.3769841269841272e-05, | |
| "loss": 0.2759, | |
| "step": 1054 | |
| }, | |
| { | |
| "epoch": 2.259100642398287, | |
| "grad_norm": 0.24320117601366367, | |
| "learning_rate": 1.3730158730158733e-05, | |
| "loss": 0.271, | |
| "step": 1055 | |
| }, | |
| { | |
| "epoch": 2.2612419700214135, | |
| "grad_norm": 0.19424837338985246, | |
| "learning_rate": 1.3690476190476192e-05, | |
| "loss": 0.2793, | |
| "step": 1056 | |
| }, | |
| { | |
| "epoch": 2.2633832976445394, | |
| "grad_norm": 0.19235543786637513, | |
| "learning_rate": 1.365079365079365e-05, | |
| "loss": 0.2797, | |
| "step": 1057 | |
| }, | |
| { | |
| "epoch": 2.265524625267666, | |
| "grad_norm": 0.2014689883501026, | |
| "learning_rate": 1.3611111111111111e-05, | |
| "loss": 0.2804, | |
| "step": 1058 | |
| }, | |
| { | |
| "epoch": 2.267665952890792, | |
| "grad_norm": 0.19388371477246316, | |
| "learning_rate": 1.357142857142857e-05, | |
| "loss": 0.2857, | |
| "step": 1059 | |
| }, | |
| { | |
| "epoch": 2.2698072805139184, | |
| "grad_norm": 0.18600354706155486, | |
| "learning_rate": 1.3531746031746031e-05, | |
| "loss": 0.268, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 2.271948608137045, | |
| "grad_norm": 0.1997457077450587, | |
| "learning_rate": 1.3492063492063492e-05, | |
| "loss": 0.2744, | |
| "step": 1061 | |
| }, | |
| { | |
| "epoch": 2.274089935760171, | |
| "grad_norm": 0.21254455192384775, | |
| "learning_rate": 1.3452380952380953e-05, | |
| "loss": 0.281, | |
| "step": 1062 | |
| }, | |
| { | |
| "epoch": 2.2762312633832975, | |
| "grad_norm": 0.19277682702608626, | |
| "learning_rate": 1.3412698412698413e-05, | |
| "loss": 0.2766, | |
| "step": 1063 | |
| }, | |
| { | |
| "epoch": 2.278372591006424, | |
| "grad_norm": 0.19996995603399093, | |
| "learning_rate": 1.3373015873015873e-05, | |
| "loss": 0.2811, | |
| "step": 1064 | |
| }, | |
| { | |
| "epoch": 2.28051391862955, | |
| "grad_norm": 0.2037157346895961, | |
| "learning_rate": 1.3333333333333333e-05, | |
| "loss": 0.286, | |
| "step": 1065 | |
| }, | |
| { | |
| "epoch": 2.2826552462526766, | |
| "grad_norm": 0.2038456192268336, | |
| "learning_rate": 1.3293650793650794e-05, | |
| "loss": 0.2756, | |
| "step": 1066 | |
| }, | |
| { | |
| "epoch": 2.284796573875803, | |
| "grad_norm": 0.19897665841544188, | |
| "learning_rate": 1.3253968253968255e-05, | |
| "loss": 0.2726, | |
| "step": 1067 | |
| }, | |
| { | |
| "epoch": 2.2869379014989293, | |
| "grad_norm": 0.2001072986623387, | |
| "learning_rate": 1.3214285714285716e-05, | |
| "loss": 0.2795, | |
| "step": 1068 | |
| }, | |
| { | |
| "epoch": 2.2890792291220556, | |
| "grad_norm": 0.19517842971444538, | |
| "learning_rate": 1.3174603174603176e-05, | |
| "loss": 0.2635, | |
| "step": 1069 | |
| }, | |
| { | |
| "epoch": 2.291220556745182, | |
| "grad_norm": 0.2068829830909192, | |
| "learning_rate": 1.3134920634920635e-05, | |
| "loss": 0.2791, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 2.2933618843683083, | |
| "grad_norm": 0.21700568586294433, | |
| "learning_rate": 1.3095238095238096e-05, | |
| "loss": 0.3014, | |
| "step": 1071 | |
| }, | |
| { | |
| "epoch": 2.2955032119914347, | |
| "grad_norm": 0.19043215131446242, | |
| "learning_rate": 1.3055555555555557e-05, | |
| "loss": 0.2772, | |
| "step": 1072 | |
| }, | |
| { | |
| "epoch": 2.297644539614561, | |
| "grad_norm": 0.20431273180658213, | |
| "learning_rate": 1.3015873015873018e-05, | |
| "loss": 0.2738, | |
| "step": 1073 | |
| }, | |
| { | |
| "epoch": 2.2997858672376874, | |
| "grad_norm": 0.1971225707821823, | |
| "learning_rate": 1.2976190476190478e-05, | |
| "loss": 0.2762, | |
| "step": 1074 | |
| }, | |
| { | |
| "epoch": 2.3019271948608138, | |
| "grad_norm": 0.19556074151794256, | |
| "learning_rate": 1.2936507936507939e-05, | |
| "loss": 0.2572, | |
| "step": 1075 | |
| }, | |
| { | |
| "epoch": 2.30406852248394, | |
| "grad_norm": 0.20833957821982024, | |
| "learning_rate": 1.2896825396825398e-05, | |
| "loss": 0.2794, | |
| "step": 1076 | |
| }, | |
| { | |
| "epoch": 2.3062098501070665, | |
| "grad_norm": 0.2025740903256515, | |
| "learning_rate": 1.2857142857142857e-05, | |
| "loss": 0.2733, | |
| "step": 1077 | |
| }, | |
| { | |
| "epoch": 2.308351177730193, | |
| "grad_norm": 0.20532981633640338, | |
| "learning_rate": 1.2817460317460316e-05, | |
| "loss": 0.2801, | |
| "step": 1078 | |
| }, | |
| { | |
| "epoch": 2.310492505353319, | |
| "grad_norm": 0.20302693087098408, | |
| "learning_rate": 1.2777777777777777e-05, | |
| "loss": 0.2855, | |
| "step": 1079 | |
| }, | |
| { | |
| "epoch": 2.3126338329764455, | |
| "grad_norm": 0.19316097914518776, | |
| "learning_rate": 1.2738095238095238e-05, | |
| "loss": 0.2744, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 2.314775160599572, | |
| "grad_norm": 0.1890481893959948, | |
| "learning_rate": 1.2698412698412699e-05, | |
| "loss": 0.2858, | |
| "step": 1081 | |
| }, | |
| { | |
| "epoch": 2.3169164882226982, | |
| "grad_norm": 0.20709172274897492, | |
| "learning_rate": 1.265873015873016e-05, | |
| "loss": 0.287, | |
| "step": 1082 | |
| }, | |
| { | |
| "epoch": 2.3190578158458246, | |
| "grad_norm": 0.21179564472663326, | |
| "learning_rate": 1.261904761904762e-05, | |
| "loss": 0.2837, | |
| "step": 1083 | |
| }, | |
| { | |
| "epoch": 2.3211991434689505, | |
| "grad_norm": 0.2076511560969458, | |
| "learning_rate": 1.2579365079365079e-05, | |
| "loss": 0.2809, | |
| "step": 1084 | |
| }, | |
| { | |
| "epoch": 2.3233404710920773, | |
| "grad_norm": 0.2053185208412779, | |
| "learning_rate": 1.253968253968254e-05, | |
| "loss": 0.2778, | |
| "step": 1085 | |
| }, | |
| { | |
| "epoch": 2.325481798715203, | |
| "grad_norm": 0.19958273635866866, | |
| "learning_rate": 1.25e-05, | |
| "loss": 0.2637, | |
| "step": 1086 | |
| }, | |
| { | |
| "epoch": 2.3276231263383296, | |
| "grad_norm": 0.2089718737763175, | |
| "learning_rate": 1.2460317460317461e-05, | |
| "loss": 0.2871, | |
| "step": 1087 | |
| }, | |
| { | |
| "epoch": 2.329764453961456, | |
| "grad_norm": 0.2051818133160847, | |
| "learning_rate": 1.2420634920634922e-05, | |
| "loss": 0.2973, | |
| "step": 1088 | |
| }, | |
| { | |
| "epoch": 2.3319057815845823, | |
| "grad_norm": 0.6114154780985942, | |
| "learning_rate": 1.2380952380952381e-05, | |
| "loss": 0.2961, | |
| "step": 1089 | |
| }, | |
| { | |
| "epoch": 2.3340471092077086, | |
| "grad_norm": 0.1835556937496075, | |
| "learning_rate": 1.2341269841269842e-05, | |
| "loss": 0.269, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 2.336188436830835, | |
| "grad_norm": 0.20213777732489752, | |
| "learning_rate": 1.2301587301587301e-05, | |
| "loss": 0.2747, | |
| "step": 1091 | |
| }, | |
| { | |
| "epoch": 2.3383297644539613, | |
| "grad_norm": 0.21427750205781732, | |
| "learning_rate": 1.2261904761904762e-05, | |
| "loss": 0.2811, | |
| "step": 1092 | |
| }, | |
| { | |
| "epoch": 2.3404710920770877, | |
| "grad_norm": 0.2087757066721972, | |
| "learning_rate": 1.2222222222222222e-05, | |
| "loss": 0.2787, | |
| "step": 1093 | |
| }, | |
| { | |
| "epoch": 2.342612419700214, | |
| "grad_norm": 0.18757584192245175, | |
| "learning_rate": 1.2182539682539683e-05, | |
| "loss": 0.2889, | |
| "step": 1094 | |
| }, | |
| { | |
| "epoch": 2.3447537473233404, | |
| "grad_norm": 0.20608037222439926, | |
| "learning_rate": 1.2142857142857144e-05, | |
| "loss": 0.2789, | |
| "step": 1095 | |
| }, | |
| { | |
| "epoch": 2.3468950749464668, | |
| "grad_norm": 0.21935230178468215, | |
| "learning_rate": 1.2103174603174603e-05, | |
| "loss": 0.285, | |
| "step": 1096 | |
| }, | |
| { | |
| "epoch": 2.349036402569593, | |
| "grad_norm": 0.21133802393480924, | |
| "learning_rate": 1.2063492063492064e-05, | |
| "loss": 0.2899, | |
| "step": 1097 | |
| }, | |
| { | |
| "epoch": 2.3511777301927195, | |
| "grad_norm": 0.18791527322276846, | |
| "learning_rate": 1.2023809523809525e-05, | |
| "loss": 0.2742, | |
| "step": 1098 | |
| }, | |
| { | |
| "epoch": 2.353319057815846, | |
| "grad_norm": 0.1971862704976824, | |
| "learning_rate": 1.1984126984126985e-05, | |
| "loss": 0.2847, | |
| "step": 1099 | |
| }, | |
| { | |
| "epoch": 2.355460385438972, | |
| "grad_norm": 0.21189728780479716, | |
| "learning_rate": 1.1944444444444446e-05, | |
| "loss": 0.2872, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 2.3576017130620985, | |
| "grad_norm": 0.19227294980450937, | |
| "learning_rate": 1.1904761904761905e-05, | |
| "loss": 0.2907, | |
| "step": 1101 | |
| }, | |
| { | |
| "epoch": 2.359743040685225, | |
| "grad_norm": 0.20548532335329017, | |
| "learning_rate": 1.1865079365079366e-05, | |
| "loss": 0.2643, | |
| "step": 1102 | |
| }, | |
| { | |
| "epoch": 2.3618843683083512, | |
| "grad_norm": 0.18375693623718486, | |
| "learning_rate": 1.1825396825396825e-05, | |
| "loss": 0.2735, | |
| "step": 1103 | |
| }, | |
| { | |
| "epoch": 2.3640256959314776, | |
| "grad_norm": 0.21294408502746626, | |
| "learning_rate": 1.1785714285714286e-05, | |
| "loss": 0.281, | |
| "step": 1104 | |
| }, | |
| { | |
| "epoch": 2.366167023554604, | |
| "grad_norm": 0.19727873974409793, | |
| "learning_rate": 1.1746031746031746e-05, | |
| "loss": 0.281, | |
| "step": 1105 | |
| }, | |
| { | |
| "epoch": 2.3683083511777303, | |
| "grad_norm": 0.18895600615844696, | |
| "learning_rate": 1.1706349206349207e-05, | |
| "loss": 0.2852, | |
| "step": 1106 | |
| }, | |
| { | |
| "epoch": 2.3704496788008567, | |
| "grad_norm": 0.19703744794029157, | |
| "learning_rate": 1.1666666666666668e-05, | |
| "loss": 0.2856, | |
| "step": 1107 | |
| }, | |
| { | |
| "epoch": 2.372591006423983, | |
| "grad_norm": 0.20958076853632984, | |
| "learning_rate": 1.1626984126984129e-05, | |
| "loss": 0.279, | |
| "step": 1108 | |
| }, | |
| { | |
| "epoch": 2.3747323340471094, | |
| "grad_norm": 0.19332254165250848, | |
| "learning_rate": 1.1587301587301588e-05, | |
| "loss": 0.2894, | |
| "step": 1109 | |
| }, | |
| { | |
| "epoch": 2.3768736616702357, | |
| "grad_norm": 0.19661719515205164, | |
| "learning_rate": 1.1547619047619048e-05, | |
| "loss": 0.2702, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 2.3790149892933616, | |
| "grad_norm": 0.1907420302248947, | |
| "learning_rate": 1.1507936507936508e-05, | |
| "loss": 0.2771, | |
| "step": 1111 | |
| }, | |
| { | |
| "epoch": 2.3811563169164884, | |
| "grad_norm": 0.18023957300416488, | |
| "learning_rate": 1.1468253968253968e-05, | |
| "loss": 0.2717, | |
| "step": 1112 | |
| }, | |
| { | |
| "epoch": 2.3832976445396143, | |
| "grad_norm": 0.2072993037766396, | |
| "learning_rate": 1.1428571428571429e-05, | |
| "loss": 0.2933, | |
| "step": 1113 | |
| }, | |
| { | |
| "epoch": 2.385438972162741, | |
| "grad_norm": 0.1904556433015721, | |
| "learning_rate": 1.138888888888889e-05, | |
| "loss": 0.2777, | |
| "step": 1114 | |
| }, | |
| { | |
| "epoch": 2.387580299785867, | |
| "grad_norm": 0.190065817649398, | |
| "learning_rate": 1.1349206349206349e-05, | |
| "loss": 0.2737, | |
| "step": 1115 | |
| }, | |
| { | |
| "epoch": 2.3897216274089934, | |
| "grad_norm": 0.18734473596445086, | |
| "learning_rate": 1.130952380952381e-05, | |
| "loss": 0.2798, | |
| "step": 1116 | |
| }, | |
| { | |
| "epoch": 2.3918629550321198, | |
| "grad_norm": 0.20310161931125695, | |
| "learning_rate": 1.126984126984127e-05, | |
| "loss": 0.2862, | |
| "step": 1117 | |
| }, | |
| { | |
| "epoch": 2.394004282655246, | |
| "grad_norm": 0.19855963578116342, | |
| "learning_rate": 1.1230158730158731e-05, | |
| "loss": 0.2784, | |
| "step": 1118 | |
| }, | |
| { | |
| "epoch": 2.3961456102783725, | |
| "grad_norm": 0.189077523042222, | |
| "learning_rate": 1.1190476190476192e-05, | |
| "loss": 0.286, | |
| "step": 1119 | |
| }, | |
| { | |
| "epoch": 2.398286937901499, | |
| "grad_norm": 0.18894803290615364, | |
| "learning_rate": 1.1150793650793653e-05, | |
| "loss": 0.2779, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 2.400428265524625, | |
| "grad_norm": 0.2015116046355913, | |
| "learning_rate": 1.1111111111111112e-05, | |
| "loss": 0.2865, | |
| "step": 1121 | |
| }, | |
| { | |
| "epoch": 2.4025695931477515, | |
| "grad_norm": 0.19437606708029698, | |
| "learning_rate": 1.107142857142857e-05, | |
| "loss": 0.2933, | |
| "step": 1122 | |
| }, | |
| { | |
| "epoch": 2.404710920770878, | |
| "grad_norm": 0.18852414654829355, | |
| "learning_rate": 1.1031746031746031e-05, | |
| "loss": 0.284, | |
| "step": 1123 | |
| }, | |
| { | |
| "epoch": 2.4068522483940042, | |
| "grad_norm": 0.1879656318380588, | |
| "learning_rate": 1.0992063492063492e-05, | |
| "loss": 0.2874, | |
| "step": 1124 | |
| }, | |
| { | |
| "epoch": 2.4089935760171306, | |
| "grad_norm": 0.19791655089067275, | |
| "learning_rate": 1.0952380952380953e-05, | |
| "loss": 0.2786, | |
| "step": 1125 | |
| }, | |
| { | |
| "epoch": 2.411134903640257, | |
| "grad_norm": 0.20226850687509584, | |
| "learning_rate": 1.0912698412698414e-05, | |
| "loss": 0.2922, | |
| "step": 1126 | |
| }, | |
| { | |
| "epoch": 2.4132762312633833, | |
| "grad_norm": 0.18552400783172202, | |
| "learning_rate": 1.0873015873015874e-05, | |
| "loss": 0.2757, | |
| "step": 1127 | |
| }, | |
| { | |
| "epoch": 2.4154175588865097, | |
| "grad_norm": 0.1837825345997371, | |
| "learning_rate": 1.0833333333333334e-05, | |
| "loss": 0.2764, | |
| "step": 1128 | |
| }, | |
| { | |
| "epoch": 2.417558886509636, | |
| "grad_norm": 0.19528894541437605, | |
| "learning_rate": 1.0793650793650794e-05, | |
| "loss": 0.2742, | |
| "step": 1129 | |
| }, | |
| { | |
| "epoch": 2.4197002141327624, | |
| "grad_norm": 0.21464260349692135, | |
| "learning_rate": 1.0753968253968255e-05, | |
| "loss": 0.2873, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 2.4218415417558887, | |
| "grad_norm": 0.1945459121129588, | |
| "learning_rate": 1.0714285714285714e-05, | |
| "loss": 0.2821, | |
| "step": 1131 | |
| }, | |
| { | |
| "epoch": 2.423982869379015, | |
| "grad_norm": 0.18478419433751667, | |
| "learning_rate": 1.0674603174603175e-05, | |
| "loss": 0.2826, | |
| "step": 1132 | |
| }, | |
| { | |
| "epoch": 2.4261241970021414, | |
| "grad_norm": 0.20225910020937835, | |
| "learning_rate": 1.0634920634920636e-05, | |
| "loss": 0.2873, | |
| "step": 1133 | |
| }, | |
| { | |
| "epoch": 2.428265524625268, | |
| "grad_norm": 0.20292439805906615, | |
| "learning_rate": 1.0595238095238096e-05, | |
| "loss": 0.282, | |
| "step": 1134 | |
| }, | |
| { | |
| "epoch": 2.430406852248394, | |
| "grad_norm": 0.18473437227064216, | |
| "learning_rate": 1.0555555555555555e-05, | |
| "loss": 0.269, | |
| "step": 1135 | |
| }, | |
| { | |
| "epoch": 2.4325481798715205, | |
| "grad_norm": 0.211980884226004, | |
| "learning_rate": 1.0515873015873016e-05, | |
| "loss": 0.2882, | |
| "step": 1136 | |
| }, | |
| { | |
| "epoch": 2.434689507494647, | |
| "grad_norm": 0.22535605642238996, | |
| "learning_rate": 1.0476190476190477e-05, | |
| "loss": 0.265, | |
| "step": 1137 | |
| }, | |
| { | |
| "epoch": 2.436830835117773, | |
| "grad_norm": 0.1991542202837908, | |
| "learning_rate": 1.0436507936507938e-05, | |
| "loss": 0.2887, | |
| "step": 1138 | |
| }, | |
| { | |
| "epoch": 2.4389721627408996, | |
| "grad_norm": 0.2072336431629644, | |
| "learning_rate": 1.0396825396825398e-05, | |
| "loss": 0.2805, | |
| "step": 1139 | |
| }, | |
| { | |
| "epoch": 2.4411134903640255, | |
| "grad_norm": 0.20543914654797396, | |
| "learning_rate": 1.0357142857142859e-05, | |
| "loss": 0.2905, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 2.4432548179871523, | |
| "grad_norm": 0.20157112476161199, | |
| "learning_rate": 1.0317460317460318e-05, | |
| "loss": 0.2843, | |
| "step": 1141 | |
| }, | |
| { | |
| "epoch": 2.445396145610278, | |
| "grad_norm": 0.1916041907370502, | |
| "learning_rate": 1.0277777777777777e-05, | |
| "loss": 0.2916, | |
| "step": 1142 | |
| }, | |
| { | |
| "epoch": 2.4475374732334045, | |
| "grad_norm": 0.19261294432915077, | |
| "learning_rate": 1.0238095238095238e-05, | |
| "loss": 0.2766, | |
| "step": 1143 | |
| }, | |
| { | |
| "epoch": 2.449678800856531, | |
| "grad_norm": 0.21519445528802897, | |
| "learning_rate": 1.0198412698412699e-05, | |
| "loss": 0.2819, | |
| "step": 1144 | |
| }, | |
| { | |
| "epoch": 2.4518201284796572, | |
| "grad_norm": 0.19022925356343928, | |
| "learning_rate": 1.015873015873016e-05, | |
| "loss": 0.2633, | |
| "step": 1145 | |
| }, | |
| { | |
| "epoch": 2.4539614561027836, | |
| "grad_norm": 0.18842920918262623, | |
| "learning_rate": 1.011904761904762e-05, | |
| "loss": 0.2701, | |
| "step": 1146 | |
| }, | |
| { | |
| "epoch": 2.45610278372591, | |
| "grad_norm": 0.18644571326970072, | |
| "learning_rate": 1.007936507936508e-05, | |
| "loss": 0.2747, | |
| "step": 1147 | |
| }, | |
| { | |
| "epoch": 2.4582441113490363, | |
| "grad_norm": 0.20125355865381975, | |
| "learning_rate": 1.003968253968254e-05, | |
| "loss": 0.2627, | |
| "step": 1148 | |
| }, | |
| { | |
| "epoch": 2.4603854389721627, | |
| "grad_norm": 0.21999175240537394, | |
| "learning_rate": 1e-05, | |
| "loss": 0.2805, | |
| "step": 1149 | |
| }, | |
| { | |
| "epoch": 2.462526766595289, | |
| "grad_norm": 0.19787370820057412, | |
| "learning_rate": 9.96031746031746e-06, | |
| "loss": 0.2811, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 2.4646680942184154, | |
| "grad_norm": 0.18955007917593744, | |
| "learning_rate": 9.92063492063492e-06, | |
| "loss": 0.286, | |
| "step": 1151 | |
| }, | |
| { | |
| "epoch": 2.4668094218415417, | |
| "grad_norm": 0.20431736362786296, | |
| "learning_rate": 9.880952380952381e-06, | |
| "loss": 0.2657, | |
| "step": 1152 | |
| }, | |
| { | |
| "epoch": 2.468950749464668, | |
| "grad_norm": 0.19320732235445437, | |
| "learning_rate": 9.841269841269842e-06, | |
| "loss": 0.2791, | |
| "step": 1153 | |
| }, | |
| { | |
| "epoch": 2.4710920770877944, | |
| "grad_norm": 0.19183959802084546, | |
| "learning_rate": 9.801587301587301e-06, | |
| "loss": 0.2702, | |
| "step": 1154 | |
| }, | |
| { | |
| "epoch": 2.473233404710921, | |
| "grad_norm": 0.19805919853745266, | |
| "learning_rate": 9.761904761904762e-06, | |
| "loss": 0.2858, | |
| "step": 1155 | |
| }, | |
| { | |
| "epoch": 2.475374732334047, | |
| "grad_norm": 0.1907652749236508, | |
| "learning_rate": 9.722222222222223e-06, | |
| "loss": 0.294, | |
| "step": 1156 | |
| }, | |
| { | |
| "epoch": 2.4775160599571735, | |
| "grad_norm": 0.19770176710293932, | |
| "learning_rate": 9.682539682539683e-06, | |
| "loss": 0.2759, | |
| "step": 1157 | |
| }, | |
| { | |
| "epoch": 2.4796573875803, | |
| "grad_norm": 0.19222146084786232, | |
| "learning_rate": 9.642857142857144e-06, | |
| "loss": 0.2732, | |
| "step": 1158 | |
| }, | |
| { | |
| "epoch": 2.481798715203426, | |
| "grad_norm": 0.20359350613339589, | |
| "learning_rate": 9.603174603174605e-06, | |
| "loss": 0.2858, | |
| "step": 1159 | |
| }, | |
| { | |
| "epoch": 2.4839400428265526, | |
| "grad_norm": 0.19032755422213457, | |
| "learning_rate": 9.563492063492064e-06, | |
| "loss": 0.2742, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 2.486081370449679, | |
| "grad_norm": 0.19335063670418315, | |
| "learning_rate": 9.523809523809523e-06, | |
| "loss": 0.2859, | |
| "step": 1161 | |
| }, | |
| { | |
| "epoch": 2.4882226980728053, | |
| "grad_norm": 0.19897996740846366, | |
| "learning_rate": 9.484126984126984e-06, | |
| "loss": 0.2857, | |
| "step": 1162 | |
| }, | |
| { | |
| "epoch": 2.4903640256959316, | |
| "grad_norm": 0.1941351806336689, | |
| "learning_rate": 9.444444444444445e-06, | |
| "loss": 0.2784, | |
| "step": 1163 | |
| }, | |
| { | |
| "epoch": 2.492505353319058, | |
| "grad_norm": 0.19765019351850296, | |
| "learning_rate": 9.404761904761905e-06, | |
| "loss": 0.2761, | |
| "step": 1164 | |
| }, | |
| { | |
| "epoch": 2.4946466809421843, | |
| "grad_norm": 0.18516337216937107, | |
| "learning_rate": 9.365079365079366e-06, | |
| "loss": 0.2764, | |
| "step": 1165 | |
| }, | |
| { | |
| "epoch": 2.4967880085653107, | |
| "grad_norm": 0.18907608503624435, | |
| "learning_rate": 9.325396825396827e-06, | |
| "loss": 0.277, | |
| "step": 1166 | |
| }, | |
| { | |
| "epoch": 2.4989293361884366, | |
| "grad_norm": 0.1941807441287669, | |
| "learning_rate": 9.285714285714286e-06, | |
| "loss": 0.275, | |
| "step": 1167 | |
| }, | |
| { | |
| "epoch": 2.5010706638115634, | |
| "grad_norm": 0.18811473358774278, | |
| "learning_rate": 9.246031746031747e-06, | |
| "loss": 0.2825, | |
| "step": 1168 | |
| }, | |
| { | |
| "epoch": 2.5032119914346893, | |
| "grad_norm": 0.19866610507866173, | |
| "learning_rate": 9.206349206349207e-06, | |
| "loss": 0.2903, | |
| "step": 1169 | |
| }, | |
| { | |
| "epoch": 2.505353319057816, | |
| "grad_norm": 0.18733861692093362, | |
| "learning_rate": 9.166666666666666e-06, | |
| "loss": 0.2873, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 2.507494646680942, | |
| "grad_norm": 0.20268747557365815, | |
| "learning_rate": 9.126984126984127e-06, | |
| "loss": 0.2948, | |
| "step": 1171 | |
| }, | |
| { | |
| "epoch": 2.5096359743040684, | |
| "grad_norm": 0.1996977559423424, | |
| "learning_rate": 9.087301587301588e-06, | |
| "loss": 0.2713, | |
| "step": 1172 | |
| }, | |
| { | |
| "epoch": 2.5117773019271947, | |
| "grad_norm": 0.1996076333163632, | |
| "learning_rate": 9.047619047619047e-06, | |
| "loss": 0.2779, | |
| "step": 1173 | |
| }, | |
| { | |
| "epoch": 2.513918629550321, | |
| "grad_norm": 0.19011932221335692, | |
| "learning_rate": 9.007936507936508e-06, | |
| "loss": 0.2803, | |
| "step": 1174 | |
| }, | |
| { | |
| "epoch": 2.5160599571734474, | |
| "grad_norm": 0.18599656098112288, | |
| "learning_rate": 8.968253968253968e-06, | |
| "loss": 0.2791, | |
| "step": 1175 | |
| }, | |
| { | |
| "epoch": 2.518201284796574, | |
| "grad_norm": 0.19678433899725203, | |
| "learning_rate": 8.92857142857143e-06, | |
| "loss": 0.2822, | |
| "step": 1176 | |
| }, | |
| { | |
| "epoch": 2.5203426124197, | |
| "grad_norm": 0.1846108520466212, | |
| "learning_rate": 8.88888888888889e-06, | |
| "loss": 0.266, | |
| "step": 1177 | |
| }, | |
| { | |
| "epoch": 2.5224839400428265, | |
| "grad_norm": 0.18960292191925746, | |
| "learning_rate": 8.84920634920635e-06, | |
| "loss": 0.2662, | |
| "step": 1178 | |
| }, | |
| { | |
| "epoch": 2.524625267665953, | |
| "grad_norm": 0.2200193235205345, | |
| "learning_rate": 8.80952380952381e-06, | |
| "loss": 0.2854, | |
| "step": 1179 | |
| }, | |
| { | |
| "epoch": 2.526766595289079, | |
| "grad_norm": 0.18902634217465988, | |
| "learning_rate": 8.769841269841269e-06, | |
| "loss": 0.2687, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 2.5289079229122056, | |
| "grad_norm": 0.19098504833889798, | |
| "learning_rate": 8.73015873015873e-06, | |
| "loss": 0.2735, | |
| "step": 1181 | |
| }, | |
| { | |
| "epoch": 2.531049250535332, | |
| "grad_norm": 0.19175998158414528, | |
| "learning_rate": 8.69047619047619e-06, | |
| "loss": 0.2785, | |
| "step": 1182 | |
| }, | |
| { | |
| "epoch": 2.5331905781584583, | |
| "grad_norm": 0.20056232550718314, | |
| "learning_rate": 8.650793650793651e-06, | |
| "loss": 0.2941, | |
| "step": 1183 | |
| }, | |
| { | |
| "epoch": 2.5353319057815846, | |
| "grad_norm": 0.19066315286775598, | |
| "learning_rate": 8.611111111111112e-06, | |
| "loss": 0.2731, | |
| "step": 1184 | |
| }, | |
| { | |
| "epoch": 2.537473233404711, | |
| "grad_norm": 0.18331446647935762, | |
| "learning_rate": 8.571428571428573e-06, | |
| "loss": 0.2758, | |
| "step": 1185 | |
| }, | |
| { | |
| "epoch": 2.5396145610278373, | |
| "grad_norm": 0.184471748142892, | |
| "learning_rate": 8.531746031746032e-06, | |
| "loss": 0.2703, | |
| "step": 1186 | |
| }, | |
| { | |
| "epoch": 2.5417558886509637, | |
| "grad_norm": 0.18934407240554624, | |
| "learning_rate": 8.492063492063492e-06, | |
| "loss": 0.2917, | |
| "step": 1187 | |
| }, | |
| { | |
| "epoch": 2.54389721627409, | |
| "grad_norm": 0.1839570377909689, | |
| "learning_rate": 8.452380952380953e-06, | |
| "loss": 0.2733, | |
| "step": 1188 | |
| }, | |
| { | |
| "epoch": 2.5460385438972164, | |
| "grad_norm": 0.19505246832240355, | |
| "learning_rate": 8.412698412698414e-06, | |
| "loss": 0.2821, | |
| "step": 1189 | |
| }, | |
| { | |
| "epoch": 2.5481798715203428, | |
| "grad_norm": 0.1868170944308521, | |
| "learning_rate": 8.373015873015873e-06, | |
| "loss": 0.2756, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 2.550321199143469, | |
| "grad_norm": 0.1863338672371585, | |
| "learning_rate": 8.333333333333334e-06, | |
| "loss": 0.2779, | |
| "step": 1191 | |
| }, | |
| { | |
| "epoch": 2.552462526766595, | |
| "grad_norm": 0.18694030749510335, | |
| "learning_rate": 8.293650793650794e-06, | |
| "loss": 0.2623, | |
| "step": 1192 | |
| }, | |
| { | |
| "epoch": 2.554603854389722, | |
| "grad_norm": 0.18346739599692435, | |
| "learning_rate": 8.253968253968254e-06, | |
| "loss": 0.2692, | |
| "step": 1193 | |
| }, | |
| { | |
| "epoch": 2.5567451820128477, | |
| "grad_norm": 0.193938387145013, | |
| "learning_rate": 8.214285714285714e-06, | |
| "loss": 0.2762, | |
| "step": 1194 | |
| }, | |
| { | |
| "epoch": 2.5588865096359745, | |
| "grad_norm": 0.1968770084022562, | |
| "learning_rate": 8.174603174603175e-06, | |
| "loss": 0.2749, | |
| "step": 1195 | |
| }, | |
| { | |
| "epoch": 2.5610278372591004, | |
| "grad_norm": 0.2012872958431149, | |
| "learning_rate": 8.134920634920636e-06, | |
| "loss": 0.2841, | |
| "step": 1196 | |
| }, | |
| { | |
| "epoch": 2.5631691648822272, | |
| "grad_norm": 0.19648015971972235, | |
| "learning_rate": 8.095238095238097e-06, | |
| "loss": 0.2677, | |
| "step": 1197 | |
| }, | |
| { | |
| "epoch": 2.565310492505353, | |
| "grad_norm": 0.18810436672950626, | |
| "learning_rate": 8.055555555555557e-06, | |
| "loss": 0.2668, | |
| "step": 1198 | |
| }, | |
| { | |
| "epoch": 2.5674518201284795, | |
| "grad_norm": 0.19172953088721956, | |
| "learning_rate": 8.015873015873016e-06, | |
| "loss": 0.2754, | |
| "step": 1199 | |
| }, | |
| { | |
| "epoch": 2.569593147751606, | |
| "grad_norm": 0.2022573936763731, | |
| "learning_rate": 7.976190476190475e-06, | |
| "loss": 0.2878, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 2.571734475374732, | |
| "grad_norm": 0.20158630502809705, | |
| "learning_rate": 7.936507936507936e-06, | |
| "loss": 0.3009, | |
| "step": 1201 | |
| }, | |
| { | |
| "epoch": 2.5738758029978586, | |
| "grad_norm": 0.19444900388460162, | |
| "learning_rate": 7.896825396825397e-06, | |
| "loss": 0.2723, | |
| "step": 1202 | |
| }, | |
| { | |
| "epoch": 2.576017130620985, | |
| "grad_norm": 0.19393996035159938, | |
| "learning_rate": 7.857142857142858e-06, | |
| "loss": 0.2784, | |
| "step": 1203 | |
| }, | |
| { | |
| "epoch": 2.5781584582441113, | |
| "grad_norm": 0.20388456521131612, | |
| "learning_rate": 7.817460317460318e-06, | |
| "loss": 0.2972, | |
| "step": 1204 | |
| }, | |
| { | |
| "epoch": 2.5802997858672376, | |
| "grad_norm": 0.19046236974876576, | |
| "learning_rate": 7.777777777777777e-06, | |
| "loss": 0.2866, | |
| "step": 1205 | |
| }, | |
| { | |
| "epoch": 2.582441113490364, | |
| "grad_norm": 0.21198070199178753, | |
| "learning_rate": 7.738095238095238e-06, | |
| "loss": 0.2874, | |
| "step": 1206 | |
| }, | |
| { | |
| "epoch": 2.5845824411134903, | |
| "grad_norm": 0.20591590293289885, | |
| "learning_rate": 7.698412698412699e-06, | |
| "loss": 0.3017, | |
| "step": 1207 | |
| }, | |
| { | |
| "epoch": 2.5867237687366167, | |
| "grad_norm": 0.20766349326464448, | |
| "learning_rate": 7.65873015873016e-06, | |
| "loss": 0.2874, | |
| "step": 1208 | |
| }, | |
| { | |
| "epoch": 2.588865096359743, | |
| "grad_norm": 0.18365533590321947, | |
| "learning_rate": 7.6190476190476205e-06, | |
| "loss": 0.2761, | |
| "step": 1209 | |
| }, | |
| { | |
| "epoch": 2.5910064239828694, | |
| "grad_norm": 0.19032688283851512, | |
| "learning_rate": 7.5793650793650795e-06, | |
| "loss": 0.2708, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 2.5931477516059958, | |
| "grad_norm": 0.1980622035151324, | |
| "learning_rate": 7.5396825396825394e-06, | |
| "loss": 0.269, | |
| "step": 1211 | |
| }, | |
| { | |
| "epoch": 2.595289079229122, | |
| "grad_norm": 0.1983391081538549, | |
| "learning_rate": 7.5e-06, | |
| "loss": 0.2796, | |
| "step": 1212 | |
| }, | |
| { | |
| "epoch": 2.5974304068522485, | |
| "grad_norm": 0.18057379441303084, | |
| "learning_rate": 7.460317460317461e-06, | |
| "loss": 0.2746, | |
| "step": 1213 | |
| }, | |
| { | |
| "epoch": 2.599571734475375, | |
| "grad_norm": 0.19068010050148992, | |
| "learning_rate": 7.420634920634921e-06, | |
| "loss": 0.263, | |
| "step": 1214 | |
| }, | |
| { | |
| "epoch": 2.601713062098501, | |
| "grad_norm": 0.18991511897664065, | |
| "learning_rate": 7.380952380952382e-06, | |
| "loss": 0.2773, | |
| "step": 1215 | |
| }, | |
| { | |
| "epoch": 2.6038543897216275, | |
| "grad_norm": 0.19991447304073107, | |
| "learning_rate": 7.3412698412698415e-06, | |
| "loss": 0.2946, | |
| "step": 1216 | |
| }, | |
| { | |
| "epoch": 2.605995717344754, | |
| "grad_norm": 0.20077693323441242, | |
| "learning_rate": 7.301587301587302e-06, | |
| "loss": 0.2952, | |
| "step": 1217 | |
| }, | |
| { | |
| "epoch": 2.6081370449678802, | |
| "grad_norm": 0.1910164898967557, | |
| "learning_rate": 7.261904761904763e-06, | |
| "loss": 0.2871, | |
| "step": 1218 | |
| }, | |
| { | |
| "epoch": 2.6102783725910066, | |
| "grad_norm": 0.19760453195164532, | |
| "learning_rate": 7.222222222222222e-06, | |
| "loss": 0.276, | |
| "step": 1219 | |
| }, | |
| { | |
| "epoch": 2.612419700214133, | |
| "grad_norm": 0.19107969100185188, | |
| "learning_rate": 7.182539682539682e-06, | |
| "loss": 0.2857, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 2.614561027837259, | |
| "grad_norm": 0.18522026162274652, | |
| "learning_rate": 7.142857142857143e-06, | |
| "loss": 0.269, | |
| "step": 1221 | |
| }, | |
| { | |
| "epoch": 2.6167023554603857, | |
| "grad_norm": 0.1940614696572163, | |
| "learning_rate": 7.1031746031746035e-06, | |
| "loss": 0.2755, | |
| "step": 1222 | |
| }, | |
| { | |
| "epoch": 2.6188436830835116, | |
| "grad_norm": 0.19575778265780672, | |
| "learning_rate": 7.063492063492063e-06, | |
| "loss": 0.2807, | |
| "step": 1223 | |
| }, | |
| { | |
| "epoch": 2.6209850107066384, | |
| "grad_norm": 0.18786133539258287, | |
| "learning_rate": 7.023809523809524e-06, | |
| "loss": 0.2726, | |
| "step": 1224 | |
| }, | |
| { | |
| "epoch": 2.6231263383297643, | |
| "grad_norm": 0.18567364878354917, | |
| "learning_rate": 6.984126984126985e-06, | |
| "loss": 0.2737, | |
| "step": 1225 | |
| }, | |
| { | |
| "epoch": 2.6252676659528906, | |
| "grad_norm": 0.18207760252968563, | |
| "learning_rate": 6.944444444444445e-06, | |
| "loss": 0.2692, | |
| "step": 1226 | |
| }, | |
| { | |
| "epoch": 2.627408993576017, | |
| "grad_norm": 0.190042944624669, | |
| "learning_rate": 6.9047619047619055e-06, | |
| "loss": 0.2677, | |
| "step": 1227 | |
| }, | |
| { | |
| "epoch": 2.6295503211991433, | |
| "grad_norm": 0.18201431307145552, | |
| "learning_rate": 6.865079365079366e-06, | |
| "loss": 0.2741, | |
| "step": 1228 | |
| }, | |
| { | |
| "epoch": 2.6316916488222697, | |
| "grad_norm": 0.1865727217496232, | |
| "learning_rate": 6.825396825396825e-06, | |
| "loss": 0.2671, | |
| "step": 1229 | |
| }, | |
| { | |
| "epoch": 2.633832976445396, | |
| "grad_norm": 0.1801997882324314, | |
| "learning_rate": 6.785714285714285e-06, | |
| "loss": 0.2699, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 2.6359743040685224, | |
| "grad_norm": 0.19389188154337386, | |
| "learning_rate": 6.746031746031746e-06, | |
| "loss": 0.2761, | |
| "step": 1231 | |
| }, | |
| { | |
| "epoch": 2.6381156316916488, | |
| "grad_norm": 0.1876099016700767, | |
| "learning_rate": 6.706349206349207e-06, | |
| "loss": 0.2974, | |
| "step": 1232 | |
| }, | |
| { | |
| "epoch": 2.640256959314775, | |
| "grad_norm": 0.19121391315236033, | |
| "learning_rate": 6.666666666666667e-06, | |
| "loss": 0.2988, | |
| "step": 1233 | |
| }, | |
| { | |
| "epoch": 2.6423982869379015, | |
| "grad_norm": 0.19053822839952972, | |
| "learning_rate": 6.626984126984127e-06, | |
| "loss": 0.2797, | |
| "step": 1234 | |
| }, | |
| { | |
| "epoch": 2.644539614561028, | |
| "grad_norm": 0.18505965289729664, | |
| "learning_rate": 6.587301587301588e-06, | |
| "loss": 0.2666, | |
| "step": 1235 | |
| }, | |
| { | |
| "epoch": 2.646680942184154, | |
| "grad_norm": 0.1893938662916577, | |
| "learning_rate": 6.547619047619048e-06, | |
| "loss": 0.2719, | |
| "step": 1236 | |
| }, | |
| { | |
| "epoch": 2.6488222698072805, | |
| "grad_norm": 0.1938288347509404, | |
| "learning_rate": 6.507936507936509e-06, | |
| "loss": 0.2823, | |
| "step": 1237 | |
| }, | |
| { | |
| "epoch": 2.650963597430407, | |
| "grad_norm": 0.19032293804902783, | |
| "learning_rate": 6.4682539682539696e-06, | |
| "loss": 0.2687, | |
| "step": 1238 | |
| }, | |
| { | |
| "epoch": 2.6531049250535332, | |
| "grad_norm": 0.1799378115721631, | |
| "learning_rate": 6.428571428571429e-06, | |
| "loss": 0.2708, | |
| "step": 1239 | |
| }, | |
| { | |
| "epoch": 2.6552462526766596, | |
| "grad_norm": 0.19301414441267256, | |
| "learning_rate": 6.3888888888888885e-06, | |
| "loss": 0.2952, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 2.657387580299786, | |
| "grad_norm": 0.1849662307056679, | |
| "learning_rate": 6.349206349206349e-06, | |
| "loss": 0.2668, | |
| "step": 1241 | |
| }, | |
| { | |
| "epoch": 2.6595289079229123, | |
| "grad_norm": 0.18055776145465033, | |
| "learning_rate": 6.30952380952381e-06, | |
| "loss": 0.2688, | |
| "step": 1242 | |
| }, | |
| { | |
| "epoch": 2.6616702355460387, | |
| "grad_norm": 0.1886120175112293, | |
| "learning_rate": 6.26984126984127e-06, | |
| "loss": 0.2858, | |
| "step": 1243 | |
| }, | |
| { | |
| "epoch": 2.663811563169165, | |
| "grad_norm": 0.18653293808215643, | |
| "learning_rate": 6.230158730158731e-06, | |
| "loss": 0.2848, | |
| "step": 1244 | |
| }, | |
| { | |
| "epoch": 2.6659528907922914, | |
| "grad_norm": 0.1843692042836253, | |
| "learning_rate": 6.190476190476191e-06, | |
| "loss": 0.2688, | |
| "step": 1245 | |
| }, | |
| { | |
| "epoch": 2.6680942184154177, | |
| "grad_norm": 0.19256050930680374, | |
| "learning_rate": 6.1507936507936505e-06, | |
| "loss": 0.276, | |
| "step": 1246 | |
| }, | |
| { | |
| "epoch": 2.670235546038544, | |
| "grad_norm": 0.1854008621346679, | |
| "learning_rate": 6.111111111111111e-06, | |
| "loss": 0.265, | |
| "step": 1247 | |
| }, | |
| { | |
| "epoch": 2.67237687366167, | |
| "grad_norm": 0.19104484469553526, | |
| "learning_rate": 6.071428571428572e-06, | |
| "loss": 0.2758, | |
| "step": 1248 | |
| }, | |
| { | |
| "epoch": 2.674518201284797, | |
| "grad_norm": 0.18892651998233595, | |
| "learning_rate": 6.031746031746032e-06, | |
| "loss": 0.2929, | |
| "step": 1249 | |
| }, | |
| { | |
| "epoch": 2.6766595289079227, | |
| "grad_norm": 0.21181690679734574, | |
| "learning_rate": 5.992063492063493e-06, | |
| "loss": 0.2766, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 2.6788008565310495, | |
| "grad_norm": 0.2257805250104584, | |
| "learning_rate": 5.9523809523809525e-06, | |
| "loss": 0.2767, | |
| "step": 1251 | |
| }, | |
| { | |
| "epoch": 2.6809421841541754, | |
| "grad_norm": 0.18618064006808122, | |
| "learning_rate": 5.9126984126984124e-06, | |
| "loss": 0.2768, | |
| "step": 1252 | |
| }, | |
| { | |
| "epoch": 2.683083511777302, | |
| "grad_norm": 0.17853160960610476, | |
| "learning_rate": 5.873015873015873e-06, | |
| "loss": 0.2819, | |
| "step": 1253 | |
| }, | |
| { | |
| "epoch": 2.685224839400428, | |
| "grad_norm": 0.1853256859343023, | |
| "learning_rate": 5.833333333333334e-06, | |
| "loss": 0.285, | |
| "step": 1254 | |
| }, | |
| { | |
| "epoch": 2.6873661670235545, | |
| "grad_norm": 0.20073292717441923, | |
| "learning_rate": 5.793650793650794e-06, | |
| "loss": 0.2717, | |
| "step": 1255 | |
| }, | |
| { | |
| "epoch": 2.689507494646681, | |
| "grad_norm": 0.18795175480989068, | |
| "learning_rate": 5.753968253968254e-06, | |
| "loss": 0.2785, | |
| "step": 1256 | |
| }, | |
| { | |
| "epoch": 2.691648822269807, | |
| "grad_norm": 0.1776226753510265, | |
| "learning_rate": 5.7142857142857145e-06, | |
| "loss": 0.2713, | |
| "step": 1257 | |
| }, | |
| { | |
| "epoch": 2.6937901498929335, | |
| "grad_norm": 0.17299646249891704, | |
| "learning_rate": 5.674603174603174e-06, | |
| "loss": 0.2671, | |
| "step": 1258 | |
| }, | |
| { | |
| "epoch": 2.69593147751606, | |
| "grad_norm": 0.1740946943196013, | |
| "learning_rate": 5.634920634920635e-06, | |
| "loss": 0.2724, | |
| "step": 1259 | |
| }, | |
| { | |
| "epoch": 2.6980728051391862, | |
| "grad_norm": 0.17712448583628357, | |
| "learning_rate": 5.595238095238096e-06, | |
| "loss": 0.281, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 2.7002141327623126, | |
| "grad_norm": 0.18442023632973145, | |
| "learning_rate": 5.555555555555556e-06, | |
| "loss": 0.2691, | |
| "step": 1261 | |
| }, | |
| { | |
| "epoch": 2.702355460385439, | |
| "grad_norm": 0.1813575399048717, | |
| "learning_rate": 5.515873015873016e-06, | |
| "loss": 0.2823, | |
| "step": 1262 | |
| }, | |
| { | |
| "epoch": 2.7044967880085653, | |
| "grad_norm": 0.18738697864152037, | |
| "learning_rate": 5.4761904761904765e-06, | |
| "loss": 0.2791, | |
| "step": 1263 | |
| }, | |
| { | |
| "epoch": 2.7066381156316917, | |
| "grad_norm": 0.18736026207972067, | |
| "learning_rate": 5.436507936507937e-06, | |
| "loss": 0.2777, | |
| "step": 1264 | |
| }, | |
| { | |
| "epoch": 2.708779443254818, | |
| "grad_norm": 0.1769303559004464, | |
| "learning_rate": 5.396825396825397e-06, | |
| "loss": 0.2891, | |
| "step": 1265 | |
| }, | |
| { | |
| "epoch": 2.7109207708779444, | |
| "grad_norm": 0.18034406343578302, | |
| "learning_rate": 5.357142857142857e-06, | |
| "loss": 0.2749, | |
| "step": 1266 | |
| }, | |
| { | |
| "epoch": 2.7130620985010707, | |
| "grad_norm": 0.18273873357915627, | |
| "learning_rate": 5.317460317460318e-06, | |
| "loss": 0.2725, | |
| "step": 1267 | |
| }, | |
| { | |
| "epoch": 2.715203426124197, | |
| "grad_norm": 0.19359646334285074, | |
| "learning_rate": 5.277777777777778e-06, | |
| "loss": 0.2784, | |
| "step": 1268 | |
| }, | |
| { | |
| "epoch": 2.7173447537473234, | |
| "grad_norm": 0.18794519478477986, | |
| "learning_rate": 5.2380952380952384e-06, | |
| "loss": 0.275, | |
| "step": 1269 | |
| }, | |
| { | |
| "epoch": 2.71948608137045, | |
| "grad_norm": 0.18233700769386646, | |
| "learning_rate": 5.198412698412699e-06, | |
| "loss": 0.2625, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 2.721627408993576, | |
| "grad_norm": 0.1870952848168329, | |
| "learning_rate": 5.158730158730159e-06, | |
| "loss": 0.2792, | |
| "step": 1271 | |
| }, | |
| { | |
| "epoch": 2.7237687366167025, | |
| "grad_norm": 0.19371058024908325, | |
| "learning_rate": 5.119047619047619e-06, | |
| "loss": 0.2885, | |
| "step": 1272 | |
| }, | |
| { | |
| "epoch": 2.725910064239829, | |
| "grad_norm": 0.22178418486723533, | |
| "learning_rate": 5.07936507936508e-06, | |
| "loss": 0.2994, | |
| "step": 1273 | |
| }, | |
| { | |
| "epoch": 2.728051391862955, | |
| "grad_norm": 0.2033197772755529, | |
| "learning_rate": 5.03968253968254e-06, | |
| "loss": 0.2798, | |
| "step": 1274 | |
| }, | |
| { | |
| "epoch": 2.730192719486081, | |
| "grad_norm": 0.18764851342867586, | |
| "learning_rate": 5e-06, | |
| "loss": 0.2739, | |
| "step": 1275 | |
| }, | |
| { | |
| "epoch": 2.732334047109208, | |
| "grad_norm": 0.18851110490571485, | |
| "learning_rate": 4.96031746031746e-06, | |
| "loss": 0.2807, | |
| "step": 1276 | |
| }, | |
| { | |
| "epoch": 2.734475374732334, | |
| "grad_norm": 0.1860752013848117, | |
| "learning_rate": 4.920634920634921e-06, | |
| "loss": 0.2808, | |
| "step": 1277 | |
| }, | |
| { | |
| "epoch": 2.7366167023554606, | |
| "grad_norm": 0.1913864953828912, | |
| "learning_rate": 4.880952380952381e-06, | |
| "loss": 0.2857, | |
| "step": 1278 | |
| }, | |
| { | |
| "epoch": 2.7387580299785865, | |
| "grad_norm": 0.18956585829358527, | |
| "learning_rate": 4.841269841269842e-06, | |
| "loss": 0.2929, | |
| "step": 1279 | |
| }, | |
| { | |
| "epoch": 2.7408993576017133, | |
| "grad_norm": 0.18168087559276616, | |
| "learning_rate": 4.8015873015873025e-06, | |
| "loss": 0.2639, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 2.7430406852248392, | |
| "grad_norm": 0.18483118773286475, | |
| "learning_rate": 4.7619047619047615e-06, | |
| "loss": 0.2796, | |
| "step": 1281 | |
| }, | |
| { | |
| "epoch": 2.7451820128479656, | |
| "grad_norm": 0.18660137821522485, | |
| "learning_rate": 4.722222222222222e-06, | |
| "loss": 0.2767, | |
| "step": 1282 | |
| }, | |
| { | |
| "epoch": 2.747323340471092, | |
| "grad_norm": 0.1870410587872287, | |
| "learning_rate": 4.682539682539683e-06, | |
| "loss": 0.2608, | |
| "step": 1283 | |
| }, | |
| { | |
| "epoch": 2.7494646680942183, | |
| "grad_norm": 0.18934420087113804, | |
| "learning_rate": 4.642857142857143e-06, | |
| "loss": 0.2776, | |
| "step": 1284 | |
| }, | |
| { | |
| "epoch": 2.7516059957173447, | |
| "grad_norm": 0.1944710962243241, | |
| "learning_rate": 4.603174603174604e-06, | |
| "loss": 0.2819, | |
| "step": 1285 | |
| }, | |
| { | |
| "epoch": 2.753747323340471, | |
| "grad_norm": 0.18507420980354902, | |
| "learning_rate": 4.563492063492064e-06, | |
| "loss": 0.2675, | |
| "step": 1286 | |
| }, | |
| { | |
| "epoch": 2.7558886509635974, | |
| "grad_norm": 0.18671295615639139, | |
| "learning_rate": 4.5238095238095235e-06, | |
| "loss": 0.2919, | |
| "step": 1287 | |
| }, | |
| { | |
| "epoch": 2.7580299785867237, | |
| "grad_norm": 0.1820586621003967, | |
| "learning_rate": 4.484126984126984e-06, | |
| "loss": 0.276, | |
| "step": 1288 | |
| }, | |
| { | |
| "epoch": 2.76017130620985, | |
| "grad_norm": 0.18863107638734553, | |
| "learning_rate": 4.444444444444445e-06, | |
| "loss": 0.2812, | |
| "step": 1289 | |
| }, | |
| { | |
| "epoch": 2.7623126338329764, | |
| "grad_norm": 0.1886951420800492, | |
| "learning_rate": 4.404761904761905e-06, | |
| "loss": 0.2684, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 2.764453961456103, | |
| "grad_norm": 0.18624344207988158, | |
| "learning_rate": 4.365079365079365e-06, | |
| "loss": 0.28, | |
| "step": 1291 | |
| }, | |
| { | |
| "epoch": 2.766595289079229, | |
| "grad_norm": 0.18959188951160139, | |
| "learning_rate": 4.3253968253968256e-06, | |
| "loss": 0.2583, | |
| "step": 1292 | |
| }, | |
| { | |
| "epoch": 2.7687366167023555, | |
| "grad_norm": 0.18636470840156413, | |
| "learning_rate": 4.285714285714286e-06, | |
| "loss": 0.2816, | |
| "step": 1293 | |
| }, | |
| { | |
| "epoch": 2.770877944325482, | |
| "grad_norm": 0.18043083965620976, | |
| "learning_rate": 4.246031746031746e-06, | |
| "loss": 0.2754, | |
| "step": 1294 | |
| }, | |
| { | |
| "epoch": 2.773019271948608, | |
| "grad_norm": 0.18037368839844325, | |
| "learning_rate": 4.206349206349207e-06, | |
| "loss": 0.279, | |
| "step": 1295 | |
| }, | |
| { | |
| "epoch": 2.7751605995717346, | |
| "grad_norm": 0.1823817127914405, | |
| "learning_rate": 4.166666666666667e-06, | |
| "loss": 0.2854, | |
| "step": 1296 | |
| }, | |
| { | |
| "epoch": 2.777301927194861, | |
| "grad_norm": 0.18058730214933147, | |
| "learning_rate": 4.126984126984127e-06, | |
| "loss": 0.2734, | |
| "step": 1297 | |
| }, | |
| { | |
| "epoch": 2.7794432548179873, | |
| "grad_norm": 0.1879979042288386, | |
| "learning_rate": 4.0873015873015875e-06, | |
| "loss": 0.2717, | |
| "step": 1298 | |
| }, | |
| { | |
| "epoch": 2.7815845824411136, | |
| "grad_norm": 0.18341480124861972, | |
| "learning_rate": 4.047619047619048e-06, | |
| "loss": 0.2836, | |
| "step": 1299 | |
| }, | |
| { | |
| "epoch": 2.78372591006424, | |
| "grad_norm": 0.18324668375068373, | |
| "learning_rate": 4.007936507936508e-06, | |
| "loss": 0.286, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 2.7858672376873663, | |
| "grad_norm": 0.1965187754215459, | |
| "learning_rate": 3.968253968253968e-06, | |
| "loss": 0.2823, | |
| "step": 1301 | |
| }, | |
| { | |
| "epoch": 2.7880085653104922, | |
| "grad_norm": 0.18573889524253487, | |
| "learning_rate": 3.928571428571429e-06, | |
| "loss": 0.2751, | |
| "step": 1302 | |
| }, | |
| { | |
| "epoch": 2.790149892933619, | |
| "grad_norm": 0.19265696353956446, | |
| "learning_rate": 3.888888888888889e-06, | |
| "loss": 0.2699, | |
| "step": 1303 | |
| }, | |
| { | |
| "epoch": 2.792291220556745, | |
| "grad_norm": 0.19781851093500513, | |
| "learning_rate": 3.8492063492063495e-06, | |
| "loss": 0.2779, | |
| "step": 1304 | |
| }, | |
| { | |
| "epoch": 2.7944325481798717, | |
| "grad_norm": 0.18360741758477603, | |
| "learning_rate": 3.8095238095238102e-06, | |
| "loss": 0.2787, | |
| "step": 1305 | |
| }, | |
| { | |
| "epoch": 2.7965738758029977, | |
| "grad_norm": 0.1862707572751917, | |
| "learning_rate": 3.7698412698412697e-06, | |
| "loss": 0.2774, | |
| "step": 1306 | |
| }, | |
| { | |
| "epoch": 2.7987152034261245, | |
| "grad_norm": 0.17847216017441006, | |
| "learning_rate": 3.7301587301587305e-06, | |
| "loss": 0.2669, | |
| "step": 1307 | |
| }, | |
| { | |
| "epoch": 2.8008565310492504, | |
| "grad_norm": 0.1743513654767412, | |
| "learning_rate": 3.690476190476191e-06, | |
| "loss": 0.2616, | |
| "step": 1308 | |
| }, | |
| { | |
| "epoch": 2.8029978586723767, | |
| "grad_norm": 0.17678834999880497, | |
| "learning_rate": 3.650793650793651e-06, | |
| "loss": 0.2729, | |
| "step": 1309 | |
| }, | |
| { | |
| "epoch": 2.805139186295503, | |
| "grad_norm": 0.18147041110133913, | |
| "learning_rate": 3.611111111111111e-06, | |
| "loss": 0.265, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 2.8072805139186294, | |
| "grad_norm": 0.19719201740239473, | |
| "learning_rate": 3.5714285714285714e-06, | |
| "loss": 0.2923, | |
| "step": 1311 | |
| }, | |
| { | |
| "epoch": 2.809421841541756, | |
| "grad_norm": 0.1876418916737588, | |
| "learning_rate": 3.5317460317460317e-06, | |
| "loss": 0.2909, | |
| "step": 1312 | |
| }, | |
| { | |
| "epoch": 2.811563169164882, | |
| "grad_norm": 0.18222086562470918, | |
| "learning_rate": 3.4920634920634924e-06, | |
| "loss": 0.2643, | |
| "step": 1313 | |
| }, | |
| { | |
| "epoch": 2.8137044967880085, | |
| "grad_norm": 0.18368483933597352, | |
| "learning_rate": 3.4523809523809528e-06, | |
| "loss": 0.2758, | |
| "step": 1314 | |
| }, | |
| { | |
| "epoch": 2.815845824411135, | |
| "grad_norm": 0.1837715941113332, | |
| "learning_rate": 3.4126984126984127e-06, | |
| "loss": 0.2787, | |
| "step": 1315 | |
| }, | |
| { | |
| "epoch": 2.817987152034261, | |
| "grad_norm": 0.19049420307445103, | |
| "learning_rate": 3.373015873015873e-06, | |
| "loss": 0.2847, | |
| "step": 1316 | |
| }, | |
| { | |
| "epoch": 2.8201284796573876, | |
| "grad_norm": 0.17835342017317368, | |
| "learning_rate": 3.3333333333333333e-06, | |
| "loss": 0.2786, | |
| "step": 1317 | |
| }, | |
| { | |
| "epoch": 2.822269807280514, | |
| "grad_norm": 0.17787960285957102, | |
| "learning_rate": 3.293650793650794e-06, | |
| "loss": 0.2727, | |
| "step": 1318 | |
| }, | |
| { | |
| "epoch": 2.8244111349036403, | |
| "grad_norm": 0.17862229300209337, | |
| "learning_rate": 3.2539682539682544e-06, | |
| "loss": 0.2695, | |
| "step": 1319 | |
| }, | |
| { | |
| "epoch": 2.8265524625267666, | |
| "grad_norm": 0.1901757951555972, | |
| "learning_rate": 3.2142857142857143e-06, | |
| "loss": 0.2575, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 2.828693790149893, | |
| "grad_norm": 0.18103134737351187, | |
| "learning_rate": 3.1746031746031746e-06, | |
| "loss": 0.2746, | |
| "step": 1321 | |
| }, | |
| { | |
| "epoch": 2.8308351177730193, | |
| "grad_norm": 0.18702283268180547, | |
| "learning_rate": 3.134920634920635e-06, | |
| "loss": 0.2861, | |
| "step": 1322 | |
| }, | |
| { | |
| "epoch": 2.8329764453961457, | |
| "grad_norm": 0.17858016179861205, | |
| "learning_rate": 3.0952380952380953e-06, | |
| "loss": 0.2767, | |
| "step": 1323 | |
| }, | |
| { | |
| "epoch": 2.835117773019272, | |
| "grad_norm": 0.18620061639621535, | |
| "learning_rate": 3.0555555555555556e-06, | |
| "loss": 0.2815, | |
| "step": 1324 | |
| }, | |
| { | |
| "epoch": 2.8372591006423984, | |
| "grad_norm": 0.1814788975276678, | |
| "learning_rate": 3.015873015873016e-06, | |
| "loss": 0.2767, | |
| "step": 1325 | |
| }, | |
| { | |
| "epoch": 2.8394004282655247, | |
| "grad_norm": 0.18778455877330474, | |
| "learning_rate": 2.9761904761904763e-06, | |
| "loss": 0.2928, | |
| "step": 1326 | |
| }, | |
| { | |
| "epoch": 2.841541755888651, | |
| "grad_norm": 0.17098509883295968, | |
| "learning_rate": 2.9365079365079366e-06, | |
| "loss": 0.2648, | |
| "step": 1327 | |
| }, | |
| { | |
| "epoch": 2.8436830835117775, | |
| "grad_norm": 0.17520171745909632, | |
| "learning_rate": 2.896825396825397e-06, | |
| "loss": 0.2724, | |
| "step": 1328 | |
| }, | |
| { | |
| "epoch": 2.8458244111349034, | |
| "grad_norm": 0.17536544537693816, | |
| "learning_rate": 2.8571428571428573e-06, | |
| "loss": 0.2783, | |
| "step": 1329 | |
| }, | |
| { | |
| "epoch": 2.84796573875803, | |
| "grad_norm": 0.17658589120185325, | |
| "learning_rate": 2.8174603174603176e-06, | |
| "loss": 0.2758, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 2.850107066381156, | |
| "grad_norm": 0.17567307855848976, | |
| "learning_rate": 2.777777777777778e-06, | |
| "loss": 0.2668, | |
| "step": 1331 | |
| }, | |
| { | |
| "epoch": 2.852248394004283, | |
| "grad_norm": 0.1806128444041324, | |
| "learning_rate": 2.7380952380952382e-06, | |
| "loss": 0.2835, | |
| "step": 1332 | |
| }, | |
| { | |
| "epoch": 2.854389721627409, | |
| "grad_norm": 0.18946086509767565, | |
| "learning_rate": 2.6984126984126986e-06, | |
| "loss": 0.271, | |
| "step": 1333 | |
| }, | |
| { | |
| "epoch": 2.8565310492505356, | |
| "grad_norm": 0.17379093474956162, | |
| "learning_rate": 2.658730158730159e-06, | |
| "loss": 0.2673, | |
| "step": 1334 | |
| }, | |
| { | |
| "epoch": 2.8586723768736615, | |
| "grad_norm": 0.18610236327347768, | |
| "learning_rate": 2.6190476190476192e-06, | |
| "loss": 0.2635, | |
| "step": 1335 | |
| }, | |
| { | |
| "epoch": 2.860813704496788, | |
| "grad_norm": 0.1773051015057536, | |
| "learning_rate": 2.5793650793650795e-06, | |
| "loss": 0.2814, | |
| "step": 1336 | |
| }, | |
| { | |
| "epoch": 2.862955032119914, | |
| "grad_norm": 0.16809841728125763, | |
| "learning_rate": 2.53968253968254e-06, | |
| "loss": 0.267, | |
| "step": 1337 | |
| }, | |
| { | |
| "epoch": 2.8650963597430406, | |
| "grad_norm": 0.1737833908046207, | |
| "learning_rate": 2.5e-06, | |
| "loss": 0.2701, | |
| "step": 1338 | |
| }, | |
| { | |
| "epoch": 2.867237687366167, | |
| "grad_norm": 0.1748499028349341, | |
| "learning_rate": 2.4603174603174605e-06, | |
| "loss": 0.2723, | |
| "step": 1339 | |
| }, | |
| { | |
| "epoch": 2.8693790149892933, | |
| "grad_norm": 0.182639754025226, | |
| "learning_rate": 2.420634920634921e-06, | |
| "loss": 0.2914, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 2.8715203426124196, | |
| "grad_norm": 0.17803450959486153, | |
| "learning_rate": 2.3809523809523808e-06, | |
| "loss": 0.2689, | |
| "step": 1341 | |
| }, | |
| { | |
| "epoch": 2.873661670235546, | |
| "grad_norm": 0.1812081036704342, | |
| "learning_rate": 2.3412698412698415e-06, | |
| "loss": 0.2725, | |
| "step": 1342 | |
| }, | |
| { | |
| "epoch": 2.8758029978586723, | |
| "grad_norm": 0.17858190828209497, | |
| "learning_rate": 2.301587301587302e-06, | |
| "loss": 0.2809, | |
| "step": 1343 | |
| }, | |
| { | |
| "epoch": 2.8779443254817987, | |
| "grad_norm": 0.17897541722080942, | |
| "learning_rate": 2.2619047619047617e-06, | |
| "loss": 0.2827, | |
| "step": 1344 | |
| }, | |
| { | |
| "epoch": 2.880085653104925, | |
| "grad_norm": 0.18620447467008244, | |
| "learning_rate": 2.2222222222222225e-06, | |
| "loss": 0.2739, | |
| "step": 1345 | |
| }, | |
| { | |
| "epoch": 2.8822269807280514, | |
| "grad_norm": 0.17922254926198172, | |
| "learning_rate": 2.1825396825396824e-06, | |
| "loss": 0.2937, | |
| "step": 1346 | |
| }, | |
| { | |
| "epoch": 2.8843683083511777, | |
| "grad_norm": 0.1733438564135853, | |
| "learning_rate": 2.142857142857143e-06, | |
| "loss": 0.2648, | |
| "step": 1347 | |
| }, | |
| { | |
| "epoch": 2.886509635974304, | |
| "grad_norm": 0.18332268371523366, | |
| "learning_rate": 2.1031746031746035e-06, | |
| "loss": 0.2678, | |
| "step": 1348 | |
| }, | |
| { | |
| "epoch": 2.8886509635974305, | |
| "grad_norm": 0.18120143996921909, | |
| "learning_rate": 2.0634920634920634e-06, | |
| "loss": 0.2703, | |
| "step": 1349 | |
| }, | |
| { | |
| "epoch": 2.890792291220557, | |
| "grad_norm": 0.17603741176248058, | |
| "learning_rate": 2.023809523809524e-06, | |
| "loss": 0.2634, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 2.892933618843683, | |
| "grad_norm": 0.18073214958209138, | |
| "learning_rate": 1.984126984126984e-06, | |
| "loss": 0.2776, | |
| "step": 1351 | |
| }, | |
| { | |
| "epoch": 2.8950749464668095, | |
| "grad_norm": 0.1765653541500017, | |
| "learning_rate": 1.9444444444444444e-06, | |
| "loss": 0.27, | |
| "step": 1352 | |
| }, | |
| { | |
| "epoch": 2.897216274089936, | |
| "grad_norm": 0.17852720128484673, | |
| "learning_rate": 1.9047619047619051e-06, | |
| "loss": 0.2731, | |
| "step": 1353 | |
| }, | |
| { | |
| "epoch": 2.8993576017130622, | |
| "grad_norm": 0.17920803523071938, | |
| "learning_rate": 1.8650793650793652e-06, | |
| "loss": 0.2803, | |
| "step": 1354 | |
| }, | |
| { | |
| "epoch": 2.9014989293361886, | |
| "grad_norm": 0.17112784325767613, | |
| "learning_rate": 1.8253968253968256e-06, | |
| "loss": 0.2753, | |
| "step": 1355 | |
| }, | |
| { | |
| "epoch": 2.903640256959315, | |
| "grad_norm": 0.18339378168166692, | |
| "learning_rate": 1.7857142857142857e-06, | |
| "loss": 0.273, | |
| "step": 1356 | |
| }, | |
| { | |
| "epoch": 2.9057815845824413, | |
| "grad_norm": 0.1984295346113488, | |
| "learning_rate": 1.7460317460317462e-06, | |
| "loss": 0.2796, | |
| "step": 1357 | |
| }, | |
| { | |
| "epoch": 2.907922912205567, | |
| "grad_norm": 0.18306750932125437, | |
| "learning_rate": 1.7063492063492063e-06, | |
| "loss": 0.2844, | |
| "step": 1358 | |
| }, | |
| { | |
| "epoch": 2.910064239828694, | |
| "grad_norm": 0.18197706558527013, | |
| "learning_rate": 1.6666666666666667e-06, | |
| "loss": 0.2685, | |
| "step": 1359 | |
| }, | |
| { | |
| "epoch": 2.91220556745182, | |
| "grad_norm": 0.17423685740773412, | |
| "learning_rate": 1.6269841269841272e-06, | |
| "loss": 0.2614, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 2.9143468950749467, | |
| "grad_norm": 0.17655186248410631, | |
| "learning_rate": 1.5873015873015873e-06, | |
| "loss": 0.27, | |
| "step": 1361 | |
| }, | |
| { | |
| "epoch": 2.9164882226980726, | |
| "grad_norm": 0.17672011279618857, | |
| "learning_rate": 1.5476190476190476e-06, | |
| "loss": 0.2768, | |
| "step": 1362 | |
| }, | |
| { | |
| "epoch": 2.9186295503211994, | |
| "grad_norm": 0.184344574950677, | |
| "learning_rate": 1.507936507936508e-06, | |
| "loss": 0.3001, | |
| "step": 1363 | |
| }, | |
| { | |
| "epoch": 2.9207708779443253, | |
| "grad_norm": 0.17385175676196593, | |
| "learning_rate": 1.4682539682539683e-06, | |
| "loss": 0.2687, | |
| "step": 1364 | |
| }, | |
| { | |
| "epoch": 2.9229122055674517, | |
| "grad_norm": 0.17861379692559234, | |
| "learning_rate": 1.4285714285714286e-06, | |
| "loss": 0.2719, | |
| "step": 1365 | |
| }, | |
| { | |
| "epoch": 2.925053533190578, | |
| "grad_norm": 0.177882396776031, | |
| "learning_rate": 1.388888888888889e-06, | |
| "loss": 0.2772, | |
| "step": 1366 | |
| }, | |
| { | |
| "epoch": 2.9271948608137044, | |
| "grad_norm": 0.16826587147555336, | |
| "learning_rate": 1.3492063492063493e-06, | |
| "loss": 0.2701, | |
| "step": 1367 | |
| }, | |
| { | |
| "epoch": 2.9293361884368307, | |
| "grad_norm": 0.18519355648741595, | |
| "learning_rate": 1.3095238095238096e-06, | |
| "loss": 0.2992, | |
| "step": 1368 | |
| }, | |
| { | |
| "epoch": 2.931477516059957, | |
| "grad_norm": 0.1782680704296424, | |
| "learning_rate": 1.26984126984127e-06, | |
| "loss": 0.2801, | |
| "step": 1369 | |
| }, | |
| { | |
| "epoch": 2.9336188436830835, | |
| "grad_norm": 0.17474930461615157, | |
| "learning_rate": 1.2301587301587303e-06, | |
| "loss": 0.2766, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 2.93576017130621, | |
| "grad_norm": 0.18245581683863532, | |
| "learning_rate": 1.1904761904761904e-06, | |
| "loss": 0.2662, | |
| "step": 1371 | |
| }, | |
| { | |
| "epoch": 2.937901498929336, | |
| "grad_norm": 0.17789694691861707, | |
| "learning_rate": 1.150793650793651e-06, | |
| "loss": 0.2885, | |
| "step": 1372 | |
| }, | |
| { | |
| "epoch": 2.9400428265524625, | |
| "grad_norm": 0.1687387330562206, | |
| "learning_rate": 1.1111111111111112e-06, | |
| "loss": 0.2537, | |
| "step": 1373 | |
| }, | |
| { | |
| "epoch": 2.942184154175589, | |
| "grad_norm": 0.18142938523732535, | |
| "learning_rate": 1.0714285714285716e-06, | |
| "loss": 0.2695, | |
| "step": 1374 | |
| }, | |
| { | |
| "epoch": 2.9443254817987152, | |
| "grad_norm": 0.16915728914832637, | |
| "learning_rate": 1.0317460317460317e-06, | |
| "loss": 0.2641, | |
| "step": 1375 | |
| }, | |
| { | |
| "epoch": 2.9464668094218416, | |
| "grad_norm": 0.1780681979316246, | |
| "learning_rate": 9.92063492063492e-07, | |
| "loss": 0.2693, | |
| "step": 1376 | |
| }, | |
| { | |
| "epoch": 2.948608137044968, | |
| "grad_norm": 0.178537434553442, | |
| "learning_rate": 9.523809523809526e-07, | |
| "loss": 0.2635, | |
| "step": 1377 | |
| }, | |
| { | |
| "epoch": 2.9507494646680943, | |
| "grad_norm": 0.17999898864703598, | |
| "learning_rate": 9.126984126984128e-07, | |
| "loss": 0.2797, | |
| "step": 1378 | |
| }, | |
| { | |
| "epoch": 2.9528907922912206, | |
| "grad_norm": 0.18157617617292318, | |
| "learning_rate": 8.730158730158731e-07, | |
| "loss": 0.2778, | |
| "step": 1379 | |
| }, | |
| { | |
| "epoch": 2.955032119914347, | |
| "grad_norm": 0.180109121804787, | |
| "learning_rate": 8.333333333333333e-07, | |
| "loss": 0.2822, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 2.9571734475374734, | |
| "grad_norm": 0.17585603613458994, | |
| "learning_rate": 7.936507936507937e-07, | |
| "loss": 0.2796, | |
| "step": 1381 | |
| }, | |
| { | |
| "epoch": 2.9593147751605997, | |
| "grad_norm": 0.178180131081556, | |
| "learning_rate": 7.53968253968254e-07, | |
| "loss": 0.288, | |
| "step": 1382 | |
| }, | |
| { | |
| "epoch": 2.961456102783726, | |
| "grad_norm": 0.174756761248787, | |
| "learning_rate": 7.142857142857143e-07, | |
| "loss": 0.2963, | |
| "step": 1383 | |
| }, | |
| { | |
| "epoch": 2.9635974304068524, | |
| "grad_norm": 0.17507554933400857, | |
| "learning_rate": 6.746031746031746e-07, | |
| "loss": 0.2743, | |
| "step": 1384 | |
| }, | |
| { | |
| "epoch": 2.9657387580299783, | |
| "grad_norm": 0.17237340079111105, | |
| "learning_rate": 6.34920634920635e-07, | |
| "loss": 0.2812, | |
| "step": 1385 | |
| }, | |
| { | |
| "epoch": 2.967880085653105, | |
| "grad_norm": 0.17800514856704938, | |
| "learning_rate": 5.952380952380952e-07, | |
| "loss": 0.2901, | |
| "step": 1386 | |
| }, | |
| { | |
| "epoch": 2.970021413276231, | |
| "grad_norm": 0.17518183885385935, | |
| "learning_rate": 5.555555555555556e-07, | |
| "loss": 0.2817, | |
| "step": 1387 | |
| }, | |
| { | |
| "epoch": 2.972162740899358, | |
| "grad_norm": 0.19209259654412175, | |
| "learning_rate": 5.158730158730158e-07, | |
| "loss": 0.2979, | |
| "step": 1388 | |
| }, | |
| { | |
| "epoch": 2.9743040685224837, | |
| "grad_norm": 0.18038864732034635, | |
| "learning_rate": 4.761904761904763e-07, | |
| "loss": 0.2758, | |
| "step": 1389 | |
| }, | |
| { | |
| "epoch": 2.9764453961456105, | |
| "grad_norm": 0.17455696050193842, | |
| "learning_rate": 4.3650793650793655e-07, | |
| "loss": 0.2783, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 2.9785867237687365, | |
| "grad_norm": 0.17453553372749056, | |
| "learning_rate": 3.9682539682539683e-07, | |
| "loss": 0.2662, | |
| "step": 1391 | |
| }, | |
| { | |
| "epoch": 2.980728051391863, | |
| "grad_norm": 0.17577598361771224, | |
| "learning_rate": 3.5714285714285716e-07, | |
| "loss": 0.2856, | |
| "step": 1392 | |
| }, | |
| { | |
| "epoch": 2.982869379014989, | |
| "grad_norm": 0.16869697010362938, | |
| "learning_rate": 3.174603174603175e-07, | |
| "loss": 0.2733, | |
| "step": 1393 | |
| }, | |
| { | |
| "epoch": 2.9850107066381155, | |
| "grad_norm": 0.1776277204829826, | |
| "learning_rate": 2.777777777777778e-07, | |
| "loss": 0.2734, | |
| "step": 1394 | |
| }, | |
| { | |
| "epoch": 2.987152034261242, | |
| "grad_norm": 0.17086305942740904, | |
| "learning_rate": 2.3809523809523814e-07, | |
| "loss": 0.2697, | |
| "step": 1395 | |
| }, | |
| { | |
| "epoch": 2.9892933618843682, | |
| "grad_norm": 0.18182256735600724, | |
| "learning_rate": 1.9841269841269841e-07, | |
| "loss": 0.2862, | |
| "step": 1396 | |
| }, | |
| { | |
| "epoch": 2.9914346895074946, | |
| "grad_norm": 0.1740762738414256, | |
| "learning_rate": 1.5873015873015874e-07, | |
| "loss": 0.2888, | |
| "step": 1397 | |
| }, | |
| { | |
| "epoch": 2.993576017130621, | |
| "grad_norm": 0.1737350102448509, | |
| "learning_rate": 1.1904761904761907e-07, | |
| "loss": 0.2777, | |
| "step": 1398 | |
| }, | |
| { | |
| "epoch": 2.9957173447537473, | |
| "grad_norm": 0.16946025736659603, | |
| "learning_rate": 7.936507936507937e-08, | |
| "loss": 0.2651, | |
| "step": 1399 | |
| }, | |
| { | |
| "epoch": 2.9978586723768736, | |
| "grad_norm": 0.1722572338000261, | |
| "learning_rate": 3.9682539682539686e-08, | |
| "loss": 0.261, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "grad_norm": 0.17642900461088157, | |
| "learning_rate": 0.0, | |
| "loss": 0.2644, | |
| "step": 1401 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "step": 1401, | |
| "total_flos": 1.5578375880118895e+19, | |
| "train_loss": 0.4592563231913725, | |
| "train_runtime": 43286.824, | |
| "train_samples_per_second": 0.518, | |
| "train_steps_per_second": 0.032 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 1401, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.5578375880118895e+19, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |