| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.6547167551598866, | |
| "eval_steps": 500, | |
| "global_step": 8200, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0007984350672681544, | |
| "grad_norm": 3.5232925702802524, | |
| "learning_rate": 2.942208207641671e-05, | |
| "loss": 0.8424, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.0015968701345363088, | |
| "grad_norm": 3.736142434929718, | |
| "learning_rate": 3.8279011316305725e-05, | |
| "loss": 0.7905, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.0023953052018044632, | |
| "grad_norm": 4.955020334614627, | |
| "learning_rate": 4.345998279318154e-05, | |
| "loss": 0.671, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.0031937402690726175, | |
| "grad_norm": 8.588687610785087, | |
| "learning_rate": 4.713594055619474e-05, | |
| "loss": 0.5332, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.003992175336340772, | |
| "grad_norm": 5.482949306795534, | |
| "learning_rate": 4.9987234912944383e-05, | |
| "loss": 0.3416, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.0047906104036089265, | |
| "grad_norm": 6.784951935057385, | |
| "learning_rate": 5.231691203307055e-05, | |
| "loss": 0.2779, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.005589045470877081, | |
| "grad_norm": 5.51385584449277, | |
| "learning_rate": 5.428662597233506e-05, | |
| "loss": 0.2096, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.006387480538145235, | |
| "grad_norm": 4.2244974879162, | |
| "learning_rate": 5.599286979608376e-05, | |
| "loss": 0.252, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.00718591560541339, | |
| "grad_norm": 3.733833668520439, | |
| "learning_rate": 5.749788350994636e-05, | |
| "loss": 0.1798, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.007984350672681544, | |
| "grad_norm": 2.261256472451689, | |
| "learning_rate": 5.884416415283342e-05, | |
| "loss": 0.1689, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.008782785739949698, | |
| "grad_norm": 1.801856803973853, | |
| "learning_rate": 6.0062023132921985e-05, | |
| "loss": 0.1418, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.009581220807217853, | |
| "grad_norm": 5.830051184887845, | |
| "learning_rate": 6.117384127295957e-05, | |
| "loss": 0.1148, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.010379655874486008, | |
| "grad_norm": 0.5938772441566059, | |
| "learning_rate": 6.219661481646723e-05, | |
| "loss": 0.0991, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.011178090941754162, | |
| "grad_norm": 3.3844685803924732, | |
| "learning_rate": 6.314355521222407e-05, | |
| "loss": 0.121, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.011976526009022317, | |
| "grad_norm": 2.6364672052008387, | |
| "learning_rate": 6.402513562970922e-05, | |
| "loss": 0.0952, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.01277496107629047, | |
| "grad_norm": 2.2940383537390376, | |
| "learning_rate": 6.484979903597278e-05, | |
| "loss": 0.1226, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.013573396143558625, | |
| "grad_norm": 2.0903566324645033, | |
| "learning_rate": 6.562445123204669e-05, | |
| "loss": 0.115, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.01437183121082678, | |
| "grad_norm": 1.5992990163675893, | |
| "learning_rate": 6.635481274983538e-05, | |
| "loss": 0.1142, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.015170266278094934, | |
| "grad_norm": 1.1091866267461175, | |
| "learning_rate": 6.704567547916425e-05, | |
| "loss": 0.0993, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.015968701345363087, | |
| "grad_norm": 2.2156336864111146, | |
| "learning_rate": 6.770109339272242e-05, | |
| "loss": 0.132, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.016767136412631244, | |
| "grad_norm": 3.251336662728112, | |
| "learning_rate": 6.832452668909989e-05, | |
| "loss": 0.0737, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.017565571479899397, | |
| "grad_norm": 2.568444919571399, | |
| "learning_rate": 6.8918952372811e-05, | |
| "loss": 0.0907, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.018364006547167553, | |
| "grad_norm": 0.7114489570730391, | |
| "learning_rate": 6.948695023346763e-05, | |
| "loss": 0.0781, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.019162441614435706, | |
| "grad_norm": 2.8086271912993794, | |
| "learning_rate": 7.003077051284858e-05, | |
| "loss": 0.115, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.01996087668170386, | |
| "grad_norm": 1.900134521715012, | |
| "learning_rate": 7.055238774947207e-05, | |
| "loss": 0.0993, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.020759311748972015, | |
| "grad_norm": 0.6776229066938406, | |
| "learning_rate": 7.105354405635625e-05, | |
| "loss": 0.0948, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.02155774681624017, | |
| "grad_norm": 1.8867241168177415, | |
| "learning_rate": 7.15357842267112e-05, | |
| "loss": 0.077, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.022356181883508325, | |
| "grad_norm": 1.2503978176846782, | |
| "learning_rate": 7.20004844521131e-05, | |
| "loss": 0.0829, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.023154616950776478, | |
| "grad_norm": 1.47247849944815, | |
| "learning_rate": 7.244887599898725e-05, | |
| "loss": 0.0887, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.023953052018044634, | |
| "grad_norm": 1.3484244136088377, | |
| "learning_rate": 7.288206486959823e-05, | |
| "loss": 0.1163, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.024751487085312787, | |
| "grad_norm": 7.942468080626284, | |
| "learning_rate": 7.330104823803253e-05, | |
| "loss": 0.1425, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.02554992215258094, | |
| "grad_norm": 4.655099563025342, | |
| "learning_rate": 7.37067282758618e-05, | |
| "loss": 0.1409, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.026348357219849097, | |
| "grad_norm": 1.3927603965707027, | |
| "learning_rate": 7.40999238496868e-05, | |
| "loss": 0.0887, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.02714679228711725, | |
| "grad_norm": 0.6712166946336496, | |
| "learning_rate": 7.448138047193572e-05, | |
| "loss": 0.0701, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.027945227354385406, | |
| "grad_norm": 0.819398704134634, | |
| "learning_rate": 7.485177880886274e-05, | |
| "loss": 0.064, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.02874366242165356, | |
| "grad_norm": 1.5850883570722307, | |
| "learning_rate": 7.52117419897244e-05, | |
| "loss": 0.1039, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.029542097488921712, | |
| "grad_norm": 3.4982586330431267, | |
| "learning_rate": 7.556184191429401e-05, | |
| "loss": 0.0512, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.03034053255618987, | |
| "grad_norm": 1.2751502486060688, | |
| "learning_rate": 7.590260471905327e-05, | |
| "loss": 0.082, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.03113896762345802, | |
| "grad_norm": 1.0821666549195599, | |
| "learning_rate": 7.623451553323206e-05, | |
| "loss": 0.0838, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.031937402690726174, | |
| "grad_norm": 0.3179937672264547, | |
| "learning_rate": 7.655802263261145e-05, | |
| "loss": 0.0682, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.03273583775799433, | |
| "grad_norm": 0.7683525367800101, | |
| "learning_rate": 7.687354108034464e-05, | |
| "loss": 0.0751, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.03353427282526249, | |
| "grad_norm": 1.2004951837928237, | |
| "learning_rate": 7.71814559289889e-05, | |
| "loss": 0.1034, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.03433270789253064, | |
| "grad_norm": 0.8513751742352591, | |
| "learning_rate": 7.748212504571692e-05, | |
| "loss": 0.1099, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.03513114295979879, | |
| "grad_norm": 0.6047209002381277, | |
| "learning_rate": 7.777588161270002e-05, | |
| "loss": 0.0657, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.03592957802706695, | |
| "grad_norm": 1.4997073770835052, | |
| "learning_rate": 7.806303634647405e-05, | |
| "loss": 0.0663, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.036728013094335106, | |
| "grad_norm": 2.450672575042776, | |
| "learning_rate": 7.834387947335665e-05, | |
| "loss": 0.0764, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.037526448161603256, | |
| "grad_norm": 1.5816060633166842, | |
| "learning_rate": 7.861868249240195e-05, | |
| "loss": 0.0894, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.03832488322887141, | |
| "grad_norm": 0.8978231524516874, | |
| "learning_rate": 7.888769975273761e-05, | |
| "loss": 0.093, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.03912331829613957, | |
| "grad_norm": 0.31880545463652393, | |
| "learning_rate": 7.91511698682534e-05, | |
| "loss": 0.0918, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.03992175336340772, | |
| "grad_norm": 0.41662701320676065, | |
| "learning_rate": 7.94093169893611e-05, | |
| "loss": 0.1022, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.040720188430675874, | |
| "grad_norm": 1.6604197089055341, | |
| "learning_rate": 7.966235194881151e-05, | |
| "loss": 0.0577, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.04151862349794403, | |
| "grad_norm": 0.5952537112068932, | |
| "learning_rate": 7.991047329624525e-05, | |
| "loss": 0.0824, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.04231705856521219, | |
| "grad_norm": 1.057400098552263, | |
| "learning_rate": 8.01538682341959e-05, | |
| "loss": 0.0635, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.04311549363248034, | |
| "grad_norm": 1.4367457504094763, | |
| "learning_rate": 8.039271346660021e-05, | |
| "loss": 0.0749, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.04391392869974849, | |
| "grad_norm": 1.4304249110893281, | |
| "learning_rate": 8.062717596944966e-05, | |
| "loss": 0.083, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.04471236376701665, | |
| "grad_norm": 0.7235154604788787, | |
| "learning_rate": 8.085741369200211e-05, | |
| "loss": 0.0688, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.0455107988342848, | |
| "grad_norm": 0.3530930527250422, | |
| "learning_rate": 8.108357619592909e-05, | |
| "loss": 0.0929, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.046309233901552956, | |
| "grad_norm": 0.9718806338973844, | |
| "learning_rate": 8.130580523887628e-05, | |
| "loss": 0.0553, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.04710766896882111, | |
| "grad_norm": 1.2335130967238364, | |
| "learning_rate": 8.15242353081395e-05, | |
| "loss": 0.1021, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.04790610403608927, | |
| "grad_norm": 0.3228008866221465, | |
| "learning_rate": 8.173899410948726e-05, | |
| "loss": 0.0791, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.04870453910335742, | |
| "grad_norm": 1.3015629710425247, | |
| "learning_rate": 8.195020301557898e-05, | |
| "loss": 0.0692, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.049502974170625574, | |
| "grad_norm": 0.6936581823866799, | |
| "learning_rate": 8.215797747792154e-05, | |
| "loss": 0.091, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.05030140923789373, | |
| "grad_norm": 2.046468793339183, | |
| "learning_rate": 8.236242740586472e-05, | |
| "loss": 0.0785, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.05109984430516188, | |
| "grad_norm": 4.620875624681255, | |
| "learning_rate": 8.256365751575082e-05, | |
| "loss": 0.1094, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.05189827937243004, | |
| "grad_norm": 1.0255647708402473, | |
| "learning_rate": 8.27617676529949e-05, | |
| "loss": 0.0535, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.05269671443969819, | |
| "grad_norm": 0.4616344444526033, | |
| "learning_rate": 8.295685308957582e-05, | |
| "loss": 0.0787, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.05349514950696634, | |
| "grad_norm": 1.3330176172712866, | |
| "learning_rate": 8.314900479915685e-05, | |
| "loss": 0.0914, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.0542935845742345, | |
| "grad_norm": 1.126229916038408, | |
| "learning_rate": 8.333830971182472e-05, | |
| "loss": 0.1071, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.055092019641502656, | |
| "grad_norm": 0.9532218500993943, | |
| "learning_rate": 8.352485095023246e-05, | |
| "loss": 0.0629, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.05589045470877081, | |
| "grad_norm": 1.2383873083152779, | |
| "learning_rate": 8.370870804875176e-05, | |
| "loss": 0.1098, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.05668888977603896, | |
| "grad_norm": 0.9810020490409863, | |
| "learning_rate": 8.388995715708101e-05, | |
| "loss": 0.0736, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.05748732484330712, | |
| "grad_norm": 0.3971929526776789, | |
| "learning_rate": 8.406867122961343e-05, | |
| "loss": 0.0773, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.058285759910575274, | |
| "grad_norm": 0.7108087299273486, | |
| "learning_rate": 8.424492020174429e-05, | |
| "loss": 0.1098, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.059084194977843424, | |
| "grad_norm": 2.2341124836718995, | |
| "learning_rate": 8.441877115418304e-05, | |
| "loss": 0.0813, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.05988263004511158, | |
| "grad_norm": 0.38550605242633584, | |
| "learning_rate": 8.45902884662369e-05, | |
| "loss": 0.0715, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.06068106511237974, | |
| "grad_norm": 0.4335701502840811, | |
| "learning_rate": 8.47595339589423e-05, | |
| "loss": 0.076, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.06147950017964789, | |
| "grad_norm": 0.4115317244701979, | |
| "learning_rate": 8.492656702884034e-05, | |
| "loss": 0.0953, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.06227793524691604, | |
| "grad_norm": 1.840713686032294, | |
| "learning_rate": 8.509144477312107e-05, | |
| "loss": 0.0764, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.0630763703141842, | |
| "grad_norm": 1.4905547592936923, | |
| "learning_rate": 8.525422210679596e-05, | |
| "loss": 0.0725, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.06387480538145235, | |
| "grad_norm": 0.5996295060118539, | |
| "learning_rate": 8.541495187250046e-05, | |
| "loss": 0.0769, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.06467324044872051, | |
| "grad_norm": 1.3404150641883488, | |
| "learning_rate": 8.557368494347602e-05, | |
| "loss": 0.0821, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.06547167551598866, | |
| "grad_norm": 0.4748718025375387, | |
| "learning_rate": 8.573047032023365e-05, | |
| "loss": 0.0874, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.06627011058325681, | |
| "grad_norm": 0.7725716996053142, | |
| "learning_rate": 8.588535522135874e-05, | |
| "loss": 0.0772, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.06706854565052497, | |
| "grad_norm": 0.822250642719596, | |
| "learning_rate": 8.603838516887792e-05, | |
| "loss": 0.0606, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.06786698071779312, | |
| "grad_norm": 0.6797537145210232, | |
| "learning_rate": 8.618960406857437e-05, | |
| "loss": 0.0587, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.06866541578506127, | |
| "grad_norm": 0.3442856654043566, | |
| "learning_rate": 8.633905428560595e-05, | |
| "loss": 0.0791, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.06946385085232944, | |
| "grad_norm": 1.9321932414949274, | |
| "learning_rate": 8.648677671575208e-05, | |
| "loss": 0.1101, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.07026228591959759, | |
| "grad_norm": 0.8091988266382711, | |
| "learning_rate": 8.663281085258905e-05, | |
| "loss": 0.0935, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.07106072098686575, | |
| "grad_norm": 0.9736053752084688, | |
| "learning_rate": 8.677719485086984e-05, | |
| "loss": 0.0892, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.0718591560541339, | |
| "grad_norm": 0.8209361814313335, | |
| "learning_rate": 8.691996558636308e-05, | |
| "loss": 0.0737, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.07265759112140205, | |
| "grad_norm": 0.4035267647165605, | |
| "learning_rate": 8.706115871238559e-05, | |
| "loss": 0.0553, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.07345602618867021, | |
| "grad_norm": 0.7616589733118284, | |
| "learning_rate": 8.720080871324567e-05, | |
| "loss": 0.0615, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.07425446125593836, | |
| "grad_norm": 3.3729080844699326, | |
| "learning_rate": 8.733894895479733e-05, | |
| "loss": 0.0819, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.07505289632320651, | |
| "grad_norm": 0.15929664401491783, | |
| "learning_rate": 8.747561173229096e-05, | |
| "loss": 0.0701, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.07585133139047467, | |
| "grad_norm": 0.884846200272443, | |
| "learning_rate": 8.761082831569194e-05, | |
| "loss": 0.0846, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.07664976645774282, | |
| "grad_norm": 0.4976508744267154, | |
| "learning_rate": 8.774462899262663e-05, | |
| "loss": 0.1038, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.07744820152501097, | |
| "grad_norm": 2.4405255315055334, | |
| "learning_rate": 8.787704310910292e-05, | |
| "loss": 0.083, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.07824663659227914, | |
| "grad_norm": 0.5928086171271278, | |
| "learning_rate": 8.800809910814243e-05, | |
| "loss": 0.0613, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.07904507165954729, | |
| "grad_norm": 0.7380085702017415, | |
| "learning_rate": 8.813782456645164e-05, | |
| "loss": 0.0741, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.07984350672681544, | |
| "grad_norm": 0.8908559197171063, | |
| "learning_rate": 8.826624622925012e-05, | |
| "loss": 0.0609, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.0806419417940836, | |
| "grad_norm": 1.3924647259072407, | |
| "learning_rate": 8.83933900433659e-05, | |
| "loss": 0.0804, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.08144037686135175, | |
| "grad_norm": 0.6744120448203766, | |
| "learning_rate": 8.851928118870054e-05, | |
| "loss": 0.0782, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.0822388119286199, | |
| "grad_norm": 1.2461058480859704, | |
| "learning_rate": 8.86439441081591e-05, | |
| "loss": 0.0626, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.08303724699588806, | |
| "grad_norm": 2.467688787000267, | |
| "learning_rate": 8.876740253613428e-05, | |
| "loss": 0.087, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.08383568206315621, | |
| "grad_norm": 0.6757524264432309, | |
| "learning_rate": 8.888967952562756e-05, | |
| "loss": 0.086, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.08463411713042437, | |
| "grad_norm": 2.165699518864385, | |
| "learning_rate": 8.901079747408493e-05, | |
| "loss": 0.0733, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.08543255219769252, | |
| "grad_norm": 0.3953210869404968, | |
| "learning_rate": 8.913077814801954e-05, | |
| "loss": 0.0962, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.08623098726496067, | |
| "grad_norm": 0.5296065235489941, | |
| "learning_rate": 8.924964270648924e-05, | |
| "loss": 0.06, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.08702942233222884, | |
| "grad_norm": 0.20038342872971074, | |
| "learning_rate": 8.936741172349198e-05, | |
| "loss": 0.0775, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.08782785739949699, | |
| "grad_norm": 0.8356649350481642, | |
| "learning_rate": 8.948410520933869e-05, | |
| "loss": 0.0868, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.08862629246676514, | |
| "grad_norm": 1.0589224823029895, | |
| "learning_rate": 8.959974263105884e-05, | |
| "loss": 0.0619, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.0894247275340333, | |
| "grad_norm": 0.7960185516540815, | |
| "learning_rate": 8.971434293189114e-05, | |
| "loss": 0.0576, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.09022316260130145, | |
| "grad_norm": 1.2994761751707133, | |
| "learning_rate": 8.982792454990772e-05, | |
| "loss": 0.0812, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.0910215976685696, | |
| "grad_norm": 0.822292570186698, | |
| "learning_rate": 8.99405054358181e-05, | |
| "loss": 0.0691, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.09182003273583776, | |
| "grad_norm": 1.241640377765173, | |
| "learning_rate": 9.005210306999533e-05, | |
| "loss": 0.0619, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.09261846780310591, | |
| "grad_norm": 0.7758797635010947, | |
| "learning_rate": 9.01627344787653e-05, | |
| "loss": 0.0922, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.09341690287037406, | |
| "grad_norm": 1.104880691096772, | |
| "learning_rate": 9.027241624999689e-05, | |
| "loss": 0.0981, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.09421533793764222, | |
| "grad_norm": 0.10918433398556385, | |
| "learning_rate": 9.038116454802852e-05, | |
| "loss": 0.0612, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.09501377300491037, | |
| "grad_norm": 0.5247380213024838, | |
| "learning_rate": 9.048899512796504e-05, | |
| "loss": 0.0687, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.09581220807217854, | |
| "grad_norm": 0.5310899070310212, | |
| "learning_rate": 9.059592334937628e-05, | |
| "loss": 0.0725, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.09661064313944669, | |
| "grad_norm": 0.13475686039340207, | |
| "learning_rate": 9.070196418942726e-05, | |
| "loss": 0.0586, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 0.09740907820671484, | |
| "grad_norm": 0.24722920106750101, | |
| "learning_rate": 9.080713225546801e-05, | |
| "loss": 0.0704, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.098207513273983, | |
| "grad_norm": 1.8397525090103166, | |
| "learning_rate": 9.091144179710946e-05, | |
| "loss": 0.0792, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.09900594834125115, | |
| "grad_norm": 0.5542485878904292, | |
| "learning_rate": 9.101490671781055e-05, | |
| "loss": 0.0967, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.0998043834085193, | |
| "grad_norm": 0.3583983335842228, | |
| "learning_rate": 9.111754058599977e-05, | |
| "loss": 0.0608, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.10060281847578746, | |
| "grad_norm": 1.0065124160311716, | |
| "learning_rate": 9.121935664575374e-05, | |
| "loss": 0.1035, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.10140125354305561, | |
| "grad_norm": 0.4666588580387682, | |
| "learning_rate": 9.132036782705373e-05, | |
| "loss": 0.0687, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 0.10219968861032376, | |
| "grad_norm": 0.3623496777731359, | |
| "learning_rate": 9.142058675563983e-05, | |
| "loss": 0.0732, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.10299812367759192, | |
| "grad_norm": 0.26488301439731154, | |
| "learning_rate": 9.152002576248177e-05, | |
| "loss": 0.1041, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 0.10379655874486007, | |
| "grad_norm": 0.1888961199003232, | |
| "learning_rate": 9.161869689288392e-05, | |
| "loss": 0.0577, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.10459499381212822, | |
| "grad_norm": 0.24802960871773072, | |
| "learning_rate": 9.171661191524173e-05, | |
| "loss": 0.0592, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 0.10539342887939639, | |
| "grad_norm": 0.8859054279177876, | |
| "learning_rate": 9.181378232946485e-05, | |
| "loss": 0.0737, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 0.10619186394666454, | |
| "grad_norm": 0.6342449000208032, | |
| "learning_rate": 9.191021937508261e-05, | |
| "loss": 0.07, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 0.10699029901393269, | |
| "grad_norm": 0.19646033159322437, | |
| "learning_rate": 9.200593403904588e-05, | |
| "loss": 0.0775, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 0.10778873408120085, | |
| "grad_norm": 0.23174924542751033, | |
| "learning_rate": 9.210093706323888e-05, | |
| "loss": 0.0823, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.108587169148469, | |
| "grad_norm": 0.5011075581503674, | |
| "learning_rate": 9.219523895171373e-05, | |
| "loss": 0.0671, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 0.10938560421573716, | |
| "grad_norm": 0.936119632385534, | |
| "learning_rate": 9.228884997766016e-05, | |
| "loss": 0.0592, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 0.11018403928300531, | |
| "grad_norm": 0.7797368432433808, | |
| "learning_rate": 9.238178019012147e-05, | |
| "loss": 0.0647, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 0.11098247435027346, | |
| "grad_norm": 0.6468052400331983, | |
| "learning_rate": 9.247403942046845e-05, | |
| "loss": 0.095, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 0.11178090941754162, | |
| "grad_norm": 1.5790945386597242, | |
| "learning_rate": 9.256563728864078e-05, | |
| "loss": 0.0785, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.11257934448480977, | |
| "grad_norm": 0.48706241804156847, | |
| "learning_rate": 9.265658320916678e-05, | |
| "loss": 0.074, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 0.11337777955207792, | |
| "grad_norm": 0.6031988051095882, | |
| "learning_rate": 9.274688639697002e-05, | |
| "loss": 0.1009, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 0.11417621461934609, | |
| "grad_norm": 0.5190122244621026, | |
| "learning_rate": 9.283655587297249e-05, | |
| "loss": 0.0843, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 0.11497464968661424, | |
| "grad_norm": 0.9512689902993662, | |
| "learning_rate": 9.292560046950244e-05, | |
| "loss": 0.0715, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 0.11577308475388239, | |
| "grad_norm": 0.40864222618071416, | |
| "learning_rate": 9.301402883551495e-05, | |
| "loss": 0.0755, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.11657151982115055, | |
| "grad_norm": 0.7769625588538747, | |
| "learning_rate": 9.31018494416333e-05, | |
| "loss": 0.0757, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 0.1173699548884187, | |
| "grad_norm": 0.3969001840652321, | |
| "learning_rate": 9.318907058501824e-05, | |
| "loss": 0.066, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 0.11816838995568685, | |
| "grad_norm": 1.0891536238630175, | |
| "learning_rate": 9.327570039407205e-05, | |
| "loss": 0.0685, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 0.11896682502295501, | |
| "grad_norm": 0.7825998513796186, | |
| "learning_rate": 9.336174683298437e-05, | |
| "loss": 0.068, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 0.11976526009022316, | |
| "grad_norm": 0.6119434023978193, | |
| "learning_rate": 9.344721770612593e-05, | |
| "loss": 0.0708, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.12056369515749131, | |
| "grad_norm": 0.6392505815350028, | |
| "learning_rate": 9.353212066229625e-05, | |
| "loss": 0.0696, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 0.12136213022475947, | |
| "grad_norm": 0.23794947831500443, | |
| "learning_rate": 9.361646319883132e-05, | |
| "loss": 0.081, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 0.12216056529202762, | |
| "grad_norm": 0.6909279964700517, | |
| "learning_rate": 9.370025266557636e-05, | |
| "loss": 0.0718, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 0.12295900035929579, | |
| "grad_norm": 1.1048882763470222, | |
| "learning_rate": 9.378349626872935e-05, | |
| "loss": 0.0788, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 0.12375743542656394, | |
| "grad_norm": 0.12156789791117421, | |
| "learning_rate": 9.38662010745602e-05, | |
| "loss": 0.0814, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.12455587049383209, | |
| "grad_norm": 2.026304704972604, | |
| "learning_rate": 9.39483740130101e-05, | |
| "loss": 0.0921, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 0.12535430556110025, | |
| "grad_norm": 2.025916307932843, | |
| "learning_rate": 9.403002188117609e-05, | |
| "loss": 0.058, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 0.1261527406283684, | |
| "grad_norm": 0.21178485251660298, | |
| "learning_rate": 9.411115134668499e-05, | |
| "loss": 0.0598, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 0.12695117569563655, | |
| "grad_norm": 1.4284181500108823, | |
| "learning_rate": 9.419176895096073e-05, | |
| "loss": 0.0814, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 0.1277496107629047, | |
| "grad_norm": 0.7843104603155183, | |
| "learning_rate": 9.427188111238948e-05, | |
| "loss": 0.042, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.12854804583017287, | |
| "grad_norm": 1.2008220942804122, | |
| "learning_rate": 9.435149412938599e-05, | |
| "loss": 0.0901, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 0.12934648089744102, | |
| "grad_norm": 0.33193358515873145, | |
| "learning_rate": 9.443061418336504e-05, | |
| "loss": 0.06, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 0.13014491596470917, | |
| "grad_norm": 0.5099792717245282, | |
| "learning_rate": 9.45092473416216e-05, | |
| "loss": 0.0561, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 0.13094335103197732, | |
| "grad_norm": 0.5997349133416637, | |
| "learning_rate": 9.458739956012268e-05, | |
| "loss": 0.0681, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 0.13174178609924547, | |
| "grad_norm": 0.2113301672959503, | |
| "learning_rate": 9.46650766862145e-05, | |
| "loss": 0.0752, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.13254022116651362, | |
| "grad_norm": 0.5787929964170312, | |
| "learning_rate": 9.474228446124777e-05, | |
| "loss": 0.0656, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 0.1333386562337818, | |
| "grad_norm": 0.8127539428562134, | |
| "learning_rate": 9.48190285231242e-05, | |
| "loss": 0.0899, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 0.13413709130104995, | |
| "grad_norm": 0.7650823029213837, | |
| "learning_rate": 9.489531440876694e-05, | |
| "loss": 0.0651, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 0.1349355263683181, | |
| "grad_norm": 0.28754919982145066, | |
| "learning_rate": 9.497114755651775e-05, | |
| "loss": 0.0847, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 0.13573396143558625, | |
| "grad_norm": 0.13862826523964283, | |
| "learning_rate": 9.504653330846339e-05, | |
| "loss": 0.0347, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.1365323965028544, | |
| "grad_norm": 0.5268722016235668, | |
| "learning_rate": 9.51214769126939e-05, | |
| "loss": 0.0656, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 0.13733083157012255, | |
| "grad_norm": 0.5178142175719089, | |
| "learning_rate": 9.519598352549497e-05, | |
| "loss": 0.0395, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 0.13812926663739072, | |
| "grad_norm": 0.6866453511139639, | |
| "learning_rate": 9.527005821347668e-05, | |
| "loss": 0.0714, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 0.13892770170465887, | |
| "grad_norm": 0.259840861988313, | |
| "learning_rate": 9.534370595564111e-05, | |
| "loss": 0.0561, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 0.13972613677192702, | |
| "grad_norm": 1.3308950214123239, | |
| "learning_rate": 9.541693164539043e-05, | |
| "loss": 0.0781, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.14052457183919517, | |
| "grad_norm": 0.349179761935008, | |
| "learning_rate": 9.548974009247806e-05, | |
| "loss": 0.0638, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 0.14132300690646332, | |
| "grad_norm": 1.4789155167397658, | |
| "learning_rate": 9.556213602490433e-05, | |
| "loss": 0.0521, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 0.1421214419737315, | |
| "grad_norm": 0.5314115346714045, | |
| "learning_rate": 9.563412409075885e-05, | |
| "loss": 0.0453, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 0.14291987704099965, | |
| "grad_norm": 0.7719157195860719, | |
| "learning_rate": 9.570570886001126e-05, | |
| "loss": 0.0576, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 0.1437183121082678, | |
| "grad_norm": 0.5365584658316398, | |
| "learning_rate": 9.577689482625209e-05, | |
| "loss": 0.0884, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.14451674717553595, | |
| "grad_norm": 0.40980247060477115, | |
| "learning_rate": 9.584768640838534e-05, | |
| "loss": 0.0633, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 0.1453151822428041, | |
| "grad_norm": 1.0230342101543866, | |
| "learning_rate": 9.59180879522746e-05, | |
| "loss": 0.072, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 0.14611361731007225, | |
| "grad_norm": 0.37391519429618736, | |
| "learning_rate": 9.598810373234382e-05, | |
| "loss": 0.062, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 0.14691205237734042, | |
| "grad_norm": 0.5546899786228485, | |
| "learning_rate": 9.605773795313468e-05, | |
| "loss": 0.0811, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 0.14771048744460857, | |
| "grad_norm": 0.5121481165129823, | |
| "learning_rate": 9.61269947508217e-05, | |
| "loss": 0.0709, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.14850892251187672, | |
| "grad_norm": 0.3968585153073068, | |
| "learning_rate": 9.619587819468636e-05, | |
| "loss": 0.0753, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 0.14930735757914487, | |
| "grad_norm": 0.830639936616994, | |
| "learning_rate": 9.626439228855197e-05, | |
| "loss": 0.0684, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 0.15010579264641302, | |
| "grad_norm": 0.2864308337848012, | |
| "learning_rate": 9.633254097217999e-05, | |
| "loss": 0.0595, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 0.15090422771368117, | |
| "grad_norm": 0.34372011057055735, | |
| "learning_rate": 9.640032812262954e-05, | |
| "loss": 0.0851, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 0.15170266278094935, | |
| "grad_norm": 0.7457036317344385, | |
| "learning_rate": 9.646775755558097e-05, | |
| "loss": 0.0537, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.1525010978482175, | |
| "grad_norm": 0.5523782556900674, | |
| "learning_rate": 9.653483302662452e-05, | |
| "loss": 0.0942, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 0.15329953291548565, | |
| "grad_norm": 0.2828028287146034, | |
| "learning_rate": 9.660155823251565e-05, | |
| "loss": 0.068, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 0.1540979679827538, | |
| "grad_norm": 0.7231361664577421, | |
| "learning_rate": 9.666793681239751e-05, | |
| "loss": 0.0645, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 0.15489640305002195, | |
| "grad_norm": 0.7844028595228837, | |
| "learning_rate": 9.673397234899194e-05, | |
| "loss": 0.0749, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 0.15569483811729012, | |
| "grad_norm": 1.0478767531894126, | |
| "learning_rate": 9.679966836975974e-05, | |
| "loss": 0.1157, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.15649327318455827, | |
| "grad_norm": 0.41201473500664804, | |
| "learning_rate": 9.686502834803144e-05, | |
| "loss": 0.0799, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 0.15729170825182642, | |
| "grad_norm": 0.47662392382424124, | |
| "learning_rate": 9.69300557041092e-05, | |
| "loss": 0.0657, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 0.15809014331909457, | |
| "grad_norm": 0.22078628822978255, | |
| "learning_rate": 9.699475380634067e-05, | |
| "loss": 0.057, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 0.15888857838636272, | |
| "grad_norm": 0.5959337410821844, | |
| "learning_rate": 9.705912597216614e-05, | |
| "loss": 0.0651, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 0.15968701345363087, | |
| "grad_norm": 0.7035564380493872, | |
| "learning_rate": 9.712317546913912e-05, | |
| "loss": 0.0659, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.16048544852089905, | |
| "grad_norm": 0.38778113345150145, | |
| "learning_rate": 9.718690551592169e-05, | |
| "loss": 0.0927, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 0.1612838835881672, | |
| "grad_norm": 0.3486784675066523, | |
| "learning_rate": 9.725031928325491e-05, | |
| "loss": 0.0716, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 0.16208231865543535, | |
| "grad_norm": 0.6303310068494262, | |
| "learning_rate": 9.731341989490561e-05, | |
| "loss": 0.078, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 0.1628807537227035, | |
| "grad_norm": 1.4877039883169711, | |
| "learning_rate": 9.737621042858955e-05, | |
| "loss": 0.0866, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 0.16367918878997165, | |
| "grad_norm": 0.7954321278315749, | |
| "learning_rate": 9.743869391687233e-05, | |
| "loss": 0.0798, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 0.1644776238572398, | |
| "grad_norm": 0.28805128141498443, | |
| "learning_rate": 9.750087334804812e-05, | |
| "loss": 0.0702, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 0.16527605892450797, | |
| "grad_norm": 0.6762487117705422, | |
| "learning_rate": 9.75627516669973e-05, | |
| "loss": 0.0602, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 0.16607449399177612, | |
| "grad_norm": 0.5536331829158825, | |
| "learning_rate": 9.76243317760233e-05, | |
| "loss": 0.0459, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 0.16687292905904427, | |
| "grad_norm": 0.33803981510789505, | |
| "learning_rate": 9.768561653566953e-05, | |
| "loss": 0.0667, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 0.16767136412631242, | |
| "grad_norm": 0.19271271530476722, | |
| "learning_rate": 9.774660876551659e-05, | |
| "loss": 0.0614, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.16846979919358057, | |
| "grad_norm": 0.8927328638984207, | |
| "learning_rate": 9.780731124496076e-05, | |
| "loss": 0.0767, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 0.16926823426084875, | |
| "grad_norm": 0.5298297843276258, | |
| "learning_rate": 9.786772671397395e-05, | |
| "loss": 0.0629, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 0.1700666693281169, | |
| "grad_norm": 0.30875418229193075, | |
| "learning_rate": 9.792785787384581e-05, | |
| "loss": 0.0334, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 0.17086510439538505, | |
| "grad_norm": 0.4603898414224521, | |
| "learning_rate": 9.798770738790855e-05, | |
| "loss": 0.0668, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 0.1716635394626532, | |
| "grad_norm": 0.17333943418800754, | |
| "learning_rate": 9.804727788224462e-05, | |
| "loss": 0.0511, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 0.17246197452992135, | |
| "grad_norm": 0.7779994729637698, | |
| "learning_rate": 9.810657194637825e-05, | |
| "loss": 0.0535, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 0.1732604095971895, | |
| "grad_norm": 0.32995926154524996, | |
| "learning_rate": 9.816559213395087e-05, | |
| "loss": 0.0908, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 0.17405884466445767, | |
| "grad_norm": 1.453741519995499, | |
| "learning_rate": 9.8224340963381e-05, | |
| "loss": 0.0908, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 0.17485727973172582, | |
| "grad_norm": 1.8378453173299512, | |
| "learning_rate": 9.82828209185091e-05, | |
| "loss": 0.1127, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 0.17565571479899397, | |
| "grad_norm": 0.48725273756729015, | |
| "learning_rate": 9.83410344492277e-05, | |
| "loss": 0.0645, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.17645414986626212, | |
| "grad_norm": 0.22593484396831762, | |
| "learning_rate": 9.83989839720972e-05, | |
| "loss": 0.0517, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 0.17725258493353027, | |
| "grad_norm": 0.33118665534159303, | |
| "learning_rate": 9.845667187094785e-05, | |
| "loss": 0.0536, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 0.17805102000079845, | |
| "grad_norm": 0.39066819449354656, | |
| "learning_rate": 9.851410049746818e-05, | |
| "loss": 0.0777, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 0.1788494550680666, | |
| "grad_norm": 0.8485083305429001, | |
| "learning_rate": 9.857127217178015e-05, | |
| "loss": 0.0697, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 0.17964789013533475, | |
| "grad_norm": 0.19952539466987482, | |
| "learning_rate": 9.862818918300173e-05, | |
| "loss": 0.0451, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 0.1804463252026029, | |
| "grad_norm": 0.691252616561084, | |
| "learning_rate": 9.868485378979675e-05, | |
| "loss": 0.0647, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 0.18124476026987105, | |
| "grad_norm": 0.7049940159011623, | |
| "learning_rate": 9.874126822091277e-05, | |
| "loss": 0.0845, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 0.1820431953371392, | |
| "grad_norm": 1.2133382620511355, | |
| "learning_rate": 9.879743467570711e-05, | |
| "loss": 0.0588, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 0.18284163040440737, | |
| "grad_norm": 0.35602520819498223, | |
| "learning_rate": 9.885335532466129e-05, | |
| "loss": 0.0742, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 0.18364006547167552, | |
| "grad_norm": 0.4158085994828218, | |
| "learning_rate": 9.890903230988434e-05, | |
| "loss": 0.0808, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.18443850053894367, | |
| "grad_norm": 0.18046190200012216, | |
| "learning_rate": 9.896446774560516e-05, | |
| "loss": 0.0503, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 0.18523693560621182, | |
| "grad_norm": 0.22406543693610548, | |
| "learning_rate": 9.90196637186543e-05, | |
| "loss": 0.0657, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 0.18603537067347997, | |
| "grad_norm": 0.4288290023830769, | |
| "learning_rate": 9.907462228893529e-05, | |
| "loss": 0.0552, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 0.18683380574074812, | |
| "grad_norm": 0.385521256661968, | |
| "learning_rate": 9.912934548988589e-05, | |
| "loss": 0.0399, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 0.1876322408080163, | |
| "grad_norm": 0.7416803203530228, | |
| "learning_rate": 9.918383532892963e-05, | |
| "loss": 0.0556, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 0.18843067587528445, | |
| "grad_norm": 0.40622202711719285, | |
| "learning_rate": 9.923809378791754e-05, | |
| "loss": 0.0685, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 0.1892291109425526, | |
| "grad_norm": 0.8021095669829111, | |
| "learning_rate": 9.92921228235608e-05, | |
| "loss": 0.0448, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 0.19002754600982075, | |
| "grad_norm": 0.2868854139454059, | |
| "learning_rate": 9.934592436785405e-05, | |
| "loss": 0.0476, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 0.1908259810770889, | |
| "grad_norm": 1.2605357558943955, | |
| "learning_rate": 9.939950032849001e-05, | |
| "loss": 0.051, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 0.19162441614435707, | |
| "grad_norm": 1.073063002530671, | |
| "learning_rate": 9.94528525892653e-05, | |
| "loss": 0.0759, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.19242285121162522, | |
| "grad_norm": 0.1347432671200069, | |
| "learning_rate": 9.950598301047787e-05, | |
| "loss": 0.0657, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 0.19322128627889337, | |
| "grad_norm": 0.48182010739489867, | |
| "learning_rate": 9.955889342931627e-05, | |
| "loss": 0.0589, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 0.19401972134616152, | |
| "grad_norm": 0.6296388960353804, | |
| "learning_rate": 9.961158566024085e-05, | |
| "loss": 0.0714, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 0.19481815641342967, | |
| "grad_norm": 0.33729264945412785, | |
| "learning_rate": 9.966406149535702e-05, | |
| "loss": 0.0479, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 0.19561659148069782, | |
| "grad_norm": 0.1498735604114689, | |
| "learning_rate": 9.97163227047811e-05, | |
| "loss": 0.0498, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 0.196415026547966, | |
| "grad_norm": 1.0199520439535377, | |
| "learning_rate": 9.976837103699849e-05, | |
| "loss": 0.1106, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 0.19721346161523415, | |
| "grad_norm": 0.29763329077307044, | |
| "learning_rate": 9.982020821921477e-05, | |
| "loss": 0.0692, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 0.1980118966825023, | |
| "grad_norm": 1.047960512760466, | |
| "learning_rate": 9.987183595769957e-05, | |
| "loss": 0.0876, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 0.19881033174977045, | |
| "grad_norm": 1.7136728132679582, | |
| "learning_rate": 9.992325593812358e-05, | |
| "loss": 0.1053, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 0.1996087668170386, | |
| "grad_norm": 0.2024671443429759, | |
| "learning_rate": 9.997446982588877e-05, | |
| "loss": 0.0713, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.20040720188430675, | |
| "grad_norm": 0.8635214144009746, | |
| "learning_rate": 9.998225613272413e-05, | |
| "loss": 0.0753, | |
| "step": 2510 | |
| }, | |
| { | |
| "epoch": 0.20120563695157492, | |
| "grad_norm": 1.1726579945772015, | |
| "learning_rate": 9.993789646453446e-05, | |
| "loss": 0.0464, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 0.20200407201884307, | |
| "grad_norm": 0.5156462324013295, | |
| "learning_rate": 9.989353679634476e-05, | |
| "loss": 0.0641, | |
| "step": 2530 | |
| }, | |
| { | |
| "epoch": 0.20280250708611122, | |
| "grad_norm": 0.4818857730271147, | |
| "learning_rate": 9.984917712815509e-05, | |
| "loss": 0.0774, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 0.20360094215337937, | |
| "grad_norm": 0.5932059075223263, | |
| "learning_rate": 9.980481745996541e-05, | |
| "loss": 0.0765, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 0.20439937722064752, | |
| "grad_norm": 0.5806911480243303, | |
| "learning_rate": 9.976045779177571e-05, | |
| "loss": 0.0792, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 0.2051978122879157, | |
| "grad_norm": 0.767705201792789, | |
| "learning_rate": 9.971609812358604e-05, | |
| "loss": 0.0666, | |
| "step": 2570 | |
| }, | |
| { | |
| "epoch": 0.20599624735518385, | |
| "grad_norm": 0.2993239339384049, | |
| "learning_rate": 9.967173845539636e-05, | |
| "loss": 0.0739, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 0.206794682422452, | |
| "grad_norm": 0.347192548507271, | |
| "learning_rate": 9.962737878720667e-05, | |
| "loss": 0.0832, | |
| "step": 2590 | |
| }, | |
| { | |
| "epoch": 0.20759311748972015, | |
| "grad_norm": 0.4651167628767381, | |
| "learning_rate": 9.9583019119017e-05, | |
| "loss": 0.0813, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.2083915525569883, | |
| "grad_norm": 0.5106958424622942, | |
| "learning_rate": 9.953865945082731e-05, | |
| "loss": 0.0887, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 0.20918998762425645, | |
| "grad_norm": 0.35739379245303143, | |
| "learning_rate": 9.949429978263763e-05, | |
| "loss": 0.0644, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 0.20998842269152462, | |
| "grad_norm": 1.0125482686281093, | |
| "learning_rate": 9.944994011444795e-05, | |
| "loss": 0.06, | |
| "step": 2630 | |
| }, | |
| { | |
| "epoch": 0.21078685775879277, | |
| "grad_norm": 0.3451023665574518, | |
| "learning_rate": 9.940558044625827e-05, | |
| "loss": 0.0867, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 0.21158529282606092, | |
| "grad_norm": 0.6950882876363752, | |
| "learning_rate": 9.936122077806858e-05, | |
| "loss": 0.0692, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 0.21238372789332907, | |
| "grad_norm": 0.6273149132955865, | |
| "learning_rate": 9.93168611098789e-05, | |
| "loss": 0.0653, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 0.21318216296059722, | |
| "grad_norm": 0.8797100572687333, | |
| "learning_rate": 9.927250144168923e-05, | |
| "loss": 0.0862, | |
| "step": 2670 | |
| }, | |
| { | |
| "epoch": 0.21398059802786537, | |
| "grad_norm": 0.3022352149028631, | |
| "learning_rate": 9.922814177349954e-05, | |
| "loss": 0.0388, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 0.21477903309513355, | |
| "grad_norm": 0.46577150224023545, | |
| "learning_rate": 9.918378210530985e-05, | |
| "loss": 0.0907, | |
| "step": 2690 | |
| }, | |
| { | |
| "epoch": 0.2155774681624017, | |
| "grad_norm": 0.40185970587390235, | |
| "learning_rate": 9.913942243712018e-05, | |
| "loss": 0.0664, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.21637590322966985, | |
| "grad_norm": 0.7032122014906631, | |
| "learning_rate": 9.909506276893049e-05, | |
| "loss": 0.0556, | |
| "step": 2710 | |
| }, | |
| { | |
| "epoch": 0.217174338296938, | |
| "grad_norm": 0.20054634591660722, | |
| "learning_rate": 9.905070310074082e-05, | |
| "loss": 0.053, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 0.21797277336420615, | |
| "grad_norm": 0.8214860525138075, | |
| "learning_rate": 9.900634343255114e-05, | |
| "loss": 0.0629, | |
| "step": 2730 | |
| }, | |
| { | |
| "epoch": 0.21877120843147432, | |
| "grad_norm": 0.4407457602483392, | |
| "learning_rate": 9.896198376436144e-05, | |
| "loss": 0.0509, | |
| "step": 2740 | |
| }, | |
| { | |
| "epoch": 0.21956964349874247, | |
| "grad_norm": 0.9525928650688722, | |
| "learning_rate": 9.891762409617177e-05, | |
| "loss": 0.0751, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 0.22036807856601062, | |
| "grad_norm": 0.7920710965808775, | |
| "learning_rate": 9.887326442798209e-05, | |
| "loss": 0.0753, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 0.22116651363327877, | |
| "grad_norm": 0.6674506450378365, | |
| "learning_rate": 9.88289047597924e-05, | |
| "loss": 0.0559, | |
| "step": 2770 | |
| }, | |
| { | |
| "epoch": 0.22196494870054692, | |
| "grad_norm": 0.4032602018987511, | |
| "learning_rate": 9.878454509160272e-05, | |
| "loss": 0.0498, | |
| "step": 2780 | |
| }, | |
| { | |
| "epoch": 0.22276338376781507, | |
| "grad_norm": 0.34518737677514616, | |
| "learning_rate": 9.874018542341304e-05, | |
| "loss": 0.0729, | |
| "step": 2790 | |
| }, | |
| { | |
| "epoch": 0.22356181883508325, | |
| "grad_norm": 0.26706028708878127, | |
| "learning_rate": 9.869582575522336e-05, | |
| "loss": 0.0691, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.2243602539023514, | |
| "grad_norm": 0.43474114414320436, | |
| "learning_rate": 9.865146608703367e-05, | |
| "loss": 0.0615, | |
| "step": 2810 | |
| }, | |
| { | |
| "epoch": 0.22515868896961955, | |
| "grad_norm": 0.5283867052692955, | |
| "learning_rate": 9.860710641884399e-05, | |
| "loss": 0.071, | |
| "step": 2820 | |
| }, | |
| { | |
| "epoch": 0.2259571240368877, | |
| "grad_norm": 0.47837272161979605, | |
| "learning_rate": 9.856274675065431e-05, | |
| "loss": 0.0693, | |
| "step": 2830 | |
| }, | |
| { | |
| "epoch": 0.22675555910415585, | |
| "grad_norm": 0.7024624015245926, | |
| "learning_rate": 9.851838708246463e-05, | |
| "loss": 0.0738, | |
| "step": 2840 | |
| }, | |
| { | |
| "epoch": 0.227553994171424, | |
| "grad_norm": 0.3111551408716249, | |
| "learning_rate": 9.847402741427494e-05, | |
| "loss": 0.0476, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 0.22835242923869217, | |
| "grad_norm": 0.2394474908901574, | |
| "learning_rate": 9.842966774608526e-05, | |
| "loss": 0.0574, | |
| "step": 2860 | |
| }, | |
| { | |
| "epoch": 0.22915086430596032, | |
| "grad_norm": 1.1725937115127267, | |
| "learning_rate": 9.838530807789558e-05, | |
| "loss": 0.0918, | |
| "step": 2870 | |
| }, | |
| { | |
| "epoch": 0.22994929937322847, | |
| "grad_norm": 0.3956678467142246, | |
| "learning_rate": 9.83409484097059e-05, | |
| "loss": 0.0844, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 0.23074773444049662, | |
| "grad_norm": 0.5616900783159234, | |
| "learning_rate": 9.829658874151621e-05, | |
| "loss": 0.0773, | |
| "step": 2890 | |
| }, | |
| { | |
| "epoch": 0.23154616950776477, | |
| "grad_norm": 0.4062469324519191, | |
| "learning_rate": 9.825222907332654e-05, | |
| "loss": 0.0413, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.23234460457503295, | |
| "grad_norm": 0.8819733220912126, | |
| "learning_rate": 9.820786940513685e-05, | |
| "loss": 0.0735, | |
| "step": 2910 | |
| }, | |
| { | |
| "epoch": 0.2331430396423011, | |
| "grad_norm": 1.006357506188907, | |
| "learning_rate": 9.816350973694717e-05, | |
| "loss": 0.0786, | |
| "step": 2920 | |
| }, | |
| { | |
| "epoch": 0.23394147470956925, | |
| "grad_norm": 0.2040002764107568, | |
| "learning_rate": 9.81191500687575e-05, | |
| "loss": 0.0543, | |
| "step": 2930 | |
| }, | |
| { | |
| "epoch": 0.2347399097768374, | |
| "grad_norm": 1.4210094000753832, | |
| "learning_rate": 9.80747904005678e-05, | |
| "loss": 0.064, | |
| "step": 2940 | |
| }, | |
| { | |
| "epoch": 0.23553834484410555, | |
| "grad_norm": 0.5962753420340007, | |
| "learning_rate": 9.803043073237813e-05, | |
| "loss": 0.0615, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 0.2363367799113737, | |
| "grad_norm": 0.5173259768588669, | |
| "learning_rate": 9.798607106418845e-05, | |
| "loss": 0.0762, | |
| "step": 2960 | |
| }, | |
| { | |
| "epoch": 0.23713521497864187, | |
| "grad_norm": 1.2344255844869074, | |
| "learning_rate": 9.794171139599875e-05, | |
| "loss": 0.0823, | |
| "step": 2970 | |
| }, | |
| { | |
| "epoch": 0.23793365004591002, | |
| "grad_norm": 0.7619318342778804, | |
| "learning_rate": 9.789735172780908e-05, | |
| "loss": 0.0782, | |
| "step": 2980 | |
| }, | |
| { | |
| "epoch": 0.23873208511317817, | |
| "grad_norm": 0.6206950997276741, | |
| "learning_rate": 9.78529920596194e-05, | |
| "loss": 0.0664, | |
| "step": 2990 | |
| }, | |
| { | |
| "epoch": 0.23953052018044632, | |
| "grad_norm": 0.2777974840701721, | |
| "learning_rate": 9.780863239142972e-05, | |
| "loss": 0.0639, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.24032895524771447, | |
| "grad_norm": 1.2924041445825014, | |
| "learning_rate": 9.776427272324004e-05, | |
| "loss": 0.0668, | |
| "step": 3010 | |
| }, | |
| { | |
| "epoch": 0.24112739031498262, | |
| "grad_norm": 0.5118342520849409, | |
| "learning_rate": 9.771991305505035e-05, | |
| "loss": 0.0797, | |
| "step": 3020 | |
| }, | |
| { | |
| "epoch": 0.2419258253822508, | |
| "grad_norm": 0.8633704661668934, | |
| "learning_rate": 9.767555338686067e-05, | |
| "loss": 0.0797, | |
| "step": 3030 | |
| }, | |
| { | |
| "epoch": 0.24272426044951895, | |
| "grad_norm": 0.25434876668468953, | |
| "learning_rate": 9.763119371867099e-05, | |
| "loss": 0.0679, | |
| "step": 3040 | |
| }, | |
| { | |
| "epoch": 0.2435226955167871, | |
| "grad_norm": 1.0640468101763765, | |
| "learning_rate": 9.75868340504813e-05, | |
| "loss": 0.0583, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 0.24432113058405525, | |
| "grad_norm": 0.5614750666753393, | |
| "learning_rate": 9.754247438229162e-05, | |
| "loss": 0.079, | |
| "step": 3060 | |
| }, | |
| { | |
| "epoch": 0.2451195656513234, | |
| "grad_norm": 0.6715198464966025, | |
| "learning_rate": 9.749811471410194e-05, | |
| "loss": 0.0683, | |
| "step": 3070 | |
| }, | |
| { | |
| "epoch": 0.24591800071859157, | |
| "grad_norm": 1.291191254475961, | |
| "learning_rate": 9.745375504591227e-05, | |
| "loss": 0.0821, | |
| "step": 3080 | |
| }, | |
| { | |
| "epoch": 0.24671643578585972, | |
| "grad_norm": 0.49187729587768836, | |
| "learning_rate": 9.740939537772257e-05, | |
| "loss": 0.064, | |
| "step": 3090 | |
| }, | |
| { | |
| "epoch": 0.24751487085312787, | |
| "grad_norm": 0.20031272028441496, | |
| "learning_rate": 9.736503570953289e-05, | |
| "loss": 0.0518, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 0.24831330592039602, | |
| "grad_norm": 0.7593839309467876, | |
| "learning_rate": 9.732067604134322e-05, | |
| "loss": 0.085, | |
| "step": 3110 | |
| }, | |
| { | |
| "epoch": 0.24911174098766417, | |
| "grad_norm": 1.5421303335254006, | |
| "learning_rate": 9.727631637315353e-05, | |
| "loss": 0.0951, | |
| "step": 3120 | |
| }, | |
| { | |
| "epoch": 0.24991017605493232, | |
| "grad_norm": 0.6102084147035887, | |
| "learning_rate": 9.723195670496386e-05, | |
| "loss": 0.0655, | |
| "step": 3130 | |
| }, | |
| { | |
| "epoch": 0.2507086111222005, | |
| "grad_norm": 0.44309051140710953, | |
| "learning_rate": 9.718759703677418e-05, | |
| "loss": 0.0692, | |
| "step": 3140 | |
| }, | |
| { | |
| "epoch": 0.2515070461894686, | |
| "grad_norm": 0.2800764130796373, | |
| "learning_rate": 9.714323736858448e-05, | |
| "loss": 0.0566, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 0.2523054812567368, | |
| "grad_norm": 0.33225924131316076, | |
| "learning_rate": 9.709887770039481e-05, | |
| "loss": 0.0555, | |
| "step": 3160 | |
| }, | |
| { | |
| "epoch": 0.253103916324005, | |
| "grad_norm": 0.28000037239354675, | |
| "learning_rate": 9.705451803220513e-05, | |
| "loss": 0.0549, | |
| "step": 3170 | |
| }, | |
| { | |
| "epoch": 0.2539023513912731, | |
| "grad_norm": 0.5510944129584439, | |
| "learning_rate": 9.701015836401544e-05, | |
| "loss": 0.0699, | |
| "step": 3180 | |
| }, | |
| { | |
| "epoch": 0.2547007864585413, | |
| "grad_norm": 0.32418363687489393, | |
| "learning_rate": 9.696579869582576e-05, | |
| "loss": 0.0686, | |
| "step": 3190 | |
| }, | |
| { | |
| "epoch": 0.2554992215258094, | |
| "grad_norm": 0.6754324134221963, | |
| "learning_rate": 9.692143902763608e-05, | |
| "loss": 0.071, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.25629765659307757, | |
| "grad_norm": 1.022738360263931, | |
| "learning_rate": 9.68770793594464e-05, | |
| "loss": 0.0691, | |
| "step": 3210 | |
| }, | |
| { | |
| "epoch": 0.25709609166034575, | |
| "grad_norm": 0.8622983849669558, | |
| "learning_rate": 9.683271969125671e-05, | |
| "loss": 0.093, | |
| "step": 3220 | |
| }, | |
| { | |
| "epoch": 0.25789452672761387, | |
| "grad_norm": 0.5050331506487474, | |
| "learning_rate": 9.678836002306703e-05, | |
| "loss": 0.0631, | |
| "step": 3230 | |
| }, | |
| { | |
| "epoch": 0.25869296179488205, | |
| "grad_norm": 1.0255097116123932, | |
| "learning_rate": 9.674400035487735e-05, | |
| "loss": 0.0563, | |
| "step": 3240 | |
| }, | |
| { | |
| "epoch": 0.25949139686215017, | |
| "grad_norm": 0.4240474303571155, | |
| "learning_rate": 9.669964068668767e-05, | |
| "loss": 0.0746, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 0.26028983192941835, | |
| "grad_norm": 0.17675298277758164, | |
| "learning_rate": 9.665528101849798e-05, | |
| "loss": 0.0673, | |
| "step": 3260 | |
| }, | |
| { | |
| "epoch": 0.26108826699668647, | |
| "grad_norm": 0.3869745350922928, | |
| "learning_rate": 9.66109213503083e-05, | |
| "loss": 0.1101, | |
| "step": 3270 | |
| }, | |
| { | |
| "epoch": 0.26188670206395465, | |
| "grad_norm": 0.15099346361631436, | |
| "learning_rate": 9.656656168211862e-05, | |
| "loss": 0.0728, | |
| "step": 3280 | |
| }, | |
| { | |
| "epoch": 0.2626851371312228, | |
| "grad_norm": 0.7320323894609118, | |
| "learning_rate": 9.652220201392894e-05, | |
| "loss": 0.0712, | |
| "step": 3290 | |
| }, | |
| { | |
| "epoch": 0.26348357219849095, | |
| "grad_norm": 0.13397678878907418, | |
| "learning_rate": 9.647784234573925e-05, | |
| "loss": 0.0557, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 0.2642820072657591, | |
| "grad_norm": 0.4705351472047174, | |
| "learning_rate": 9.643348267754958e-05, | |
| "loss": 0.0599, | |
| "step": 3310 | |
| }, | |
| { | |
| "epoch": 0.26508044233302724, | |
| "grad_norm": 1.24551689645886, | |
| "learning_rate": 9.638912300935989e-05, | |
| "loss": 0.0589, | |
| "step": 3320 | |
| }, | |
| { | |
| "epoch": 0.2658788774002954, | |
| "grad_norm": 0.24989018849005057, | |
| "learning_rate": 9.63447633411702e-05, | |
| "loss": 0.0731, | |
| "step": 3330 | |
| }, | |
| { | |
| "epoch": 0.2666773124675636, | |
| "grad_norm": 0.4633263939700149, | |
| "learning_rate": 9.630040367298054e-05, | |
| "loss": 0.0525, | |
| "step": 3340 | |
| }, | |
| { | |
| "epoch": 0.2674757475348317, | |
| "grad_norm": 0.5755597975991337, | |
| "learning_rate": 9.625604400479084e-05, | |
| "loss": 0.0742, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 0.2682741826020999, | |
| "grad_norm": 0.3891342346275618, | |
| "learning_rate": 9.621168433660117e-05, | |
| "loss": 0.059, | |
| "step": 3360 | |
| }, | |
| { | |
| "epoch": 0.269072617669368, | |
| "grad_norm": 0.13349742033851617, | |
| "learning_rate": 9.616732466841149e-05, | |
| "loss": 0.0548, | |
| "step": 3370 | |
| }, | |
| { | |
| "epoch": 0.2698710527366362, | |
| "grad_norm": 0.48026841764167816, | |
| "learning_rate": 9.612296500022179e-05, | |
| "loss": 0.0668, | |
| "step": 3380 | |
| }, | |
| { | |
| "epoch": 0.2706694878039044, | |
| "grad_norm": 0.7808325364960803, | |
| "learning_rate": 9.607860533203212e-05, | |
| "loss": 0.0704, | |
| "step": 3390 | |
| }, | |
| { | |
| "epoch": 0.2714679228711725, | |
| "grad_norm": 0.4864712450283722, | |
| "learning_rate": 9.603424566384244e-05, | |
| "loss": 0.0713, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.2722663579384407, | |
| "grad_norm": 0.887232498565028, | |
| "learning_rate": 9.598988599565276e-05, | |
| "loss": 0.0525, | |
| "step": 3410 | |
| }, | |
| { | |
| "epoch": 0.2730647930057088, | |
| "grad_norm": 0.8234101548616632, | |
| "learning_rate": 9.594552632746308e-05, | |
| "loss": 0.0733, | |
| "step": 3420 | |
| }, | |
| { | |
| "epoch": 0.27386322807297697, | |
| "grad_norm": 0.5022871225844846, | |
| "learning_rate": 9.590116665927339e-05, | |
| "loss": 0.0547, | |
| "step": 3430 | |
| }, | |
| { | |
| "epoch": 0.2746616631402451, | |
| "grad_norm": 2.1522421476954547, | |
| "learning_rate": 9.585680699108371e-05, | |
| "loss": 0.0668, | |
| "step": 3440 | |
| }, | |
| { | |
| "epoch": 0.27546009820751327, | |
| "grad_norm": 1.3741601077902381, | |
| "learning_rate": 9.581244732289403e-05, | |
| "loss": 0.0658, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 0.27625853327478145, | |
| "grad_norm": 0.6094718464750811, | |
| "learning_rate": 9.576808765470436e-05, | |
| "loss": 0.0662, | |
| "step": 3460 | |
| }, | |
| { | |
| "epoch": 0.27705696834204957, | |
| "grad_norm": 0.3305485987290091, | |
| "learning_rate": 9.572372798651466e-05, | |
| "loss": 0.0578, | |
| "step": 3470 | |
| }, | |
| { | |
| "epoch": 0.27785540340931775, | |
| "grad_norm": 0.6526066084303576, | |
| "learning_rate": 9.567936831832498e-05, | |
| "loss": 0.0746, | |
| "step": 3480 | |
| }, | |
| { | |
| "epoch": 0.27865383847658587, | |
| "grad_norm": 0.9842490963836676, | |
| "learning_rate": 9.563500865013531e-05, | |
| "loss": 0.0631, | |
| "step": 3490 | |
| }, | |
| { | |
| "epoch": 0.27945227354385405, | |
| "grad_norm": 0.8835467687651831, | |
| "learning_rate": 9.559064898194561e-05, | |
| "loss": 0.0875, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.2802507086111222, | |
| "grad_norm": 0.781469027598919, | |
| "learning_rate": 9.554628931375593e-05, | |
| "loss": 0.101, | |
| "step": 3510 | |
| }, | |
| { | |
| "epoch": 0.28104914367839035, | |
| "grad_norm": 0.8106723090645768, | |
| "learning_rate": 9.550192964556626e-05, | |
| "loss": 0.0668, | |
| "step": 3520 | |
| }, | |
| { | |
| "epoch": 0.2818475787456585, | |
| "grad_norm": 0.46004322835035094, | |
| "learning_rate": 9.545756997737657e-05, | |
| "loss": 0.0694, | |
| "step": 3530 | |
| }, | |
| { | |
| "epoch": 0.28264601381292664, | |
| "grad_norm": 0.5784098668603113, | |
| "learning_rate": 9.54132103091869e-05, | |
| "loss": 0.0688, | |
| "step": 3540 | |
| }, | |
| { | |
| "epoch": 0.2834444488801948, | |
| "grad_norm": 0.6629513380474472, | |
| "learning_rate": 9.536885064099722e-05, | |
| "loss": 0.0608, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 0.284242883947463, | |
| "grad_norm": 0.19931079062599605, | |
| "learning_rate": 9.532449097280752e-05, | |
| "loss": 0.0841, | |
| "step": 3560 | |
| }, | |
| { | |
| "epoch": 0.2850413190147311, | |
| "grad_norm": 0.7951033718666626, | |
| "learning_rate": 9.528013130461785e-05, | |
| "loss": 0.0527, | |
| "step": 3570 | |
| }, | |
| { | |
| "epoch": 0.2858397540819993, | |
| "grad_norm": 0.9565484508249654, | |
| "learning_rate": 9.523577163642817e-05, | |
| "loss": 0.0482, | |
| "step": 3580 | |
| }, | |
| { | |
| "epoch": 0.2866381891492674, | |
| "grad_norm": 0.8392384050951297, | |
| "learning_rate": 9.519141196823848e-05, | |
| "loss": 0.0633, | |
| "step": 3590 | |
| }, | |
| { | |
| "epoch": 0.2874366242165356, | |
| "grad_norm": 0.5717780924727565, | |
| "learning_rate": 9.51470523000488e-05, | |
| "loss": 0.0449, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.2882350592838037, | |
| "grad_norm": 0.4189642837284483, | |
| "learning_rate": 9.510269263185912e-05, | |
| "loss": 0.0488, | |
| "step": 3610 | |
| }, | |
| { | |
| "epoch": 0.2890334943510719, | |
| "grad_norm": 0.2488194601811155, | |
| "learning_rate": 9.505833296366944e-05, | |
| "loss": 0.0654, | |
| "step": 3620 | |
| }, | |
| { | |
| "epoch": 0.2898319294183401, | |
| "grad_norm": 0.4890100046395099, | |
| "learning_rate": 9.501397329547975e-05, | |
| "loss": 0.0531, | |
| "step": 3630 | |
| }, | |
| { | |
| "epoch": 0.2906303644856082, | |
| "grad_norm": 0.6529603939573656, | |
| "learning_rate": 9.496961362729007e-05, | |
| "loss": 0.0761, | |
| "step": 3640 | |
| }, | |
| { | |
| "epoch": 0.29142879955287637, | |
| "grad_norm": 0.1019537391373853, | |
| "learning_rate": 9.492525395910039e-05, | |
| "loss": 0.0467, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 0.2922272346201445, | |
| "grad_norm": 0.17618204759123188, | |
| "learning_rate": 9.48808942909107e-05, | |
| "loss": 0.0554, | |
| "step": 3660 | |
| }, | |
| { | |
| "epoch": 0.29302566968741267, | |
| "grad_norm": 0.5353417707615883, | |
| "learning_rate": 9.483653462272102e-05, | |
| "loss": 0.0586, | |
| "step": 3670 | |
| }, | |
| { | |
| "epoch": 0.29382410475468085, | |
| "grad_norm": 0.5391605538942996, | |
| "learning_rate": 9.479217495453134e-05, | |
| "loss": 0.057, | |
| "step": 3680 | |
| }, | |
| { | |
| "epoch": 0.29462253982194897, | |
| "grad_norm": 0.19771138219286866, | |
| "learning_rate": 9.474781528634167e-05, | |
| "loss": 0.0715, | |
| "step": 3690 | |
| }, | |
| { | |
| "epoch": 0.29542097488921715, | |
| "grad_norm": 1.491295495505135, | |
| "learning_rate": 9.470345561815198e-05, | |
| "loss": 0.0708, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 0.29621940995648527, | |
| "grad_norm": 0.13512836092147384, | |
| "learning_rate": 9.465909594996229e-05, | |
| "loss": 0.0581, | |
| "step": 3710 | |
| }, | |
| { | |
| "epoch": 0.29701784502375345, | |
| "grad_norm": 1.5095520257451078, | |
| "learning_rate": 9.461473628177262e-05, | |
| "loss": 0.0559, | |
| "step": 3720 | |
| }, | |
| { | |
| "epoch": 0.2978162800910216, | |
| "grad_norm": 1.1545077382657083, | |
| "learning_rate": 9.457037661358293e-05, | |
| "loss": 0.0673, | |
| "step": 3730 | |
| }, | |
| { | |
| "epoch": 0.29861471515828975, | |
| "grad_norm": 1.3565052045468842, | |
| "learning_rate": 9.452601694539325e-05, | |
| "loss": 0.0759, | |
| "step": 3740 | |
| }, | |
| { | |
| "epoch": 0.2994131502255579, | |
| "grad_norm": 0.2761557893990645, | |
| "learning_rate": 9.448165727720358e-05, | |
| "loss": 0.0729, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 0.30021158529282604, | |
| "grad_norm": 0.6113839039760615, | |
| "learning_rate": 9.443729760901388e-05, | |
| "loss": 0.0804, | |
| "step": 3760 | |
| }, | |
| { | |
| "epoch": 0.3010100203600942, | |
| "grad_norm": 1.1193210205165771, | |
| "learning_rate": 9.439293794082421e-05, | |
| "loss": 0.0607, | |
| "step": 3770 | |
| }, | |
| { | |
| "epoch": 0.30180845542736234, | |
| "grad_norm": 1.2227261887092338, | |
| "learning_rate": 9.434857827263453e-05, | |
| "loss": 0.0887, | |
| "step": 3780 | |
| }, | |
| { | |
| "epoch": 0.3026068904946305, | |
| "grad_norm": 0.6336848150580261, | |
| "learning_rate": 9.430421860444483e-05, | |
| "loss": 0.0894, | |
| "step": 3790 | |
| }, | |
| { | |
| "epoch": 0.3034053255618987, | |
| "grad_norm": 0.3976035265311596, | |
| "learning_rate": 9.425985893625516e-05, | |
| "loss": 0.0757, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 0.3042037606291668, | |
| "grad_norm": 0.3575402571045892, | |
| "learning_rate": 9.421549926806548e-05, | |
| "loss": 0.0803, | |
| "step": 3810 | |
| }, | |
| { | |
| "epoch": 0.305002195696435, | |
| "grad_norm": 0.5648417782171387, | |
| "learning_rate": 9.41711395998758e-05, | |
| "loss": 0.0617, | |
| "step": 3820 | |
| }, | |
| { | |
| "epoch": 0.3058006307637031, | |
| "grad_norm": 0.2822352199969617, | |
| "learning_rate": 9.412677993168612e-05, | |
| "loss": 0.0802, | |
| "step": 3830 | |
| }, | |
| { | |
| "epoch": 0.3065990658309713, | |
| "grad_norm": 0.13975805465865077, | |
| "learning_rate": 9.408242026349643e-05, | |
| "loss": 0.0652, | |
| "step": 3840 | |
| }, | |
| { | |
| "epoch": 0.3073975008982395, | |
| "grad_norm": 0.7969925852775963, | |
| "learning_rate": 9.403806059530675e-05, | |
| "loss": 0.0369, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 0.3081959359655076, | |
| "grad_norm": 0.35201376066581236, | |
| "learning_rate": 9.399370092711707e-05, | |
| "loss": 0.0478, | |
| "step": 3860 | |
| }, | |
| { | |
| "epoch": 0.30899437103277577, | |
| "grad_norm": 0.6213359726373199, | |
| "learning_rate": 9.39493412589274e-05, | |
| "loss": 0.0793, | |
| "step": 3870 | |
| }, | |
| { | |
| "epoch": 0.3097928061000439, | |
| "grad_norm": 0.19355341743297388, | |
| "learning_rate": 9.39049815907377e-05, | |
| "loss": 0.0663, | |
| "step": 3880 | |
| }, | |
| { | |
| "epoch": 0.31059124116731207, | |
| "grad_norm": 0.1777608148662464, | |
| "learning_rate": 9.386062192254802e-05, | |
| "loss": 0.066, | |
| "step": 3890 | |
| }, | |
| { | |
| "epoch": 0.31138967623458025, | |
| "grad_norm": 0.3915213687567011, | |
| "learning_rate": 9.381626225435835e-05, | |
| "loss": 0.0462, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 0.31218811130184837, | |
| "grad_norm": 0.45632356085935716, | |
| "learning_rate": 9.377190258616865e-05, | |
| "loss": 0.0543, | |
| "step": 3910 | |
| }, | |
| { | |
| "epoch": 0.31298654636911655, | |
| "grad_norm": 0.14412321773869619, | |
| "learning_rate": 9.372754291797899e-05, | |
| "loss": 0.0574, | |
| "step": 3920 | |
| }, | |
| { | |
| "epoch": 0.31378498143638467, | |
| "grad_norm": 0.2525773726678694, | |
| "learning_rate": 9.36831832497893e-05, | |
| "loss": 0.0737, | |
| "step": 3930 | |
| }, | |
| { | |
| "epoch": 0.31458341650365285, | |
| "grad_norm": 0.8403006704325723, | |
| "learning_rate": 9.36388235815996e-05, | |
| "loss": 0.0939, | |
| "step": 3940 | |
| }, | |
| { | |
| "epoch": 0.31538185157092097, | |
| "grad_norm": 0.6521623803536748, | |
| "learning_rate": 9.359446391340994e-05, | |
| "loss": 0.058, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 0.31618028663818915, | |
| "grad_norm": 0.3547147871976421, | |
| "learning_rate": 9.355010424522025e-05, | |
| "loss": 0.0631, | |
| "step": 3960 | |
| }, | |
| { | |
| "epoch": 0.3169787217054573, | |
| "grad_norm": 1.213057907328134, | |
| "learning_rate": 9.350574457703056e-05, | |
| "loss": 0.0709, | |
| "step": 3970 | |
| }, | |
| { | |
| "epoch": 0.31777715677272544, | |
| "grad_norm": 0.8145837560797573, | |
| "learning_rate": 9.346138490884089e-05, | |
| "loss": 0.0789, | |
| "step": 3980 | |
| }, | |
| { | |
| "epoch": 0.3185755918399936, | |
| "grad_norm": 0.31619564069314254, | |
| "learning_rate": 9.341702524065121e-05, | |
| "loss": 0.0592, | |
| "step": 3990 | |
| }, | |
| { | |
| "epoch": 0.31937402690726174, | |
| "grad_norm": 0.1999948652995334, | |
| "learning_rate": 9.337266557246152e-05, | |
| "loss": 0.0611, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.3201724619745299, | |
| "grad_norm": 0.9048703932371391, | |
| "learning_rate": 9.332830590427184e-05, | |
| "loss": 0.0626, | |
| "step": 4010 | |
| }, | |
| { | |
| "epoch": 0.3209708970417981, | |
| "grad_norm": 0.581399417609177, | |
| "learning_rate": 9.328394623608216e-05, | |
| "loss": 0.0752, | |
| "step": 4020 | |
| }, | |
| { | |
| "epoch": 0.3217693321090662, | |
| "grad_norm": 0.3120397134500295, | |
| "learning_rate": 9.323958656789248e-05, | |
| "loss": 0.0559, | |
| "step": 4030 | |
| }, | |
| { | |
| "epoch": 0.3225677671763344, | |
| "grad_norm": 0.40722820540078686, | |
| "learning_rate": 9.31952268997028e-05, | |
| "loss": 0.0667, | |
| "step": 4040 | |
| }, | |
| { | |
| "epoch": 0.3233662022436025, | |
| "grad_norm": 0.49952846455914823, | |
| "learning_rate": 9.315086723151311e-05, | |
| "loss": 0.0699, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 0.3241646373108707, | |
| "grad_norm": 0.5499884570539099, | |
| "learning_rate": 9.310650756332343e-05, | |
| "loss": 0.0987, | |
| "step": 4060 | |
| }, | |
| { | |
| "epoch": 0.3249630723781389, | |
| "grad_norm": 0.705022079278504, | |
| "learning_rate": 9.306214789513375e-05, | |
| "loss": 0.0708, | |
| "step": 4070 | |
| }, | |
| { | |
| "epoch": 0.325761507445407, | |
| "grad_norm": 0.4639845211871033, | |
| "learning_rate": 9.301778822694406e-05, | |
| "loss": 0.0619, | |
| "step": 4080 | |
| }, | |
| { | |
| "epoch": 0.32655994251267517, | |
| "grad_norm": 0.20800711837138103, | |
| "learning_rate": 9.297342855875438e-05, | |
| "loss": 0.0428, | |
| "step": 4090 | |
| }, | |
| { | |
| "epoch": 0.3273583775799433, | |
| "grad_norm": 0.7247882394307614, | |
| "learning_rate": 9.292906889056471e-05, | |
| "loss": 0.0664, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 0.32815681264721147, | |
| "grad_norm": 0.7582305969619678, | |
| "learning_rate": 9.288470922237502e-05, | |
| "loss": 0.0482, | |
| "step": 4110 | |
| }, | |
| { | |
| "epoch": 0.3289552477144796, | |
| "grad_norm": 0.3892556989008362, | |
| "learning_rate": 9.284034955418533e-05, | |
| "loss": 0.0709, | |
| "step": 4120 | |
| }, | |
| { | |
| "epoch": 0.32975368278174777, | |
| "grad_norm": 0.28872693985562875, | |
| "learning_rate": 9.279598988599566e-05, | |
| "loss": 0.0382, | |
| "step": 4130 | |
| }, | |
| { | |
| "epoch": 0.33055211784901595, | |
| "grad_norm": 0.3375027856146073, | |
| "learning_rate": 9.275163021780597e-05, | |
| "loss": 0.0604, | |
| "step": 4140 | |
| }, | |
| { | |
| "epoch": 0.33135055291628407, | |
| "grad_norm": 0.7959867830511765, | |
| "learning_rate": 9.27072705496163e-05, | |
| "loss": 0.058, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 0.33214898798355225, | |
| "grad_norm": 1.113370323153677, | |
| "learning_rate": 9.266291088142662e-05, | |
| "loss": 0.0657, | |
| "step": 4160 | |
| }, | |
| { | |
| "epoch": 0.33294742305082037, | |
| "grad_norm": 0.3894855633297203, | |
| "learning_rate": 9.261855121323692e-05, | |
| "loss": 0.0606, | |
| "step": 4170 | |
| }, | |
| { | |
| "epoch": 0.33374585811808855, | |
| "grad_norm": 1.6682181003177796, | |
| "learning_rate": 9.257419154504725e-05, | |
| "loss": 0.0575, | |
| "step": 4180 | |
| }, | |
| { | |
| "epoch": 0.3345442931853567, | |
| "grad_norm": 0.4147451777526778, | |
| "learning_rate": 9.252983187685757e-05, | |
| "loss": 0.0757, | |
| "step": 4190 | |
| }, | |
| { | |
| "epoch": 0.33534272825262484, | |
| "grad_norm": 1.4313508327315267, | |
| "learning_rate": 9.248547220866787e-05, | |
| "loss": 0.0816, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 0.336141163319893, | |
| "grad_norm": 0.8629267807236959, | |
| "learning_rate": 9.24411125404782e-05, | |
| "loss": 0.0658, | |
| "step": 4210 | |
| }, | |
| { | |
| "epoch": 0.33693959838716114, | |
| "grad_norm": 0.14128184424301535, | |
| "learning_rate": 9.239675287228852e-05, | |
| "loss": 0.0519, | |
| "step": 4220 | |
| }, | |
| { | |
| "epoch": 0.3377380334544293, | |
| "grad_norm": 0.4930129021270943, | |
| "learning_rate": 9.235239320409884e-05, | |
| "loss": 0.0599, | |
| "step": 4230 | |
| }, | |
| { | |
| "epoch": 0.3385364685216975, | |
| "grad_norm": 0.6045942919945482, | |
| "learning_rate": 9.230803353590915e-05, | |
| "loss": 0.078, | |
| "step": 4240 | |
| }, | |
| { | |
| "epoch": 0.3393349035889656, | |
| "grad_norm": 0.5299359565226784, | |
| "learning_rate": 9.226367386771947e-05, | |
| "loss": 0.07, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 0.3401333386562338, | |
| "grad_norm": 0.32803420167901814, | |
| "learning_rate": 9.221931419952979e-05, | |
| "loss": 0.0677, | |
| "step": 4260 | |
| }, | |
| { | |
| "epoch": 0.3409317737235019, | |
| "grad_norm": 0.5736143342256715, | |
| "learning_rate": 9.217495453134011e-05, | |
| "loss": 0.0813, | |
| "step": 4270 | |
| }, | |
| { | |
| "epoch": 0.3417302087907701, | |
| "grad_norm": 0.5006873224659751, | |
| "learning_rate": 9.213059486315044e-05, | |
| "loss": 0.0919, | |
| "step": 4280 | |
| }, | |
| { | |
| "epoch": 0.3425286438580382, | |
| "grad_norm": 0.7189402066951537, | |
| "learning_rate": 9.208623519496074e-05, | |
| "loss": 0.0664, | |
| "step": 4290 | |
| }, | |
| { | |
| "epoch": 0.3433270789253064, | |
| "grad_norm": 1.1042243525293143, | |
| "learning_rate": 9.204187552677106e-05, | |
| "loss": 0.0837, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 0.34412551399257457, | |
| "grad_norm": 1.3047043218797034, | |
| "learning_rate": 9.199751585858139e-05, | |
| "loss": 0.0508, | |
| "step": 4310 | |
| }, | |
| { | |
| "epoch": 0.3449239490598427, | |
| "grad_norm": 0.7076312109495163, | |
| "learning_rate": 9.19531561903917e-05, | |
| "loss": 0.0574, | |
| "step": 4320 | |
| }, | |
| { | |
| "epoch": 0.34572238412711087, | |
| "grad_norm": 0.20680949689309577, | |
| "learning_rate": 9.190879652220202e-05, | |
| "loss": 0.0525, | |
| "step": 4330 | |
| }, | |
| { | |
| "epoch": 0.346520819194379, | |
| "grad_norm": 0.3035802107484262, | |
| "learning_rate": 9.186443685401234e-05, | |
| "loss": 0.0657, | |
| "step": 4340 | |
| }, | |
| { | |
| "epoch": 0.34731925426164717, | |
| "grad_norm": 0.7911695899519386, | |
| "learning_rate": 9.182007718582265e-05, | |
| "loss": 0.0849, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 0.34811768932891535, | |
| "grad_norm": 0.5895064283440106, | |
| "learning_rate": 9.177571751763298e-05, | |
| "loss": 0.0761, | |
| "step": 4360 | |
| }, | |
| { | |
| "epoch": 0.34891612439618347, | |
| "grad_norm": 0.35415376481925265, | |
| "learning_rate": 9.17313578494433e-05, | |
| "loss": 0.0713, | |
| "step": 4370 | |
| }, | |
| { | |
| "epoch": 0.34971455946345165, | |
| "grad_norm": 0.5419823106587008, | |
| "learning_rate": 9.168699818125361e-05, | |
| "loss": 0.0669, | |
| "step": 4380 | |
| }, | |
| { | |
| "epoch": 0.35051299453071977, | |
| "grad_norm": 0.8830483510160156, | |
| "learning_rate": 9.164263851306393e-05, | |
| "loss": 0.0735, | |
| "step": 4390 | |
| }, | |
| { | |
| "epoch": 0.35131142959798795, | |
| "grad_norm": 1.049664144394219, | |
| "learning_rate": 9.159827884487425e-05, | |
| "loss": 0.0653, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 0.3521098646652561, | |
| "grad_norm": 0.23432670361744762, | |
| "learning_rate": 9.155391917668456e-05, | |
| "loss": 0.0365, | |
| "step": 4410 | |
| }, | |
| { | |
| "epoch": 0.35290829973252424, | |
| "grad_norm": 0.6699954512353271, | |
| "learning_rate": 9.150955950849488e-05, | |
| "loss": 0.0573, | |
| "step": 4420 | |
| }, | |
| { | |
| "epoch": 0.3537067347997924, | |
| "grad_norm": 0.40988692483619393, | |
| "learning_rate": 9.14651998403052e-05, | |
| "loss": 0.0598, | |
| "step": 4430 | |
| }, | |
| { | |
| "epoch": 0.35450516986706054, | |
| "grad_norm": 0.30987019795651377, | |
| "learning_rate": 9.142084017211552e-05, | |
| "loss": 0.0583, | |
| "step": 4440 | |
| }, | |
| { | |
| "epoch": 0.3553036049343287, | |
| "grad_norm": 1.5473769973813984, | |
| "learning_rate": 9.137648050392583e-05, | |
| "loss": 0.088, | |
| "step": 4450 | |
| }, | |
| { | |
| "epoch": 0.3561020400015969, | |
| "grad_norm": 1.0985488042237361, | |
| "learning_rate": 9.133212083573615e-05, | |
| "loss": 0.0808, | |
| "step": 4460 | |
| }, | |
| { | |
| "epoch": 0.356900475068865, | |
| "grad_norm": 0.47391395596589303, | |
| "learning_rate": 9.128776116754647e-05, | |
| "loss": 0.0598, | |
| "step": 4470 | |
| }, | |
| { | |
| "epoch": 0.3576989101361332, | |
| "grad_norm": 0.16818095830793228, | |
| "learning_rate": 9.124340149935679e-05, | |
| "loss": 0.0713, | |
| "step": 4480 | |
| }, | |
| { | |
| "epoch": 0.3584973452034013, | |
| "grad_norm": 0.6284876149603, | |
| "learning_rate": 9.11990418311671e-05, | |
| "loss": 0.0791, | |
| "step": 4490 | |
| }, | |
| { | |
| "epoch": 0.3592957802706695, | |
| "grad_norm": 1.1532137182508528, | |
| "learning_rate": 9.115468216297742e-05, | |
| "loss": 0.0788, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.3600942153379376, | |
| "grad_norm": 0.38319530479006925, | |
| "learning_rate": 9.111032249478775e-05, | |
| "loss": 0.0706, | |
| "step": 4510 | |
| }, | |
| { | |
| "epoch": 0.3608926504052058, | |
| "grad_norm": 0.5929092448145604, | |
| "learning_rate": 9.106596282659806e-05, | |
| "loss": 0.0541, | |
| "step": 4520 | |
| }, | |
| { | |
| "epoch": 0.36169108547247397, | |
| "grad_norm": 0.40753898344676914, | |
| "learning_rate": 9.102160315840837e-05, | |
| "loss": 0.0711, | |
| "step": 4530 | |
| }, | |
| { | |
| "epoch": 0.3624895205397421, | |
| "grad_norm": 0.9834740305156284, | |
| "learning_rate": 9.09772434902187e-05, | |
| "loss": 0.075, | |
| "step": 4540 | |
| }, | |
| { | |
| "epoch": 0.36328795560701027, | |
| "grad_norm": 0.525576434370597, | |
| "learning_rate": 9.093288382202901e-05, | |
| "loss": 0.0544, | |
| "step": 4550 | |
| }, | |
| { | |
| "epoch": 0.3640863906742784, | |
| "grad_norm": 0.865321793226859, | |
| "learning_rate": 9.088852415383934e-05, | |
| "loss": 0.0629, | |
| "step": 4560 | |
| }, | |
| { | |
| "epoch": 0.36488482574154657, | |
| "grad_norm": 0.6423714230291824, | |
| "learning_rate": 9.084416448564966e-05, | |
| "loss": 0.0642, | |
| "step": 4570 | |
| }, | |
| { | |
| "epoch": 0.36568326080881475, | |
| "grad_norm": 0.5137380114454171, | |
| "learning_rate": 9.079980481745996e-05, | |
| "loss": 0.084, | |
| "step": 4580 | |
| }, | |
| { | |
| "epoch": 0.36648169587608287, | |
| "grad_norm": 0.19927820633025783, | |
| "learning_rate": 9.075544514927029e-05, | |
| "loss": 0.0903, | |
| "step": 4590 | |
| }, | |
| { | |
| "epoch": 0.36728013094335105, | |
| "grad_norm": 1.058178203153698, | |
| "learning_rate": 9.071108548108061e-05, | |
| "loss": 0.0831, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 0.36807856601061917, | |
| "grad_norm": 1.249280563738633, | |
| "learning_rate": 9.066672581289093e-05, | |
| "loss": 0.068, | |
| "step": 4610 | |
| }, | |
| { | |
| "epoch": 0.36887700107788735, | |
| "grad_norm": 0.5807254981900888, | |
| "learning_rate": 9.062236614470124e-05, | |
| "loss": 0.0591, | |
| "step": 4620 | |
| }, | |
| { | |
| "epoch": 0.3696754361451555, | |
| "grad_norm": 0.5441909496135758, | |
| "learning_rate": 9.057800647651156e-05, | |
| "loss": 0.0581, | |
| "step": 4630 | |
| }, | |
| { | |
| "epoch": 0.37047387121242364, | |
| "grad_norm": 1.6412220260958208, | |
| "learning_rate": 9.053364680832188e-05, | |
| "loss": 0.0759, | |
| "step": 4640 | |
| }, | |
| { | |
| "epoch": 0.3712723062796918, | |
| "grad_norm": 0.7095159700544676, | |
| "learning_rate": 9.04892871401322e-05, | |
| "loss": 0.0473, | |
| "step": 4650 | |
| }, | |
| { | |
| "epoch": 0.37207074134695994, | |
| "grad_norm": 0.260058630317952, | |
| "learning_rate": 9.044492747194251e-05, | |
| "loss": 0.0461, | |
| "step": 4660 | |
| }, | |
| { | |
| "epoch": 0.3728691764142281, | |
| "grad_norm": 0.22182297181489585, | |
| "learning_rate": 9.040056780375283e-05, | |
| "loss": 0.061, | |
| "step": 4670 | |
| }, | |
| { | |
| "epoch": 0.37366761148149624, | |
| "grad_norm": 1.0224917304159058, | |
| "learning_rate": 9.035620813556315e-05, | |
| "loss": 0.0552, | |
| "step": 4680 | |
| }, | |
| { | |
| "epoch": 0.3744660465487644, | |
| "grad_norm": 0.28268273613408273, | |
| "learning_rate": 9.031184846737348e-05, | |
| "loss": 0.0496, | |
| "step": 4690 | |
| }, | |
| { | |
| "epoch": 0.3752644816160326, | |
| "grad_norm": 0.7027656925839558, | |
| "learning_rate": 9.026748879918378e-05, | |
| "loss": 0.0476, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 0.3760629166833007, | |
| "grad_norm": 0.3836861736397752, | |
| "learning_rate": 9.02231291309941e-05, | |
| "loss": 0.0652, | |
| "step": 4710 | |
| }, | |
| { | |
| "epoch": 0.3768613517505689, | |
| "grad_norm": 0.3397625493924417, | |
| "learning_rate": 9.017876946280443e-05, | |
| "loss": 0.0834, | |
| "step": 4720 | |
| }, | |
| { | |
| "epoch": 0.377659786817837, | |
| "grad_norm": 0.48750058843343874, | |
| "learning_rate": 9.013440979461473e-05, | |
| "loss": 0.0574, | |
| "step": 4730 | |
| }, | |
| { | |
| "epoch": 0.3784582218851052, | |
| "grad_norm": 0.5010802768596067, | |
| "learning_rate": 9.009005012642506e-05, | |
| "loss": 0.0623, | |
| "step": 4740 | |
| }, | |
| { | |
| "epoch": 0.37925665695237337, | |
| "grad_norm": 1.3902600676959112, | |
| "learning_rate": 9.004569045823538e-05, | |
| "loss": 0.0747, | |
| "step": 4750 | |
| }, | |
| { | |
| "epoch": 0.3800550920196415, | |
| "grad_norm": 0.6234122056900067, | |
| "learning_rate": 9.000133079004569e-05, | |
| "loss": 0.0478, | |
| "step": 4760 | |
| }, | |
| { | |
| "epoch": 0.38085352708690967, | |
| "grad_norm": 0.5685712290921905, | |
| "learning_rate": 8.995697112185602e-05, | |
| "loss": 0.0887, | |
| "step": 4770 | |
| }, | |
| { | |
| "epoch": 0.3816519621541778, | |
| "grad_norm": 0.565347440171693, | |
| "learning_rate": 8.991261145366633e-05, | |
| "loss": 0.0684, | |
| "step": 4780 | |
| }, | |
| { | |
| "epoch": 0.38245039722144597, | |
| "grad_norm": 0.5939386619028115, | |
| "learning_rate": 8.986825178547665e-05, | |
| "loss": 0.0656, | |
| "step": 4790 | |
| }, | |
| { | |
| "epoch": 0.38324883228871415, | |
| "grad_norm": 0.35600920379795853, | |
| "learning_rate": 8.982389211728697e-05, | |
| "loss": 0.0667, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 0.38404726735598227, | |
| "grad_norm": 0.3450273603243629, | |
| "learning_rate": 8.977953244909729e-05, | |
| "loss": 0.0768, | |
| "step": 4810 | |
| }, | |
| { | |
| "epoch": 0.38484570242325045, | |
| "grad_norm": 0.7355536879981746, | |
| "learning_rate": 8.97351727809076e-05, | |
| "loss": 0.0784, | |
| "step": 4820 | |
| }, | |
| { | |
| "epoch": 0.38564413749051857, | |
| "grad_norm": 0.6358098164509965, | |
| "learning_rate": 8.969081311271792e-05, | |
| "loss": 0.0482, | |
| "step": 4830 | |
| }, | |
| { | |
| "epoch": 0.38644257255778675, | |
| "grad_norm": 0.8836154598539316, | |
| "learning_rate": 8.964645344452824e-05, | |
| "loss": 0.0668, | |
| "step": 4840 | |
| }, | |
| { | |
| "epoch": 0.38724100762505487, | |
| "grad_norm": 0.24586284011234355, | |
| "learning_rate": 8.960209377633856e-05, | |
| "loss": 0.0444, | |
| "step": 4850 | |
| }, | |
| { | |
| "epoch": 0.38803944269232304, | |
| "grad_norm": 0.31228441399739965, | |
| "learning_rate": 8.955773410814887e-05, | |
| "loss": 0.0546, | |
| "step": 4860 | |
| }, | |
| { | |
| "epoch": 0.3888378777595912, | |
| "grad_norm": 1.3382422056050238, | |
| "learning_rate": 8.951337443995919e-05, | |
| "loss": 0.0637, | |
| "step": 4870 | |
| }, | |
| { | |
| "epoch": 0.38963631282685934, | |
| "grad_norm": 0.444796329308194, | |
| "learning_rate": 8.946901477176951e-05, | |
| "loss": 0.0543, | |
| "step": 4880 | |
| }, | |
| { | |
| "epoch": 0.3904347478941275, | |
| "grad_norm": 0.23308267264899266, | |
| "learning_rate": 8.942465510357983e-05, | |
| "loss": 0.067, | |
| "step": 4890 | |
| }, | |
| { | |
| "epoch": 0.39123318296139564, | |
| "grad_norm": 0.6401991484412825, | |
| "learning_rate": 8.938029543539014e-05, | |
| "loss": 0.0521, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 0.3920316180286638, | |
| "grad_norm": 0.22834355519756833, | |
| "learning_rate": 8.933593576720046e-05, | |
| "loss": 0.0431, | |
| "step": 4910 | |
| }, | |
| { | |
| "epoch": 0.392830053095932, | |
| "grad_norm": 0.7232285060113931, | |
| "learning_rate": 8.929157609901079e-05, | |
| "loss": 0.0547, | |
| "step": 4920 | |
| }, | |
| { | |
| "epoch": 0.3936284881632001, | |
| "grad_norm": 0.456986877784416, | |
| "learning_rate": 8.92472164308211e-05, | |
| "loss": 0.0662, | |
| "step": 4930 | |
| }, | |
| { | |
| "epoch": 0.3944269232304683, | |
| "grad_norm": 0.25476686741206894, | |
| "learning_rate": 8.920285676263141e-05, | |
| "loss": 0.0557, | |
| "step": 4940 | |
| }, | |
| { | |
| "epoch": 0.3952253582977364, | |
| "grad_norm": 0.4027583008470791, | |
| "learning_rate": 8.915849709444174e-05, | |
| "loss": 0.0559, | |
| "step": 4950 | |
| }, | |
| { | |
| "epoch": 0.3960237933650046, | |
| "grad_norm": 0.14934067044491264, | |
| "learning_rate": 8.911413742625205e-05, | |
| "loss": 0.0656, | |
| "step": 4960 | |
| }, | |
| { | |
| "epoch": 0.39682222843227277, | |
| "grad_norm": 0.14225234528381334, | |
| "learning_rate": 8.906977775806238e-05, | |
| "loss": 0.0504, | |
| "step": 4970 | |
| }, | |
| { | |
| "epoch": 0.3976206634995409, | |
| "grad_norm": 1.1907087935137126, | |
| "learning_rate": 8.90254180898727e-05, | |
| "loss": 0.071, | |
| "step": 4980 | |
| }, | |
| { | |
| "epoch": 0.39841909856680907, | |
| "grad_norm": 0.27933025425637376, | |
| "learning_rate": 8.8981058421683e-05, | |
| "loss": 0.0566, | |
| "step": 4990 | |
| }, | |
| { | |
| "epoch": 0.3992175336340772, | |
| "grad_norm": 0.6083068308403123, | |
| "learning_rate": 8.893669875349333e-05, | |
| "loss": 0.0662, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.40001596870134537, | |
| "grad_norm": 1.1884762791366723, | |
| "learning_rate": 8.889233908530365e-05, | |
| "loss": 0.0676, | |
| "step": 5010 | |
| }, | |
| { | |
| "epoch": 0.4008144037686135, | |
| "grad_norm": 1.0558648065537648, | |
| "learning_rate": 8.884797941711396e-05, | |
| "loss": 0.0705, | |
| "step": 5020 | |
| }, | |
| { | |
| "epoch": 0.40161283883588167, | |
| "grad_norm": 0.4289983200286532, | |
| "learning_rate": 8.880361974892428e-05, | |
| "loss": 0.0727, | |
| "step": 5030 | |
| }, | |
| { | |
| "epoch": 0.40241127390314985, | |
| "grad_norm": 0.4911531992853839, | |
| "learning_rate": 8.87592600807346e-05, | |
| "loss": 0.0714, | |
| "step": 5040 | |
| }, | |
| { | |
| "epoch": 0.40320970897041797, | |
| "grad_norm": 0.31150842781342475, | |
| "learning_rate": 8.871490041254492e-05, | |
| "loss": 0.0528, | |
| "step": 5050 | |
| }, | |
| { | |
| "epoch": 0.40400814403768615, | |
| "grad_norm": 0.6489978032345287, | |
| "learning_rate": 8.867054074435523e-05, | |
| "loss": 0.0751, | |
| "step": 5060 | |
| }, | |
| { | |
| "epoch": 0.40480657910495427, | |
| "grad_norm": 0.419948904458347, | |
| "learning_rate": 8.862618107616557e-05, | |
| "loss": 0.062, | |
| "step": 5070 | |
| }, | |
| { | |
| "epoch": 0.40560501417222244, | |
| "grad_norm": 0.2183663052059662, | |
| "learning_rate": 8.858182140797587e-05, | |
| "loss": 0.0638, | |
| "step": 5080 | |
| }, | |
| { | |
| "epoch": 0.4064034492394906, | |
| "grad_norm": 0.26154611890494367, | |
| "learning_rate": 8.853746173978619e-05, | |
| "loss": 0.0483, | |
| "step": 5090 | |
| }, | |
| { | |
| "epoch": 0.40720188430675874, | |
| "grad_norm": 1.4954231531687736, | |
| "learning_rate": 8.849310207159652e-05, | |
| "loss": 0.0943, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 0.4080003193740269, | |
| "grad_norm": 0.3045976586748024, | |
| "learning_rate": 8.844874240340682e-05, | |
| "loss": 0.0679, | |
| "step": 5110 | |
| }, | |
| { | |
| "epoch": 0.40879875444129504, | |
| "grad_norm": 0.36479206890797944, | |
| "learning_rate": 8.840438273521714e-05, | |
| "loss": 0.0614, | |
| "step": 5120 | |
| }, | |
| { | |
| "epoch": 0.4095971895085632, | |
| "grad_norm": 0.2676201553843575, | |
| "learning_rate": 8.836002306702747e-05, | |
| "loss": 0.0412, | |
| "step": 5130 | |
| }, | |
| { | |
| "epoch": 0.4103956245758314, | |
| "grad_norm": 1.0386926051609444, | |
| "learning_rate": 8.831566339883777e-05, | |
| "loss": 0.0748, | |
| "step": 5140 | |
| }, | |
| { | |
| "epoch": 0.4111940596430995, | |
| "grad_norm": 0.3438745636536693, | |
| "learning_rate": 8.82713037306481e-05, | |
| "loss": 0.0603, | |
| "step": 5150 | |
| }, | |
| { | |
| "epoch": 0.4119924947103677, | |
| "grad_norm": 0.6260398748757086, | |
| "learning_rate": 8.822694406245842e-05, | |
| "loss": 0.0845, | |
| "step": 5160 | |
| }, | |
| { | |
| "epoch": 0.4127909297776358, | |
| "grad_norm": 0.5703119341755367, | |
| "learning_rate": 8.818258439426873e-05, | |
| "loss": 0.073, | |
| "step": 5170 | |
| }, | |
| { | |
| "epoch": 0.413589364844904, | |
| "grad_norm": 0.6937887146171914, | |
| "learning_rate": 8.813822472607906e-05, | |
| "loss": 0.0643, | |
| "step": 5180 | |
| }, | |
| { | |
| "epoch": 0.4143877999121721, | |
| "grad_norm": 1.1824674908984751, | |
| "learning_rate": 8.809386505788937e-05, | |
| "loss": 0.0754, | |
| "step": 5190 | |
| }, | |
| { | |
| "epoch": 0.4151862349794403, | |
| "grad_norm": 1.7614692575489068, | |
| "learning_rate": 8.804950538969969e-05, | |
| "loss": 0.1003, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 0.41598467004670847, | |
| "grad_norm": 0.3060089486060604, | |
| "learning_rate": 8.800514572151001e-05, | |
| "loss": 0.0715, | |
| "step": 5210 | |
| }, | |
| { | |
| "epoch": 0.4167831051139766, | |
| "grad_norm": 0.3910737594616741, | |
| "learning_rate": 8.796078605332033e-05, | |
| "loss": 0.0508, | |
| "step": 5220 | |
| }, | |
| { | |
| "epoch": 0.41758154018124477, | |
| "grad_norm": 0.19993085932726584, | |
| "learning_rate": 8.791642638513064e-05, | |
| "loss": 0.0573, | |
| "step": 5230 | |
| }, | |
| { | |
| "epoch": 0.4183799752485129, | |
| "grad_norm": 0.27516676488467984, | |
| "learning_rate": 8.787206671694096e-05, | |
| "loss": 0.0653, | |
| "step": 5240 | |
| }, | |
| { | |
| "epoch": 0.41917841031578107, | |
| "grad_norm": 0.6167919795211267, | |
| "learning_rate": 8.782770704875128e-05, | |
| "loss": 0.0942, | |
| "step": 5250 | |
| }, | |
| { | |
| "epoch": 0.41997684538304925, | |
| "grad_norm": 0.27936432847287795, | |
| "learning_rate": 8.77833473805616e-05, | |
| "loss": 0.0738, | |
| "step": 5260 | |
| }, | |
| { | |
| "epoch": 0.42077528045031737, | |
| "grad_norm": 0.7588252383320675, | |
| "learning_rate": 8.773898771237191e-05, | |
| "loss": 0.0523, | |
| "step": 5270 | |
| }, | |
| { | |
| "epoch": 0.42157371551758555, | |
| "grad_norm": 0.06949164789983199, | |
| "learning_rate": 8.769462804418223e-05, | |
| "loss": 0.0795, | |
| "step": 5280 | |
| }, | |
| { | |
| "epoch": 0.42237215058485367, | |
| "grad_norm": 0.3957542748855769, | |
| "learning_rate": 8.765026837599255e-05, | |
| "loss": 0.0865, | |
| "step": 5290 | |
| }, | |
| { | |
| "epoch": 0.42317058565212184, | |
| "grad_norm": 0.280739201904563, | |
| "learning_rate": 8.760590870780288e-05, | |
| "loss": 0.0508, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 0.42396902071939, | |
| "grad_norm": 0.5673662143218914, | |
| "learning_rate": 8.756154903961318e-05, | |
| "loss": 0.0744, | |
| "step": 5310 | |
| }, | |
| { | |
| "epoch": 0.42476745578665814, | |
| "grad_norm": 0.47405142279811713, | |
| "learning_rate": 8.75171893714235e-05, | |
| "loss": 0.0616, | |
| "step": 5320 | |
| }, | |
| { | |
| "epoch": 0.4255658908539263, | |
| "grad_norm": 0.1281058538132511, | |
| "learning_rate": 8.747282970323383e-05, | |
| "loss": 0.0434, | |
| "step": 5330 | |
| }, | |
| { | |
| "epoch": 0.42636432592119444, | |
| "grad_norm": 0.4979701542404763, | |
| "learning_rate": 8.742847003504413e-05, | |
| "loss": 0.0572, | |
| "step": 5340 | |
| }, | |
| { | |
| "epoch": 0.4271627609884626, | |
| "grad_norm": 1.4594895339218368, | |
| "learning_rate": 8.738411036685445e-05, | |
| "loss": 0.077, | |
| "step": 5350 | |
| }, | |
| { | |
| "epoch": 0.42796119605573074, | |
| "grad_norm": 0.41948570674673674, | |
| "learning_rate": 8.733975069866478e-05, | |
| "loss": 0.0654, | |
| "step": 5360 | |
| }, | |
| { | |
| "epoch": 0.4287596311229989, | |
| "grad_norm": 0.7352581247356831, | |
| "learning_rate": 8.729539103047509e-05, | |
| "loss": 0.0584, | |
| "step": 5370 | |
| }, | |
| { | |
| "epoch": 0.4295580661902671, | |
| "grad_norm": 0.23926559831019342, | |
| "learning_rate": 8.725103136228542e-05, | |
| "loss": 0.0412, | |
| "step": 5380 | |
| }, | |
| { | |
| "epoch": 0.4303565012575352, | |
| "grad_norm": 0.7753469657671935, | |
| "learning_rate": 8.720667169409573e-05, | |
| "loss": 0.0737, | |
| "step": 5390 | |
| }, | |
| { | |
| "epoch": 0.4311549363248034, | |
| "grad_norm": 0.6734499022356067, | |
| "learning_rate": 8.716231202590605e-05, | |
| "loss": 0.0749, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 0.4319533713920715, | |
| "grad_norm": 0.8067819775253303, | |
| "learning_rate": 8.711795235771637e-05, | |
| "loss": 0.0724, | |
| "step": 5410 | |
| }, | |
| { | |
| "epoch": 0.4327518064593397, | |
| "grad_norm": 0.3922789027943089, | |
| "learning_rate": 8.707359268952669e-05, | |
| "loss": 0.0714, | |
| "step": 5420 | |
| }, | |
| { | |
| "epoch": 0.43355024152660787, | |
| "grad_norm": 0.2643907438829241, | |
| "learning_rate": 8.7029233021337e-05, | |
| "loss": 0.0727, | |
| "step": 5430 | |
| }, | |
| { | |
| "epoch": 0.434348676593876, | |
| "grad_norm": 0.22125726238970103, | |
| "learning_rate": 8.698487335314732e-05, | |
| "loss": 0.0731, | |
| "step": 5440 | |
| }, | |
| { | |
| "epoch": 0.43514711166114417, | |
| "grad_norm": 0.5787553670066385, | |
| "learning_rate": 8.694051368495764e-05, | |
| "loss": 0.0699, | |
| "step": 5450 | |
| }, | |
| { | |
| "epoch": 0.4359455467284123, | |
| "grad_norm": 0.9740759075600869, | |
| "learning_rate": 8.689615401676796e-05, | |
| "loss": 0.0586, | |
| "step": 5460 | |
| }, | |
| { | |
| "epoch": 0.43674398179568047, | |
| "grad_norm": 0.5602442417248852, | |
| "learning_rate": 8.685179434857827e-05, | |
| "loss": 0.0517, | |
| "step": 5470 | |
| }, | |
| { | |
| "epoch": 0.43754241686294865, | |
| "grad_norm": 0.678676901103541, | |
| "learning_rate": 8.68074346803886e-05, | |
| "loss": 0.0557, | |
| "step": 5480 | |
| }, | |
| { | |
| "epoch": 0.43834085193021677, | |
| "grad_norm": 1.3188535290570824, | |
| "learning_rate": 8.676307501219891e-05, | |
| "loss": 0.0699, | |
| "step": 5490 | |
| }, | |
| { | |
| "epoch": 0.43913928699748495, | |
| "grad_norm": 1.2435485858474957, | |
| "learning_rate": 8.671871534400923e-05, | |
| "loss": 0.0907, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.43993772206475307, | |
| "grad_norm": 0.5531882667257373, | |
| "learning_rate": 8.667435567581956e-05, | |
| "loss": 0.0534, | |
| "step": 5510 | |
| }, | |
| { | |
| "epoch": 0.44073615713202124, | |
| "grad_norm": 0.209508564195029, | |
| "learning_rate": 8.662999600762986e-05, | |
| "loss": 0.0593, | |
| "step": 5520 | |
| }, | |
| { | |
| "epoch": 0.44153459219928937, | |
| "grad_norm": 0.40802476016406686, | |
| "learning_rate": 8.658563633944019e-05, | |
| "loss": 0.0497, | |
| "step": 5530 | |
| }, | |
| { | |
| "epoch": 0.44233302726655754, | |
| "grad_norm": 0.5003442540085888, | |
| "learning_rate": 8.654127667125051e-05, | |
| "loss": 0.0528, | |
| "step": 5540 | |
| }, | |
| { | |
| "epoch": 0.4431314623338257, | |
| "grad_norm": 0.5347815858537798, | |
| "learning_rate": 8.649691700306081e-05, | |
| "loss": 0.0484, | |
| "step": 5550 | |
| }, | |
| { | |
| "epoch": 0.44392989740109384, | |
| "grad_norm": 0.21195410850548047, | |
| "learning_rate": 8.645255733487114e-05, | |
| "loss": 0.0411, | |
| "step": 5560 | |
| }, | |
| { | |
| "epoch": 0.444728332468362, | |
| "grad_norm": 0.33320854722031934, | |
| "learning_rate": 8.640819766668146e-05, | |
| "loss": 0.0441, | |
| "step": 5570 | |
| }, | |
| { | |
| "epoch": 0.44552676753563014, | |
| "grad_norm": 0.5977688428709171, | |
| "learning_rate": 8.636383799849177e-05, | |
| "loss": 0.0711, | |
| "step": 5580 | |
| }, | |
| { | |
| "epoch": 0.4463252026028983, | |
| "grad_norm": 0.8369268611940605, | |
| "learning_rate": 8.63194783303021e-05, | |
| "loss": 0.0525, | |
| "step": 5590 | |
| }, | |
| { | |
| "epoch": 0.4471236376701665, | |
| "grad_norm": 0.3805775222554737, | |
| "learning_rate": 8.627511866211241e-05, | |
| "loss": 0.0602, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 0.4479220727374346, | |
| "grad_norm": 0.8957144762619743, | |
| "learning_rate": 8.623075899392273e-05, | |
| "loss": 0.0532, | |
| "step": 5610 | |
| }, | |
| { | |
| "epoch": 0.4487205078047028, | |
| "grad_norm": 0.4652900799065469, | |
| "learning_rate": 8.618639932573305e-05, | |
| "loss": 0.0717, | |
| "step": 5620 | |
| }, | |
| { | |
| "epoch": 0.4495189428719709, | |
| "grad_norm": 0.37729025821964557, | |
| "learning_rate": 8.614203965754337e-05, | |
| "loss": 0.0688, | |
| "step": 5630 | |
| }, | |
| { | |
| "epoch": 0.4503173779392391, | |
| "grad_norm": 1.040481843266623, | |
| "learning_rate": 8.609767998935368e-05, | |
| "loss": 0.0445, | |
| "step": 5640 | |
| }, | |
| { | |
| "epoch": 0.45111581300650727, | |
| "grad_norm": 0.3330126604993848, | |
| "learning_rate": 8.6053320321164e-05, | |
| "loss": 0.065, | |
| "step": 5650 | |
| }, | |
| { | |
| "epoch": 0.4519142480737754, | |
| "grad_norm": 0.543884899784255, | |
| "learning_rate": 8.600896065297432e-05, | |
| "loss": 0.0601, | |
| "step": 5660 | |
| }, | |
| { | |
| "epoch": 0.45271268314104357, | |
| "grad_norm": 0.2653124950369882, | |
| "learning_rate": 8.596460098478463e-05, | |
| "loss": 0.0545, | |
| "step": 5670 | |
| }, | |
| { | |
| "epoch": 0.4535111182083117, | |
| "grad_norm": 0.6739101914855006, | |
| "learning_rate": 8.592024131659495e-05, | |
| "loss": 0.0401, | |
| "step": 5680 | |
| }, | |
| { | |
| "epoch": 0.45430955327557987, | |
| "grad_norm": 0.2050892659042918, | |
| "learning_rate": 8.587588164840527e-05, | |
| "loss": 0.0647, | |
| "step": 5690 | |
| }, | |
| { | |
| "epoch": 0.455107988342848, | |
| "grad_norm": 0.5184564770421916, | |
| "learning_rate": 8.583152198021559e-05, | |
| "loss": 0.0566, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 0.45590642341011617, | |
| "grad_norm": 0.4895769280806872, | |
| "learning_rate": 8.578716231202592e-05, | |
| "loss": 0.0434, | |
| "step": 5710 | |
| }, | |
| { | |
| "epoch": 0.45670485847738435, | |
| "grad_norm": 0.14379874304204618, | |
| "learning_rate": 8.574280264383622e-05, | |
| "loss": 0.0379, | |
| "step": 5720 | |
| }, | |
| { | |
| "epoch": 0.45750329354465247, | |
| "grad_norm": 0.5671708658830928, | |
| "learning_rate": 8.569844297564654e-05, | |
| "loss": 0.0436, | |
| "step": 5730 | |
| }, | |
| { | |
| "epoch": 0.45830172861192064, | |
| "grad_norm": 0.7415929057111613, | |
| "learning_rate": 8.565408330745687e-05, | |
| "loss": 0.0713, | |
| "step": 5740 | |
| }, | |
| { | |
| "epoch": 0.45910016367918877, | |
| "grad_norm": 0.8550707763593818, | |
| "learning_rate": 8.560972363926719e-05, | |
| "loss": 0.0788, | |
| "step": 5750 | |
| }, | |
| { | |
| "epoch": 0.45989859874645694, | |
| "grad_norm": 0.723475303117586, | |
| "learning_rate": 8.55653639710775e-05, | |
| "loss": 0.0496, | |
| "step": 5760 | |
| }, | |
| { | |
| "epoch": 0.4606970338137251, | |
| "grad_norm": 0.10177492691267885, | |
| "learning_rate": 8.552100430288782e-05, | |
| "loss": 0.0523, | |
| "step": 5770 | |
| }, | |
| { | |
| "epoch": 0.46149546888099324, | |
| "grad_norm": 0.5119863271676398, | |
| "learning_rate": 8.547664463469814e-05, | |
| "loss": 0.0842, | |
| "step": 5780 | |
| }, | |
| { | |
| "epoch": 0.4622939039482614, | |
| "grad_norm": 0.8375508714525854, | |
| "learning_rate": 8.543228496650846e-05, | |
| "loss": 0.0751, | |
| "step": 5790 | |
| }, | |
| { | |
| "epoch": 0.46309233901552954, | |
| "grad_norm": 0.3543710763686105, | |
| "learning_rate": 8.538792529831877e-05, | |
| "loss": 0.0626, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 0.4638907740827977, | |
| "grad_norm": 0.21516304688172944, | |
| "learning_rate": 8.534356563012909e-05, | |
| "loss": 0.0556, | |
| "step": 5810 | |
| }, | |
| { | |
| "epoch": 0.4646892091500659, | |
| "grad_norm": 0.5369649304258546, | |
| "learning_rate": 8.529920596193941e-05, | |
| "loss": 0.0924, | |
| "step": 5820 | |
| }, | |
| { | |
| "epoch": 0.465487644217334, | |
| "grad_norm": 0.16686767153061297, | |
| "learning_rate": 8.525484629374973e-05, | |
| "loss": 0.0498, | |
| "step": 5830 | |
| }, | |
| { | |
| "epoch": 0.4662860792846022, | |
| "grad_norm": 0.5692944993426537, | |
| "learning_rate": 8.521048662556004e-05, | |
| "loss": 0.0618, | |
| "step": 5840 | |
| }, | |
| { | |
| "epoch": 0.4670845143518703, | |
| "grad_norm": 0.2500282256324541, | |
| "learning_rate": 8.516612695737036e-05, | |
| "loss": 0.0572, | |
| "step": 5850 | |
| }, | |
| { | |
| "epoch": 0.4678829494191385, | |
| "grad_norm": 1.0243874711555592, | |
| "learning_rate": 8.512176728918068e-05, | |
| "loss": 0.0697, | |
| "step": 5860 | |
| }, | |
| { | |
| "epoch": 0.4686813844864066, | |
| "grad_norm": 1.1440126889078868, | |
| "learning_rate": 8.5077407620991e-05, | |
| "loss": 0.0522, | |
| "step": 5870 | |
| }, | |
| { | |
| "epoch": 0.4694798195536748, | |
| "grad_norm": 0.6464834791986863, | |
| "learning_rate": 8.503304795280131e-05, | |
| "loss": 0.0495, | |
| "step": 5880 | |
| }, | |
| { | |
| "epoch": 0.47027825462094297, | |
| "grad_norm": 0.2987467279017007, | |
| "learning_rate": 8.498868828461164e-05, | |
| "loss": 0.051, | |
| "step": 5890 | |
| }, | |
| { | |
| "epoch": 0.4710766896882111, | |
| "grad_norm": 0.5446392106542469, | |
| "learning_rate": 8.494432861642195e-05, | |
| "loss": 0.07, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 0.47187512475547927, | |
| "grad_norm": 0.3319188507450066, | |
| "learning_rate": 8.489996894823227e-05, | |
| "loss": 0.0618, | |
| "step": 5910 | |
| }, | |
| { | |
| "epoch": 0.4726735598227474, | |
| "grad_norm": 0.17413701444289367, | |
| "learning_rate": 8.48556092800426e-05, | |
| "loss": 0.0615, | |
| "step": 5920 | |
| }, | |
| { | |
| "epoch": 0.47347199489001557, | |
| "grad_norm": 0.4660774684817729, | |
| "learning_rate": 8.48112496118529e-05, | |
| "loss": 0.0473, | |
| "step": 5930 | |
| }, | |
| { | |
| "epoch": 0.47427042995728375, | |
| "grad_norm": 0.31409912418756264, | |
| "learning_rate": 8.476688994366323e-05, | |
| "loss": 0.0352, | |
| "step": 5940 | |
| }, | |
| { | |
| "epoch": 0.47506886502455187, | |
| "grad_norm": 0.38023200269157886, | |
| "learning_rate": 8.472253027547355e-05, | |
| "loss": 0.0747, | |
| "step": 5950 | |
| }, | |
| { | |
| "epoch": 0.47586730009182004, | |
| "grad_norm": 1.0388339923063081, | |
| "learning_rate": 8.467817060728385e-05, | |
| "loss": 0.0711, | |
| "step": 5960 | |
| }, | |
| { | |
| "epoch": 0.47666573515908817, | |
| "grad_norm": 0.4483516898214409, | |
| "learning_rate": 8.463381093909418e-05, | |
| "loss": 0.0828, | |
| "step": 5970 | |
| }, | |
| { | |
| "epoch": 0.47746417022635634, | |
| "grad_norm": 1.308927326688913, | |
| "learning_rate": 8.45894512709045e-05, | |
| "loss": 0.0692, | |
| "step": 5980 | |
| }, | |
| { | |
| "epoch": 0.4782626052936245, | |
| "grad_norm": 0.8389767702919784, | |
| "learning_rate": 8.454509160271482e-05, | |
| "loss": 0.0501, | |
| "step": 5990 | |
| }, | |
| { | |
| "epoch": 0.47906104036089264, | |
| "grad_norm": 0.34836370344580164, | |
| "learning_rate": 8.450073193452514e-05, | |
| "loss": 0.0586, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.4798594754281608, | |
| "grad_norm": 0.4295239557610677, | |
| "learning_rate": 8.445637226633545e-05, | |
| "loss": 0.0675, | |
| "step": 6010 | |
| }, | |
| { | |
| "epoch": 0.48065791049542894, | |
| "grad_norm": 0.35036280574185724, | |
| "learning_rate": 8.441201259814577e-05, | |
| "loss": 0.0624, | |
| "step": 6020 | |
| }, | |
| { | |
| "epoch": 0.4814563455626971, | |
| "grad_norm": 0.44702820482053074, | |
| "learning_rate": 8.436765292995609e-05, | |
| "loss": 0.0694, | |
| "step": 6030 | |
| }, | |
| { | |
| "epoch": 0.48225478062996524, | |
| "grad_norm": 0.6531156283955792, | |
| "learning_rate": 8.43232932617664e-05, | |
| "loss": 0.0765, | |
| "step": 6040 | |
| }, | |
| { | |
| "epoch": 0.4830532156972334, | |
| "grad_norm": 0.4330465340758785, | |
| "learning_rate": 8.427893359357672e-05, | |
| "loss": 0.0667, | |
| "step": 6050 | |
| }, | |
| { | |
| "epoch": 0.4838516507645016, | |
| "grad_norm": 0.806120927975259, | |
| "learning_rate": 8.423457392538704e-05, | |
| "loss": 0.0624, | |
| "step": 6060 | |
| }, | |
| { | |
| "epoch": 0.4846500858317697, | |
| "grad_norm": 1.1472878212957334, | |
| "learning_rate": 8.419021425719736e-05, | |
| "loss": 0.0524, | |
| "step": 6070 | |
| }, | |
| { | |
| "epoch": 0.4854485208990379, | |
| "grad_norm": 1.0107019337484981, | |
| "learning_rate": 8.414585458900767e-05, | |
| "loss": 0.0919, | |
| "step": 6080 | |
| }, | |
| { | |
| "epoch": 0.486246955966306, | |
| "grad_norm": 0.43287809560013385, | |
| "learning_rate": 8.410149492081799e-05, | |
| "loss": 0.0825, | |
| "step": 6090 | |
| }, | |
| { | |
| "epoch": 0.4870453910335742, | |
| "grad_norm": 0.751653585498906, | |
| "learning_rate": 8.405713525262832e-05, | |
| "loss": 0.064, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 0.48784382610084237, | |
| "grad_norm": 1.279229861370186, | |
| "learning_rate": 8.401277558443863e-05, | |
| "loss": 0.0678, | |
| "step": 6110 | |
| }, | |
| { | |
| "epoch": 0.4886422611681105, | |
| "grad_norm": 0.7924618650401917, | |
| "learning_rate": 8.396841591624896e-05, | |
| "loss": 0.0619, | |
| "step": 6120 | |
| }, | |
| { | |
| "epoch": 0.48944069623537867, | |
| "grad_norm": 1.7361339434442595, | |
| "learning_rate": 8.392405624805928e-05, | |
| "loss": 0.0816, | |
| "step": 6130 | |
| }, | |
| { | |
| "epoch": 0.4902391313026468, | |
| "grad_norm": 0.7861976029319782, | |
| "learning_rate": 8.387969657986958e-05, | |
| "loss": 0.0939, | |
| "step": 6140 | |
| }, | |
| { | |
| "epoch": 0.49103756636991497, | |
| "grad_norm": 0.7619765245860369, | |
| "learning_rate": 8.383533691167991e-05, | |
| "loss": 0.0813, | |
| "step": 6150 | |
| }, | |
| { | |
| "epoch": 0.49183600143718315, | |
| "grad_norm": 0.5114292307220892, | |
| "learning_rate": 8.379097724349023e-05, | |
| "loss": 0.0616, | |
| "step": 6160 | |
| }, | |
| { | |
| "epoch": 0.49263443650445127, | |
| "grad_norm": 0.1391537940982425, | |
| "learning_rate": 8.374661757530054e-05, | |
| "loss": 0.0427, | |
| "step": 6170 | |
| }, | |
| { | |
| "epoch": 0.49343287157171944, | |
| "grad_norm": 0.6944831169186162, | |
| "learning_rate": 8.370225790711086e-05, | |
| "loss": 0.0767, | |
| "step": 6180 | |
| }, | |
| { | |
| "epoch": 0.49423130663898757, | |
| "grad_norm": 0.3894128639134844, | |
| "learning_rate": 8.365789823892118e-05, | |
| "loss": 0.0585, | |
| "step": 6190 | |
| }, | |
| { | |
| "epoch": 0.49502974170625574, | |
| "grad_norm": 0.9576661297036134, | |
| "learning_rate": 8.36135385707315e-05, | |
| "loss": 0.0897, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 0.49582817677352387, | |
| "grad_norm": 0.45017824313200255, | |
| "learning_rate": 8.356917890254181e-05, | |
| "loss": 0.0551, | |
| "step": 6210 | |
| }, | |
| { | |
| "epoch": 0.49662661184079204, | |
| "grad_norm": 0.578217231479692, | |
| "learning_rate": 8.352481923435213e-05, | |
| "loss": 0.0675, | |
| "step": 6220 | |
| }, | |
| { | |
| "epoch": 0.4974250469080602, | |
| "grad_norm": 0.8903703995378134, | |
| "learning_rate": 8.348045956616245e-05, | |
| "loss": 0.0682, | |
| "step": 6230 | |
| }, | |
| { | |
| "epoch": 0.49822348197532834, | |
| "grad_norm": 0.14260077092570403, | |
| "learning_rate": 8.343609989797277e-05, | |
| "loss": 0.0621, | |
| "step": 6240 | |
| }, | |
| { | |
| "epoch": 0.4990219170425965, | |
| "grad_norm": 0.8075310070857461, | |
| "learning_rate": 8.339174022978308e-05, | |
| "loss": 0.061, | |
| "step": 6250 | |
| }, | |
| { | |
| "epoch": 0.49982035210986464, | |
| "grad_norm": 0.7242461553616462, | |
| "learning_rate": 8.33473805615934e-05, | |
| "loss": 0.0927, | |
| "step": 6260 | |
| }, | |
| { | |
| "epoch": 0.5006187871771328, | |
| "grad_norm": 0.2005813623355205, | |
| "learning_rate": 8.330302089340372e-05, | |
| "loss": 0.0779, | |
| "step": 6270 | |
| }, | |
| { | |
| "epoch": 0.501417222244401, | |
| "grad_norm": 0.22869071139284775, | |
| "learning_rate": 8.325866122521404e-05, | |
| "loss": 0.0329, | |
| "step": 6280 | |
| }, | |
| { | |
| "epoch": 0.5022156573116692, | |
| "grad_norm": 2.3083924183194178, | |
| "learning_rate": 8.321430155702435e-05, | |
| "loss": 0.0619, | |
| "step": 6290 | |
| }, | |
| { | |
| "epoch": 0.5030140923789372, | |
| "grad_norm": 0.7434249385683861, | |
| "learning_rate": 8.316994188883468e-05, | |
| "loss": 0.0703, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 0.5038125274462054, | |
| "grad_norm": 0.9138191544723026, | |
| "learning_rate": 8.312558222064499e-05, | |
| "loss": 0.0577, | |
| "step": 6310 | |
| }, | |
| { | |
| "epoch": 0.5046109625134736, | |
| "grad_norm": 0.25653158436649787, | |
| "learning_rate": 8.30812225524553e-05, | |
| "loss": 0.0478, | |
| "step": 6320 | |
| }, | |
| { | |
| "epoch": 0.5054093975807418, | |
| "grad_norm": 0.5620053416520807, | |
| "learning_rate": 8.303686288426564e-05, | |
| "loss": 0.0481, | |
| "step": 6330 | |
| }, | |
| { | |
| "epoch": 0.50620783264801, | |
| "grad_norm": 0.1276357272302078, | |
| "learning_rate": 8.299250321607594e-05, | |
| "loss": 0.0479, | |
| "step": 6340 | |
| }, | |
| { | |
| "epoch": 0.507006267715278, | |
| "grad_norm": 0.60544258687383, | |
| "learning_rate": 8.294814354788627e-05, | |
| "loss": 0.0738, | |
| "step": 6350 | |
| }, | |
| { | |
| "epoch": 0.5078047027825462, | |
| "grad_norm": 0.4988768974466748, | |
| "learning_rate": 8.290378387969659e-05, | |
| "loss": 0.0602, | |
| "step": 6360 | |
| }, | |
| { | |
| "epoch": 0.5086031378498144, | |
| "grad_norm": 0.8380837716515054, | |
| "learning_rate": 8.285942421150689e-05, | |
| "loss": 0.0515, | |
| "step": 6370 | |
| }, | |
| { | |
| "epoch": 0.5094015729170825, | |
| "grad_norm": 0.2649314561466298, | |
| "learning_rate": 8.281506454331722e-05, | |
| "loss": 0.0526, | |
| "step": 6380 | |
| }, | |
| { | |
| "epoch": 0.5102000079843507, | |
| "grad_norm": 0.16309396798317682, | |
| "learning_rate": 8.277070487512754e-05, | |
| "loss": 0.0555, | |
| "step": 6390 | |
| }, | |
| { | |
| "epoch": 0.5109984430516188, | |
| "grad_norm": 1.2443648203406286, | |
| "learning_rate": 8.272634520693786e-05, | |
| "loss": 0.0613, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 0.511796878118887, | |
| "grad_norm": 0.6419294388683323, | |
| "learning_rate": 8.268198553874818e-05, | |
| "loss": 0.0568, | |
| "step": 6410 | |
| }, | |
| { | |
| "epoch": 0.5125953131861551, | |
| "grad_norm": 0.29889506341617506, | |
| "learning_rate": 8.263762587055849e-05, | |
| "loss": 0.0582, | |
| "step": 6420 | |
| }, | |
| { | |
| "epoch": 0.5133937482534233, | |
| "grad_norm": 0.6487256851752289, | |
| "learning_rate": 8.259326620236881e-05, | |
| "loss": 0.0562, | |
| "step": 6430 | |
| }, | |
| { | |
| "epoch": 0.5141921833206915, | |
| "grad_norm": 0.4604599082658974, | |
| "learning_rate": 8.254890653417913e-05, | |
| "loss": 0.0535, | |
| "step": 6440 | |
| }, | |
| { | |
| "epoch": 0.5149906183879596, | |
| "grad_norm": 0.5571572469431415, | |
| "learning_rate": 8.250454686598946e-05, | |
| "loss": 0.0595, | |
| "step": 6450 | |
| }, | |
| { | |
| "epoch": 0.5157890534552277, | |
| "grad_norm": 0.16286352259235828, | |
| "learning_rate": 8.246018719779976e-05, | |
| "loss": 0.0645, | |
| "step": 6460 | |
| }, | |
| { | |
| "epoch": 0.5165874885224959, | |
| "grad_norm": 0.2423915931929422, | |
| "learning_rate": 8.241582752961008e-05, | |
| "loss": 0.04, | |
| "step": 6470 | |
| }, | |
| { | |
| "epoch": 0.5173859235897641, | |
| "grad_norm": 0.12424351376419095, | |
| "learning_rate": 8.237146786142041e-05, | |
| "loss": 0.0663, | |
| "step": 6480 | |
| }, | |
| { | |
| "epoch": 0.5181843586570322, | |
| "grad_norm": 0.72651821446849, | |
| "learning_rate": 8.232710819323071e-05, | |
| "loss": 0.0522, | |
| "step": 6490 | |
| }, | |
| { | |
| "epoch": 0.5189827937243003, | |
| "grad_norm": 0.7950904269617449, | |
| "learning_rate": 8.228274852504103e-05, | |
| "loss": 0.0492, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.5197812287915685, | |
| "grad_norm": 0.5747491220221507, | |
| "learning_rate": 8.223838885685136e-05, | |
| "loss": 0.0844, | |
| "step": 6510 | |
| }, | |
| { | |
| "epoch": 0.5205796638588367, | |
| "grad_norm": 0.2782512901079484, | |
| "learning_rate": 8.219402918866167e-05, | |
| "loss": 0.0475, | |
| "step": 6520 | |
| }, | |
| { | |
| "epoch": 0.5213780989261049, | |
| "grad_norm": 0.2648292458710825, | |
| "learning_rate": 8.2149669520472e-05, | |
| "loss": 0.0544, | |
| "step": 6530 | |
| }, | |
| { | |
| "epoch": 0.5221765339933729, | |
| "grad_norm": 0.19105815570303936, | |
| "learning_rate": 8.210530985228231e-05, | |
| "loss": 0.0653, | |
| "step": 6540 | |
| }, | |
| { | |
| "epoch": 0.5229749690606411, | |
| "grad_norm": 0.46407076378758166, | |
| "learning_rate": 8.206095018409262e-05, | |
| "loss": 0.0819, | |
| "step": 6550 | |
| }, | |
| { | |
| "epoch": 0.5237734041279093, | |
| "grad_norm": 0.12564468042648594, | |
| "learning_rate": 8.201659051590295e-05, | |
| "loss": 0.0684, | |
| "step": 6560 | |
| }, | |
| { | |
| "epoch": 0.5245718391951775, | |
| "grad_norm": 0.62639241054009, | |
| "learning_rate": 8.197223084771327e-05, | |
| "loss": 0.0632, | |
| "step": 6570 | |
| }, | |
| { | |
| "epoch": 0.5253702742624456, | |
| "grad_norm": 0.3230944239837902, | |
| "learning_rate": 8.192787117952358e-05, | |
| "loss": 0.0477, | |
| "step": 6580 | |
| }, | |
| { | |
| "epoch": 0.5261687093297137, | |
| "grad_norm": 0.9896576099526085, | |
| "learning_rate": 8.18835115113339e-05, | |
| "loss": 0.0764, | |
| "step": 6590 | |
| }, | |
| { | |
| "epoch": 0.5269671443969819, | |
| "grad_norm": 0.28160344356170797, | |
| "learning_rate": 8.183915184314422e-05, | |
| "loss": 0.0542, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 0.5277655794642501, | |
| "grad_norm": 1.683063453242496, | |
| "learning_rate": 8.179479217495454e-05, | |
| "loss": 0.0467, | |
| "step": 6610 | |
| }, | |
| { | |
| "epoch": 0.5285640145315182, | |
| "grad_norm": 0.17755025572391545, | |
| "learning_rate": 8.175043250676485e-05, | |
| "loss": 0.0566, | |
| "step": 6620 | |
| }, | |
| { | |
| "epoch": 0.5293624495987864, | |
| "grad_norm": 0.45573443555969606, | |
| "learning_rate": 8.170607283857517e-05, | |
| "loss": 0.0677, | |
| "step": 6630 | |
| }, | |
| { | |
| "epoch": 0.5301608846660545, | |
| "grad_norm": 0.1575084916438133, | |
| "learning_rate": 8.166171317038549e-05, | |
| "loss": 0.0575, | |
| "step": 6640 | |
| }, | |
| { | |
| "epoch": 0.5309593197333227, | |
| "grad_norm": 1.2877144807985812, | |
| "learning_rate": 8.16173535021958e-05, | |
| "loss": 0.0678, | |
| "step": 6650 | |
| }, | |
| { | |
| "epoch": 0.5317577548005908, | |
| "grad_norm": 0.18522169090671942, | |
| "learning_rate": 8.157299383400612e-05, | |
| "loss": 0.0604, | |
| "step": 6660 | |
| }, | |
| { | |
| "epoch": 0.532556189867859, | |
| "grad_norm": 0.7266885221486851, | |
| "learning_rate": 8.152863416581644e-05, | |
| "loss": 0.0681, | |
| "step": 6670 | |
| }, | |
| { | |
| "epoch": 0.5333546249351272, | |
| "grad_norm": 0.12652671512749664, | |
| "learning_rate": 8.148427449762677e-05, | |
| "loss": 0.0538, | |
| "step": 6680 | |
| }, | |
| { | |
| "epoch": 0.5341530600023953, | |
| "grad_norm": 0.9739164827650703, | |
| "learning_rate": 8.143991482943708e-05, | |
| "loss": 0.0836, | |
| "step": 6690 | |
| }, | |
| { | |
| "epoch": 0.5349514950696634, | |
| "grad_norm": 0.17376810669729617, | |
| "learning_rate": 8.139555516124739e-05, | |
| "loss": 0.0889, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 0.5357499301369316, | |
| "grad_norm": 0.6641373869163394, | |
| "learning_rate": 8.135119549305772e-05, | |
| "loss": 0.0764, | |
| "step": 6710 | |
| }, | |
| { | |
| "epoch": 0.5365483652041998, | |
| "grad_norm": 0.16815120710505263, | |
| "learning_rate": 8.130683582486803e-05, | |
| "loss": 0.0672, | |
| "step": 6720 | |
| }, | |
| { | |
| "epoch": 0.537346800271468, | |
| "grad_norm": 0.5796326559620748, | |
| "learning_rate": 8.126247615667834e-05, | |
| "loss": 0.078, | |
| "step": 6730 | |
| }, | |
| { | |
| "epoch": 0.538145235338736, | |
| "grad_norm": 0.1412822636172128, | |
| "learning_rate": 8.121811648848868e-05, | |
| "loss": 0.0652, | |
| "step": 6740 | |
| }, | |
| { | |
| "epoch": 0.5389436704060042, | |
| "grad_norm": 0.18003108883859614, | |
| "learning_rate": 8.117375682029898e-05, | |
| "loss": 0.0508, | |
| "step": 6750 | |
| }, | |
| { | |
| "epoch": 0.5397421054732724, | |
| "grad_norm": 0.36894859982760714, | |
| "learning_rate": 8.112939715210931e-05, | |
| "loss": 0.0612, | |
| "step": 6760 | |
| }, | |
| { | |
| "epoch": 0.5405405405405406, | |
| "grad_norm": 0.2407377212282452, | |
| "learning_rate": 8.108503748391963e-05, | |
| "loss": 0.0435, | |
| "step": 6770 | |
| }, | |
| { | |
| "epoch": 0.5413389756078087, | |
| "grad_norm": 1.352276758332171, | |
| "learning_rate": 8.104067781572993e-05, | |
| "loss": 0.0893, | |
| "step": 6780 | |
| }, | |
| { | |
| "epoch": 0.5421374106750768, | |
| "grad_norm": 0.6181733852818506, | |
| "learning_rate": 8.099631814754026e-05, | |
| "loss": 0.0631, | |
| "step": 6790 | |
| }, | |
| { | |
| "epoch": 0.542935845742345, | |
| "grad_norm": 0.510119508066543, | |
| "learning_rate": 8.095195847935058e-05, | |
| "loss": 0.065, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 0.5437342808096132, | |
| "grad_norm": 0.33556736143265536, | |
| "learning_rate": 8.09075988111609e-05, | |
| "loss": 0.052, | |
| "step": 6810 | |
| }, | |
| { | |
| "epoch": 0.5445327158768813, | |
| "grad_norm": 0.23362913712809788, | |
| "learning_rate": 8.086323914297121e-05, | |
| "loss": 0.06, | |
| "step": 6820 | |
| }, | |
| { | |
| "epoch": 0.5453311509441494, | |
| "grad_norm": 1.0108831673519008, | |
| "learning_rate": 8.081887947478153e-05, | |
| "loss": 0.1047, | |
| "step": 6830 | |
| }, | |
| { | |
| "epoch": 0.5461295860114176, | |
| "grad_norm": 0.9061380750994878, | |
| "learning_rate": 8.077451980659185e-05, | |
| "loss": 0.0683, | |
| "step": 6840 | |
| }, | |
| { | |
| "epoch": 0.5469280210786858, | |
| "grad_norm": 0.42524090606390924, | |
| "learning_rate": 8.073016013840217e-05, | |
| "loss": 0.0577, | |
| "step": 6850 | |
| }, | |
| { | |
| "epoch": 0.5477264561459539, | |
| "grad_norm": 0.9112083691474904, | |
| "learning_rate": 8.06858004702125e-05, | |
| "loss": 0.0628, | |
| "step": 6860 | |
| }, | |
| { | |
| "epoch": 0.5485248912132221, | |
| "grad_norm": 0.343851767869507, | |
| "learning_rate": 8.06414408020228e-05, | |
| "loss": 0.0411, | |
| "step": 6870 | |
| }, | |
| { | |
| "epoch": 0.5493233262804902, | |
| "grad_norm": 0.26288157493297026, | |
| "learning_rate": 8.059708113383312e-05, | |
| "loss": 0.0884, | |
| "step": 6880 | |
| }, | |
| { | |
| "epoch": 0.5501217613477584, | |
| "grad_norm": 0.672374580318619, | |
| "learning_rate": 8.055272146564345e-05, | |
| "loss": 0.0575, | |
| "step": 6890 | |
| }, | |
| { | |
| "epoch": 0.5509201964150265, | |
| "grad_norm": 0.25631627581337185, | |
| "learning_rate": 8.050836179745375e-05, | |
| "loss": 0.0367, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 0.5517186314822947, | |
| "grad_norm": 0.459897810225924, | |
| "learning_rate": 8.046400212926408e-05, | |
| "loss": 0.0782, | |
| "step": 6910 | |
| }, | |
| { | |
| "epoch": 0.5525170665495629, | |
| "grad_norm": 0.5999621078607194, | |
| "learning_rate": 8.04196424610744e-05, | |
| "loss": 0.075, | |
| "step": 6920 | |
| }, | |
| { | |
| "epoch": 0.553315501616831, | |
| "grad_norm": 0.23982515518970804, | |
| "learning_rate": 8.03752827928847e-05, | |
| "loss": 0.0424, | |
| "step": 6930 | |
| }, | |
| { | |
| "epoch": 0.5541139366840991, | |
| "grad_norm": 0.23837663059539815, | |
| "learning_rate": 8.033092312469504e-05, | |
| "loss": 0.0547, | |
| "step": 6940 | |
| }, | |
| { | |
| "epoch": 0.5549123717513673, | |
| "grad_norm": 0.46337228232386396, | |
| "learning_rate": 8.028656345650535e-05, | |
| "loss": 0.0671, | |
| "step": 6950 | |
| }, | |
| { | |
| "epoch": 0.5557108068186355, | |
| "grad_norm": 2.0716745224894217, | |
| "learning_rate": 8.024220378831566e-05, | |
| "loss": 0.0961, | |
| "step": 6960 | |
| }, | |
| { | |
| "epoch": 0.5565092418859037, | |
| "grad_norm": 0.38258068674015955, | |
| "learning_rate": 8.019784412012599e-05, | |
| "loss": 0.0641, | |
| "step": 6970 | |
| }, | |
| { | |
| "epoch": 0.5573076769531717, | |
| "grad_norm": 0.6422500771717725, | |
| "learning_rate": 8.01534844519363e-05, | |
| "loss": 0.0682, | |
| "step": 6980 | |
| }, | |
| { | |
| "epoch": 0.5581061120204399, | |
| "grad_norm": 0.6046415591096082, | |
| "learning_rate": 8.010912478374662e-05, | |
| "loss": 0.0972, | |
| "step": 6990 | |
| }, | |
| { | |
| "epoch": 0.5589045470877081, | |
| "grad_norm": 0.8616981377908823, | |
| "learning_rate": 8.006476511555694e-05, | |
| "loss": 0.0746, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.5597029821549763, | |
| "grad_norm": 0.24097579377088618, | |
| "learning_rate": 8.002040544736726e-05, | |
| "loss": 0.0751, | |
| "step": 7010 | |
| }, | |
| { | |
| "epoch": 0.5605014172222444, | |
| "grad_norm": 0.6522706315596682, | |
| "learning_rate": 7.997604577917758e-05, | |
| "loss": 0.0471, | |
| "step": 7020 | |
| }, | |
| { | |
| "epoch": 0.5612998522895125, | |
| "grad_norm": 1.2638941405843032, | |
| "learning_rate": 7.99316861109879e-05, | |
| "loss": 0.0571, | |
| "step": 7030 | |
| }, | |
| { | |
| "epoch": 0.5620982873567807, | |
| "grad_norm": 0.1313943832208323, | |
| "learning_rate": 7.988732644279821e-05, | |
| "loss": 0.0374, | |
| "step": 7040 | |
| }, | |
| { | |
| "epoch": 0.5628967224240489, | |
| "grad_norm": 0.6033488591473235, | |
| "learning_rate": 7.984296677460853e-05, | |
| "loss": 0.0527, | |
| "step": 7050 | |
| }, | |
| { | |
| "epoch": 0.563695157491317, | |
| "grad_norm": 0.19916184688760954, | |
| "learning_rate": 7.979860710641885e-05, | |
| "loss": 0.0578, | |
| "step": 7060 | |
| }, | |
| { | |
| "epoch": 0.5644935925585852, | |
| "grad_norm": 0.5973224327114647, | |
| "learning_rate": 7.975424743822916e-05, | |
| "loss": 0.066, | |
| "step": 7070 | |
| }, | |
| { | |
| "epoch": 0.5652920276258533, | |
| "grad_norm": 0.4957434283307288, | |
| "learning_rate": 7.970988777003948e-05, | |
| "loss": 0.0496, | |
| "step": 7080 | |
| }, | |
| { | |
| "epoch": 0.5660904626931215, | |
| "grad_norm": 0.7014221037795486, | |
| "learning_rate": 7.966552810184981e-05, | |
| "loss": 0.0536, | |
| "step": 7090 | |
| }, | |
| { | |
| "epoch": 0.5668888977603896, | |
| "grad_norm": 0.2942948589315806, | |
| "learning_rate": 7.962116843366012e-05, | |
| "loss": 0.0631, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 0.5676873328276578, | |
| "grad_norm": 0.6122650378671805, | |
| "learning_rate": 7.957680876547043e-05, | |
| "loss": 0.0758, | |
| "step": 7110 | |
| }, | |
| { | |
| "epoch": 0.568485767894926, | |
| "grad_norm": 0.06698143407689137, | |
| "learning_rate": 7.953244909728076e-05, | |
| "loss": 0.0487, | |
| "step": 7120 | |
| }, | |
| { | |
| "epoch": 0.5692842029621941, | |
| "grad_norm": 0.49897005838474545, | |
| "learning_rate": 7.948808942909107e-05, | |
| "loss": 0.0745, | |
| "step": 7130 | |
| }, | |
| { | |
| "epoch": 0.5700826380294622, | |
| "grad_norm": 0.12126764090029286, | |
| "learning_rate": 7.94437297609014e-05, | |
| "loss": 0.0491, | |
| "step": 7140 | |
| }, | |
| { | |
| "epoch": 0.5708810730967304, | |
| "grad_norm": 0.6765414621536705, | |
| "learning_rate": 7.939937009271172e-05, | |
| "loss": 0.0707, | |
| "step": 7150 | |
| }, | |
| { | |
| "epoch": 0.5716795081639986, | |
| "grad_norm": 0.7488705487173876, | |
| "learning_rate": 7.935501042452202e-05, | |
| "loss": 0.0707, | |
| "step": 7160 | |
| }, | |
| { | |
| "epoch": 0.5724779432312668, | |
| "grad_norm": 0.4396172085640616, | |
| "learning_rate": 7.931065075633235e-05, | |
| "loss": 0.0566, | |
| "step": 7170 | |
| }, | |
| { | |
| "epoch": 0.5732763782985348, | |
| "grad_norm": 0.8264001543643991, | |
| "learning_rate": 7.926629108814267e-05, | |
| "loss": 0.0741, | |
| "step": 7180 | |
| }, | |
| { | |
| "epoch": 0.574074813365803, | |
| "grad_norm": 0.34922891662042377, | |
| "learning_rate": 7.922193141995297e-05, | |
| "loss": 0.0605, | |
| "step": 7190 | |
| }, | |
| { | |
| "epoch": 0.5748732484330712, | |
| "grad_norm": 1.2243612635366266, | |
| "learning_rate": 7.91775717517633e-05, | |
| "loss": 0.093, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 0.5756716835003394, | |
| "grad_norm": 0.7752867030859307, | |
| "learning_rate": 7.913321208357362e-05, | |
| "loss": 0.0735, | |
| "step": 7210 | |
| }, | |
| { | |
| "epoch": 0.5764701185676074, | |
| "grad_norm": 0.260444765912474, | |
| "learning_rate": 7.908885241538394e-05, | |
| "loss": 0.0618, | |
| "step": 7220 | |
| }, | |
| { | |
| "epoch": 0.5772685536348756, | |
| "grad_norm": 0.20684347358549432, | |
| "learning_rate": 7.904449274719425e-05, | |
| "loss": 0.0528, | |
| "step": 7230 | |
| }, | |
| { | |
| "epoch": 0.5780669887021438, | |
| "grad_norm": 0.5262456398761236, | |
| "learning_rate": 7.900013307900457e-05, | |
| "loss": 0.0702, | |
| "step": 7240 | |
| }, | |
| { | |
| "epoch": 0.578865423769412, | |
| "grad_norm": 0.4831605388391065, | |
| "learning_rate": 7.895577341081489e-05, | |
| "loss": 0.0573, | |
| "step": 7250 | |
| }, | |
| { | |
| "epoch": 0.5796638588366801, | |
| "grad_norm": 1.350413669611844, | |
| "learning_rate": 7.89114137426252e-05, | |
| "loss": 0.0811, | |
| "step": 7260 | |
| }, | |
| { | |
| "epoch": 0.5804622939039482, | |
| "grad_norm": 0.17202686615065826, | |
| "learning_rate": 7.886705407443554e-05, | |
| "loss": 0.0662, | |
| "step": 7270 | |
| }, | |
| { | |
| "epoch": 0.5812607289712164, | |
| "grad_norm": 0.3215578422459197, | |
| "learning_rate": 7.882269440624584e-05, | |
| "loss": 0.0511, | |
| "step": 7280 | |
| }, | |
| { | |
| "epoch": 0.5820591640384846, | |
| "grad_norm": 0.4914632124251946, | |
| "learning_rate": 7.877833473805616e-05, | |
| "loss": 0.0976, | |
| "step": 7290 | |
| }, | |
| { | |
| "epoch": 0.5828575991057527, | |
| "grad_norm": 0.6502099273735706, | |
| "learning_rate": 7.873397506986649e-05, | |
| "loss": 0.0552, | |
| "step": 7300 | |
| }, | |
| { | |
| "epoch": 0.5836560341730209, | |
| "grad_norm": 0.4750500443154491, | |
| "learning_rate": 7.86896154016768e-05, | |
| "loss": 0.0637, | |
| "step": 7310 | |
| }, | |
| { | |
| "epoch": 0.584454469240289, | |
| "grad_norm": 0.2973395613219485, | |
| "learning_rate": 7.864525573348712e-05, | |
| "loss": 0.0625, | |
| "step": 7320 | |
| }, | |
| { | |
| "epoch": 0.5852529043075572, | |
| "grad_norm": 0.27458996393587615, | |
| "learning_rate": 7.860089606529744e-05, | |
| "loss": 0.0637, | |
| "step": 7330 | |
| }, | |
| { | |
| "epoch": 0.5860513393748253, | |
| "grad_norm": 0.8209883237591991, | |
| "learning_rate": 7.855653639710775e-05, | |
| "loss": 0.0745, | |
| "step": 7340 | |
| }, | |
| { | |
| "epoch": 0.5868497744420935, | |
| "grad_norm": 0.9682766674817845, | |
| "learning_rate": 7.851217672891808e-05, | |
| "loss": 0.0688, | |
| "step": 7350 | |
| }, | |
| { | |
| "epoch": 0.5876482095093617, | |
| "grad_norm": 0.2958448984803725, | |
| "learning_rate": 7.84678170607284e-05, | |
| "loss": 0.0908, | |
| "step": 7360 | |
| }, | |
| { | |
| "epoch": 0.5884466445766298, | |
| "grad_norm": 0.935616415881285, | |
| "learning_rate": 7.842345739253871e-05, | |
| "loss": 0.0432, | |
| "step": 7370 | |
| }, | |
| { | |
| "epoch": 0.5892450796438979, | |
| "grad_norm": 0.42683926949066886, | |
| "learning_rate": 7.837909772434903e-05, | |
| "loss": 0.0724, | |
| "step": 7380 | |
| }, | |
| { | |
| "epoch": 0.5900435147111661, | |
| "grad_norm": 0.14183843181607633, | |
| "learning_rate": 7.833473805615935e-05, | |
| "loss": 0.0511, | |
| "step": 7390 | |
| }, | |
| { | |
| "epoch": 0.5908419497784343, | |
| "grad_norm": 0.5699310686505371, | |
| "learning_rate": 7.829037838796966e-05, | |
| "loss": 0.0871, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 0.5916403848457025, | |
| "grad_norm": 0.23883922920971742, | |
| "learning_rate": 7.824601871977998e-05, | |
| "loss": 0.0579, | |
| "step": 7410 | |
| }, | |
| { | |
| "epoch": 0.5924388199129705, | |
| "grad_norm": 0.6123424382050415, | |
| "learning_rate": 7.82016590515903e-05, | |
| "loss": 0.0864, | |
| "step": 7420 | |
| }, | |
| { | |
| "epoch": 0.5932372549802387, | |
| "grad_norm": 0.39849019500035515, | |
| "learning_rate": 7.815729938340062e-05, | |
| "loss": 0.0506, | |
| "step": 7430 | |
| }, | |
| { | |
| "epoch": 0.5940356900475069, | |
| "grad_norm": 0.4663347129336559, | |
| "learning_rate": 7.811293971521093e-05, | |
| "loss": 0.0574, | |
| "step": 7440 | |
| }, | |
| { | |
| "epoch": 0.5948341251147751, | |
| "grad_norm": 0.7471883648747045, | |
| "learning_rate": 7.806858004702125e-05, | |
| "loss": 0.0522, | |
| "step": 7450 | |
| }, | |
| { | |
| "epoch": 0.5956325601820432, | |
| "grad_norm": 0.651546517935869, | |
| "learning_rate": 7.802422037883157e-05, | |
| "loss": 0.062, | |
| "step": 7460 | |
| }, | |
| { | |
| "epoch": 0.5964309952493113, | |
| "grad_norm": 0.2860666478175177, | |
| "learning_rate": 7.797986071064189e-05, | |
| "loss": 0.0718, | |
| "step": 7470 | |
| }, | |
| { | |
| "epoch": 0.5972294303165795, | |
| "grad_norm": 0.6037593345648162, | |
| "learning_rate": 7.79355010424522e-05, | |
| "loss": 0.0519, | |
| "step": 7480 | |
| }, | |
| { | |
| "epoch": 0.5980278653838477, | |
| "grad_norm": 0.3900292909701525, | |
| "learning_rate": 7.789114137426252e-05, | |
| "loss": 0.0514, | |
| "step": 7490 | |
| }, | |
| { | |
| "epoch": 0.5988263004511158, | |
| "grad_norm": 0.2854211108781873, | |
| "learning_rate": 7.784678170607285e-05, | |
| "loss": 0.0544, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.599624735518384, | |
| "grad_norm": 0.8380500971235885, | |
| "learning_rate": 7.780242203788315e-05, | |
| "loss": 0.0511, | |
| "step": 7510 | |
| }, | |
| { | |
| "epoch": 0.6004231705856521, | |
| "grad_norm": 0.7573018775305534, | |
| "learning_rate": 7.775806236969347e-05, | |
| "loss": 0.0876, | |
| "step": 7520 | |
| }, | |
| { | |
| "epoch": 0.6012216056529203, | |
| "grad_norm": 0.26249122345970394, | |
| "learning_rate": 7.77137027015038e-05, | |
| "loss": 0.0785, | |
| "step": 7530 | |
| }, | |
| { | |
| "epoch": 0.6020200407201884, | |
| "grad_norm": 0.8173089098567144, | |
| "learning_rate": 7.766934303331411e-05, | |
| "loss": 0.0628, | |
| "step": 7540 | |
| }, | |
| { | |
| "epoch": 0.6028184757874566, | |
| "grad_norm": 0.4551514955773799, | |
| "learning_rate": 7.762498336512444e-05, | |
| "loss": 0.0564, | |
| "step": 7550 | |
| }, | |
| { | |
| "epoch": 0.6036169108547247, | |
| "grad_norm": 0.7281675170313439, | |
| "learning_rate": 7.758062369693476e-05, | |
| "loss": 0.0558, | |
| "step": 7560 | |
| }, | |
| { | |
| "epoch": 0.6044153459219929, | |
| "grad_norm": 0.729468984681738, | |
| "learning_rate": 7.753626402874506e-05, | |
| "loss": 0.0673, | |
| "step": 7570 | |
| }, | |
| { | |
| "epoch": 0.605213780989261, | |
| "grad_norm": 1.175985724771846, | |
| "learning_rate": 7.749190436055539e-05, | |
| "loss": 0.0915, | |
| "step": 7580 | |
| }, | |
| { | |
| "epoch": 0.6060122160565292, | |
| "grad_norm": 0.2252551084028221, | |
| "learning_rate": 7.744754469236571e-05, | |
| "loss": 0.059, | |
| "step": 7590 | |
| }, | |
| { | |
| "epoch": 0.6068106511237974, | |
| "grad_norm": 0.6300017871560232, | |
| "learning_rate": 7.740318502417602e-05, | |
| "loss": 0.0605, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 0.6076090861910655, | |
| "grad_norm": 0.4962012860136617, | |
| "learning_rate": 7.735882535598634e-05, | |
| "loss": 0.0664, | |
| "step": 7610 | |
| }, | |
| { | |
| "epoch": 0.6084075212583336, | |
| "grad_norm": 0.36523306801124467, | |
| "learning_rate": 7.731446568779666e-05, | |
| "loss": 0.0507, | |
| "step": 7620 | |
| }, | |
| { | |
| "epoch": 0.6092059563256018, | |
| "grad_norm": 0.7522576930476144, | |
| "learning_rate": 7.727010601960698e-05, | |
| "loss": 0.0506, | |
| "step": 7630 | |
| }, | |
| { | |
| "epoch": 0.61000439139287, | |
| "grad_norm": 0.25074975014272083, | |
| "learning_rate": 7.72257463514173e-05, | |
| "loss": 0.0395, | |
| "step": 7640 | |
| }, | |
| { | |
| "epoch": 0.6108028264601382, | |
| "grad_norm": 0.5220421408073481, | |
| "learning_rate": 7.718138668322761e-05, | |
| "loss": 0.0738, | |
| "step": 7650 | |
| }, | |
| { | |
| "epoch": 0.6116012615274062, | |
| "grad_norm": 0.44417669645567104, | |
| "learning_rate": 7.713702701503793e-05, | |
| "loss": 0.0601, | |
| "step": 7660 | |
| }, | |
| { | |
| "epoch": 0.6123996965946744, | |
| "grad_norm": 0.8061882748059278, | |
| "learning_rate": 7.709266734684825e-05, | |
| "loss": 0.0491, | |
| "step": 7670 | |
| }, | |
| { | |
| "epoch": 0.6131981316619426, | |
| "grad_norm": 2.5447832388331877, | |
| "learning_rate": 7.704830767865858e-05, | |
| "loss": 0.0565, | |
| "step": 7680 | |
| }, | |
| { | |
| "epoch": 0.6139965667292108, | |
| "grad_norm": 0.3301717640321505, | |
| "learning_rate": 7.700394801046888e-05, | |
| "loss": 0.0553, | |
| "step": 7690 | |
| }, | |
| { | |
| "epoch": 0.614795001796479, | |
| "grad_norm": 0.49796370625063435, | |
| "learning_rate": 7.69595883422792e-05, | |
| "loss": 0.0561, | |
| "step": 7700 | |
| }, | |
| { | |
| "epoch": 0.615593436863747, | |
| "grad_norm": 1.755807427152239, | |
| "learning_rate": 7.691522867408953e-05, | |
| "loss": 0.0687, | |
| "step": 7710 | |
| }, | |
| { | |
| "epoch": 0.6163918719310152, | |
| "grad_norm": 1.160415695120886, | |
| "learning_rate": 7.687086900589983e-05, | |
| "loss": 0.0556, | |
| "step": 7720 | |
| }, | |
| { | |
| "epoch": 0.6171903069982834, | |
| "grad_norm": 1.0085260825243685, | |
| "learning_rate": 7.682650933771016e-05, | |
| "loss": 0.069, | |
| "step": 7730 | |
| }, | |
| { | |
| "epoch": 0.6179887420655515, | |
| "grad_norm": 0.1306292147862706, | |
| "learning_rate": 7.678214966952048e-05, | |
| "loss": 0.055, | |
| "step": 7740 | |
| }, | |
| { | |
| "epoch": 0.6187871771328197, | |
| "grad_norm": 0.6485284615482432, | |
| "learning_rate": 7.673779000133079e-05, | |
| "loss": 0.0442, | |
| "step": 7750 | |
| }, | |
| { | |
| "epoch": 0.6195856122000878, | |
| "grad_norm": 0.4228073123981237, | |
| "learning_rate": 7.669343033314112e-05, | |
| "loss": 0.0764, | |
| "step": 7760 | |
| }, | |
| { | |
| "epoch": 0.620384047267356, | |
| "grad_norm": 0.23326275111725533, | |
| "learning_rate": 7.664907066495143e-05, | |
| "loss": 0.0464, | |
| "step": 7770 | |
| }, | |
| { | |
| "epoch": 0.6211824823346241, | |
| "grad_norm": 0.9561233426879092, | |
| "learning_rate": 7.660471099676175e-05, | |
| "loss": 0.0638, | |
| "step": 7780 | |
| }, | |
| { | |
| "epoch": 0.6219809174018923, | |
| "grad_norm": 0.6384667660627328, | |
| "learning_rate": 7.656035132857207e-05, | |
| "loss": 0.07, | |
| "step": 7790 | |
| }, | |
| { | |
| "epoch": 0.6227793524691605, | |
| "grad_norm": 0.8416825374497995, | |
| "learning_rate": 7.651599166038239e-05, | |
| "loss": 0.077, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 0.6235777875364286, | |
| "grad_norm": 1.0456515597273508, | |
| "learning_rate": 7.64716319921927e-05, | |
| "loss": 0.0667, | |
| "step": 7810 | |
| }, | |
| { | |
| "epoch": 0.6243762226036967, | |
| "grad_norm": 0.43237442544902527, | |
| "learning_rate": 7.642727232400302e-05, | |
| "loss": 0.0422, | |
| "step": 7820 | |
| }, | |
| { | |
| "epoch": 0.6251746576709649, | |
| "grad_norm": 0.5474360097858476, | |
| "learning_rate": 7.638291265581334e-05, | |
| "loss": 0.0567, | |
| "step": 7830 | |
| }, | |
| { | |
| "epoch": 0.6259730927382331, | |
| "grad_norm": 0.21149515091500953, | |
| "learning_rate": 7.633855298762366e-05, | |
| "loss": 0.0647, | |
| "step": 7840 | |
| }, | |
| { | |
| "epoch": 0.6267715278055013, | |
| "grad_norm": 0.381989832210318, | |
| "learning_rate": 7.629419331943397e-05, | |
| "loss": 0.0513, | |
| "step": 7850 | |
| }, | |
| { | |
| "epoch": 0.6275699628727693, | |
| "grad_norm": 0.5773680693611083, | |
| "learning_rate": 7.624983365124429e-05, | |
| "loss": 0.0638, | |
| "step": 7860 | |
| }, | |
| { | |
| "epoch": 0.6283683979400375, | |
| "grad_norm": 0.27172771417711034, | |
| "learning_rate": 7.620547398305461e-05, | |
| "loss": 0.0463, | |
| "step": 7870 | |
| }, | |
| { | |
| "epoch": 0.6291668330073057, | |
| "grad_norm": 1.2529735246413634, | |
| "learning_rate": 7.616111431486492e-05, | |
| "loss": 0.0711, | |
| "step": 7880 | |
| }, | |
| { | |
| "epoch": 0.6299652680745739, | |
| "grad_norm": 0.520377783745768, | |
| "learning_rate": 7.611675464667524e-05, | |
| "loss": 0.0638, | |
| "step": 7890 | |
| }, | |
| { | |
| "epoch": 0.6307637031418419, | |
| "grad_norm": 1.748282147554803, | |
| "learning_rate": 7.607239497848556e-05, | |
| "loss": 0.0604, | |
| "step": 7900 | |
| }, | |
| { | |
| "epoch": 0.6315621382091101, | |
| "grad_norm": 0.26800510354228496, | |
| "learning_rate": 7.602803531029589e-05, | |
| "loss": 0.0437, | |
| "step": 7910 | |
| }, | |
| { | |
| "epoch": 0.6323605732763783, | |
| "grad_norm": 1.4528358850229193, | |
| "learning_rate": 7.59836756421062e-05, | |
| "loss": 0.0592, | |
| "step": 7920 | |
| }, | |
| { | |
| "epoch": 0.6331590083436465, | |
| "grad_norm": 0.1173707058797108, | |
| "learning_rate": 7.593931597391651e-05, | |
| "loss": 0.0449, | |
| "step": 7930 | |
| }, | |
| { | |
| "epoch": 0.6339574434109146, | |
| "grad_norm": 0.218979297067727, | |
| "learning_rate": 7.589495630572684e-05, | |
| "loss": 0.0649, | |
| "step": 7940 | |
| }, | |
| { | |
| "epoch": 0.6347558784781827, | |
| "grad_norm": 0.18828056943739424, | |
| "learning_rate": 7.585059663753715e-05, | |
| "loss": 0.0496, | |
| "step": 7950 | |
| }, | |
| { | |
| "epoch": 0.6355543135454509, | |
| "grad_norm": 0.36105964997831, | |
| "learning_rate": 7.580623696934748e-05, | |
| "loss": 0.0779, | |
| "step": 7960 | |
| }, | |
| { | |
| "epoch": 0.6363527486127191, | |
| "grad_norm": 0.3860519463421298, | |
| "learning_rate": 7.57618773011578e-05, | |
| "loss": 0.0616, | |
| "step": 7970 | |
| }, | |
| { | |
| "epoch": 0.6371511836799872, | |
| "grad_norm": 0.37066091618679997, | |
| "learning_rate": 7.57175176329681e-05, | |
| "loss": 0.0694, | |
| "step": 7980 | |
| }, | |
| { | |
| "epoch": 0.6379496187472554, | |
| "grad_norm": 0.8566487456631784, | |
| "learning_rate": 7.567315796477843e-05, | |
| "loss": 0.0749, | |
| "step": 7990 | |
| }, | |
| { | |
| "epoch": 0.6387480538145235, | |
| "grad_norm": 0.4122089947136881, | |
| "learning_rate": 7.562879829658875e-05, | |
| "loss": 0.0558, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.6395464888817917, | |
| "grad_norm": 1.1227851605078236, | |
| "learning_rate": 7.558443862839906e-05, | |
| "loss": 0.0581, | |
| "step": 8010 | |
| }, | |
| { | |
| "epoch": 0.6403449239490598, | |
| "grad_norm": 0.4289686099198654, | |
| "learning_rate": 7.554007896020938e-05, | |
| "loss": 0.0576, | |
| "step": 8020 | |
| }, | |
| { | |
| "epoch": 0.641143359016328, | |
| "grad_norm": 0.6918225500592088, | |
| "learning_rate": 7.54957192920197e-05, | |
| "loss": 0.0783, | |
| "step": 8030 | |
| }, | |
| { | |
| "epoch": 0.6419417940835962, | |
| "grad_norm": 0.7921222085446677, | |
| "learning_rate": 7.545135962383002e-05, | |
| "loss": 0.0659, | |
| "step": 8040 | |
| }, | |
| { | |
| "epoch": 0.6427402291508643, | |
| "grad_norm": 0.555757166135947, | |
| "learning_rate": 7.540699995564033e-05, | |
| "loss": 0.0552, | |
| "step": 8050 | |
| }, | |
| { | |
| "epoch": 0.6435386642181324, | |
| "grad_norm": 0.6164912342935124, | |
| "learning_rate": 7.536264028745066e-05, | |
| "loss": 0.0488, | |
| "step": 8060 | |
| }, | |
| { | |
| "epoch": 0.6443370992854006, | |
| "grad_norm": 1.2094497855234292, | |
| "learning_rate": 7.531828061926097e-05, | |
| "loss": 0.0727, | |
| "step": 8070 | |
| }, | |
| { | |
| "epoch": 0.6451355343526688, | |
| "grad_norm": 0.44264474930118525, | |
| "learning_rate": 7.527392095107129e-05, | |
| "loss": 0.0725, | |
| "step": 8080 | |
| }, | |
| { | |
| "epoch": 0.645933969419937, | |
| "grad_norm": 0.8852677852993542, | |
| "learning_rate": 7.522956128288162e-05, | |
| "loss": 0.0706, | |
| "step": 8090 | |
| }, | |
| { | |
| "epoch": 0.646732404487205, | |
| "grad_norm": 0.4185552606938776, | |
| "learning_rate": 7.518520161469192e-05, | |
| "loss": 0.0615, | |
| "step": 8100 | |
| }, | |
| { | |
| "epoch": 0.6475308395544732, | |
| "grad_norm": 0.9889620142615653, | |
| "learning_rate": 7.514084194650224e-05, | |
| "loss": 0.066, | |
| "step": 8110 | |
| }, | |
| { | |
| "epoch": 0.6483292746217414, | |
| "grad_norm": 0.5444520967465741, | |
| "learning_rate": 7.509648227831257e-05, | |
| "loss": 0.0726, | |
| "step": 8120 | |
| }, | |
| { | |
| "epoch": 0.6491277096890096, | |
| "grad_norm": 0.6543439122201649, | |
| "learning_rate": 7.505212261012287e-05, | |
| "loss": 0.0709, | |
| "step": 8130 | |
| }, | |
| { | |
| "epoch": 0.6499261447562777, | |
| "grad_norm": 0.2836388415205917, | |
| "learning_rate": 7.50077629419332e-05, | |
| "loss": 0.0552, | |
| "step": 8140 | |
| }, | |
| { | |
| "epoch": 0.6507245798235458, | |
| "grad_norm": 0.6375614613426243, | |
| "learning_rate": 7.496340327374352e-05, | |
| "loss": 0.0694, | |
| "step": 8150 | |
| }, | |
| { | |
| "epoch": 0.651523014890814, | |
| "grad_norm": 1.1844406778857912, | |
| "learning_rate": 7.491904360555383e-05, | |
| "loss": 0.0612, | |
| "step": 8160 | |
| }, | |
| { | |
| "epoch": 0.6523214499580822, | |
| "grad_norm": 0.49923423023373387, | |
| "learning_rate": 7.487468393736416e-05, | |
| "loss": 0.0611, | |
| "step": 8170 | |
| }, | |
| { | |
| "epoch": 0.6531198850253503, | |
| "grad_norm": 0.28409047103533674, | |
| "learning_rate": 7.483032426917447e-05, | |
| "loss": 0.051, | |
| "step": 8180 | |
| }, | |
| { | |
| "epoch": 0.6539183200926185, | |
| "grad_norm": 0.2772068614340186, | |
| "learning_rate": 7.478596460098479e-05, | |
| "loss": 0.066, | |
| "step": 8190 | |
| }, | |
| { | |
| "epoch": 0.6547167551598866, | |
| "grad_norm": 0.15989122587835364, | |
| "learning_rate": 7.474160493279511e-05, | |
| "loss": 0.0413, | |
| "step": 8200 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 25048, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 2, | |
| "save_steps": 200, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |