LLaVA-Tri-VQARAD / trainer_state.json
yunfeixie's picture
Add files using upload-large-folder tool
9e21459 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 15.0,
"eval_steps": 500,
"global_step": 360,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.04,
"learning_rate": 1.8181818181818183e-06,
"loss": 8.9259,
"step": 1
},
{
"epoch": 0.08,
"learning_rate": 3.6363636363636366e-06,
"loss": 8.9458,
"step": 2
},
{
"epoch": 0.12,
"learning_rate": 5.4545454545454545e-06,
"loss": 8.1374,
"step": 3
},
{
"epoch": 0.17,
"learning_rate": 7.272727272727273e-06,
"loss": 5.8659,
"step": 4
},
{
"epoch": 0.21,
"learning_rate": 9.090909090909091e-06,
"loss": 2.6947,
"step": 5
},
{
"epoch": 0.25,
"learning_rate": 1.0909090909090909e-05,
"loss": 1.8902,
"step": 6
},
{
"epoch": 0.29,
"learning_rate": 1.2727272727272728e-05,
"loss": 1.907,
"step": 7
},
{
"epoch": 0.33,
"learning_rate": 1.4545454545454546e-05,
"loss": 1.6722,
"step": 8
},
{
"epoch": 0.38,
"learning_rate": 1.6363636363636366e-05,
"loss": 1.6213,
"step": 9
},
{
"epoch": 0.42,
"learning_rate": 1.8181818181818182e-05,
"loss": 1.5757,
"step": 10
},
{
"epoch": 0.46,
"learning_rate": 2e-05,
"loss": 1.3937,
"step": 11
},
{
"epoch": 0.5,
"learning_rate": 1.9999594849888083e-05,
"loss": 0.9765,
"step": 12
},
{
"epoch": 0.54,
"learning_rate": 1.999837943238166e-05,
"loss": 1.2983,
"step": 13
},
{
"epoch": 0.58,
"learning_rate": 1.9996353845966033e-05,
"loss": 1.0346,
"step": 14
},
{
"epoch": 0.62,
"learning_rate": 1.9993518254774517e-05,
"loss": 1.0499,
"step": 15
},
{
"epoch": 0.67,
"learning_rate": 1.998987288857513e-05,
"loss": 1.071,
"step": 16
},
{
"epoch": 0.71,
"learning_rate": 1.9985418042751975e-05,
"loss": 1.2056,
"step": 17
},
{
"epoch": 0.75,
"learning_rate": 1.998015407828131e-05,
"loss": 0.8992,
"step": 18
},
{
"epoch": 0.79,
"learning_rate": 1.9974081421702296e-05,
"loss": 1.1998,
"step": 19
},
{
"epoch": 0.83,
"learning_rate": 1.9967200565082426e-05,
"loss": 0.8691,
"step": 20
},
{
"epoch": 0.88,
"learning_rate": 1.9959512065977673e-05,
"loss": 0.8735,
"step": 21
},
{
"epoch": 0.92,
"learning_rate": 1.9951016547387286e-05,
"loss": 0.7922,
"step": 22
},
{
"epoch": 0.96,
"learning_rate": 1.9941714697703333e-05,
"loss": 0.8494,
"step": 23
},
{
"epoch": 1.0,
"learning_rate": 1.993160727065489e-05,
"loss": 0.8835,
"step": 24
},
{
"epoch": 1.04,
"learning_rate": 1.9920695085247012e-05,
"loss": 0.5345,
"step": 25
},
{
"epoch": 1.08,
"learning_rate": 1.9908979025694312e-05,
"loss": 0.4796,
"step": 26
},
{
"epoch": 1.12,
"learning_rate": 1.989646004134937e-05,
"loss": 0.5371,
"step": 27
},
{
"epoch": 1.17,
"learning_rate": 1.9883139146625763e-05,
"loss": 0.4286,
"step": 28
},
{
"epoch": 1.21,
"learning_rate": 1.9869017420915888e-05,
"loss": 0.5679,
"step": 29
},
{
"epoch": 1.25,
"learning_rate": 1.9854096008503495e-05,
"loss": 0.5667,
"step": 30
},
{
"epoch": 1.29,
"learning_rate": 1.9838376118470965e-05,
"loss": 0.4878,
"step": 31
},
{
"epoch": 1.33,
"learning_rate": 1.9821859024601345e-05,
"loss": 0.6535,
"step": 32
},
{
"epoch": 1.38,
"learning_rate": 1.9804546065275116e-05,
"loss": 0.566,
"step": 33
},
{
"epoch": 1.42,
"learning_rate": 1.978643864336176e-05,
"loss": 0.4362,
"step": 34
},
{
"epoch": 1.46,
"learning_rate": 1.9767538226106078e-05,
"loss": 0.6098,
"step": 35
},
{
"epoch": 1.5,
"learning_rate": 1.9747846345009306e-05,
"loss": 0.3936,
"step": 36
},
{
"epoch": 1.54,
"learning_rate": 1.9727364595705012e-05,
"loss": 0.5268,
"step": 37
},
{
"epoch": 1.58,
"learning_rate": 1.9706094637829797e-05,
"loss": 0.5022,
"step": 38
},
{
"epoch": 1.62,
"learning_rate": 1.9684038194888827e-05,
"loss": 0.486,
"step": 39
},
{
"epoch": 1.67,
"learning_rate": 1.9661197054116165e-05,
"loss": 0.5562,
"step": 40
},
{
"epoch": 1.71,
"learning_rate": 1.963757306632996e-05,
"loss": 0.3986,
"step": 41
},
{
"epoch": 1.75,
"learning_rate": 1.9613168145782468e-05,
"loss": 0.4909,
"step": 42
},
{
"epoch": 1.79,
"learning_rate": 1.958798427000495e-05,
"loss": 0.4447,
"step": 43
},
{
"epoch": 1.83,
"learning_rate": 1.956202347964743e-05,
"loss": 0.4243,
"step": 44
},
{
"epoch": 1.88,
"learning_rate": 1.9535287878313315e-05,
"loss": 0.4518,
"step": 45
},
{
"epoch": 1.92,
"learning_rate": 1.9507779632388997e-05,
"loss": 0.5424,
"step": 46
},
{
"epoch": 1.96,
"learning_rate": 1.947950097086825e-05,
"loss": 0.472,
"step": 47
},
{
"epoch": 2.0,
"learning_rate": 1.945045418517165e-05,
"loss": 0.4739,
"step": 48
},
{
"epoch": 2.04,
"learning_rate": 1.9420641628960897e-05,
"loss": 0.3188,
"step": 49
},
{
"epoch": 2.08,
"learning_rate": 1.9390065717948084e-05,
"loss": 0.3718,
"step": 50
},
{
"epoch": 2.12,
"learning_rate": 1.9358728929699966e-05,
"loss": 0.2013,
"step": 51
},
{
"epoch": 2.17,
"learning_rate": 1.9326633803437197e-05,
"loss": 0.2719,
"step": 52
},
{
"epoch": 2.21,
"learning_rate": 1.929378293982857e-05,
"loss": 0.2863,
"step": 53
},
{
"epoch": 2.25,
"learning_rate": 1.926017900078031e-05,
"loss": 0.2934,
"step": 54
},
{
"epoch": 2.29,
"learning_rate": 1.922582470922034e-05,
"loss": 0.2446,
"step": 55
},
{
"epoch": 2.33,
"learning_rate": 1.9190722848877683e-05,
"loss": 0.2367,
"step": 56
},
{
"epoch": 2.38,
"learning_rate": 1.9154876264056863e-05,
"loss": 0.2255,
"step": 57
},
{
"epoch": 2.42,
"learning_rate": 1.911828785940745e-05,
"loss": 0.2965,
"step": 58
},
{
"epoch": 2.46,
"learning_rate": 1.908096059968869e-05,
"loss": 0.2464,
"step": 59
},
{
"epoch": 2.5,
"learning_rate": 1.904289750952928e-05,
"loss": 0.2446,
"step": 60
},
{
"epoch": 2.54,
"learning_rate": 1.900410167318226e-05,
"loss": 0.2948,
"step": 61
},
{
"epoch": 2.58,
"learning_rate": 1.8964576234275123e-05,
"loss": 0.2947,
"step": 62
},
{
"epoch": 2.62,
"learning_rate": 1.8924324395555066e-05,
"loss": 0.2137,
"step": 63
},
{
"epoch": 2.67,
"learning_rate": 1.8883349418629487e-05,
"loss": 0.2963,
"step": 64
},
{
"epoch": 2.71,
"learning_rate": 1.8841654623701673e-05,
"loss": 0.319,
"step": 65
},
{
"epoch": 2.75,
"learning_rate": 1.8799243389301796e-05,
"loss": 0.2211,
"step": 66
},
{
"epoch": 2.79,
"learning_rate": 1.8756119152013134e-05,
"loss": 0.2289,
"step": 67
},
{
"epoch": 2.83,
"learning_rate": 1.8712285406193585e-05,
"loss": 0.2706,
"step": 68
},
{
"epoch": 2.88,
"learning_rate": 1.866774570369257e-05,
"loss": 0.2301,
"step": 69
},
{
"epoch": 2.92,
"learning_rate": 1.8622503653563173e-05,
"loss": 0.2522,
"step": 70
},
{
"epoch": 2.96,
"learning_rate": 1.8576562921769727e-05,
"loss": 0.2784,
"step": 71
},
{
"epoch": 3.0,
"learning_rate": 1.8529927230890757e-05,
"loss": 0.3307,
"step": 72
},
{
"epoch": 3.04,
"learning_rate": 1.8482600359817344e-05,
"loss": 0.1112,
"step": 73
},
{
"epoch": 3.08,
"learning_rate": 1.843458614344691e-05,
"loss": 0.1366,
"step": 74
},
{
"epoch": 3.12,
"learning_rate": 1.8385888472372474e-05,
"loss": 0.1771,
"step": 75
},
{
"epoch": 3.17,
"learning_rate": 1.833651129256742e-05,
"loss": 0.1372,
"step": 76
},
{
"epoch": 3.21,
"learning_rate": 1.828645860506573e-05,
"loss": 0.1824,
"step": 77
},
{
"epoch": 3.25,
"learning_rate": 1.8235734465637794e-05,
"loss": 0.1933,
"step": 78
},
{
"epoch": 3.29,
"learning_rate": 1.8184342984461766e-05,
"loss": 0.2282,
"step": 79
},
{
"epoch": 3.33,
"learning_rate": 1.8132288325790518e-05,
"loss": 0.1289,
"step": 80
},
{
"epoch": 3.38,
"learning_rate": 1.8079574707614202e-05,
"loss": 0.1726,
"step": 81
},
{
"epoch": 3.42,
"learning_rate": 1.802620640131848e-05,
"loss": 0.1802,
"step": 82
},
{
"epoch": 3.46,
"learning_rate": 1.797218773133841e-05,
"loss": 0.1267,
"step": 83
},
{
"epoch": 3.5,
"learning_rate": 1.7917523074808024e-05,
"loss": 0.1633,
"step": 84
},
{
"epoch": 3.54,
"learning_rate": 1.786221686120567e-05,
"loss": 0.1657,
"step": 85
},
{
"epoch": 3.58,
"learning_rate": 1.7806273571995066e-05,
"loss": 0.1596,
"step": 86
},
{
"epoch": 3.62,
"learning_rate": 1.7749697740262197e-05,
"loss": 0.1457,
"step": 87
},
{
"epoch": 3.67,
"learning_rate": 1.769249395034797e-05,
"loss": 0.1333,
"step": 88
},
{
"epoch": 3.71,
"learning_rate": 1.7634666837476765e-05,
"loss": 0.2186,
"step": 89
},
{
"epoch": 3.75,
"learning_rate": 1.757622108738083e-05,
"loss": 0.1399,
"step": 90
},
{
"epoch": 3.79,
"learning_rate": 1.7517161435920606e-05,
"loss": 0.1465,
"step": 91
},
{
"epoch": 3.83,
"learning_rate": 1.7457492668700967e-05,
"loss": 0.1402,
"step": 92
},
{
"epoch": 3.88,
"learning_rate": 1.7397219620683465e-05,
"loss": 0.2108,
"step": 93
},
{
"epoch": 3.92,
"learning_rate": 1.7336347175794523e-05,
"loss": 0.1197,
"step": 94
},
{
"epoch": 3.96,
"learning_rate": 1.7274880266529716e-05,
"loss": 0.1889,
"step": 95
},
{
"epoch": 4.0,
"learning_rate": 1.721282387355408e-05,
"loss": 0.1857,
"step": 96
},
{
"epoch": 4.04,
"learning_rate": 1.715018302529852e-05,
"loss": 0.0949,
"step": 97
},
{
"epoch": 4.08,
"learning_rate": 1.7086962797552376e-05,
"loss": 0.102,
"step": 98
},
{
"epoch": 4.12,
"learning_rate": 1.7023168313052118e-05,
"loss": 0.07,
"step": 99
},
{
"epoch": 4.17,
"learning_rate": 1.6958804741066254e-05,
"loss": 0.0904,
"step": 100
},
{
"epoch": 4.21,
"learning_rate": 1.689387729697646e-05,
"loss": 0.1163,
"step": 101
},
{
"epoch": 4.25,
"learning_rate": 1.6828391241854983e-05,
"loss": 0.0992,
"step": 102
},
{
"epoch": 4.29,
"learning_rate": 1.6762351882038342e-05,
"loss": 0.1115,
"step": 103
},
{
"epoch": 4.33,
"learning_rate": 1.669576456869733e-05,
"loss": 0.0796,
"step": 104
},
{
"epoch": 4.38,
"learning_rate": 1.6628634697403447e-05,
"loss": 0.0835,
"step": 105
},
{
"epoch": 4.42,
"learning_rate": 1.6560967707691663e-05,
"loss": 0.1216,
"step": 106
},
{
"epoch": 4.46,
"learning_rate": 1.649276908261967e-05,
"loss": 0.0637,
"step": 107
},
{
"epoch": 4.5,
"learning_rate": 1.642404434832358e-05,
"loss": 0.138,
"step": 108
},
{
"epoch": 4.54,
"learning_rate": 1.635479907357016e-05,
"loss": 0.1041,
"step": 109
},
{
"epoch": 4.58,
"learning_rate": 1.6285038869305565e-05,
"loss": 0.0871,
"step": 110
},
{
"epoch": 4.62,
"learning_rate": 1.621476938820071e-05,
"loss": 0.0819,
"step": 111
},
{
"epoch": 4.67,
"learning_rate": 1.6143996324193227e-05,
"loss": 0.034,
"step": 112
},
{
"epoch": 4.71,
"learning_rate": 1.6072725412026066e-05,
"loss": 0.0366,
"step": 113
},
{
"epoch": 4.75,
"learning_rate": 1.6000962426782844e-05,
"loss": 0.0967,
"step": 114
},
{
"epoch": 4.79,
"learning_rate": 1.592871318341986e-05,
"loss": 0.1434,
"step": 115
},
{
"epoch": 4.83,
"learning_rate": 1.585598353629492e-05,
"loss": 0.1013,
"step": 116
},
{
"epoch": 4.88,
"learning_rate": 1.5782779378692957e-05,
"loss": 0.1169,
"step": 117
},
{
"epoch": 4.92,
"learning_rate": 1.57091066423485e-05,
"loss": 0.0837,
"step": 118
},
{
"epoch": 4.96,
"learning_rate": 1.5634971296965027e-05,
"loss": 0.1023,
"step": 119
},
{
"epoch": 5.0,
"learning_rate": 1.5560379349731234e-05,
"loss": 0.1044,
"step": 120
},
{
"epoch": 5.04,
"learning_rate": 1.5485336844834274e-05,
"loss": 0.0449,
"step": 121
},
{
"epoch": 5.08,
"learning_rate": 1.5409849862969994e-05,
"loss": 0.0338,
"step": 122
},
{
"epoch": 5.12,
"learning_rate": 1.5333924520850227e-05,
"loss": 0.0379,
"step": 123
},
{
"epoch": 5.17,
"learning_rate": 1.5257566970707147e-05,
"loss": 0.0553,
"step": 124
},
{
"epoch": 5.21,
"learning_rate": 1.5180783399794749e-05,
"loss": 0.0408,
"step": 125
},
{
"epoch": 5.25,
"learning_rate": 1.5103580029887504e-05,
"loss": 0.0688,
"step": 126
},
{
"epoch": 5.29,
"learning_rate": 1.5025963116776203e-05,
"loss": 0.0781,
"step": 127
},
{
"epoch": 5.33,
"learning_rate": 1.4947938949761054e-05,
"loss": 0.0799,
"step": 128
},
{
"epoch": 5.38,
"learning_rate": 1.4869513851142051e-05,
"loss": 0.0328,
"step": 129
},
{
"epoch": 5.42,
"learning_rate": 1.4790694175706698e-05,
"loss": 0.0869,
"step": 130
},
{
"epoch": 5.46,
"learning_rate": 1.4711486310215053e-05,
"loss": 0.0185,
"step": 131
},
{
"epoch": 5.5,
"learning_rate": 1.4631896672882235e-05,
"loss": 0.0751,
"step": 132
},
{
"epoch": 5.54,
"learning_rate": 1.4551931712858334e-05,
"loss": 0.0842,
"step": 133
},
{
"epoch": 5.58,
"learning_rate": 1.4471597909705858e-05,
"loss": 0.0718,
"step": 134
},
{
"epoch": 5.62,
"learning_rate": 1.4390901772874668e-05,
"loss": 0.0854,
"step": 135
},
{
"epoch": 5.67,
"learning_rate": 1.4309849841174538e-05,
"loss": 0.0508,
"step": 136
},
{
"epoch": 5.71,
"learning_rate": 1.422844868224531e-05,
"loss": 0.0722,
"step": 137
},
{
"epoch": 5.75,
"learning_rate": 1.4146704892024714e-05,
"loss": 0.0577,
"step": 138
},
{
"epoch": 5.79,
"learning_rate": 1.40646250942139e-05,
"loss": 0.0654,
"step": 139
},
{
"epoch": 5.83,
"learning_rate": 1.3982215939740726e-05,
"loss": 0.0452,
"step": 140
},
{
"epoch": 5.88,
"learning_rate": 1.3899484106220816e-05,
"loss": 0.0549,
"step": 141
},
{
"epoch": 5.92,
"learning_rate": 1.3816436297416496e-05,
"loss": 0.0385,
"step": 142
},
{
"epoch": 5.96,
"learning_rate": 1.3733079242693572e-05,
"loss": 0.0729,
"step": 143
},
{
"epoch": 6.0,
"learning_rate": 1.3649419696476057e-05,
"loss": 0.041,
"step": 144
},
{
"epoch": 6.04,
"learning_rate": 1.356546443769885e-05,
"loss": 0.02,
"step": 145
},
{
"epoch": 6.08,
"learning_rate": 1.3481220269258449e-05,
"loss": 0.0611,
"step": 146
},
{
"epoch": 6.12,
"learning_rate": 1.3396694017461708e-05,
"loss": 0.0374,
"step": 147
},
{
"epoch": 6.17,
"learning_rate": 1.3311892531472705e-05,
"loss": 0.0294,
"step": 148
},
{
"epoch": 6.21,
"learning_rate": 1.3226822682757745e-05,
"loss": 0.0483,
"step": 149
},
{
"epoch": 6.25,
"learning_rate": 1.3141491364528576e-05,
"loss": 0.0242,
"step": 150
},
{
"epoch": 6.29,
"learning_rate": 1.3055905491183822e-05,
"loss": 0.0307,
"step": 151
},
{
"epoch": 6.33,
"learning_rate": 1.2970071997748712e-05,
"loss": 0.0529,
"step": 152
},
{
"epoch": 6.38,
"learning_rate": 1.288399783931315e-05,
"loss": 0.0542,
"step": 153
},
{
"epoch": 6.42,
"learning_rate": 1.2797689990468113e-05,
"loss": 0.0297,
"step": 154
},
{
"epoch": 6.46,
"learning_rate": 1.2711155444740529e-05,
"loss": 0.0292,
"step": 155
},
{
"epoch": 6.5,
"learning_rate": 1.2624401214026574e-05,
"loss": 0.0176,
"step": 156
},
{
"epoch": 6.54,
"learning_rate": 1.2537434328023501e-05,
"loss": 0.0154,
"step": 157
},
{
"epoch": 6.58,
"learning_rate": 1.2450261833660033e-05,
"loss": 0.02,
"step": 158
},
{
"epoch": 6.62,
"learning_rate": 1.2362890794525342e-05,
"loss": 0.0589,
"step": 159
},
{
"epoch": 6.67,
"learning_rate": 1.2275328290296677e-05,
"loss": 0.0283,
"step": 160
},
{
"epoch": 6.71,
"learning_rate": 1.2187581416165721e-05,
"loss": 0.0307,
"step": 161
},
{
"epoch": 6.75,
"learning_rate": 1.2099657282263651e-05,
"loss": 0.0356,
"step": 162
},
{
"epoch": 6.79,
"learning_rate": 1.2011563013084996e-05,
"loss": 0.0466,
"step": 163
},
{
"epoch": 6.83,
"learning_rate": 1.1923305746910372e-05,
"loss": 0.0354,
"step": 164
},
{
"epoch": 6.88,
"learning_rate": 1.1834892635228024e-05,
"loss": 0.0432,
"step": 165
},
{
"epoch": 6.92,
"learning_rate": 1.1746330842154371e-05,
"loss": 0.0324,
"step": 166
},
{
"epoch": 6.96,
"learning_rate": 1.1657627543853491e-05,
"loss": 0.0406,
"step": 167
},
{
"epoch": 7.0,
"learning_rate": 1.156878992795563e-05,
"loss": 0.0394,
"step": 168
},
{
"epoch": 7.04,
"learning_rate": 1.1479825192974791e-05,
"loss": 0.0234,
"step": 169
},
{
"epoch": 7.08,
"learning_rate": 1.1390740547725443e-05,
"loss": 0.0135,
"step": 170
},
{
"epoch": 7.12,
"learning_rate": 1.1301543210738383e-05,
"loss": 0.0438,
"step": 171
},
{
"epoch": 7.17,
"learning_rate": 1.1212240409675825e-05,
"loss": 0.015,
"step": 172
},
{
"epoch": 7.21,
"learning_rate": 1.1122839380745738e-05,
"loss": 0.0083,
"step": 173
},
{
"epoch": 7.25,
"learning_rate": 1.1033347368115494e-05,
"loss": 0.0212,
"step": 174
},
{
"epoch": 7.29,
"learning_rate": 1.0943771623324884e-05,
"loss": 0.0245,
"step": 175
},
{
"epoch": 7.33,
"learning_rate": 1.085411940469851e-05,
"loss": 0.0083,
"step": 176
},
{
"epoch": 7.38,
"learning_rate": 1.0764397976757658e-05,
"loss": 0.0187,
"step": 177
},
{
"epoch": 7.42,
"learning_rate": 1.0674614609631634e-05,
"loss": 0.0212,
"step": 178
},
{
"epoch": 7.46,
"learning_rate": 1.0584776578468698e-05,
"loss": 0.022,
"step": 179
},
{
"epoch": 7.5,
"learning_rate": 1.0494891162846515e-05,
"loss": 0.0374,
"step": 180
},
{
"epoch": 7.54,
"learning_rate": 1.040496564618233e-05,
"loss": 0.014,
"step": 181
},
{
"epoch": 7.58,
"learning_rate": 1.0315007315142772e-05,
"loss": 0.0146,
"step": 182
},
{
"epoch": 7.62,
"learning_rate": 1.0225023459053416e-05,
"loss": 0.0226,
"step": 183
},
{
"epoch": 7.67,
"learning_rate": 1.0135021369308138e-05,
"loss": 0.0176,
"step": 184
},
{
"epoch": 7.71,
"learning_rate": 1.004500833877828e-05,
"loss": 0.0101,
"step": 185
},
{
"epoch": 7.75,
"learning_rate": 9.954991661221724e-06,
"loss": 0.0093,
"step": 186
},
{
"epoch": 7.79,
"learning_rate": 9.864978630691865e-06,
"loss": 0.0228,
"step": 187
},
{
"epoch": 7.83,
"learning_rate": 9.774976540946589e-06,
"loss": 0.0194,
"step": 188
},
{
"epoch": 7.88,
"learning_rate": 9.684992684857232e-06,
"loss": 0.0445,
"step": 189
},
{
"epoch": 7.92,
"learning_rate": 9.595034353817673e-06,
"loss": 0.0078,
"step": 190
},
{
"epoch": 7.96,
"learning_rate": 9.505108837153489e-06,
"loss": 0.0227,
"step": 191
},
{
"epoch": 8.0,
"learning_rate": 9.415223421531308e-06,
"loss": 0.0188,
"step": 192
},
{
"epoch": 8.04,
"learning_rate": 9.325385390368367e-06,
"loss": 0.0114,
"step": 193
},
{
"epoch": 8.08,
"learning_rate": 9.23560202324235e-06,
"loss": 0.0084,
"step": 194
},
{
"epoch": 8.12,
"learning_rate": 9.145880595301495e-06,
"loss": 0.0092,
"step": 195
},
{
"epoch": 8.17,
"learning_rate": 9.056228376675118e-06,
"loss": 0.0158,
"step": 196
},
{
"epoch": 8.21,
"learning_rate": 8.966652631884506e-06,
"loss": 0.0045,
"step": 197
},
{
"epoch": 8.25,
"learning_rate": 8.877160619254264e-06,
"loss": 0.0148,
"step": 198
},
{
"epoch": 8.29,
"learning_rate": 8.787759590324177e-06,
"loss": 0.0076,
"step": 199
},
{
"epoch": 8.33,
"learning_rate": 8.698456789261617e-06,
"loss": 0.0086,
"step": 200
},
{
"epoch": 8.38,
"learning_rate": 8.609259452274559e-06,
"loss": 0.0121,
"step": 201
},
{
"epoch": 8.42,
"learning_rate": 8.52017480702521e-06,
"loss": 0.01,
"step": 202
},
{
"epoch": 8.46,
"learning_rate": 8.431210072044371e-06,
"loss": 0.0094,
"step": 203
},
{
"epoch": 8.5,
"learning_rate": 8.342372456146512e-06,
"loss": 0.0036,
"step": 204
},
{
"epoch": 8.54,
"learning_rate": 8.253669157845632e-06,
"loss": 0.0123,
"step": 205
},
{
"epoch": 8.58,
"learning_rate": 8.165107364771979e-06,
"loss": 0.0262,
"step": 206
},
{
"epoch": 8.62,
"learning_rate": 8.076694253089632e-06,
"loss": 0.0409,
"step": 207
},
{
"epoch": 8.67,
"learning_rate": 7.988436986915005e-06,
"loss": 0.0219,
"step": 208
},
{
"epoch": 8.71,
"learning_rate": 7.900342717736354e-06,
"loss": 0.0114,
"step": 209
},
{
"epoch": 8.75,
"learning_rate": 7.812418583834282e-06,
"loss": 0.0184,
"step": 210
},
{
"epoch": 8.79,
"learning_rate": 7.724671709703328e-06,
"loss": 0.0151,
"step": 211
},
{
"epoch": 8.83,
"learning_rate": 7.637109205474665e-06,
"loss": 0.0371,
"step": 212
},
{
"epoch": 8.88,
"learning_rate": 7.5497381663399716e-06,
"loss": 0.0111,
"step": 213
},
{
"epoch": 8.92,
"learning_rate": 7.462565671976504e-06,
"loss": 0.0236,
"step": 214
},
{
"epoch": 8.96,
"learning_rate": 7.375598785973429e-06,
"loss": 0.0131,
"step": 215
},
{
"epoch": 9.0,
"learning_rate": 7.288844555259471e-06,
"loss": 0.0104,
"step": 216
},
{
"epoch": 9.04,
"learning_rate": 7.202310009531886e-06,
"loss": 0.0021,
"step": 217
},
{
"epoch": 9.08,
"learning_rate": 7.116002160686851e-06,
"loss": 0.0202,
"step": 218
},
{
"epoch": 9.12,
"learning_rate": 7.0299280022512875e-06,
"loss": 0.0089,
"step": 219
},
{
"epoch": 9.17,
"learning_rate": 6.944094508816182e-06,
"loss": 0.0027,
"step": 220
},
{
"epoch": 9.21,
"learning_rate": 6.858508635471428e-06,
"loss": 0.0009,
"step": 221
},
{
"epoch": 9.25,
"learning_rate": 6.773177317242257e-06,
"loss": 0.0061,
"step": 222
},
{
"epoch": 9.29,
"learning_rate": 6.688107468527297e-06,
"loss": 0.0023,
"step": 223
},
{
"epoch": 9.33,
"learning_rate": 6.603305982538295e-06,
"loss": 0.0144,
"step": 224
},
{
"epoch": 9.38,
"learning_rate": 6.518779730741555e-06,
"loss": 0.0145,
"step": 225
},
{
"epoch": 9.42,
"learning_rate": 6.434535562301153e-06,
"loss": 0.01,
"step": 226
},
{
"epoch": 9.46,
"learning_rate": 6.350580303523947e-06,
"loss": 0.007,
"step": 227
},
{
"epoch": 9.5,
"learning_rate": 6.266920757306429e-06,
"loss": 0.0065,
"step": 228
},
{
"epoch": 9.54,
"learning_rate": 6.183563702583506e-06,
"loss": 0.0218,
"step": 229
},
{
"epoch": 9.58,
"learning_rate": 6.100515893779188e-06,
"loss": 0.0089,
"step": 230
},
{
"epoch": 9.62,
"learning_rate": 6.01778406025928e-06,
"loss": 0.0153,
"step": 231
},
{
"epoch": 9.67,
"learning_rate": 5.935374905786102e-06,
"loss": 0.0106,
"step": 232
},
{
"epoch": 9.71,
"learning_rate": 5.8532951079752895e-06,
"loss": 0.0111,
"step": 233
},
{
"epoch": 9.75,
"learning_rate": 5.771551317754691e-06,
"loss": 0.0289,
"step": 234
},
{
"epoch": 9.79,
"learning_rate": 5.690150158825462e-06,
"loss": 0.0136,
"step": 235
},
{
"epoch": 9.83,
"learning_rate": 5.609098227125334e-06,
"loss": 0.0087,
"step": 236
},
{
"epoch": 9.88,
"learning_rate": 5.528402090294142e-06,
"loss": 0.0147,
"step": 237
},
{
"epoch": 9.92,
"learning_rate": 5.448068287141663e-06,
"loss": 0.0122,
"step": 238
},
{
"epoch": 9.96,
"learning_rate": 5.368103327117768e-06,
"loss": 0.0181,
"step": 239
},
{
"epoch": 10.0,
"learning_rate": 5.288513689784951e-06,
"loss": 0.0142,
"step": 240
},
{
"epoch": 10.04,
"learning_rate": 5.209305824293307e-06,
"loss": 0.0038,
"step": 241
},
{
"epoch": 10.08,
"learning_rate": 5.130486148857952e-06,
"loss": 0.0144,
"step": 242
},
{
"epoch": 10.12,
"learning_rate": 5.05206105023895e-06,
"loss": 0.0034,
"step": 243
},
{
"epoch": 10.17,
"learning_rate": 4.974036883223798e-06,
"loss": 0.0013,
"step": 244
},
{
"epoch": 10.21,
"learning_rate": 4.896419970112499e-06,
"loss": 0.0083,
"step": 245
},
{
"epoch": 10.25,
"learning_rate": 4.819216600205254e-06,
"loss": 0.0194,
"step": 246
},
{
"epoch": 10.29,
"learning_rate": 4.742433029292856e-06,
"loss": 0.0019,
"step": 247
},
{
"epoch": 10.33,
"learning_rate": 4.6660754791497755e-06,
"loss": 0.0074,
"step": 248
},
{
"epoch": 10.38,
"learning_rate": 4.590150137030009e-06,
"loss": 0.0013,
"step": 249
},
{
"epoch": 10.42,
"learning_rate": 4.514663155165731e-06,
"loss": 0.0098,
"step": 250
},
{
"epoch": 10.46,
"learning_rate": 4.439620650268771e-06,
"loss": 0.0016,
"step": 251
},
{
"epoch": 10.5,
"learning_rate": 4.365028703034976e-06,
"loss": 0.0037,
"step": 252
},
{
"epoch": 10.54,
"learning_rate": 4.290893357651502e-06,
"loss": 0.0056,
"step": 253
},
{
"epoch": 10.58,
"learning_rate": 4.217220621307043e-06,
"loss": 0.0148,
"step": 254
},
{
"epoch": 10.62,
"learning_rate": 4.144016463705081e-06,
"loss": 0.0028,
"step": 255
},
{
"epoch": 10.67,
"learning_rate": 4.071286816580142e-06,
"loss": 0.0033,
"step": 256
},
{
"epoch": 10.71,
"learning_rate": 3.999037573217157e-06,
"loss": 0.0117,
"step": 257
},
{
"epoch": 10.75,
"learning_rate": 3.927274587973935e-06,
"loss": 0.0108,
"step": 258
},
{
"epoch": 10.79,
"learning_rate": 3.856003675806777e-06,
"loss": 0.0096,
"step": 259
},
{
"epoch": 10.83,
"learning_rate": 3.78523061179929e-06,
"loss": 0.0225,
"step": 260
},
{
"epoch": 10.88,
"learning_rate": 3.7149611306944356e-06,
"loss": 0.0192,
"step": 261
},
{
"epoch": 10.92,
"learning_rate": 3.645200926429844e-06,
"loss": 0.0096,
"step": 262
},
{
"epoch": 10.96,
"learning_rate": 3.5759556516764205e-06,
"loss": 0.0045,
"step": 263
},
{
"epoch": 11.0,
"learning_rate": 3.507230917380332e-06,
"loss": 0.0265,
"step": 264
},
{
"epoch": 11.04,
"learning_rate": 3.4390322923083385e-06,
"loss": 0.0033,
"step": 265
},
{
"epoch": 11.08,
"learning_rate": 3.3713653025965544e-06,
"loss": 0.0111,
"step": 266
},
{
"epoch": 11.12,
"learning_rate": 3.3042354313026702e-06,
"loss": 0.0106,
"step": 267
},
{
"epoch": 11.17,
"learning_rate": 3.237648117961665e-06,
"loss": 0.0172,
"step": 268
},
{
"epoch": 11.21,
"learning_rate": 3.1716087581450193e-06,
"loss": 0.0027,
"step": 269
},
{
"epoch": 11.25,
"learning_rate": 3.1061227030235442e-06,
"loss": 0.0074,
"step": 270
},
{
"epoch": 11.29,
"learning_rate": 3.041195258933749e-06,
"loss": 0.0037,
"step": 271
},
{
"epoch": 11.33,
"learning_rate": 2.976831686947884e-06,
"loss": 0.0006,
"step": 272
},
{
"epoch": 11.38,
"learning_rate": 2.913037202447625e-06,
"loss": 0.0053,
"step": 273
},
{
"epoch": 11.42,
"learning_rate": 2.8498169747014824e-06,
"loss": 0.0049,
"step": 274
},
{
"epoch": 11.46,
"learning_rate": 2.787176126445923e-06,
"loss": 0.0098,
"step": 275
},
{
"epoch": 11.5,
"learning_rate": 2.725119733470284e-06,
"loss": 0.0064,
"step": 276
},
{
"epoch": 11.54,
"learning_rate": 2.663652824205476e-06,
"loss": 0.0051,
"step": 277
},
{
"epoch": 11.58,
"learning_rate": 2.6027803793165353e-06,
"loss": 0.007,
"step": 278
},
{
"epoch": 11.62,
"learning_rate": 2.5425073312990334e-06,
"loss": 0.0121,
"step": 279
},
{
"epoch": 11.67,
"learning_rate": 2.4828385640793974e-06,
"loss": 0.0055,
"step": 280
},
{
"epoch": 11.71,
"learning_rate": 2.4237789126191715e-06,
"loss": 0.0035,
"step": 281
},
{
"epoch": 11.75,
"learning_rate": 2.3653331625232367e-06,
"loss": 0.004,
"step": 282
},
{
"epoch": 11.79,
"learning_rate": 2.307506049652031e-06,
"loss": 0.0114,
"step": 283
},
{
"epoch": 11.83,
"learning_rate": 2.250302259737803e-06,
"loss": 0.0004,
"step": 284
},
{
"epoch": 11.88,
"learning_rate": 2.1937264280049365e-06,
"loss": 0.0247,
"step": 285
},
{
"epoch": 11.92,
"learning_rate": 2.137783138794335e-06,
"loss": 0.0128,
"step": 286
},
{
"epoch": 11.96,
"learning_rate": 2.0824769251919775e-06,
"loss": 0.0015,
"step": 287
},
{
"epoch": 12.0,
"learning_rate": 2.027812268661592e-06,
"loss": 0.0182,
"step": 288
},
{
"epoch": 12.04,
"learning_rate": 1.9737935986815205e-06,
"loss": 0.004,
"step": 289
},
{
"epoch": 12.08,
"learning_rate": 1.9204252923858003e-06,
"loss": 0.0028,
"step": 290
},
{
"epoch": 12.12,
"learning_rate": 1.8677116742094858e-06,
"loss": 0.0056,
"step": 291
},
{
"epoch": 12.17,
"learning_rate": 1.8156570155382357e-06,
"loss": 0.0048,
"step": 292
},
{
"epoch": 12.21,
"learning_rate": 1.764265534362205e-06,
"loss": 0.0125,
"step": 293
},
{
"epoch": 12.25,
"learning_rate": 1.7135413949342706e-06,
"loss": 0.0017,
"step": 294
},
{
"epoch": 12.29,
"learning_rate": 1.6634887074325844e-06,
"loss": 0.0135,
"step": 295
},
{
"epoch": 12.33,
"learning_rate": 1.6141115276275298e-06,
"loss": 0.0093,
"step": 296
},
{
"epoch": 12.38,
"learning_rate": 1.565413856553095e-06,
"loss": 0.0095,
"step": 297
},
{
"epoch": 12.42,
"learning_rate": 1.5173996401826563e-06,
"loss": 0.0082,
"step": 298
},
{
"epoch": 12.46,
"learning_rate": 1.470072769109242e-06,
"loss": 0.0089,
"step": 299
},
{
"epoch": 12.5,
"learning_rate": 1.4234370782302742e-06,
"loss": 0.0023,
"step": 300
},
{
"epoch": 12.54,
"learning_rate": 1.3774963464368295e-06,
"loss": 0.0118,
"step": 301
},
{
"epoch": 12.58,
"learning_rate": 1.3322542963074314e-06,
"loss": 0.0053,
"step": 302
},
{
"epoch": 12.62,
"learning_rate": 1.287714593806415e-06,
"loss": 0.0128,
"step": 303
},
{
"epoch": 12.67,
"learning_rate": 1.2438808479868715e-06,
"loss": 0.0042,
"step": 304
},
{
"epoch": 12.71,
"learning_rate": 1.200756610698205e-06,
"loss": 0.0132,
"step": 305
},
{
"epoch": 12.75,
"learning_rate": 1.1583453762983289e-06,
"loss": 0.0093,
"step": 306
},
{
"epoch": 12.79,
"learning_rate": 1.1166505813705187e-06,
"loss": 0.0095,
"step": 307
},
{
"epoch": 12.83,
"learning_rate": 1.0756756044449358e-06,
"loss": 0.0043,
"step": 308
},
{
"epoch": 12.88,
"learning_rate": 1.035423765724879e-06,
"loss": 0.0129,
"step": 309
},
{
"epoch": 12.92,
"learning_rate": 9.958983268177425e-07,
"loss": 0.0118,
"step": 310
},
{
"epoch": 12.96,
"learning_rate": 9.571024904707238e-07,
"loss": 0.0036,
"step": 311
},
{
"epoch": 13.0,
"learning_rate": 9.190394003113123e-07,
"loss": 0.0034,
"step": 312
},
{
"epoch": 13.04,
"learning_rate": 8.817121405925543e-07,
"loss": 0.0017,
"step": 313
},
{
"epoch": 13.08,
"learning_rate": 8.451237359431397e-07,
"loss": 0.0046,
"step": 314
},
{
"epoch": 13.12,
"learning_rate": 8.092771511223185e-07,
"loss": 0.0003,
"step": 315
},
{
"epoch": 13.17,
"learning_rate": 7.741752907796584e-07,
"loss": 0.0048,
"step": 316
},
{
"epoch": 13.21,
"learning_rate": 7.398209992196914e-07,
"loss": 0.0085,
"step": 317
},
{
"epoch": 13.25,
"learning_rate": 7.062170601714302e-07,
"loss": 0.0036,
"step": 318
},
{
"epoch": 13.29,
"learning_rate": 6.73366196562808e-07,
"loss": 0.0027,
"step": 319
},
{
"epoch": 13.33,
"learning_rate": 6.412710703000368e-07,
"loss": 0.01,
"step": 320
},
{
"epoch": 13.38,
"learning_rate": 6.099342820519183e-07,
"loss": 0.0037,
"step": 321
},
{
"epoch": 13.42,
"learning_rate": 5.79358371039106e-07,
"loss": 0.0068,
"step": 322
},
{
"epoch": 13.46,
"learning_rate": 5.495458148283505e-07,
"loss": 0.0019,
"step": 323
},
{
"epoch": 13.5,
"learning_rate": 5.204990291317535e-07,
"loss": 0.0031,
"step": 324
},
{
"epoch": 13.54,
"learning_rate": 4.92220367611006e-07,
"loss": 0.0131,
"step": 325
},
{
"epoch": 13.58,
"learning_rate": 4.647121216866857e-07,
"loss": 0.0162,
"step": 326
},
{
"epoch": 13.62,
"learning_rate": 4.3797652035257544e-07,
"loss": 0.0021,
"step": 327
},
{
"epoch": 13.67,
"learning_rate": 4.1201572999505e-07,
"loss": 0.0101,
"step": 328
},
{
"epoch": 13.71,
"learning_rate": 3.8683185421753313e-07,
"loss": 0.0137,
"step": 329
},
{
"epoch": 13.75,
"learning_rate": 3.6242693367004365e-07,
"loss": 0.0034,
"step": 330
},
{
"epoch": 13.79,
"learning_rate": 3.38802945883836e-07,
"loss": 0.0039,
"step": 331
},
{
"epoch": 13.83,
"learning_rate": 3.1596180511117235e-07,
"loss": 0.0086,
"step": 332
},
{
"epoch": 13.88,
"learning_rate": 2.939053621702015e-07,
"loss": 0.0113,
"step": 333
},
{
"epoch": 13.92,
"learning_rate": 2.7263540429498747e-07,
"loss": 0.0192,
"step": 334
},
{
"epoch": 13.96,
"learning_rate": 2.5215365499069446e-07,
"loss": 0.006,
"step": 335
},
{
"epoch": 14.0,
"learning_rate": 2.3246177389392388e-07,
"loss": 0.0024,
"step": 336
},
{
"epoch": 14.04,
"learning_rate": 2.1356135663824328e-07,
"loss": 0.0003,
"step": 337
},
{
"epoch": 14.08,
"learning_rate": 1.9545393472488738e-07,
"loss": 0.0011,
"step": 338
},
{
"epoch": 14.12,
"learning_rate": 1.7814097539865626e-07,
"loss": 0.0018,
"step": 339
},
{
"epoch": 14.17,
"learning_rate": 1.6162388152903498e-07,
"loss": 0.0058,
"step": 340
},
{
"epoch": 14.21,
"learning_rate": 1.4590399149650769e-07,
"loss": 0.0052,
"step": 341
},
{
"epoch": 14.25,
"learning_rate": 1.309825790841146e-07,
"loss": 0.006,
"step": 342
},
{
"epoch": 14.29,
"learning_rate": 1.1686085337423991e-07,
"loss": 0.0049,
"step": 343
},
{
"epoch": 14.33,
"learning_rate": 1.0353995865063138e-07,
"loss": 0.004,
"step": 344
},
{
"epoch": 14.38,
"learning_rate": 9.10209743056889e-08,
"loss": 0.009,
"step": 345
},
{
"epoch": 14.42,
"learning_rate": 7.930491475299229e-08,
"loss": 0.0017,
"step": 346
},
{
"epoch": 14.46,
"learning_rate": 6.839272934511143e-08,
"loss": 0.0021,
"step": 347
},
{
"epoch": 14.5,
"learning_rate": 5.828530229667228e-08,
"loss": 0.0064,
"step": 348
},
{
"epoch": 14.54,
"learning_rate": 4.898345261271531e-08,
"loss": 0.0092,
"step": 349
},
{
"epoch": 14.58,
"learning_rate": 4.0487934022328533e-08,
"loss": 0.0002,
"step": 350
},
{
"epoch": 14.62,
"learning_rate": 3.27994349175742e-08,
"loss": 0.0116,
"step": 351
},
{
"epoch": 14.67,
"learning_rate": 2.591857829770672e-08,
"loss": 0.0022,
"step": 352
},
{
"epoch": 14.71,
"learning_rate": 1.984592171869082e-08,
"loss": 0.0142,
"step": 353
},
{
"epoch": 14.75,
"learning_rate": 1.4581957248026579e-08,
"loss": 0.0068,
"step": 354
},
{
"epoch": 14.79,
"learning_rate": 1.0127111424872437e-08,
"loss": 0.0046,
"step": 355
},
{
"epoch": 14.83,
"learning_rate": 6.481745225485059e-09,
"loss": 0.0054,
"step": 356
},
{
"epoch": 14.88,
"learning_rate": 3.6461540339682855e-09,
"loss": 0.0138,
"step": 357
},
{
"epoch": 14.92,
"learning_rate": 1.6205676183411733e-09,
"loss": 0.0125,
"step": 358
},
{
"epoch": 14.96,
"learning_rate": 4.0515011191621933e-10,
"loss": 0.0022,
"step": 359
},
{
"epoch": 15.0,
"learning_rate": 0.0,
"loss": 0.0143,
"step": 360
},
{
"epoch": 15.0,
"step": 360,
"total_flos": 1.427028711613399e+17,
"train_loss": 0.004024597113311756,
"train_runtime": 2809.5847,
"train_samples_per_second": 16.358,
"train_steps_per_second": 0.128
}
],
"logging_steps": 1.0,
"max_steps": 360,
"num_input_tokens_seen": 0,
"num_train_epochs": 15,
"save_steps": 100,
"total_flos": 1.427028711613399e+17,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}