| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.0, | |
| "eval_steps": 500, | |
| "global_step": 312, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.03205128205128205, | |
| "grad_norm": 17.482923590326937, | |
| "learning_rate": 1.25e-07, | |
| "logits/chosen": -2.0185546875, | |
| "logits/rejected": -1.881250023841858, | |
| "logps/chosen": -235.671875, | |
| "logps/rejected": -302.26873779296875, | |
| "loss": 0.38, | |
| "rewards/accuracies": 0.8187500238418579, | |
| "rewards/chosen": 1.7926757335662842, | |
| "rewards/margins": 3.30548095703125, | |
| "rewards/rejected": -1.5131103992462158, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.0641025641025641, | |
| "grad_norm": 10.731624225914716, | |
| "learning_rate": 2.8125e-07, | |
| "logits/chosen": -2.0074219703674316, | |
| "logits/rejected": -1.779687523841858, | |
| "logps/chosen": -220.52969360351562, | |
| "logps/rejected": -415.6499938964844, | |
| "loss": 0.4128, | |
| "rewards/accuracies": 0.7875000238418579, | |
| "rewards/chosen": 1.885888695716858, | |
| "rewards/margins": 3.5706787109375, | |
| "rewards/rejected": -1.6814696788787842, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.09615384615384616, | |
| "grad_norm": 12.984171783098084, | |
| "learning_rate": 4.375e-07, | |
| "logits/chosen": -2.097851514816284, | |
| "logits/rejected": -1.916015625, | |
| "logps/chosen": -212.4031219482422, | |
| "logps/rejected": -295.92657470703125, | |
| "loss": 0.4187, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": 1.880639672279358, | |
| "rewards/margins": 3.52978515625, | |
| "rewards/rejected": -1.648584008216858, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.1282051282051282, | |
| "grad_norm": 39.351112208545295, | |
| "learning_rate": 4.949324324324325e-07, | |
| "logits/chosen": -2.023632764816284, | |
| "logits/rejected": -1.8396484851837158, | |
| "logps/chosen": -228.0656280517578, | |
| "logps/rejected": -375.046875, | |
| "loss": 0.5019, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/chosen": 1.77685546875, | |
| "rewards/margins": 3.207226514816284, | |
| "rewards/rejected": -1.429071068763733, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.16025641025641027, | |
| "grad_norm": 8.14663574686874, | |
| "learning_rate": 4.864864864864865e-07, | |
| "logits/chosen": -1.9912109375, | |
| "logits/rejected": -1.8039062023162842, | |
| "logps/chosen": -231.5593719482422, | |
| "logps/rejected": -515.8937377929688, | |
| "loss": 0.4089, | |
| "rewards/accuracies": 0.8062499761581421, | |
| "rewards/chosen": 1.800073266029358, | |
| "rewards/margins": 3.571337938308716, | |
| "rewards/rejected": -1.771520972251892, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.19230769230769232, | |
| "grad_norm": 16.42433782374998, | |
| "learning_rate": 4.780405405405405e-07, | |
| "logits/chosen": -1.983789086341858, | |
| "logits/rejected": -1.7492187023162842, | |
| "logps/chosen": -250.9375, | |
| "logps/rejected": -518.8781127929688, | |
| "loss": 0.3187, | |
| "rewards/accuracies": 0.8687499761581421, | |
| "rewards/chosen": 1.736535668373108, | |
| "rewards/margins": 3.2618165016174316, | |
| "rewards/rejected": -1.525964379310608, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.22435897435897437, | |
| "grad_norm": 16.266393432601383, | |
| "learning_rate": 4.695945945945946e-07, | |
| "logits/chosen": -2.0166015625, | |
| "logits/rejected": -1.91015625, | |
| "logps/chosen": -209.7781219482422, | |
| "logps/rejected": -289.6625061035156, | |
| "loss": 0.3953, | |
| "rewards/accuracies": 0.8125, | |
| "rewards/chosen": 2.050332546234131, | |
| "rewards/margins": 3.6033051013946533, | |
| "rewards/rejected": -1.550323486328125, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.2564102564102564, | |
| "grad_norm": 25.97441760074817, | |
| "learning_rate": 4.611486486486486e-07, | |
| "logits/chosen": -1.963476538658142, | |
| "logits/rejected": -1.8292968273162842, | |
| "logps/chosen": -283.6156311035156, | |
| "logps/rejected": -328.79766845703125, | |
| "loss": 0.4126, | |
| "rewards/accuracies": 0.8125, | |
| "rewards/chosen": 1.7740600109100342, | |
| "rewards/margins": 3.731884717941284, | |
| "rewards/rejected": -1.9563720226287842, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.28846153846153844, | |
| "grad_norm": 34.9734844548034, | |
| "learning_rate": 4.5270270270270264e-07, | |
| "logits/chosen": -2.0054688453674316, | |
| "logits/rejected": -1.7921874523162842, | |
| "logps/chosen": -274.5687561035156, | |
| "logps/rejected": -334.1656188964844, | |
| "loss": 0.377, | |
| "rewards/accuracies": 0.8187500238418579, | |
| "rewards/chosen": 2.108358860015869, | |
| "rewards/margins": 4.065283298492432, | |
| "rewards/rejected": -1.953271508216858, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.32051282051282054, | |
| "grad_norm": 11.913243260486984, | |
| "learning_rate": 4.442567567567567e-07, | |
| "logits/chosen": -2.0777344703674316, | |
| "logits/rejected": -1.947656273841858, | |
| "logps/chosen": -225.84219360351562, | |
| "logps/rejected": -250.46249389648438, | |
| "loss": 0.3373, | |
| "rewards/accuracies": 0.824999988079071, | |
| "rewards/chosen": 2.101611375808716, | |
| "rewards/margins": 3.5291504859924316, | |
| "rewards/rejected": -1.426367163658142, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.3525641025641026, | |
| "grad_norm": 13.672519743198338, | |
| "learning_rate": 4.3581081081081076e-07, | |
| "logits/chosen": -2.114453077316284, | |
| "logits/rejected": -1.878320336341858, | |
| "logps/chosen": -331.421875, | |
| "logps/rejected": -381.27264404296875, | |
| "loss": 0.3941, | |
| "rewards/accuracies": 0.824999988079071, | |
| "rewards/chosen": 1.777099609375, | |
| "rewards/margins": 3.0150146484375, | |
| "rewards/rejected": -1.236975073814392, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.38461538461538464, | |
| "grad_norm": 11.373635937771688, | |
| "learning_rate": 4.2736486486486484e-07, | |
| "logits/chosen": -2.139843702316284, | |
| "logits/rejected": -1.938867211341858, | |
| "logps/chosen": -233.5578155517578, | |
| "logps/rejected": -377.8140563964844, | |
| "loss": 0.3037, | |
| "rewards/accuracies": 0.8374999761581421, | |
| "rewards/chosen": 2.1447510719299316, | |
| "rewards/margins": 3.566601514816284, | |
| "rewards/rejected": -1.420263648033142, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.4166666666666667, | |
| "grad_norm": 14.605720279749862, | |
| "learning_rate": 4.189189189189189e-07, | |
| "logits/chosen": -1.915624976158142, | |
| "logits/rejected": -1.8369140625, | |
| "logps/chosen": -196.640625, | |
| "logps/rejected": -297.3812561035156, | |
| "loss": 0.3993, | |
| "rewards/accuracies": 0.8062499761581421, | |
| "rewards/chosen": 1.6602294445037842, | |
| "rewards/margins": 2.8565430641174316, | |
| "rewards/rejected": -1.1953613758087158, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.44871794871794873, | |
| "grad_norm": 12.129690582617949, | |
| "learning_rate": 4.1047297297297296e-07, | |
| "logits/chosen": -2.043750047683716, | |
| "logits/rejected": -1.8582031726837158, | |
| "logps/chosen": -269.55780029296875, | |
| "logps/rejected": -349.8812561035156, | |
| "loss": 0.2719, | |
| "rewards/accuracies": 0.90625, | |
| "rewards/chosen": 2.2075562477111816, | |
| "rewards/margins": 3.8485350608825684, | |
| "rewards/rejected": -1.6388671398162842, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.4807692307692308, | |
| "grad_norm": 10.894196057080642, | |
| "learning_rate": 4.02027027027027e-07, | |
| "logits/chosen": -2.0833983421325684, | |
| "logits/rejected": -1.8416016101837158, | |
| "logps/chosen": -218.09375, | |
| "logps/rejected": -379.48126220703125, | |
| "loss": 0.3121, | |
| "rewards/accuracies": 0.8687499761581421, | |
| "rewards/chosen": 2.30419921875, | |
| "rewards/margins": 3.564453125, | |
| "rewards/rejected": -1.260766625404358, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.5128205128205128, | |
| "grad_norm": 10.28529818518839, | |
| "learning_rate": 3.935810810810811e-07, | |
| "logits/chosen": -2.027539014816284, | |
| "logits/rejected": -1.8759765625, | |
| "logps/chosen": -280.046875, | |
| "logps/rejected": -328.8125, | |
| "loss": 0.2987, | |
| "rewards/accuracies": 0.887499988079071, | |
| "rewards/chosen": 2.126843214035034, | |
| "rewards/margins": 4.263671875, | |
| "rewards/rejected": -2.134082078933716, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.5448717948717948, | |
| "grad_norm": 10.046587710863294, | |
| "learning_rate": 3.851351351351351e-07, | |
| "logits/chosen": -2.0047850608825684, | |
| "logits/rejected": -1.8369140625, | |
| "logps/chosen": -249.7734375, | |
| "logps/rejected": -267.5843811035156, | |
| "loss": 0.2905, | |
| "rewards/accuracies": 0.893750011920929, | |
| "rewards/chosen": 2.1821961402893066, | |
| "rewards/margins": 3.7508788108825684, | |
| "rewards/rejected": -1.568945288658142, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.5769230769230769, | |
| "grad_norm": 15.815810554242594, | |
| "learning_rate": 3.766891891891892e-07, | |
| "logits/chosen": -1.9933593273162842, | |
| "logits/rejected": -1.7882812023162842, | |
| "logps/chosen": -285.0328063964844, | |
| "logps/rejected": -244.2062530517578, | |
| "loss": 0.2709, | |
| "rewards/accuracies": 0.925000011920929, | |
| "rewards/chosen": 2.258129835128784, | |
| "rewards/margins": 3.865039110183716, | |
| "rewards/rejected": -1.611413598060608, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.6089743589743589, | |
| "grad_norm": 33.74436261855397, | |
| "learning_rate": 3.682432432432432e-07, | |
| "logits/chosen": -2.1357421875, | |
| "logits/rejected": -1.894140601158142, | |
| "logps/chosen": -245.30624389648438, | |
| "logps/rejected": -478.70001220703125, | |
| "loss": 0.2975, | |
| "rewards/accuracies": 0.8500000238418579, | |
| "rewards/chosen": 1.964135766029358, | |
| "rewards/margins": 3.7166991233825684, | |
| "rewards/rejected": -1.751123070716858, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.6410256410256411, | |
| "grad_norm": 9.910094853594128, | |
| "learning_rate": 3.597972972972973e-07, | |
| "logits/chosen": -2.024609327316284, | |
| "logits/rejected": -1.865625023841858, | |
| "logps/chosen": -264.28436279296875, | |
| "logps/rejected": -300.07342529296875, | |
| "loss": 0.3171, | |
| "rewards/accuracies": 0.893750011920929, | |
| "rewards/chosen": 2.304931640625, | |
| "rewards/margins": 3.8041014671325684, | |
| "rewards/rejected": -1.499169945716858, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.6730769230769231, | |
| "grad_norm": 11.870080752446105, | |
| "learning_rate": 3.5135135135135134e-07, | |
| "logits/chosen": -2.0267577171325684, | |
| "logits/rejected": -1.8447265625, | |
| "logps/chosen": -259.40936279296875, | |
| "logps/rejected": -498.8890686035156, | |
| "loss": 0.2647, | |
| "rewards/accuracies": 0.893750011920929, | |
| "rewards/chosen": 2.0042967796325684, | |
| "rewards/margins": 3.547900438308716, | |
| "rewards/rejected": -1.542810082435608, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.7051282051282052, | |
| "grad_norm": 8.196509121053467, | |
| "learning_rate": 3.429054054054054e-07, | |
| "logits/chosen": -2.0044922828674316, | |
| "logits/rejected": -1.820703148841858, | |
| "logps/chosen": -221.078125, | |
| "logps/rejected": -435.2406311035156, | |
| "loss": 0.3216, | |
| "rewards/accuracies": 0.8812500238418579, | |
| "rewards/chosen": 2.1091065406799316, | |
| "rewards/margins": 3.4317383766174316, | |
| "rewards/rejected": -1.322973608970642, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.7371794871794872, | |
| "grad_norm": 17.275173620118444, | |
| "learning_rate": 3.3445945945945946e-07, | |
| "logits/chosen": -2.083203077316284, | |
| "logits/rejected": -1.875585913658142, | |
| "logps/chosen": -243.6531219482422, | |
| "logps/rejected": -477.7124938964844, | |
| "loss": 0.2858, | |
| "rewards/accuracies": 0.893750011920929, | |
| "rewards/chosen": 2.115771532058716, | |
| "rewards/margins": 3.8758788108825684, | |
| "rewards/rejected": -1.757867455482483, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.7692307692307693, | |
| "grad_norm": 6.190361827721572, | |
| "learning_rate": 3.260135135135135e-07, | |
| "logits/chosen": -2.025390625, | |
| "logits/rejected": -1.809960961341858, | |
| "logps/chosen": -276.1343688964844, | |
| "logps/rejected": -320.890625, | |
| "loss": 0.2334, | |
| "rewards/accuracies": 0.918749988079071, | |
| "rewards/chosen": 2.201000928878784, | |
| "rewards/margins": 4.690966606140137, | |
| "rewards/rejected": -2.4933104515075684, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.8012820512820513, | |
| "grad_norm": 17.015787309272795, | |
| "learning_rate": 3.175675675675675e-07, | |
| "logits/chosen": -1.984960913658142, | |
| "logits/rejected": -1.833593726158142, | |
| "logps/chosen": -248.43905639648438, | |
| "logps/rejected": -294.2093811035156, | |
| "loss": 0.3588, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": 2.134960889816284, | |
| "rewards/margins": 3.898571729660034, | |
| "rewards/rejected": -1.761315941810608, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.8333333333333334, | |
| "grad_norm": 9.916096150406192, | |
| "learning_rate": 3.091216216216216e-07, | |
| "logits/chosen": -2.0804686546325684, | |
| "logits/rejected": -1.899999976158142, | |
| "logps/chosen": -237.94686889648438, | |
| "logps/rejected": -357.84063720703125, | |
| "loss": 0.2721, | |
| "rewards/accuracies": 0.887499988079071, | |
| "rewards/chosen": 2.0490965843200684, | |
| "rewards/margins": 3.5159668922424316, | |
| "rewards/rejected": -1.4671142101287842, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.8653846153846154, | |
| "grad_norm": 19.43204229304952, | |
| "learning_rate": 3.0067567567567564e-07, | |
| "logits/chosen": -1.9519531726837158, | |
| "logits/rejected": -1.7833983898162842, | |
| "logps/chosen": -268.28436279296875, | |
| "logps/rejected": -376.12188720703125, | |
| "loss": 0.2836, | |
| "rewards/accuracies": 0.84375, | |
| "rewards/chosen": 2.1514039039611816, | |
| "rewards/margins": 4.179858207702637, | |
| "rewards/rejected": -2.028857469558716, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.8974358974358975, | |
| "grad_norm": 14.35891063544634, | |
| "learning_rate": 2.922297297297297e-07, | |
| "logits/chosen": -2.075976610183716, | |
| "logits/rejected": -1.883203148841858, | |
| "logps/chosen": -203.46875, | |
| "logps/rejected": -299.484375, | |
| "loss": 0.2292, | |
| "rewards/accuracies": 0.90625, | |
| "rewards/chosen": 2.165087938308716, | |
| "rewards/margins": 3.933666944503784, | |
| "rewards/rejected": -1.7722899913787842, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.9294871794871795, | |
| "grad_norm": 15.571301633489812, | |
| "learning_rate": 2.8378378378378376e-07, | |
| "logits/chosen": -2.001757860183716, | |
| "logits/rejected": -1.8171875476837158, | |
| "logps/chosen": -257.5218811035156, | |
| "logps/rejected": -395.0625, | |
| "loss": 0.2054, | |
| "rewards/accuracies": 0.9312499761581421, | |
| "rewards/chosen": 2.296875, | |
| "rewards/margins": 4.208886623382568, | |
| "rewards/rejected": -1.913354516029358, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.9615384615384616, | |
| "grad_norm": 10.241466724079272, | |
| "learning_rate": 2.7533783783783784e-07, | |
| "logits/chosen": -2.010937452316284, | |
| "logits/rejected": -1.7804687023162842, | |
| "logps/chosen": -263.1890563964844, | |
| "logps/rejected": -553.5797119140625, | |
| "loss": 0.2288, | |
| "rewards/accuracies": 0.9375, | |
| "rewards/chosen": 2.049511671066284, | |
| "rewards/margins": 4.341113090515137, | |
| "rewards/rejected": -2.291332960128784, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.9935897435897436, | |
| "grad_norm": 8.858764442710157, | |
| "learning_rate": 2.6689189189189187e-07, | |
| "logits/chosen": -2.089062452316284, | |
| "logits/rejected": -1.937890648841858, | |
| "logps/chosen": -220.54843139648438, | |
| "logps/rejected": -315.1640625, | |
| "loss": 0.2811, | |
| "rewards/accuracies": 0.8687499761581421, | |
| "rewards/chosen": 2.0038084983825684, | |
| "rewards/margins": 3.595703125, | |
| "rewards/rejected": -1.5892822742462158, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 1.0256410256410255, | |
| "grad_norm": 9.684185372221096, | |
| "learning_rate": 2.5844594594594596e-07, | |
| "logits/chosen": -2.1142578125, | |
| "logits/rejected": -1.8634765148162842, | |
| "logps/chosen": -242.419921875, | |
| "logps/rejected": -744.0179443359375, | |
| "loss": 0.2283, | |
| "rewards/accuracies": 0.918749988079071, | |
| "rewards/chosen": 2.2816405296325684, | |
| "rewards/margins": 4.6513671875, | |
| "rewards/rejected": -2.3667969703674316, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 1.0576923076923077, | |
| "grad_norm": 16.39607167381079, | |
| "learning_rate": 2.5e-07, | |
| "logits/chosen": -2.0179686546325684, | |
| "logits/rejected": -1.838281273841858, | |
| "logps/chosen": -285.15155029296875, | |
| "logps/rejected": -555.5343627929688, | |
| "loss": 0.2506, | |
| "rewards/accuracies": 0.90625, | |
| "rewards/chosen": 2.1651368141174316, | |
| "rewards/margins": 3.9991211891174316, | |
| "rewards/rejected": -1.832617163658142, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 1.0897435897435896, | |
| "grad_norm": 12.71417077582037, | |
| "learning_rate": 2.41554054054054e-07, | |
| "logits/chosen": -1.9617187976837158, | |
| "logits/rejected": -1.7705078125, | |
| "logps/chosen": -229.18124389648438, | |
| "logps/rejected": -388.0687561035156, | |
| "loss": 0.2361, | |
| "rewards/accuracies": 0.8999999761581421, | |
| "rewards/chosen": 2.22900390625, | |
| "rewards/margins": 4.506933689117432, | |
| "rewards/rejected": -2.277844190597534, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 1.1217948717948718, | |
| "grad_norm": 12.537778856529926, | |
| "learning_rate": 2.331081081081081e-07, | |
| "logits/chosen": -2.107226610183716, | |
| "logits/rejected": -1.90234375, | |
| "logps/chosen": -215.9656219482422, | |
| "logps/rejected": -324.7749938964844, | |
| "loss": 0.2086, | |
| "rewards/accuracies": 0.918749988079071, | |
| "rewards/chosen": 2.130053758621216, | |
| "rewards/margins": 4.331640720367432, | |
| "rewards/rejected": -2.203198194503784, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 1.1538461538461537, | |
| "grad_norm": 13.46928418539436, | |
| "learning_rate": 2.2466216216216216e-07, | |
| "logits/chosen": -1.991601586341858, | |
| "logits/rejected": -1.790429711341858, | |
| "logps/chosen": -239.49063110351562, | |
| "logps/rejected": -397.62811279296875, | |
| "loss": 0.2422, | |
| "rewards/accuracies": 0.8999999761581421, | |
| "rewards/chosen": 2.213427782058716, | |
| "rewards/margins": 4.355273246765137, | |
| "rewards/rejected": -2.143115282058716, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 1.185897435897436, | |
| "grad_norm": 16.721244760339083, | |
| "learning_rate": 2.1621621621621622e-07, | |
| "logits/chosen": -2.0091795921325684, | |
| "logits/rejected": -1.8250000476837158, | |
| "logps/chosen": -260.0874938964844, | |
| "logps/rejected": -433.359375, | |
| "loss": 0.3083, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": 2.1787109375, | |
| "rewards/margins": 3.8218750953674316, | |
| "rewards/rejected": -1.6440918445587158, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 1.217948717948718, | |
| "grad_norm": 6.4545177104485845, | |
| "learning_rate": 2.0777027027027025e-07, | |
| "logits/chosen": -2.0591797828674316, | |
| "logits/rejected": -1.8468749523162842, | |
| "logps/chosen": -235.1687469482422, | |
| "logps/rejected": -273.58123779296875, | |
| "loss": 0.2293, | |
| "rewards/accuracies": 0.9312499761581421, | |
| "rewards/chosen": 2.180835008621216, | |
| "rewards/margins": 3.9361329078674316, | |
| "rewards/rejected": -1.753662109375, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "grad_norm": 13.390863695555577, | |
| "learning_rate": 1.993243243243243e-07, | |
| "logits/chosen": -2.015429735183716, | |
| "logits/rejected": -1.857812523841858, | |
| "logps/chosen": -247.94686889648438, | |
| "logps/rejected": -330.03436279296875, | |
| "loss": 0.2265, | |
| "rewards/accuracies": 0.9125000238418579, | |
| "rewards/chosen": 2.378588914871216, | |
| "rewards/margins": 4.5875244140625, | |
| "rewards/rejected": -2.206225633621216, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 1.282051282051282, | |
| "grad_norm": 7.467973287614187, | |
| "learning_rate": 1.9087837837837837e-07, | |
| "logits/chosen": -1.9660155773162842, | |
| "logits/rejected": -1.7736327648162842, | |
| "logps/chosen": -242.78125, | |
| "logps/rejected": -278.73126220703125, | |
| "loss": 0.3117, | |
| "rewards/accuracies": 0.8812500238418579, | |
| "rewards/chosen": 2.247753858566284, | |
| "rewards/margins": 3.825390577316284, | |
| "rewards/rejected": -1.5797607898712158, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 1.314102564102564, | |
| "grad_norm": 17.275598489252985, | |
| "learning_rate": 1.8243243243243243e-07, | |
| "logits/chosen": -2.0658202171325684, | |
| "logits/rejected": -1.8517577648162842, | |
| "logps/chosen": -226.86563110351562, | |
| "logps/rejected": -353.4937438964844, | |
| "loss": 0.254, | |
| "rewards/accuracies": 0.862500011920929, | |
| "rewards/chosen": 2.29681396484375, | |
| "rewards/margins": 4.391015529632568, | |
| "rewards/rejected": -2.097705125808716, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 1.3461538461538463, | |
| "grad_norm": 10.44378100815255, | |
| "learning_rate": 1.739864864864865e-07, | |
| "logits/chosen": -2.025390625, | |
| "logits/rejected": -1.800390601158142, | |
| "logps/chosen": -193.6984405517578, | |
| "logps/rejected": -289.08282470703125, | |
| "loss": 0.2395, | |
| "rewards/accuracies": 0.893750011920929, | |
| "rewards/chosen": 2.071337938308716, | |
| "rewards/margins": 3.8559813499450684, | |
| "rewards/rejected": -1.7841675281524658, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 1.3782051282051282, | |
| "grad_norm": 97.68377543207546, | |
| "learning_rate": 1.6554054054054055e-07, | |
| "logits/chosen": -1.9826171398162842, | |
| "logits/rejected": -1.755859375, | |
| "logps/chosen": -355.3062438964844, | |
| "logps/rejected": -485.71563720703125, | |
| "loss": 0.2301, | |
| "rewards/accuracies": 0.925000011920929, | |
| "rewards/chosen": 2.012890577316284, | |
| "rewards/margins": 4.411523342132568, | |
| "rewards/rejected": -2.397656202316284, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 1.4102564102564101, | |
| "grad_norm": 10.308990703109377, | |
| "learning_rate": 1.570945945945946e-07, | |
| "logits/chosen": -2.0074219703674316, | |
| "logits/rejected": -1.796484351158142, | |
| "logps/chosen": -291.6031188964844, | |
| "logps/rejected": -320.95001220703125, | |
| "loss": 0.2796, | |
| "rewards/accuracies": 0.8687499761581421, | |
| "rewards/chosen": 1.8628418445587158, | |
| "rewards/margins": 3.919921875, | |
| "rewards/rejected": -2.0587158203125, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 1.4423076923076923, | |
| "grad_norm": 7.8418596148572455, | |
| "learning_rate": 1.4864864864864866e-07, | |
| "logits/chosen": -2.051953077316284, | |
| "logits/rejected": -1.8517577648162842, | |
| "logps/chosen": -221.51171875, | |
| "logps/rejected": -234.59375, | |
| "loss": 0.2821, | |
| "rewards/accuracies": 0.84375, | |
| "rewards/chosen": 1.9519531726837158, | |
| "rewards/margins": 3.46875, | |
| "rewards/rejected": -1.521032691001892, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 1.4743589743589745, | |
| "grad_norm": 19.99054898354689, | |
| "learning_rate": 1.402027027027027e-07, | |
| "logits/chosen": -1.9873046875, | |
| "logits/rejected": -1.816015601158142, | |
| "logps/chosen": -234.86874389648438, | |
| "logps/rejected": -471.40625, | |
| "loss": 0.2533, | |
| "rewards/accuracies": 0.918749988079071, | |
| "rewards/chosen": 1.637182593345642, | |
| "rewards/margins": 3.6805663108825684, | |
| "rewards/rejected": -2.044677734375, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 1.5064102564102564, | |
| "grad_norm": 6.927398842550873, | |
| "learning_rate": 1.3175675675675673e-07, | |
| "logits/chosen": -2.116406202316284, | |
| "logits/rejected": -1.890234351158142, | |
| "logps/chosen": -236.1374969482422, | |
| "logps/rejected": -326.1812438964844, | |
| "loss": 0.2292, | |
| "rewards/accuracies": 0.9125000238418579, | |
| "rewards/chosen": 1.9124755859375, | |
| "rewards/margins": 3.916796922683716, | |
| "rewards/rejected": -2.0068116188049316, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 1.5384615384615383, | |
| "grad_norm": 19.26565095128112, | |
| "learning_rate": 1.233108108108108e-07, | |
| "logits/chosen": -1.9998047351837158, | |
| "logits/rejected": -1.8044922351837158, | |
| "logps/chosen": -215.8718719482422, | |
| "logps/rejected": -273.55780029296875, | |
| "loss": 0.2959, | |
| "rewards/accuracies": 0.8999999761581421, | |
| "rewards/chosen": 1.870080590248108, | |
| "rewards/margins": 3.464648485183716, | |
| "rewards/rejected": -1.59326171875, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 1.5705128205128205, | |
| "grad_norm": 23.53473342051176, | |
| "learning_rate": 1.1486486486486487e-07, | |
| "logits/chosen": -2.0589842796325684, | |
| "logits/rejected": -1.871484398841858, | |
| "logps/chosen": -288.2890625, | |
| "logps/rejected": -367.5843811035156, | |
| "loss": 0.2964, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": 1.398657202720642, | |
| "rewards/margins": 3.667285203933716, | |
| "rewards/rejected": -2.267504930496216, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 1.6025641025641026, | |
| "grad_norm": 9.487545014290102, | |
| "learning_rate": 1.0641891891891891e-07, | |
| "logits/chosen": -2.122851610183716, | |
| "logits/rejected": -1.9580078125, | |
| "logps/chosen": -285.9078063964844, | |
| "logps/rejected": -379.8843688964844, | |
| "loss": 0.3032, | |
| "rewards/accuracies": 0.918749988079071, | |
| "rewards/chosen": 1.5487792491912842, | |
| "rewards/margins": 3.930615186691284, | |
| "rewards/rejected": -2.3807129859924316, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 1.6346153846153846, | |
| "grad_norm": 34.335674181311, | |
| "learning_rate": 9.797297297297297e-08, | |
| "logits/chosen": -1.9865233898162842, | |
| "logits/rejected": -1.8146483898162842, | |
| "logps/chosen": -303.875, | |
| "logps/rejected": -383.5625, | |
| "loss": 0.2268, | |
| "rewards/accuracies": 0.918749988079071, | |
| "rewards/chosen": 2.1684327125549316, | |
| "rewards/margins": 4.968652248382568, | |
| "rewards/rejected": -2.7986207008361816, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 1.6666666666666665, | |
| "grad_norm": 12.79380674096048, | |
| "learning_rate": 8.952702702702702e-08, | |
| "logits/chosen": -1.9738280773162842, | |
| "logits/rejected": -1.810156226158142, | |
| "logps/chosen": -224.6687469482422, | |
| "logps/rejected": -420.2437438964844, | |
| "loss": 0.2571, | |
| "rewards/accuracies": 0.9375, | |
| "rewards/chosen": 1.805883765220642, | |
| "rewards/margins": 3.4756011962890625, | |
| "rewards/rejected": -1.672705054283142, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 1.6987179487179487, | |
| "grad_norm": 5.288061215805156, | |
| "learning_rate": 8.108108108108108e-08, | |
| "logits/chosen": -2.00390625, | |
| "logits/rejected": -1.875585913658142, | |
| "logps/chosen": -238.39688110351562, | |
| "logps/rejected": -330.8125, | |
| "loss": 0.198, | |
| "rewards/accuracies": 0.925000011920929, | |
| "rewards/chosen": 2.1185302734375, | |
| "rewards/margins": 4.246289253234863, | |
| "rewards/rejected": -2.1275634765625, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 1.7307692307692308, | |
| "grad_norm": 15.41954334878335, | |
| "learning_rate": 7.263513513513512e-08, | |
| "logits/chosen": -2.037890672683716, | |
| "logits/rejected": -1.8224608898162842, | |
| "logps/chosen": -284.765625, | |
| "logps/rejected": -533.375, | |
| "loss": 0.2335, | |
| "rewards/accuracies": 0.893750011920929, | |
| "rewards/chosen": 1.4149596691131592, | |
| "rewards/margins": 4.398095607757568, | |
| "rewards/rejected": -2.9843382835388184, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 1.7628205128205128, | |
| "grad_norm": 7.763713271113468, | |
| "learning_rate": 6.418918918918918e-08, | |
| "logits/chosen": -2.0380859375, | |
| "logits/rejected": -1.8134765625, | |
| "logps/chosen": -225.25, | |
| "logps/rejected": -430.89373779296875, | |
| "loss": 0.2135, | |
| "rewards/accuracies": 0.925000011920929, | |
| "rewards/chosen": 1.960363745689392, | |
| "rewards/margins": 4.0166015625, | |
| "rewards/rejected": -2.0557618141174316, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 1.7948717948717947, | |
| "grad_norm": 4.521033115765149, | |
| "learning_rate": 5.574324324324324e-08, | |
| "logits/chosen": -2.044140577316284, | |
| "logits/rejected": -1.811914086341858, | |
| "logps/chosen": -144.3640594482422, | |
| "logps/rejected": -276.8843688964844, | |
| "loss": 0.2559, | |
| "rewards/accuracies": 0.8812500238418579, | |
| "rewards/chosen": 2.030041456222534, | |
| "rewards/margins": 3.295703172683716, | |
| "rewards/rejected": -1.2666351795196533, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 1.8269230769230769, | |
| "grad_norm": 9.52459220706363, | |
| "learning_rate": 4.72972972972973e-08, | |
| "logits/chosen": -2.127734422683716, | |
| "logits/rejected": -1.897070288658142, | |
| "logps/chosen": -280.30157470703125, | |
| "logps/rejected": -316.5171813964844, | |
| "loss": 0.2486, | |
| "rewards/accuracies": 0.887499988079071, | |
| "rewards/chosen": 1.485009789466858, | |
| "rewards/margins": 3.428417921066284, | |
| "rewards/rejected": -1.9419434070587158, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 1.858974358974359, | |
| "grad_norm": 8.906979076376345, | |
| "learning_rate": 3.885135135135135e-08, | |
| "logits/chosen": -2.0869140625, | |
| "logits/rejected": -1.8507812023162842, | |
| "logps/chosen": -217.7156219482422, | |
| "logps/rejected": -319.4203186035156, | |
| "loss": 0.2271, | |
| "rewards/accuracies": 0.8812500238418579, | |
| "rewards/chosen": 2.053515672683716, | |
| "rewards/margins": 4.0621337890625, | |
| "rewards/rejected": -2.010498046875, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 1.891025641025641, | |
| "grad_norm": 9.745814847980592, | |
| "learning_rate": 3.040540540540541e-08, | |
| "logits/chosen": -2.052539110183716, | |
| "logits/rejected": -1.8537108898162842, | |
| "logps/chosen": -241.68905639648438, | |
| "logps/rejected": -401.51251220703125, | |
| "loss": 0.2465, | |
| "rewards/accuracies": 0.90625, | |
| "rewards/chosen": 1.9554443359375, | |
| "rewards/margins": 3.714062452316284, | |
| "rewards/rejected": -1.7587372064590454, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 1.9230769230769231, | |
| "grad_norm": 10.279258989880054, | |
| "learning_rate": 2.195945945945946e-08, | |
| "logits/chosen": -1.91796875, | |
| "logits/rejected": -1.7571289539337158, | |
| "logps/chosen": -250.7375030517578, | |
| "logps/rejected": -312.2203063964844, | |
| "loss": 0.1891, | |
| "rewards/accuracies": 0.9624999761581421, | |
| "rewards/chosen": 2.004687547683716, | |
| "rewards/margins": 4.478320121765137, | |
| "rewards/rejected": -2.476269483566284, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 1.9551282051282053, | |
| "grad_norm": 6.84223500138646, | |
| "learning_rate": 1.3513513513513514e-08, | |
| "logits/chosen": -2.0062499046325684, | |
| "logits/rejected": -1.889062523841858, | |
| "logps/chosen": -267.1031188964844, | |
| "logps/rejected": -407.0718688964844, | |
| "loss": 0.2554, | |
| "rewards/accuracies": 0.90625, | |
| "rewards/chosen": 1.6201751232147217, | |
| "rewards/margins": 3.678515672683716, | |
| "rewards/rejected": -2.054980516433716, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 1.9871794871794872, | |
| "grad_norm": 6.084443681112896, | |
| "learning_rate": 5.067567567567567e-09, | |
| "logits/chosen": -2.08203125, | |
| "logits/rejected": -1.8772461414337158, | |
| "logps/chosen": -248.9656219482422, | |
| "logps/rejected": -302.21875, | |
| "loss": 0.2517, | |
| "rewards/accuracies": 0.8812500238418579, | |
| "rewards/chosen": 1.859375, | |
| "rewards/margins": 3.5653076171875, | |
| "rewards/rejected": -1.707067847251892, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "step": 312, | |
| "total_flos": 0.0, | |
| "train_loss": 0.2867361557407257, | |
| "train_runtime": 4280.6612, | |
| "train_samples_per_second": 2.331, | |
| "train_steps_per_second": 0.073 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 312, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 2, | |
| "save_steps": 50, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": false, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |