{ "best_metric": 1.4235661029815674, "best_model_checkpoint": "./llama3/21-04-24-Weni-WeniGPT-Agents-Llama3-1.0.8-SFT_Experiment with SFT and Llama3 and updates in requirements-2_max_steps-669_batch_2_2024-04-21_ppid_2917/checkpoint-90", "epoch": 0.40268456375838924, "eval_steps": 30, "global_step": 90, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0447427293064877, "grad_norm": 0.3199349641799927, "learning_rate": 0.0001, "loss": 1.9361, "step": 10 }, { "epoch": 0.0894854586129754, "grad_norm": 0.9833651781082153, "learning_rate": 0.0002, "loss": 1.8397, "step": 20 }, { "epoch": 0.1342281879194631, "grad_norm": 1.0959680080413818, "learning_rate": 0.00019990511519199923, "loss": 1.7877, "step": 30 }, { "epoch": 0.1342281879194631, "eval_loss": 1.7081660032272339, "eval_runtime": 10.9642, "eval_samples_per_second": 4.195, "eval_steps_per_second": 4.195, "step": 30 }, { "epoch": 0.1789709172259508, "grad_norm": 1.2613288164138794, "learning_rate": 0.00019957734949126304, "loss": 1.7523, "step": 40 }, { "epoch": 0.22371364653243847, "grad_norm": 0.7975640892982483, "learning_rate": 0.0001990162991697884, "loss": 1.516, "step": 50 }, { "epoch": 0.2684563758389262, "grad_norm": 0.71113121509552, "learning_rate": 0.0001982232786270059, "loss": 1.3675, "step": 60 }, { "epoch": 0.2684563758389262, "eval_loss": 1.4538882970809937, "eval_runtime": 10.9625, "eval_samples_per_second": 4.196, "eval_steps_per_second": 4.196, "step": 60 }, { "epoch": 0.3131991051454139, "grad_norm": 0.8126013875007629, "learning_rate": 0.00019720014571008158, "loss": 1.4173, "step": 70 }, { "epoch": 0.3579418344519016, "grad_norm": 0.6399104595184326, "learning_rate": 0.00019594929736144976, "loss": 1.5249, "step": 80 }, { "epoch": 0.40268456375838924, "grad_norm": 0.7685297131538391, "learning_rate": 0.00019447366400338116, "loss": 1.3782, "step": 90 }, { "epoch": 0.40268456375838924, "eval_loss": 1.4235661029815674, "eval_runtime": 10.9616, "eval_samples_per_second": 4.196, "eval_steps_per_second": 4.196, "step": 90 } ], "logging_steps": 10, "max_steps": 669, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 90, "total_flos": 1.867800852578304e+16, "train_batch_size": 1, "trial_name": null, "trial_params": null }