| { | |
| "best_metric": 1.4235661029815674, | |
| "best_model_checkpoint": "./llama3/21-04-24-Weni-WeniGPT-Agents-Llama3-1.0.8-SFT_Experiment with SFT and Llama3 and updates in requirements-2_max_steps-669_batch_2_2024-04-21_ppid_2917/checkpoint-90", | |
| "epoch": 0.40268456375838924, | |
| "eval_steps": 30, | |
| "global_step": 90, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0447427293064877, | |
| "grad_norm": 0.3199349641799927, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9361, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.0894854586129754, | |
| "grad_norm": 0.9833651781082153, | |
| "learning_rate": 0.0002, | |
| "loss": 1.8397, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.1342281879194631, | |
| "grad_norm": 1.0959680080413818, | |
| "learning_rate": 0.00019990511519199923, | |
| "loss": 1.7877, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.1342281879194631, | |
| "eval_loss": 1.7081660032272339, | |
| "eval_runtime": 10.9642, | |
| "eval_samples_per_second": 4.195, | |
| "eval_steps_per_second": 4.195, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.1789709172259508, | |
| "grad_norm": 1.2613288164138794, | |
| "learning_rate": 0.00019957734949126304, | |
| "loss": 1.7523, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.22371364653243847, | |
| "grad_norm": 0.7975640892982483, | |
| "learning_rate": 0.0001990162991697884, | |
| "loss": 1.516, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.2684563758389262, | |
| "grad_norm": 0.71113121509552, | |
| "learning_rate": 0.0001982232786270059, | |
| "loss": 1.3675, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.2684563758389262, | |
| "eval_loss": 1.4538882970809937, | |
| "eval_runtime": 10.9625, | |
| "eval_samples_per_second": 4.196, | |
| "eval_steps_per_second": 4.196, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.3131991051454139, | |
| "grad_norm": 0.8126013875007629, | |
| "learning_rate": 0.00019720014571008158, | |
| "loss": 1.4173, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.3579418344519016, | |
| "grad_norm": 0.6399104595184326, | |
| "learning_rate": 0.00019594929736144976, | |
| "loss": 1.5249, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.40268456375838924, | |
| "grad_norm": 0.7685297131538391, | |
| "learning_rate": 0.00019447366400338116, | |
| "loss": 1.3782, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.40268456375838924, | |
| "eval_loss": 1.4235661029815674, | |
| "eval_runtime": 10.9616, | |
| "eval_samples_per_second": 4.196, | |
| "eval_steps_per_second": 4.196, | |
| "step": 90 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 669, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 90, | |
| "total_flos": 1.867800852578304e+16, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |