| { | |
| "best_metric": 0.4090025722980499, | |
| "best_model_checkpoint": "./mistral/23-02-24-Weni-ZeroShot-3.3.3-Mistral-7b-Multilanguage-1-epoch-3.2.0_Zeroshot-2_max_steps-201_batch_128_2024-02-23_ppid_2273/checkpoint-160", | |
| "epoch": 0.7930607187112764, | |
| "eval_steps": 20, | |
| "global_step": 160, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.1, | |
| "grad_norm": 1.0583807229995728, | |
| "learning_rate": 0.0002, | |
| "loss": 1.5561, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "eval_loss": 0.8398289084434509, | |
| "eval_runtime": 365.9464, | |
| "eval_samples_per_second": 7.834, | |
| "eval_steps_per_second": 0.246, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "grad_norm": 0.2255832850933075, | |
| "learning_rate": 0.0001940350472628637, | |
| "loss": 0.594, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "eval_loss": 0.5098645687103271, | |
| "eval_runtime": 365.932, | |
| "eval_samples_per_second": 7.835, | |
| "eval_steps_per_second": 0.246, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "grad_norm": 0.188425675034523, | |
| "learning_rate": 0.0001779507116848976, | |
| "loss": 0.4849, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "eval_loss": 0.46610626578330994, | |
| "eval_runtime": 365.7636, | |
| "eval_samples_per_second": 7.838, | |
| "eval_steps_per_second": 0.246, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "grad_norm": 0.24421139061450958, | |
| "learning_rate": 0.00015199063052725745, | |
| "loss": 0.4516, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "eval_loss": 0.4327794909477234, | |
| "eval_runtime": 366.1258, | |
| "eval_samples_per_second": 7.831, | |
| "eval_steps_per_second": 0.246, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "grad_norm": 0.1821870654821396, | |
| "learning_rate": 0.00011982811629223709, | |
| "loss": 0.4237, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "eval_loss": 0.41923952102661133, | |
| "eval_runtime": 366.5093, | |
| "eval_samples_per_second": 7.822, | |
| "eval_steps_per_second": 0.246, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "grad_norm": 0.16211040318012238, | |
| "learning_rate": 8.530012652622397e-05, | |
| "loss": 0.4235, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "eval_loss": 0.41423356533050537, | |
| "eval_runtime": 366.3967, | |
| "eval_samples_per_second": 7.825, | |
| "eval_steps_per_second": 0.246, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "grad_norm": 0.16519276797771454, | |
| "learning_rate": 5.2525817770470084e-05, | |
| "loss": 0.4128, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "eval_loss": 0.41103067994117737, | |
| "eval_runtime": 366.4036, | |
| "eval_samples_per_second": 7.825, | |
| "eval_steps_per_second": 0.246, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "grad_norm": 0.15887108445167542, | |
| "learning_rate": 2.5415134079383006e-05, | |
| "loss": 0.4152, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "eval_loss": 0.4090025722980499, | |
| "eval_runtime": 366.5464, | |
| "eval_samples_per_second": 7.822, | |
| "eval_steps_per_second": 0.246, | |
| "step": 160 | |
| } | |
| ], | |
| "logging_steps": 20, | |
| "max_steps": 201, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 20, | |
| "total_flos": 7.213471704767529e+17, | |
| "train_batch_size": 16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |