| checkpoint_dir: /teamspace/studios/this_studio/out/finetune/llama-8b-dataset12/final | |
| out_dir: out/finetune-2/llama-8b-dataset12-cot | |
| precision: bf16-true | |
| devices: 1 | |
| num_nodes: 1 | |
| lora_r: 8 | |
| lora_alpha: 16 | |
| lora_dropout: 0.1 | |
| lora_query: true | |
| lora_key: false | |
| lora_value: true | |
| lora_projection: false | |
| lora_mlp: false | |
| lora_head: false | |
| data: | |
| class_path: litgpt.data.JSON | |
| init_args: | |
| json_path: cot_satcom_litgpt.json | |
| mask_prompt: false | |
| val_split_fraction: 0.05 | |
| prompt_style: alpaca | |
| ignore_index: -100 | |
| seed: 42 | |
| num_workers: 4 | |
| train: | |
| save_interval: 25 | |
| log_interval: 1 | |
| global_batch_size: 32 | |
| micro_batch_size: 4 | |
| lr_warmup_steps: 25 | |
| epochs: 12 | |
| max_seq_length: 2048 | |
| min_lr: 2.0e-05 | |
| log: {} | |
| eval: | |
| interval: 20 | |
| max_new_tokens: 100 | |
| max_iters: 100 | |
| initial_validation: true | |
| final_validation: true | |
| evaluate_example: first | |
| optimizer: | |
| class_path: torch.optim.AdamW | |
| init_args: | |
| lr: 2.0e-05 | |
| weight_decay: 0.0 | |
| betas: | |
| - 0.9 | |
| - 0.999 | |
| logger_name: wandb | |
| seed: 1337 | |