{ "epoch": 3, "train_bsz": 2, "eval_bsz": 4, "lr": 5e-06, "deepspeed": "configs/ds_stage2.json", "weight_decay": 0.01, "save_eval_step_ratio": 0.333333, "warmup_step_ratio": 0.1, "grad_checkpointing": true, "model": "llama3", "common": { "debug": false, "device": "0,1,2,3", "world_size": 4, "rank": 0, "master_address": "localhost", "master_port": 48541, "bf16": true, "wandb_project_name": "CREAM", "wandb_entity_name": "your_wandb_entity_name", "run_name": "train_llama_w_qwen_binary_sft-lora_None-ckpt_None-25-04-27-20_56_47", "output_dir": "outputs/sft/train_llama_w_qwen_binary_sft-lora_None-ckpt_None-25-04-27-20_56_47", "load_args_path": null }, "checkpoint": null, "train_stage": "sft", "dataset": { "name": "train_llama_w_qwen_binary_sft", "limit_size": null, "max_length": 4096 }, "lora": { "enable": false, "alpha": 64, "r": 32, "dropout": 0.1 }, "dpo": { "beta": 0.1, "method": "original" } }