| | #!/usr/bin/env bash |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| | export TRITON_CACHE_DIR="/home/align-anything/cache/triton" |
| | |
| | MODEL_NAME_OR_PATH="/data/align-anything/hantao/models/0916_ti_to_ti_sft/" |
| | |
| | TRAIN_DATASETS="/data/align-anything/hantao/align-anything/projects/text_image_to_text_image/outputs" |
| | TRAIN_DATA_FILES="llf_ti2ti_13.5k_tokenized.pt" |
| | |
| | OUTPUT_DIR="../outputs/sft_tf_cham_1111_13.5k_ti2ti" |
| | |
| | export WANDB_API_KEY="7e2dcc0c310ebcb7cdcafd5e9320d6be55cf1a33" |
| | |
| | source ./setup.sh |
| |
|
| | |
| | |
| | deepspeed \ |
| | --master_port ${MASTER_PORT} \ |
| | --module align_anything.trainers.text_image_to_text_image.sft \ |
| | --model_name_or_path ${MODEL_NAME_OR_PATH} \ |
| | --train_datasets ${TRAIN_DATASETS} \ |
| | --train_data_files ${TRAIN_DATA_FILES} \ |
| | --output_dir ${OUTPUT_DIR} \ |
| | --train_template AA_textfeedback \ |
| | --train_split train \ |
| | --per_device_train_batch_size 4 \ |
| | --per_device_eval_batch_size 4 \ |
| | --gradient_accumulation_steps 2 \ |
| | --save_interval 400 \ |
| | --learning_rate 1e-6 \ |
| | --epochs 3 \ |
| | --lr_scheduler_type cosine |
| |
|
| |
|
| | export TRITON_CACHE_DIR="/home/align-anything/cache/triton" |
| | |
| | MODEL_NAME_OR_PATH="/data/align-anything/hantao/models/0916_ti_to_ti_sft/" |
| | |
| | TRAIN_DATASETS="/data/align-anything/hantao/align-anything/projects/text_image_to_text_image/outputs" |
| | TRAIN_DATA_FILES="llf_ti2ti_6.75k_tokenized.pt" |
| | |
| | OUTPUT_DIR="../outputs/sft_tf_cham_1111_6.75k_ti2ti" |
| | |
| | export WANDB_API_KEY="7e2dcc0c310ebcb7cdcafd5e9320d6be55cf1a33" |
| | |
| | source ./setup.sh |
| |
|
| | |
| | |
| | deepspeed \ |
| | --master_port ${MASTER_PORT} \ |
| | --module align_anything.trainers.text_image_to_text_image.sft \ |
| | --model_name_or_path ${MODEL_NAME_OR_PATH} \ |
| | --train_datasets ${TRAIN_DATASETS} \ |
| | --train_data_files ${TRAIN_DATA_FILES} \ |
| | --output_dir ${OUTPUT_DIR} \ |
| | --train_template AA_textfeedback \ |
| | --train_split train \ |
| | --per_device_train_batch_size 4 \ |
| | --per_device_eval_batch_size 4 \ |
| | --gradient_accumulation_steps 2 \ |
| | --save_interval 400 \ |
| | --learning_rate 1e-6 \ |
| | --epochs 3 \ |
| | --lr_scheduler_type cosine |