diff --git a/OpenECADv2_0.55B_finetune.sh b/OpenECADv2_0.55B_finetune.sh new file mode 100644 index 0000000..4539b99 --- /dev/null +++ b/OpenECADv2_0.55B_finetune.sh @@ -0,0 +1,45 @@ +DATA_PATH="/root/autodl-tmp/OpenECADv2_100k_directout_1k/data.json" +IMAGE_PATH="/root/autodl-tmp/OpenECADv2_100k_directout_1k/images" +MODEL_MAX_LENGTH=2048 +OUTPUT_DIR="/root/autodl-tmp/OpenECADv2-0.55B-lora" + +deepspeed --include localhost:0,1 --master_port 29501 tinyllava/train/custom_finetune.py \ + --deepspeed ./scripts/zero2.json \ + --data_path $DATA_PATH \ + --image_folder $IMAGE_PATH \ + --is_multimodal True \ + --conv_version llama \ + --mm_vision_select_layer -2 \ + --image_aspect_ratio square \ + --fp16 True \ + --training_recipe lora \ + --tune_type_llm lora \ + --tune_type_vision_tower lora \ + --tune_vision_tower_from_layer 0 \ + --tune_type_connector full \ + --lora_r 128 \ + --lora_alpha 256 \ + --group_by_modality_length False \ + --pretrained_model_path "/root/autodl-tmp/TinyLLaVA-OpenELM-450M-CLIP-0.55B" \ + --output_dir $OUTPUT_DIR \ + --num_train_epochs 1 \ + --per_device_train_batch_size 2 \ + --per_device_eval_batch_size 2 \ + --gradient_accumulation_steps 2 \ + --evaluation_strategy "no" \ + --save_strategy "steps" \ + --save_steps 1250 \ + --save_total_limit 1 \ + --learning_rate 1e-4 \ + --weight_decay 0. \ + --warmup_ratio 0.03 \ + --lr_scheduler_type "cosine" \ + --logging_steps 1 \ + --tf32 False \ + --model_max_length $MODEL_MAX_LENGTH \ + --gradient_checkpointing True \ + --dataloader_num_workers 8 \ + --lazy_preprocess True \ + --report_to tensorboard \ + --tokenizer_use_fast False \ + --run_name OpenECADv2-0.55B-lora diff --git a/OpenECADv2_0.89B_finetune.sh b/OpenECADv2_0.89B_finetune.sh new file mode 100644 index 0000000..33ae295 --- /dev/null +++ b/OpenECADv2_0.89B_finetune.sh @@ -0,0 +1,45 @@ +DATA_PATH="/root/autodl-tmp/OpenECADv2_100k_directout_1k/data.json" +IMAGE_PATH="/root/autodl-tmp/OpenECADv2_100k_directout_1k/images" +MODEL_MAX_LENGTH=2048 +OUTPUT_DIR="/root/autodl-tmp/OpenECADv2-0.89B-lora" + +deepspeed --include localhost:0,1 --master_port 29501 tinyllava/train/custom_finetune.py \ + --deepspeed ./scripts/zero2.json \ + --data_path $DATA_PATH \ + --image_folder $IMAGE_PATH \ + --is_multimodal True \ + --conv_version llama \ + --mm_vision_select_layer -2 \ + --image_aspect_ratio square \ + --fp16 True \ + --training_recipe lora \ + --tune_type_llm lora \ + --tune_type_vision_tower lora \ + --tune_vision_tower_from_layer 0 \ + --tune_type_connector full \ + --lora_r 128 \ + --lora_alpha 256 \ + --group_by_modality_length False \ + --pretrained_model_path "/root/autodl-tmp/TinyLLaVA-OpenELM-450M-SigLIP-0.89B" \ + --output_dir $OUTPUT_DIR \ + --num_train_epochs 1 \ + --per_device_train_batch_size 2 \ + --per_device_eval_batch_size 2 \ + --gradient_accumulation_steps 2 \ + --evaluation_strategy "no" \ + --save_strategy "steps" \ + --save_steps 1250 \ + --save_total_limit 1 \ + --learning_rate 1e-4 \ + --weight_decay 0. \ + --warmup_ratio 0.03 \ + --lr_scheduler_type "cosine" \ + --logging_steps 1 \ + --tf32 False \ + --model_max_length $MODEL_MAX_LENGTH \ + --gradient_checkpointing True \ + --dataloader_num_workers 8 \ + --lazy_preprocess True \ + --report_to tensorboard \ + --tokenizer_use_fast False \ + --run_name OpenECADv2-0.89B-lora \ No newline at end of file diff --git a/OpenECADv2_2.4B_finetune.sh b/OpenECADv2_2.4B_finetune.sh new file mode 100644 index 0000000..efb0150 --- /dev/null +++ b/OpenECADv2_2.4B_finetune.sh @@ -0,0 +1,45 @@ +DATA_PATH="/root/autodl-tmp/OpenECADv2_150k_directout_2k/data.json" +IMAGE_PATH="/root/autodl-tmp/OpenECADv2_150k_directout_2k/images" +MODEL_MAX_LENGTH=2048 +OUTPUT_DIR="/root/autodl-tmp/OpenECADv2-2.4B-lora" + +deepspeed --include localhost:0 --master_port 29501 tinyllava/train/custom_finetune.py \ + --deepspeed ./scripts/zero2.json \ + --data_path $DATA_PATH \ + --image_folder $IMAGE_PATH \ + --is_multimodal True \ + --conv_version gemma \ + --mm_vision_select_layer -2 \ + --image_aspect_ratio square \ + --fp16 True \ + --training_recipe lora \ + --tune_type_llm lora \ + --tune_type_vision_tower lora \ + --tune_vision_tower_from_layer 0 \ + --tune_type_connector full \ + --lora_r 128 \ + --lora_alpha 256 \ + --group_by_modality_length False \ + --pretrained_model_path "/root/autodl-fs/TinyLLaVA-Gemma-SigLIP-2.4B" \ + --output_dir $OUTPUT_DIR \ + --num_train_epochs 1 \ + --per_device_train_batch_size 2 \ + --per_device_eval_batch_size 2 \ + --gradient_accumulation_steps 2 \ + --evaluation_strategy "no" \ + --save_strategy "steps" \ + --save_steps 1250 \ + --save_total_limit 1 \ + --learning_rate 1e-4 \ + --weight_decay 0. \ + --warmup_ratio 0.03 \ + --lr_scheduler_type "cosine" \ + --logging_steps 1 \ + --tf32 False \ + --model_max_length $MODEL_MAX_LENGTH \ + --gradient_checkpointing True \ + --dataloader_num_workers 8 \ + --lazy_preprocess True \ + --report_to tensorboard \ + --tokenizer_use_fast False \ + --run_name OpenECADv2-2.4B-lora \ No newline at end of file diff --git a/OpenECADv2_3.1B_finetune.sh b/OpenECADv2_3.1B_finetune.sh new file mode 100644 index 0000000..1b8c75b --- /dev/null +++ b/OpenECADv2_3.1B_finetune.sh @@ -0,0 +1,45 @@ +DATA_PATH="/root/autodl-tmp/OpenECADv2_200k_directout_3k/data.json" +IMAGE_PATH="/root/autodl-tmp/OpenECADv2_200k_directout_3k/images" +MODEL_MAX_LENGTH=3072 +OUTPUT_DIR="/root/autodl-tmp/OpenECADv2-3.1B-lora" + +deepspeed --include localhost:0,1 --master_port 29501 tinyllava/train/custom_finetune.py \ + --deepspeed ./scripts/zero2.json \ + --data_path $DATA_PATH \ + --image_folder $IMAGE_PATH \ + --is_multimodal True \ + --conv_version phi \ + --mm_vision_select_layer -2 \ + --image_aspect_ratio square \ + --fp16 True \ + --training_recipe lora \ + --tune_type_llm lora \ + --tune_type_vision_tower lora \ + --tune_vision_tower_from_layer 0 \ + --tune_type_connector full \ + --lora_r 128 \ + --lora_alpha 256 \ + --group_by_modality_length False \ + --pretrained_model_path "/root/autodl-fs/TinyLLaVA-Phi-2-SigLIP-3.1B" \ + --output_dir $OUTPUT_DIR \ + --num_train_epochs 1 \ + --per_device_train_batch_size 2 \ + --per_device_eval_batch_size 2 \ + --gradient_accumulation_steps 2 \ + --evaluation_strategy "no" \ + --save_strategy "steps" \ + --save_steps 1250 \ + --save_total_limit 1 \ + --learning_rate 1e-4 \ + --weight_decay 0. \ + --warmup_ratio 0.03 \ + --lr_scheduler_type "cosine" \ + --logging_steps 1 \ + --tf32 False \ + --model_max_length $MODEL_MAX_LENGTH \ + --gradient_checkpointing True \ + --dataloader_num_workers 8 \ + --lazy_preprocess True \ + --report_to tensorboard \ + --tokenizer_use_fast False \ + --run_name OpenECADv2-3.1B-lora diff --git a/eval.py b/eval.py new file mode 100644 index 0000000..985945a --- /dev/null +++ b/eval.py @@ -0,0 +1,24 @@ +from tinyllava.eval.run_tiny_llava import eval_model + +model_path = "" +prompt = "This image is a view of a 3D model from a certain angle. Please try to use Python-style APIs to render this model." +image_file = "" +conv_mode = "llama" # or llama, gemma, etc +# for OpenECAD 0.55B & 0.89B, use llama +# for OpenECAD 2.4B, use gemma +# for OpenECAD 3.1B, use phi + +args = type('Args', (), { + "model_path": model_path, + "model_base": None, + "query": prompt, + "conv_mode": conv_mode, + "image_file": image_file, + "sep": ",", + "temperature": 0, + "top_p": None, + "num_beams": 1, + "max_new_tokens": 1024 # Please edit this value if code gen unfinish +})() + +eval_model(args) \ No newline at end of file diff --git a/merge_lora_weights.py b/merge_lora_weights.py new file mode 100644 index 0000000..2a8931b --- /dev/null +++ b/merge_lora_weights.py @@ -0,0 +1,27 @@ +import argparse +from tinyllava.model.load_model import load_pretrained_model + +def get_model_name_from_path(model_path): + model_path = model_path.strip("/") + model_paths = model_path.split("/") + if model_paths[-1].startswith('checkpoint-'): + return model_paths[-2] + "_" + model_paths[-1] + else: + return model_paths[-1] + +def merge_lora(args): + model_name = get_model_name_from_path(args.model_path) + model, tokenizer, image_processor, context_len = load_pretrained_model(args.model_path) + + model.save_pretrained(args.save_path) + tokenizer.save_pretrained(args.save_path) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--model-path", type=str, required=True) + parser.add_argument("--save-path", type=str, required=True) + + args = parser.parse_args() + + merge_lora(args) \ No newline at end of file diff --git a/tinyllava/data/template/gemma_template.py b/tinyllava/data/template/gemma_template.py index 3e062eb..6b1eb33 100644 --- a/tinyllava/data/template/gemma_template.py +++ b/tinyllava/data/template/gemma_template.py @@ -12,7 +12,7 @@ from transformers import PreTrainedTokenizer import torch import tokenizers -system = "A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions." +system = "A conversation between a curious user and an artificial intelligence assistant. The user wants to create a 3D CAD project but only has a picture of the desired 3D geometry. The assistant, an expert mechanical part designer, provides detailed answers to the user's questions. The CAD project should be represented using OpenECAD style code in Python 3. Additionally, note that Curve (including Arc, Line, Circle) and Loop will automatically append to the Curves and Loops lists, respectively." @register_template('gemma') @dataclass diff --git a/tinyllava/data/template/llama_template.py b/tinyllava/data/template/llama_template.py index 6705add..ad835bb 100644 --- a/tinyllava/data/template/llama_template.py +++ b/tinyllava/data/template/llama_template.py @@ -14,7 +14,7 @@ import tokenizers IS_TOKENIZER_GREATER_THAN_0_14 = version.parse(tokenizers.__version__) >= version.parse('0.14') -system = "A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions." +system = "A conversation between a curious user and an artificial intelligence assistant. The user wants to create a 3D CAD project but only has a picture of the desired 3D geometry. The assistant, an expert mechanical part designer, provides detailed answers to the user's questions. The CAD project should be represented using OpenECAD style code in Python 3. Additionally, note that Curve (including Arc, Line, Circle) and Loop will automatically append to the Curves and Loops lists, respectively." @register_template('llama') @dataclass diff --git a/tinyllava/data/template/phi_template.py b/tinyllava/data/template/phi_template.py index e8aa45f..bedf41e 100644 --- a/tinyllava/data/template/phi_template.py +++ b/tinyllava/data/template/phi_template.py @@ -9,7 +9,7 @@ from . import register_template from transformers import PreTrainedTokenizer import torch -system = "A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions." +system = "A conversation between a curious user and an artificial intelligence assistant. The user wants to create a 3D CAD project but only has a picture of the desired 3D geometry. The assistant, an expert mechanical part designer, provides detailed answers to the user's questions. The CAD project should be represented using OpenECAD style code in Python 3. Additionally, note that Curve (including Arc, Line, Circle) and Loop will automatically append to the Curves and Loops lists, respectively." @register_template('phi') @dataclass diff --git a/tinyllava/model/load_model.py b/tinyllava/model/load_model.py index 7024247..01f8c0d 100644 --- a/tinyllava/model/load_model.py +++ b/tinyllava/model/load_model.py @@ -42,6 +42,7 @@ def load_pretrained_model(model_name_or_path, load_type='hf', load_8bit=False, l model = TinyLlavaForConditionalGeneration(model_config) language_model_ckp_path = os.path.join(model_name_or_path, 'language_model/pytorch_model.bin') language_model_ckp = load_base_ckp_for_lora(language_model_ckp_path) + #language_model_ckp['lm_head.weight'] = language_model_ckp['model.embed_tokens.weight'] model.language_model.load_state_dict(language_model_ckp) vision_tower_ckp_path = os.path.join(model_name_or_path, 'vision_tower/pytorch_model.bin') vision_tower_ckp = load_base_ckp_for_lora(vision_tower_ckp_path)