This commit is contained in:
Guanhua Zhang 2025-04-10 20:14:17 +02:00
commit 04c4625cfe
11 changed files with 1330 additions and 0 deletions

44
train/train.sh Executable file
View file

@ -0,0 +1,44 @@
#! /bin/bash
MASTER_ADDR=localhost
MASTER_PORT=12345
NNODES=1
NODE_RANK=0
GPUS_PER_NODE=2
DISTRIBUTED_ARGS="--nproc_per_node $GPUS_PER_NODE \
--nnodes $NNODES \
--node_rank $NODE_RANK \
--master_addr $MASTER_ADDR \
--master_port $MASTER_PORT"
PROJECT_PATH="your-project-path"
OPTS=""
# model config
MAXSEQLEN=1024
OPTS+=" --max_seq_length ${MAXSEQLEN}"
OPTS+=" --model_name_or_path ${PROJECT_PATH}/Mistral-7b-bmtrain"
# training config
OPTS+=" --logging_step 4"
BATCHSIZE=16
OPTS+=" --batch_size_per_device ${BATCHSIZE}"
OPTS+=" --save_step 500"
OPTS+=" --epochs 15"
LR=1e-6
OPTS+=" --lr ${LR}"
OPTS+=" --warmup_iters 0"
OPTS+=" --start_step 0"
OPTS+=" --loss_scale 6400"
ACTIONWEIGHT=2
OPTS+=" --action_weight ${ACTIONWEIGHT}"
EMBEDDING_MODEL_PATH="${PROJECT_PATH}/sentence-transformer/models--sentence-transformers--all-MiniLM-L6-v2/snapshots/e4ce9877abf3edfe10b0d82785e83bdcb973e22e"
OPTS+=" --embedding_model_path ${EMBEDDING_MODEL_PATH}"
OPTS+=" --prompt_file ${PROJECT_PATH}/prompts/summarisation/summarisation_prompt.txt"
OPTS+=" --save_dir ${PROJECT_PATH}/ckpts/experiment"
CMD="torchrun ${DISTRIBUTED_ARGS} train.py ${OPTS}"
echo "-------final CMD is------"
echo "${CMD}"
echo "-------final CMD end------"
${CMD}