llama3-sft 提交脚本
#!/bin/bash
pip install transformers==4.41.2 -i https://mirrors.aliyun.com/pypi/simple/
pip install flash-attn==2.6.1 -i https://mirrors.aliyun.com/pypi/simple/
pip install tensorboard -i https://mirrors.aliyun.com/pypi/simple/
export NCCL_BLOCKING_WAIT=1
export NCCL_COMM_TIMEOUT=3600 # 将超时时长设置为3600秒(即1小时)
export WANDB_MODE=offline
export WANDB_SILENT=true
cd /LLaMA-Factory
python -m torch.distributed.run \
--master_addr $MASTER_ADDR \
--master_port $MASTER_PORT \
--nproc_per_node $NPROC_PER_NODE \
--nnodes $WORLD_SIZE \
--node_rank $RANK \
src/train.py \
examples/full_multi_gpu/llama3p2_1B_part_sft_multi_TaskQueryTranslateEsFrPt.yaml
# llama3p2_1B_fu