|
1 | 1 | #!/bin/bash |
2 | 2 |
|
3 | 3 | ray stop |
4 | | -CURRENT_DIR="$( cd "$( dirname "$0" )" && pwd )" |
5 | | -VERL_PATCH_PATH=$( dirname $( dirname ${CURRENT_DIR})) |
6 | | -export PYTHONPATH=${VERL_PATCH_PATH}:${VERL_PATCH_PATH}/backends/megatron/Megatron-LM-250624:${VERL_PATCH_PATH}/backends/rl/verl:$PYTHONPATH |
7 | | -export HYDRA_FULL_ERROR=1 |
8 | | -gsm8k_train_path=/mnt/data/datasets/gsm8k/train.parquet |
9 | | -gsm8k_test_path=/mnt/data/datasets/gsm8k/test.parquet |
10 | | - |
11 | | -train_files="['$gsm8k_train_path']" |
12 | | -test_files="['$gsm8k_test_path']" |
13 | | - |
14 | | - |
15 | | -MODEL_PATH=/mnt/data/ckpts/huggingface/DeepSeek-V3-0324 |
16 | | -MCORE_MODEL_PATH=/mnt/data/ckpts/mcore/DeepSeek-R1-BF16-to-mcore |
17 | | - |
18 | | - |
19 | | -# If you are using vllm<=0.6.3, you might need to set the following environment variable to avoid bugs: |
20 | | -# export VLLM_ATTENTION_BACKEND=XFORMERS |
21 | | -export CUDA_DEVICE_MAX_CONNECTIONS=1 # For megatron communication/computation overlapping |
| 4 | +rm -rf /tmp/ray/* |
22 | 5 |
|
| 6 | +export CUDA_DEVICE_MAX_CONNECTIONS=1 |
23 | 7 | export GPUS_PER_NODE=${MLP_WORKER_GPU:-${KUBERNETES_CONTAINER_RESOURCE_GPU:-8}} |
24 | 8 | export RAY_num_server_call_thread=1 |
25 | 9 | export NNODES=${MLP_WORKER_NUM:-${WORLD_SIZE:-1}} |
26 | 10 | export NODE_RANK=${MLP_WORKER_RACK_RANK_INDEX:-${MLP_ROLE_INDEX:-${RANK:-0}}} |
27 | 11 | export MASTER_ADDR=${MLP_WORKER_0_HOST:-${MASTER_ADDR:-127.0.0.1}} |
28 | 12 | export MASTER_PORT=${MLP_WORKER_0_PORT:-${MASTER_PORT:-1234}} |
29 | 13 |
|
| 14 | +CURRENT_DIR=$(pwd) |
| 15 | +MEGATRON_PATCH_PATH=$( dirname $( dirname ${CURRENT_DIR})) |
| 16 | +VERL_ROOT_PATH=${MEGATRON_PATCH_PATH}/backends/rl/verl |
| 17 | +export PYTHONPATH=${MEGATRON_PATCH_PATH}:${MEGATRON_PATCH_PATH}/backends/megatron/Megatron-LM-250624:${VERL_ROOT_PATH}:$PYTHONPATH |
| 18 | + |
| 19 | +export RAY_CGRAPH_get_timeout=200 |
| 20 | +export CUDA_DEVICE_MAX_CONNECTIONS=1 |
| 21 | +export RAY_num_server_call_thread=1 |
| 22 | +export RAY_DEDUP_LOGS=0 |
| 23 | +export VLLM_USE_RAY_SPMD_WORKER=1 |
| 24 | +export VLLM_USE_RAY_COMPILED_DAG=1 |
| 25 | + |
| 26 | +train_path=/mnt/data/datasets/MATH-lighteval/train.parquet |
| 27 | +test_path=/mnt/data/datasets/MATH-lighteval/test.parquet |
| 28 | + |
| 29 | +train_files="['$train_path']" |
| 30 | +test_files="['$test_path']" |
| 31 | + |
| 32 | +hf_ckpt_path=/mnt/data/ckpts/huggingface/DeepSeek-V3-0324-BF16 |
| 33 | +mcore_ckpt_path=/mnt/data/ckpts/mcore/DeepSeek-V3-0324-BF16-to-mcore |
| 34 | +proj_name="jerry_debug" |
| 35 | +exp_name="test_deepseek_verl" |
| 36 | +export output_dir=${CURRENT_DIR}/verl_outputs/${exp_name} |
| 37 | +export WANDB_DIR=${output_dir} |
| 38 | +mkdir -p $output_dir/ |
| 39 | +export log_dir=${output_dir}/logs |
| 40 | +mkdir -p $log_dir |
| 41 | +log_file=$log_dir/${exp_name}_rank${NODE_RANK}.log |
| 42 | + |
30 | 43 |
|
31 | | -project_name='DAPO' |
32 | | -exp_name='Test_Verl_Mcore_DeepSeek671b_Loss' |
33 | 44 | adv_estimator=grpo |
34 | 45 | use_kl_in_reward=True |
35 | 46 | kl_coef=0.0 |
36 | 47 | use_kl_loss=True |
37 | 48 | kl_loss_coef=0.0 |
38 | 49 | clip_ratio_low=0.2 |
39 | 50 | clip_ratio_high=0.28 |
40 | | -max_prompt_length=$((1024 * 2)) |
41 | | -max_response_length=$((1024 * 4)) |
| 51 | +max_prompt_length=1536 |
| 52 | +max_response_length=2048 |
42 | 53 | enable_overlong_buffer=True |
43 | 54 | overlong_buffer_len=$((1024 * 4)) |
44 | 55 | overlong_penalty_factor=0.1 |
@@ -135,17 +146,17 @@ python ../qwen3/verl_entrypoint.py --config-path=../qwen3/verl_configs \ |
135 | 146 | +reward_model.reward_kwargs.overlong_buffer_cfg.log=False \ |
136 | 147 | +reward_model.reward_kwargs.max_resp_len=${max_response_length} \ |
137 | 148 | trainer.logger=['console'] \ |
138 | | - trainer.project_name="${project_name}" \ |
139 | | - trainer.experiment_name="${exp_name}" \ |
| 149 | + trainer.project_name=${proj_name} \ |
| 150 | + trainer.experiment_name=${exp_name} \ |
140 | 151 | trainer.n_gpus_per_node=${GPUS_PER_NODE} \ |
141 | 152 | trainer.nnodes=${NNODES} \ |
142 | 153 | trainer.val_before_train=False \ |
143 | | - trainer.test_freq=50000000 \ |
| 154 | + trainer.test_freq=5 \ |
144 | 155 | trainer.save_freq=50000000 \ |
145 | | - trainer.total_epochs=10 \ |
| 156 | + trainer.total_epochs=200 \ |
146 | 157 | trainer.total_training_steps=1000 \ |
147 | 158 | trainer.resume_mode=auto \ |
148 | | - trainer.log_val_generations=10 2>&1 | tee ${NNODES}nodes_verl_debug.log |
| 159 | + 2>&1 | tee ${log_file} ; exit ${PIPESTATUS[0]} |
149 | 160 |
|
150 | 161 | else |
151 | 162 | ray start --block --address=${MASTER_ADDR}:6379 |
|
0 commit comments