Skip to content

Commit b6caa19

Browse files
authored
Fix dist ckpt convert megatron paths (#606)
1 parent 8686b9c commit b6caa19

5 files changed

Lines changed: 26 additions & 33 deletions

File tree

.gitmodules

Lines changed: 18 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,30 +1,27 @@
1-
[submodule "backends/LM-Evaluation-Harness-240310"]
2-
path = backends/LM-Evaluation-Harness-240310
3-
url = https://github.com/jerryli1981/lm-evaluation-harness.git
4-
[submodule "backends/Bigcode-Evaluation-Harness-240327"]
5-
path = backends/Bigcode-Evaluation-Harness-240327
6-
url = https://github.com/jerryli1981/bigcode-evaluation-harness
7-
[submodule "backends/megatron/Megatron-LM-240701"]
8-
path = backends/megatron/Megatron-LM-240701
9-
url = https://github.com/NVIDIA/Megatron-LM.git
10-
[submodule "backends/megatron/PAI-Megatron-LM-240718"]
11-
path = backends/megatron/PAI-Megatron-LM-240718
12-
url = https://github.com/jerryli1981/PAI-Megatron-LM-240718
13-
[submodule "backends/megatron/Megatron-LM-250328"]
14-
path = backends/megatron/Megatron-LM-250328
1+
[submodule "backends/megatron/Megatron-LM-240126"]
2+
path = backends/megatron/Megatron-LM-240126
153
url = https://github.com/NVIDIA/Megatron-LM.git
16-
[submodule "backends/megatron/Megatron-LM-250217"]
17-
path = backends/megatron/Megatron-LM-250217
4+
[submodule "backends/megatron/Megatron-LM-240405"]
5+
path = backends/megatron/Megatron-LM-240405
186
url = https://github.com/NVIDIA/Megatron-LM.git
197
[submodule "backends/megatron/Megatron-LM-241113"]
208
path = backends/megatron/Megatron-LM-241113
219
url = https://github.com/NVIDIA/Megatron-LM.git
22-
[submodule "backends/megatron/Megatron-LM-240405"]
23-
path = backends/megatron/Megatron-LM-240405
10+
[submodule "backends/megatron/Megatron-LM-250217"]
11+
path = backends/megatron/Megatron-LM-250217
2412
url = https://github.com/NVIDIA/Megatron-LM.git
25-
[submodule "backends/megatron/Megatron-LM-240126"]
26-
path = backends/megatron/Megatron-LM-240126
13+
[submodule "backends/megatron/Megatron-LM-250328"]
14+
path = backends/megatron/Megatron-LM-250328
2715
url = https://github.com/NVIDIA/Megatron-LM.git
16+
[submodule "backends/megatron/PAI-Megatron-LM-240718"]
17+
path = backends/megatron/PAI-Megatron-LM-240718
18+
url = https://github.com/NVIDIA/Megatron-LM.git
19+
[submodule "backends/LM-Evaluation-Harness-240310"]
20+
path = backends/LM-Evaluation-Harness-240310
21+
url = https://github.com/jerryli1981/lm-evaluation-harness.git
22+
[submodule "backends/Bigcode-Evaluation-Harness-240327"]
23+
path = backends/Bigcode-Evaluation-Harness-240327
24+
url = https://github.com/jerryli1981/bigcode-evaluation-harness
2825
[submodule "backends/rl/ChatLearn"]
2926
path = backends/rl/ChatLearn
30-
url = https://github.com/alibaba/ChatLearn.git
27+
url = https://github.com/alibaba/ChatLearn.git

toolkits/distributed_checkpoints_convertor/scripts/deepseek_v3/run_32xH20.sh

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,8 @@
22
set -e
33
CURRENT_DIR="$( cd "$( dirname "$0" )" && pwd )"
44
CONVERTOR_DIR=$( dirname $( dirname ${CURRENT_DIR}))
5-
MEGATRON_PATH=$( dirname $( dirname ${CONVERTOR_DIR}))
6-
7-
export PYTHONPATH=${CONVERTOR_DIR}/impl:${MEGATRON_PATH}:${MEGATRON_PATH}/Megatron-LM-250328:$PYTHONPATH
5+
MEGATRON_PATCH_PATH=$( dirname $( dirname ${CONVERTOR_DIR}))
6+
export PYTHONPATH=${MEGATRON_PATCH_PATH}:${MEGATRON_PATCH_PATH}/backends/megatron/Megatron-LM-250328:${CONVERTOR_DIR}/impl:$PYTHONPATH
87
export CUDA_DEVICE_MAX_CONNECTIONS=1
98
export TORCH_FORCE_NO_WEIGHTS_ONLY_LOAD=true # for PyTorch >= 2.6
109

toolkits/distributed_checkpoints_convertor/scripts/moonlight/run_2xH20.sh

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,8 @@
22
set -e
33
CURRENT_DIR="$( cd "$( dirname "$0" )" && pwd )"
44
CONVERTOR_DIR=$( dirname $( dirname ${CURRENT_DIR}))
5-
MEGATRON_PATH=$( dirname $( dirname ${CONVERTOR_DIR}))
6-
7-
export PYTHONPATH=${CONVERTOR_DIR}/impl:${MEGATRON_PATH}:${MEGATRON_PATH}/Megatron-LM-250328:$PYTHONPATH
5+
MEGATRON_PATCH_PATH=$( dirname $( dirname ${CONVERTOR_DIR}))
6+
export PYTHONPATH=${MEGATRON_PATCH_PATH}:${MEGATRON_PATCH_PATH}/backends/megatron/Megatron-LM-250328:${CONVERTOR_DIR}/impl:$PYTHONPATH
87
export CUDA_DEVICE_MAX_CONNECTIONS=1
98
export TORCH_FORCE_NO_WEIGHTS_ONLY_LOAD=true # for PyTorch >= 2.6
109

toolkits/distributed_checkpoints_convertor/scripts/qwen3/run_8xH20.sh

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,8 @@
22
set -e
33
CURRENT_DIR="$( cd "$( dirname "$0" )" && pwd )"
44
CONVERTOR_DIR=$( dirname $( dirname ${CURRENT_DIR}))
5-
MEGATRON_PATH=$( dirname $( dirname ${CONVERTOR_DIR}))
6-
7-
export PYTHONPATH=${CONVERTOR_DIR}/impl:${MEGATRON_PATH}:${MEGATRON_PATH}/Megatron-LM-250328:$PYTHONPATH
5+
MEGATRON_PATCH_PATH=$( dirname $( dirname ${CONVERTOR_DIR}))
6+
export PYTHONPATH=${MEGATRON_PATCH_PATH}:${MEGATRON_PATCH_PATH}/backends/megatron/Megatron-LM-250328:${CONVERTOR_DIR}/impl:$PYTHONPATH
87
export CUDA_DEVICE_MAX_CONNECTIONS=1
98
export TORCH_FORCE_NO_WEIGHTS_ONLY_LOAD=true # for PyTorch >= 2.6
109

toolkits/distributed_checkpoints_convertor/scripts/qwen3/run_A22B_16xH20.sh

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,8 @@
22
set -e
33
CURRENT_DIR="$( cd "$( dirname "$0" )" && pwd )"
44
CONVERTOR_DIR=$( dirname $( dirname ${CURRENT_DIR}))
5-
MEGATRON_PATH=$( dirname $( dirname ${CONVERTOR_DIR}))
6-
7-
export PYTHONPATH=${CONVERTOR_DIR}/impl:${MEGATRON_PATH}:${MEGATRON_PATH}/Megatron-LM-250328:$PYTHONPATH
5+
MEGATRON_PATCH_PATH=$( dirname $( dirname ${CONVERTOR_DIR}))
6+
export PYTHONPATH=${MEGATRON_PATCH_PATH}:${MEGATRON_PATCH_PATH}/backends/megatron/Megatron-LM-250328:${CONVERTOR_DIR}/impl:$PYTHONPATH
87
export CUDA_DEVICE_MAX_CONNECTIONS=1
98
export TORCH_FORCE_NO_WEIGHTS_ONLY_LOAD=true # for PyTorch >= 2.6
109

0 commit comments

Comments
 (0)