From 7db19142c82d28bca4ae96078710b277aac248e9 Mon Sep 17 00:00:00 2001 From: Lukasz Pierscieniewski Date: Wed, 3 Dec 2025 14:51:14 +0100 Subject: [PATCH 1/3] Initial GRPO dump --- .gitmodules | 3 ++ reasoning/RL | 1 + reasoning/scripts/run.sub | 104 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 108 insertions(+) create mode 160000 reasoning/RL create mode 100644 reasoning/scripts/run.sub diff --git a/.gitmodules b/.gitmodules index 51d8eac03..5259ee403 100644 --- a/.gitmodules +++ b/.gitmodules @@ -6,3 +6,6 @@ path = text_to_image/torchtitan url = https://github.com/pytorch/torchtitan.git branch = mlperf-training-flux.1 +[submodule "reasoning/RL"] + path = reasoning/RL + url = https://github.com/hXl3s/RL.git diff --git a/reasoning/RL b/reasoning/RL new file mode 160000 index 000000000..534be3c13 --- /dev/null +++ b/reasoning/RL @@ -0,0 +1 @@ +Subproject commit 534be3c1353d61c7c605dd6d0f419858466b3aae diff --git a/reasoning/scripts/run.sub b/reasoning/scripts/run.sub new file mode 100644 index 000000000..b211095fe --- /dev/null +++ b/reasoning/scripts/run.sub @@ -0,0 +1,104 @@ +#!/bin/bash + +BASE_DIRECTORY=${BASE_DIRECTORY:?"BASE_DIRECTORY has to be set"} +CONFIG_FILE=${CONFIG_FILE:?"CONFIG_FILE has to be set"} +IMAGE_FILE=${BASE_DIRECTORY}/data/container_image.sqsh +#IMAGE_FILE="gitlab-master.nvidia.com/dl/mlperf/optimized/lukaszp/reasoning:reinforcer" +#IMAGE_FILE="gitlab-master.nvidia.com/dl/joc/nemo-ci/chtruong_nemo-rl-pytorch/rl:pipe.32480418-x86" +CKPT_DIR=${BASE_DIRECTORY}/data/checkpoint +DATA_DIR=${BASE_DIRECTORY}/data/dataset +CODE_DIR=${BASE_DIRECTORY}/optimized +HF_HOME=${BASE_DIRECTORY}/huggingface + +RESULTS_DIR=${BASE_DIRECTORY}/results/${EXPERIMENT_NAME} +mkdir -p ${RESULTS_DIR}/raylog + +GPU_PER_NODE=${GPU_PER_NODE:-8} + +NODES=$(scontrol show hostnames "$SLURM_JOB_NODELIST") +NODES_ARRAY=($NODES) +HEAD_NODE=${NODES_ARRAY[0]} +HEAD_NODE_IP=$(srun --nodes=1 --ntasks=1 -w "$HEAD_NODE" hostname --ip-address) +HEAD_NODE_PORT=41993 + +# if we detect a space character in the head node IP, we'll +# convert it to an ipv4 address. This step is optional. +if [[ "$HEAD_NODE_IP" == *" "* ]]; then + IFS=' ' read -ra ADDR <<<"$HEAD_NODE_IP" + if [[ ${#ADDR[0]} -gt 16 ]]; then + HEAD_NODE_IP=${ADDR[1]} + else + HEAD_NODE_IP=${ADDR[0]} + fi + echo "IPV6 address detected. We split the IPV4 address as $HEAD_NODE_IP" +fi + +srun --ntasks-per-node=1 --container-image="${IMAGE_FILE}" --container-name="reasoning-ml" true +min_worker_port=54001 +max_worker_port=54557 + +CONTAINER_MOUNTS="${CKPT_DIR}:/opt/checkpoints,${DATA_DIR}:/dataset,${CODE_DIR}:/workspace/code,${RESULTS_DIR}:/results,${HF_HOME}:/workspace/huggingface" +CONTAINER_MOUNTS="${CONTAINER_MOUNTS},${CODE_DIR}/../deps/reinforcer:/opt/nemo-rl" + +#export REINFORCER_VENV_DIR="/opt/.venv/" + + +PORT_DEFINITIONS=( + "--runtime-env-agent-port=$((${max_worker_port} + 1))" + "--dashboard-port=$((${max_worker_port} + 2))" + "--dashboard-agent-grpc-port=$((${max_worker_port} + 3))" + "--metrics-export-port=$((${max_worker_port} + 4))" +) + +export RAY_memory_monitor_refresh_ms=0 + + +echo "Starting Ray Head Node on ${HEAD_NODE} with address ${HEAD_NODE_IP}:41993" +srun -N1 --overlap --ntasks-per-node=1 --cpus-per-task=$((16 * GPU_PER_NODE)) --container-name="reasoning-ml" \ + --container-mounts ${CONTAINER_MOUNTS} \ + --container-workdir /workspace/code -w ${HEAD_NODE} --export HF_HOME=/workspace/huggingface \ + --container-remap-root --container-writable --no-container-mount-home -o ${RESULTS_DIR}/raylog/head_node_ray.log \ + ray start --head --port ${HEAD_NODE_PORT} --block --min-worker-port=$min_worker_port \ + --max-worker-port=$max_worker_port --resources="{\"worker_units\": $GPU_PER_NODE, \"slurm_managed_ray_cluster\": 1}" \ + ${PORT_DEFINITIONS[@]} & + +sleep 10 +WORKER_NODE_COUNT=$((SLURM_JOB_NUM_NODES - 1)) +echo "Starting Worker Nodes on ${NODES_ARRAY[@]:1}" + +if [ "$WORKER_NODE_COUNT" -gt 0 ]; then + for node in "${NODES_ARRAY[@]:1}"; do + echo "Starting Worker Node on ${node}" + srun -N1 --exclusive --ntasks-per-node=1 --cpus-per-task=$((16 * GPU_PER_NODE)) --container-name="reasoning-ml" \ + --container-mounts ${CONTAINER_MOUNTS} \ + --container-workdir /workspace/code -w "${node}" --export HF_HOME=/workspace/huggingface \ + --container-remap-root --container-writable --no-container-mount-home -o ${RESULTS_DIR}/raylog/worker_${node}_ray.log \ + ray start --address ${HEAD_NODE_IP}:${HEAD_NODE_PORT} --block --min-worker-port=$min_worker_port \ + --max-worker-port=$max_worker_port --resources="{\"worker_units\": $GPU_PER_NODE}" \ + ${PORT_DEFINITIONS[@]} & + done +fi + +echo "Waiting for all nodes to start" +sleep 5 + +export HF_HOME=/workspace/huggingface +export TRITON_HOME=/workspace/huggingface/triton +export VLLM_SKIP_P2P_CHECK=1 +export VLLM_NO_USAGE_STATS=1 + +#srun -N1 --container-name="reasoning-ml" --container-workdir /workspace/code -w ${HEAD_NODE} --container-mounts ${CONTAINER_MOUNTS} \ +# --export ALL,HF_HOME=/workspace/huggingface,TRITON_HOME=/workspace/huggingface/triton python -u main.py + +srun -N1 --container-name="reasoning-ml" --container-workdir /workspace/code -w ${HEAD_NODE} --container-mounts ${CONTAINER_MOUNTS} \ + --container-remap-root --container-writable --no-container-mount-home -o ${RESULTS_DIR}/output_${SLURM_JOB_ID}.out \ + --export ALL,HF_HOME=/workspace/huggingface,TRITON_HOME=/workspace/huggingface/triton \ + python -u /workspace/code/run_grpo_math.py \ + --config ${CONFIG_FILE} \ + cluster.num_nodes=${SLURM_JOB_NUM_NODES} \ + checkpointing.checkpoint_dir="/results/checkpoints" \ + data.prompt_file="/opt/nemo-rl/examples/prompts/cot.txt" \ + logger.log_dir="/results/logs/" \ + logger.wandb_enabled=false \ + logger.tensorboard_enabled=false \ + $@ From 2b854b8aa416a09b75bc9e81e2b1dd4cbf7e0790 Mon Sep 17 00:00:00 2001 From: Lukasz Pierscieniewski Date: Wed, 3 Dec 2025 14:59:47 +0100 Subject: [PATCH 2/3] README and script improvements --- reasoning/scripts/README.md | 39 +++++++++++++++++++++++++++++++++++++ reasoning/scripts/run.sub | 30 ++++++++++++++-------------- 2 files changed, 54 insertions(+), 15 deletions(-) create mode 100644 reasoning/scripts/README.md diff --git a/reasoning/scripts/README.md b/reasoning/scripts/README.md new file mode 100644 index 000000000..7ba60f09d --- /dev/null +++ b/reasoning/scripts/README.md @@ -0,0 +1,39 @@ +# Container build + +Docker can be build using + +``` +cd RL +docker buildx build --target release --build-context nemo-rl=. -f docker/Dockerfile --tag /nemo-rl:latest --push . +``` + +For more information follow instructions from `RL/docs/docker.md` + + +# Running experiments + +A script to setup environment and run the benchmark is prepared. See `run.sub` for more information + +``` +export BASE_DIRECTORY=$(pwd) +export CONFIG_FILE={ONE OF POSSIBLE CONFIG PATHS HERE} +export IMAGE_FILE={HANDLE TO DOCKER IMAGE} + +sbatch {SLURM specific instructions} run.sub +``` + +## Config selection + +### Qwen3-30B-A3B + +``` +export CONFIG_FILE=/opt/nemo-rl/examples/configs/recipes/llm/grpo-qwen3-30ba3b-base-openmath.yaml +``` + + +### DSv3 +Additional steps to convert checkpoints are needed. See `RL/docs/guides/deepseek.md` + +``` +export CONFIG_FILE=/opt/nemo-rl/examples/configs/recipes/llm/grpo-dsv3-base-openmath.yaml +``` diff --git a/reasoning/scripts/run.sub b/reasoning/scripts/run.sub index b211095fe..c5af4d396 100644 --- a/reasoning/scripts/run.sub +++ b/reasoning/scripts/run.sub @@ -2,12 +2,10 @@ BASE_DIRECTORY=${BASE_DIRECTORY:?"BASE_DIRECTORY has to be set"} CONFIG_FILE=${CONFIG_FILE:?"CONFIG_FILE has to be set"} -IMAGE_FILE=${BASE_DIRECTORY}/data/container_image.sqsh -#IMAGE_FILE="gitlab-master.nvidia.com/dl/mlperf/optimized/lukaszp/reasoning:reinforcer" -#IMAGE_FILE="gitlab-master.nvidia.com/dl/joc/nemo-ci/chtruong_nemo-rl-pytorch/rl:pipe.32480418-x86" +IMAGE_FILE=${IMAGE_FILE:?"IMAGE_FILE has to be set"} + CKPT_DIR=${BASE_DIRECTORY}/data/checkpoint DATA_DIR=${BASE_DIRECTORY}/data/dataset -CODE_DIR=${BASE_DIRECTORY}/optimized HF_HOME=${BASE_DIRECTORY}/huggingface RESULTS_DIR=${BASE_DIRECTORY}/results/${EXPERIMENT_NAME} @@ -37,11 +35,16 @@ srun --ntasks-per-node=1 --container-image="${IMAGE_FILE}" --container-name="rea min_worker_port=54001 max_worker_port=54557 -CONTAINER_MOUNTS="${CKPT_DIR}:/opt/checkpoints,${DATA_DIR}:/dataset,${CODE_DIR}:/workspace/code,${RESULTS_DIR}:/results,${HF_HOME}:/workspace/huggingface" -CONTAINER_MOUNTS="${CONTAINER_MOUNTS},${CODE_DIR}/../deps/reinforcer:/opt/nemo-rl" - -#export REINFORCER_VENV_DIR="/opt/.venv/" +CONTAINER_MOUNTS=( + "${CKPT_DIR}:/opt/checkpoints" + "${DATA_DIR}:/dataset" + "${RESULTS_DIR}:/results" + "${HF_HOME}:/workspace/huggingface" + "${BASE_DIRECTORY}/RL:/opt/nemo-rl" +) +CONTAINER_MOUNTS="${CONTAINER_MOUNTS[@]}" +CONTAINER_MOUNTS="${CONTAINER_MOUNTS// /,}" PORT_DEFINITIONS=( "--runtime-env-agent-port=$((${max_worker_port} + 1))" @@ -56,7 +59,7 @@ export RAY_memory_monitor_refresh_ms=0 echo "Starting Ray Head Node on ${HEAD_NODE} with address ${HEAD_NODE_IP}:41993" srun -N1 --overlap --ntasks-per-node=1 --cpus-per-task=$((16 * GPU_PER_NODE)) --container-name="reasoning-ml" \ --container-mounts ${CONTAINER_MOUNTS} \ - --container-workdir /workspace/code -w ${HEAD_NODE} --export HF_HOME=/workspace/huggingface \ + --container-workdir /opt/nemo-rl -w ${HEAD_NODE} --export HF_HOME=/workspace/huggingface \ --container-remap-root --container-writable --no-container-mount-home -o ${RESULTS_DIR}/raylog/head_node_ray.log \ ray start --head --port ${HEAD_NODE_PORT} --block --min-worker-port=$min_worker_port \ --max-worker-port=$max_worker_port --resources="{\"worker_units\": $GPU_PER_NODE, \"slurm_managed_ray_cluster\": 1}" \ @@ -71,7 +74,7 @@ if [ "$WORKER_NODE_COUNT" -gt 0 ]; then echo "Starting Worker Node on ${node}" srun -N1 --exclusive --ntasks-per-node=1 --cpus-per-task=$((16 * GPU_PER_NODE)) --container-name="reasoning-ml" \ --container-mounts ${CONTAINER_MOUNTS} \ - --container-workdir /workspace/code -w "${node}" --export HF_HOME=/workspace/huggingface \ + --container-workdir /opt/nemo-rl -w "${node}" --export HF_HOME=/workspace/huggingface \ --container-remap-root --container-writable --no-container-mount-home -o ${RESULTS_DIR}/raylog/worker_${node}_ray.log \ ray start --address ${HEAD_NODE_IP}:${HEAD_NODE_PORT} --block --min-worker-port=$min_worker_port \ --max-worker-port=$max_worker_port --resources="{\"worker_units\": $GPU_PER_NODE}" \ @@ -87,13 +90,10 @@ export TRITON_HOME=/workspace/huggingface/triton export VLLM_SKIP_P2P_CHECK=1 export VLLM_NO_USAGE_STATS=1 -#srun -N1 --container-name="reasoning-ml" --container-workdir /workspace/code -w ${HEAD_NODE} --container-mounts ${CONTAINER_MOUNTS} \ -# --export ALL,HF_HOME=/workspace/huggingface,TRITON_HOME=/workspace/huggingface/triton python -u main.py - -srun -N1 --container-name="reasoning-ml" --container-workdir /workspace/code -w ${HEAD_NODE} --container-mounts ${CONTAINER_MOUNTS} \ +srun -N1 --container-name="reasoning-ml" --container-workdir /opt/nemo-rl/ -w ${HEAD_NODE} --container-mounts ${CONTAINER_MOUNTS} \ --container-remap-root --container-writable --no-container-mount-home -o ${RESULTS_DIR}/output_${SLURM_JOB_ID}.out \ --export ALL,HF_HOME=/workspace/huggingface,TRITON_HOME=/workspace/huggingface/triton \ - python -u /workspace/code/run_grpo_math.py \ + python -u /opt/nemo-rl/examples/run_grpo_math.py \ --config ${CONFIG_FILE} \ cluster.num_nodes=${SLURM_JOB_NUM_NODES} \ checkpointing.checkpoint_dir="/results/checkpoints" \ From 8917488f17a5e236f48c9dcf886f12768d12212b Mon Sep 17 00:00:00 2001 From: Lukasz Pierscieniewski Date: Wed, 3 Dec 2025 15:19:53 +0100 Subject: [PATCH 3/3] Move readme to right location --- reasoning/{scripts => }/README.md | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename reasoning/{scripts => }/README.md (100%) diff --git a/reasoning/scripts/README.md b/reasoning/README.md similarity index 100% rename from reasoning/scripts/README.md rename to reasoning/README.md