diff --git a/.gitmodules b/.gitmodules index 51d8eac03..5259ee403 100644 --- a/.gitmodules +++ b/.gitmodules @@ -6,3 +6,6 @@ path = text_to_image/torchtitan url = https://github.com/pytorch/torchtitan.git branch = mlperf-training-flux.1 +[submodule "reasoning/RL"] + path = reasoning/RL + url = https://github.com/hXl3s/RL.git diff --git a/reasoning/README.md b/reasoning/README.md new file mode 100644 index 000000000..7ba60f09d --- /dev/null +++ b/reasoning/README.md @@ -0,0 +1,39 @@ +# Container build + +Docker can be build using + +``` +cd RL +docker buildx build --target release --build-context nemo-rl=. -f docker/Dockerfile --tag /nemo-rl:latest --push . +``` + +For more information follow instructions from `RL/docs/docker.md` + + +# Running experiments + +A script to setup environment and run the benchmark is prepared. See `run.sub` for more information + +``` +export BASE_DIRECTORY=$(pwd) +export CONFIG_FILE={ONE OF POSSIBLE CONFIG PATHS HERE} +export IMAGE_FILE={HANDLE TO DOCKER IMAGE} + +sbatch {SLURM specific instructions} run.sub +``` + +## Config selection + +### Qwen3-30B-A3B + +``` +export CONFIG_FILE=/opt/nemo-rl/examples/configs/recipes/llm/grpo-qwen3-30ba3b-base-openmath.yaml +``` + + +### DSv3 +Additional steps to convert checkpoints are needed. See `RL/docs/guides/deepseek.md` + +``` +export CONFIG_FILE=/opt/nemo-rl/examples/configs/recipes/llm/grpo-dsv3-base-openmath.yaml +``` diff --git a/reasoning/RL b/reasoning/RL new file mode 160000 index 000000000..534be3c13 --- /dev/null +++ b/reasoning/RL @@ -0,0 +1 @@ +Subproject commit 534be3c1353d61c7c605dd6d0f419858466b3aae diff --git a/reasoning/scripts/run.sub b/reasoning/scripts/run.sub new file mode 100644 index 000000000..c5af4d396 --- /dev/null +++ b/reasoning/scripts/run.sub @@ -0,0 +1,104 @@ +#!/bin/bash + +BASE_DIRECTORY=${BASE_DIRECTORY:?"BASE_DIRECTORY has to be set"} +CONFIG_FILE=${CONFIG_FILE:?"CONFIG_FILE has to be set"} +IMAGE_FILE=${IMAGE_FILE:?"IMAGE_FILE has to be set"} + +CKPT_DIR=${BASE_DIRECTORY}/data/checkpoint +DATA_DIR=${BASE_DIRECTORY}/data/dataset +HF_HOME=${BASE_DIRECTORY}/huggingface + +RESULTS_DIR=${BASE_DIRECTORY}/results/${EXPERIMENT_NAME} +mkdir -p ${RESULTS_DIR}/raylog + +GPU_PER_NODE=${GPU_PER_NODE:-8} + +NODES=$(scontrol show hostnames "$SLURM_JOB_NODELIST") +NODES_ARRAY=($NODES) +HEAD_NODE=${NODES_ARRAY[0]} +HEAD_NODE_IP=$(srun --nodes=1 --ntasks=1 -w "$HEAD_NODE" hostname --ip-address) +HEAD_NODE_PORT=41993 + +# if we detect a space character in the head node IP, we'll +# convert it to an ipv4 address. This step is optional. +if [[ "$HEAD_NODE_IP" == *" "* ]]; then + IFS=' ' read -ra ADDR <<<"$HEAD_NODE_IP" + if [[ ${#ADDR[0]} -gt 16 ]]; then + HEAD_NODE_IP=${ADDR[1]} + else + HEAD_NODE_IP=${ADDR[0]} + fi + echo "IPV6 address detected. We split the IPV4 address as $HEAD_NODE_IP" +fi + +srun --ntasks-per-node=1 --container-image="${IMAGE_FILE}" --container-name="reasoning-ml" true +min_worker_port=54001 +max_worker_port=54557 + +CONTAINER_MOUNTS=( + "${CKPT_DIR}:/opt/checkpoints" + "${DATA_DIR}:/dataset" + "${RESULTS_DIR}:/results" + "${HF_HOME}:/workspace/huggingface" + "${BASE_DIRECTORY}/RL:/opt/nemo-rl" +) + +CONTAINER_MOUNTS="${CONTAINER_MOUNTS[@]}" +CONTAINER_MOUNTS="${CONTAINER_MOUNTS// /,}" + +PORT_DEFINITIONS=( + "--runtime-env-agent-port=$((${max_worker_port} + 1))" + "--dashboard-port=$((${max_worker_port} + 2))" + "--dashboard-agent-grpc-port=$((${max_worker_port} + 3))" + "--metrics-export-port=$((${max_worker_port} + 4))" +) + +export RAY_memory_monitor_refresh_ms=0 + + +echo "Starting Ray Head Node on ${HEAD_NODE} with address ${HEAD_NODE_IP}:41993" +srun -N1 --overlap --ntasks-per-node=1 --cpus-per-task=$((16 * GPU_PER_NODE)) --container-name="reasoning-ml" \ + --container-mounts ${CONTAINER_MOUNTS} \ + --container-workdir /opt/nemo-rl -w ${HEAD_NODE} --export HF_HOME=/workspace/huggingface \ + --container-remap-root --container-writable --no-container-mount-home -o ${RESULTS_DIR}/raylog/head_node_ray.log \ + ray start --head --port ${HEAD_NODE_PORT} --block --min-worker-port=$min_worker_port \ + --max-worker-port=$max_worker_port --resources="{\"worker_units\": $GPU_PER_NODE, \"slurm_managed_ray_cluster\": 1}" \ + ${PORT_DEFINITIONS[@]} & + +sleep 10 +WORKER_NODE_COUNT=$((SLURM_JOB_NUM_NODES - 1)) +echo "Starting Worker Nodes on ${NODES_ARRAY[@]:1}" + +if [ "$WORKER_NODE_COUNT" -gt 0 ]; then + for node in "${NODES_ARRAY[@]:1}"; do + echo "Starting Worker Node on ${node}" + srun -N1 --exclusive --ntasks-per-node=1 --cpus-per-task=$((16 * GPU_PER_NODE)) --container-name="reasoning-ml" \ + --container-mounts ${CONTAINER_MOUNTS} \ + --container-workdir /opt/nemo-rl -w "${node}" --export HF_HOME=/workspace/huggingface \ + --container-remap-root --container-writable --no-container-mount-home -o ${RESULTS_DIR}/raylog/worker_${node}_ray.log \ + ray start --address ${HEAD_NODE_IP}:${HEAD_NODE_PORT} --block --min-worker-port=$min_worker_port \ + --max-worker-port=$max_worker_port --resources="{\"worker_units\": $GPU_PER_NODE}" \ + ${PORT_DEFINITIONS[@]} & + done +fi + +echo "Waiting for all nodes to start" +sleep 5 + +export HF_HOME=/workspace/huggingface +export TRITON_HOME=/workspace/huggingface/triton +export VLLM_SKIP_P2P_CHECK=1 +export VLLM_NO_USAGE_STATS=1 + +srun -N1 --container-name="reasoning-ml" --container-workdir /opt/nemo-rl/ -w ${HEAD_NODE} --container-mounts ${CONTAINER_MOUNTS} \ + --container-remap-root --container-writable --no-container-mount-home -o ${RESULTS_DIR}/output_${SLURM_JOB_ID}.out \ + --export ALL,HF_HOME=/workspace/huggingface,TRITON_HOME=/workspace/huggingface/triton \ + python -u /opt/nemo-rl/examples/run_grpo_math.py \ + --config ${CONFIG_FILE} \ + cluster.num_nodes=${SLURM_JOB_NUM_NODES} \ + checkpointing.checkpoint_dir="/results/checkpoints" \ + data.prompt_file="/opt/nemo-rl/examples/prompts/cot.txt" \ + logger.log_dir="/results/logs/" \ + logger.wandb_enabled=false \ + logger.tensorboard_enabled=false \ + $@