-
Notifications
You must be signed in to change notification settings - Fork 6
Open
Description
NCCL P2P Communication Error During Multi-GPU Training
Problem
PPO training fails with NCCL error on multi-GPU setup due to unsupported peer access between devices.
Error Details
(WorkerDict pid=565043) /home/aiscuser/CODES/AgentFly/uv_agentfly/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py:4631: UserWarning: No device id is provided via `init_process_group` or `barrier `. Using the current device set by the user.
(WorkerDict pid=565043) warnings.warn( # warn only once
(WorkerDict pid=564401) [rank0]:[W831 14:46:12.241792627 ProcessGroupNCCL.cpp:4718] [PG ID 0 PG GUID 0 Rank 0] using GPU 0 as device used by this process is currently unknown. This can potentially cause a hang if this rank to GPU mapping is incorrect. You can pecify device_id in init_process_group() to force use of a particular device.
Error executing job with overrides: ['algorithm.adv_estimator=grpo', 'data.train_files=/mnt/yixiali/CODES/AgentFly/data/rlhf/math//orz_math_57k_train.json', 'data.val_files=/mnt/yixiali/CODES/AgentFly/data/rlhf/math//MATH_500.json', 'data.train_batch_size=64', 'agent.agent_type=code', 'agent.tools=[code_interpreter]', 'agent.template=qwen2.5-no-system-tool', 'agent.model_name_or_path=Qwen/Qwen2.5-3B-Instruct', 'agent.max_turns=8', 'agent.backend=async_verl', 'agent.reward_name=math_reward_tool', 'agent.num_chains=8', 'agent.use_agent=True', 'actor_rollout_ref.actor.optim.lr=5e-7', 'actor_rollout_ref.model.use_remove_padding=False', 'actor_rollout_ref.model.path=Qwen/Qwen2.5-3B-Instruct', 'actor_rollout_ref.actor.ppo_mini_batch_size=64', 'actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=2', 'actor_rollout_ref.actor.use_kl_loss=True', 'actor_rollout_ref.actor.kl_loss_coef=0.001', 'actor_rollout_ref.actor.kl_loss_type=mse', 'actor_rollout_ref.actor.entropy_coeff=0.001', 'actor_rollout_ref.model.enable_gradient_checkpointing=False', 'actor_rollout_ref.actor.fsdp_config.param_offload=True', 'actor_rollout_ref.actor.fsdp_config.optimizer_offload=True', 'actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=4', 'actor_rollout_ref.rollout.tensor_model_parallel_size=2', 'actor_rollout_ref.rollout.name=vllm', 'actor_rollout_ref.rollout.response_length=512', 'actor_rollout_ref.rollout.gpu_memory_utilization=0.5', 'actor_rollout_ref.ref.log_prob_micro_batch_size_per_gpu=4', 'actor_rollout_ref.ref.fsdp_config.param_offload=True', 'critic.model.path=Qwen/Qwen2.5-3B-Instruct', 'critic.ppo_mini_batch_size=64', 'critic.ppo_micro_batch_size_per_gpu=2', 'algorithm.kl_ctrl.kl_coef=0.001', 'trainer.critic_warmup=0', 'trainer.logger=[console,wandb]', 'trainer.project_name=AgentRL', 'trainer.experiment_name=test', 'trainer.n_gpus_per_node=4', 'trainer.nnodes=1', 'trainer.save_freq=50', 'trainer.test_freq=10', 'trainer.total_training_steps=200', 'trainer.val_before_train=False']
Traceback (most recent call last):
File "/opt/conda/envs/ptca/lib/python3.10/runpy.py", line 196, in _run_module_as_main
return _run_code(code, main_globals, None,
File "/opt/conda/envs/ptca/lib/python3.10/runpy.py", line 86, in _run_code
exec(code, run_globals)
File "/home/aiscuser/CODES/AgentFly/verl/verl/trainer/main_ppo.py", line 244, in <module>
main()
File "/home/aiscuser/CODES/AgentFly/uv_agentfly/lib/python3.10/site-packages/hydra/main.py", line 94, in decorated_main
_run_hydra(
File "/home/aiscuser/CODES/AgentFly/uv_agentfly/lib/python3.10/site-packages/hydra/_internal/utils.py", line 394, in _run_hydra
_run_app(
File "/home/aiscuser/CODES/AgentFly/uv_agentfly/lib/python3.10/site-packages/hydra/_internal/utils.py", line 457, in _run_app
run_and_report(
File "/home/aiscuser/CODES/AgentFly/uv_agentfly/lib/python3.10/site-packages/hydra/_internal/utils.py", line 223, in run_and_report
raise ex
File "/home/aiscuser/CODES/AgentFly/uv_agentfly/lib/python3.10/site-packages/hydra/_internal/utils.py", line 220, in run_and_report
return func()
File "/home/aiscuser/CODES/AgentFly/uv_agentfly/lib/python3.10/site-packages/hydra/_internal/utils.py", line 458, in <lambda>
lambda: hydra.run(
File "/home/aiscuser/CODES/AgentFly/uv_agentfly/lib/python3.10/site-packages/hydra/_internal/hydra.py", line 132, in run
_ = ret.return_value
File "/home/aiscuser/CODES/AgentFly/uv_agentfly/lib/python3.10/site-packages/hydra/core/utils.py", line 260, in return_value
raise self._return_value
File "/home/aiscuser/CODES/AgentFly/uv_agentfly/lib/python3.10/site-packages/hydra/core/utils.py", line 186, in run_job
ret.return_value = task_function(task_cfg)
File "/home/aiscuser/CODES/AgentFly/verl/verl/trainer/main_ppo.py", line 62, in main
run_ppo(config)
File "/home/aiscuser/CODES/AgentFly/verl/verl/trainer/main_ppo.py", line 74, in run_ppo
ray.get(runner.run.remote(config))
File "/home/aiscuser/CODES/AgentFly/uv_agentfly/lib/python3.10/site-packages/ray/_private/auto_init_hook.py", line 22, in auto_init_wrapper
return fn(*args, **kwargs)
File "/home/aiscuser/CODES/AgentFly/uv_agentfly/lib/python3.10/site-packages/ray/_private/client_mode_hook.py", line 104, in wrapper
return func(*args, **kwargs)
File "/home/aiscuser/CODES/AgentFly/uv_agentfly/lib/python3.10/site-packages/ray/_private/worker.py", line 2882, in get
values, debugger_breakpoint = worker.get_objects(object_refs, timeout=timeout)
File "/home/aiscuser/CODES/AgentFly/uv_agentfly/lib/python3.10/site-packages/ray/_private/worker.py", line 968, in get_objects
raise value.as_instanceof_cause()
ray.exceptions.RayTaskError(DistBackendError): ray::TaskRunner.run() (pid=560810, ip=100.64.24.39, actor_id=e8bf18909706ae5475513a4701000000, repr=<main_ppo.TaskRunner object at 0x756c19ba2c80>)
File "/home/aiscuser/CODES/AgentFly/verl/verl/trainer/main_ppo.py", line 180, in run
trainer.init_workers()
File "/home/aiscuser/CODES/AgentFly/verl/verl/trainer/ppo/ray_trainer.py", line 746, in init_workers
self.ref_policy_wg.init_model()
File "/home/aiscuser/CODES/AgentFly/verl/verl/single_controller/ray/base.py", line 49, in func
output = ray.get(output)
ray.exceptions.RayTaskError(DistBackendError): ray::WorkerDict.ref_init_model() (pid=565045, ip=100.64.24.39, actor_id=7cc9f5a6383c4734a8d1c4f501000000, repr=<verl.single_controller.ray.base.WorkerDict object at 0x749e8269b040>)
File "/home/aiscuser/CODES/AgentFly/verl/verl/single_controller/ray/base.py", line 466, in func
return getattr(self.worker_dict[key], name)(*args, **kwargs)
File "/home/aiscuser/CODES/AgentFly/verl/verl/single_controller/base/decorator.py", line 501, in inner
return func(*args, **kwargs)
File "/home/aiscuser/CODES/AgentFly/verl/verl/workers/fsdp_workers.py", line 521, in init_model
self.ref_module_fsdp = self._build_model_optimizer(
File "/home/aiscuser/CODES/AgentFly/verl/verl/workers/fsdp_workers.py", line 233, in _build_model_optimizer
torch.distributed.barrier()
File "/home/aiscuser/CODES/AgentFly/uv_agentfly/lib/python3.10/site-packages/torch/distributed/c10d_logger.py", line 81, in wrapper
return func(*args, **kwargs)
File "/home/aiscuser/CODES/AgentFly/uv_agentfly/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 4635, in barrier
work = group.barrier(opts=opts)
torch.distributed.DistBackendError: NCCL error in: /pytorch/torch/csrc/distributed/c10d/ProcessGroupNCCL.cpp:3356, unhandled cuda error (run with NCCL_DEBUG=INFO for details), NCCL version 2.26.2
ncclUnhandledCudaError: Call to CUDA function failed.
Last error:
Cuda failure 217 'peer access is not supported between these two devices'
Cleaning up environments...
0it [00:00, ?it/s]
(TaskRunner pid=560810) Unhandled error (suppress with 'RAY_IGNORE_UNHANDLED_ERRORS=1'): ray::WorkerDict.ref_init_model() (pid=565044, ip=100.64.24.39, actor_id=0f341df54875fc057ed0c0bd01000000, repr=<verl.single_controller.ray.base.WorkerDict object at 0x7d703320b040>)
(TaskRunner pid=560810) File "/home/aiscuser/CODES/AgentFly/verl/verl/single_controller/ray/base.py", line 466, in func
(TaskRunner pid=560810) return getattr(self.worker_dict[key], name)(*args, **kwargs)
(TaskRunner pid=560810) File "/home/aiscuser/CODES/AgentFly/verl/verl/single_controller/base/decorator.py", line 501, in inner
(TaskRunner pid=560810) return func(*args, **kwargs)
(TaskRunner pid=560810) File "/home/aiscuser/CODES/AgentFly/verl/verl/workers/fsdp_workers.py", line 521, in init_model
(TaskRunner pid=560810) self.ref_module_fsdp = self._build_model_optimizer(
(TaskRunner pid=560810) File "/home/aiscuser/CODES/AgentFly/verl/verl/workers/fsdp_workers.py", line 233, in _build_model_optimizer
(TaskRunner pid=560810) torch.distributed.barrier()
(TaskRunner pid=560810) File "/home/aiscuser/CODES/AgentFly/uv_agentfly/lib/python3.10/site-packages/torch/distributed/c10d_logger.py", line 81, in wrapper
(TaskRunner pid=560810) return func(*args, **kwargs)
(TaskRunner pid=560810) File "/home/aiscuser/CODES/AgentFly/uv_agentfly/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 4635, in barrier
(TaskRunner pid=560810) work = group.barrier(opts=opts)
(TaskRunner pid=560810) torch.distributed.DistBackendError: NCCL error in: /pytorch/torch/csrc/distributed/c10d/ProcessGroupNCCL.cpp:3356, unhandled cuda error (run with NCCL_DEBUG=INFO for details), NCCL version 2.26.2
(TaskRunner pid=560810) ncclUnhandledCudaError: Call to CUDA function failed.
(TaskRunner pid=560810) Last error:
(TaskRunner pid=560810) Cuda failure 217 'peer access is not supported between these two devices'
(TaskRunner pid=560810) Unhandled error (suppress with 'RAY_IGNORE_UNHANDLED_ERRORS=1'): ray::WorkerDict.ref_init_model() (pid=564401, ip=100.64.24.39, actor_id=89558e3d0536a122e39c2a7e01000000, repr=<verl.single_controller.ray.base.WorkerDict object at 0x74fa36f1f0a0>)
(TaskRunner pid=560810) File "/home/aiscuser/CODES/AgentFly/verl/verl/single_controller/ray/base.py", line 466, in func
(TaskRunner pid=560810) return getattr(self.worker_dict[key], name)(*args, **kwargs)
(TaskRunner pid=560810) File "/home/aiscuser/CODES/AgentFly/verl/verl/single_controller/base/decorator.py", line 501, in inner
(TaskRunner pid=560810) return func(*args, **kwargs)
(TaskRunner pid=560810) File "/home/aiscuser/CODES/AgentFly/verl/verl/workers/fsdp_workers.py", line 521, in init_model
(TaskRunner pid=560810) self.ref_module_fsdp = self._build_model_optimizer(
(TaskRunner pid=560810) File "/home/aiscuser/CODES/AgentFly/verl/verl/workers/fsdp_workers.py", line 233, in _build_model_optimizer
(TaskRunner pid=560810) torch.distributed.barrier()
(TaskRunner pid=560810) File "/home/aiscuser/CODES/AgentFly/uv_agentfly/lib/python3.10/site-packages/torch/distributed/c10d_logger.py", line 81, in wrapper
(TaskRunner pid=560810) return func(*args, **kwargs)
(TaskRunner pid=560810) File "/home/aiscuser/CODES/AgentFly/uv_agentfly/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 4635, in barrier
(TaskRunner pid=560810) work = group.barrier(opts=opts)
(TaskRunner pid=560810) torch.distributed.DistBackendError: NCCL error in: /pytorch/torch/csrc/distributed/c10d/ProcessGroupNCCL.cpp:3356, unhandled cuda error (run with NCCL_DEBUG=INFO for details), NCCL version 2.26.2
(TaskRunner pid=560810) ncclUnhandledCudaError: Call to CUDA function failed.
(TaskRunner pid=560810) Last error:
(TaskRunner pid=560810) Cuda failure 217 'peer access is not supported between these two devices'
(TaskRunner pid=560810) Unhandled error (suppress with 'RAY_IGNORE_UNHANDLED_ERRORS=1'): ray::WorkerDict.ref_init_model() (pid=565043, ip=100.64.24.39, actor_id=82614e5081ad45519bc4c3f201000000, repr=<verl.single_controller.ray.base.WorkerDict object at 0x70727f5d6f20>)
(TaskRunner pid=560810) File "/home/aiscuser/CODES/AgentFly/verl/verl/single_controller/ray/base.py", line 466, in func
(TaskRunner pid=560810) return getattr(self.worker_dict[key], name)(*args, **kwargs)
(TaskRunner pid=560810) File "/home/aiscuser/CODES/AgentFly/verl/verl/single_controller/base/decorator.py", line 501, in inner
(TaskRunner pid=560810) return func(*args, **kwargs)
(TaskRunner pid=560810) File "/home/aiscuser/CODES/AgentFly/verl/verl/workers/fsdp_workers.py", line 521, in init_model
(TaskRunner pid=560810) self.ref_module_fsdp = self._build_model_optimizer(
(TaskRunner pid=560810) File "/home/aiscuser/CODES/AgentFly/verl/verl/workers/fsdp_workers.py", line 233, in _build_model_optimizer
(TaskRunner pid=560810) torch.distributed.barrier()
(TaskRunner pid=560810) File "/home/aiscuser/CODES/AgentFly/uv_agentfly/lib/python3.10/site-packages/torch/distributed/c10d_logger.py", line 81, in wrapper
(TaskRunner pid=560810) return func(*args, **kwargs)
(TaskRunner pid=560810) File "/home/aiscuser/CODES/AgentFly/uv_agentfly/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 4635, in barrier
(TaskRunner pid=560810) work = group.barrier(opts=opts)
(TaskRunner pid=560810) torch.distributed.DistBackendError: NCCL error in: /pytorch/torch/csrc/distributed/c10d/ProcessGroupNCCL.cpp:3356, unhandled cuda error (run with NCCL_DEBUG=INFO for details), NCCL version 2.26.2
(TaskRunner pid=560810) ncclUnhandledCudaError: Call to CUDA function failed.
(TaskRunner pid=560810) Last error:
(TaskRunner pid=560810) Cuda failure 217 'peer access is not supported between these two devices'
(WorkerDict pid=565044) [W831 14:46:10.832875108 Utils.hpp:137] Warning: Environment variable NCCL_ASYNC_ERROR_HANDLING is deprecated; use TORCH_NCCL_ASYNC_ERROR_HANDLING instead (function operator()) [repeated 3x across cluster] (Ray deduplicates logs by default. Set RAY_DEDUP_LOGS=0 to disable log deduplication, or see https://docs.ray.io/en/master/ray-observability/user-guides/configure-logging.html#log-deduplication for more options.)
(WorkerDict pid=565044) [W831 14:46:10.832074897 socket.cpp:755] [c10d] The client socket cannot be initialized to connect to [100-64-24-39.proxy-node-0.79e5d84c-257e-473d-ad40-0b89b73e0ad7.svc.cluster.local]:50601 (errno: 97 - Address family not supported by protocol).
(WorkerDict pid=564401) `torch_dtype` is deprecated! Use `dtype` instead! [repeated 3x across cluster]
Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] [repeated 3x across cluster]
Loading checkpoint shards: 100%|██████████| 2/2 [00:00<00:00, 16.22it/s] [repeated 3x across cluster]
(WorkerDict pid=564401) /home/aiscuser/CODES/AgentFly/uv_agentfly/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py:4631: UserWarning: No device id is provided via `init_process_group` or `barrier `. Using the current device set by the user. [repeated 3x across cluster]
(WorkerDict pid=564401) warnings.warn( # warn only once [repeated 3x across cluster]
(WorkerDict pid=565044) [rank2]:[W831 14:46:12.312774421 ProcessGroupNCCL.cpp:4718] [PG ID 0 PG GUID 0 Rank 2] using GPU 0 as device used by this process is currently unknown. This can potentially cause a hang if this rank to GPU mapping is incorrect. You can pecify device_id in init_process_group() to force use of a particular device. [repeated 3x across cluster]
Root Cause
ncclUnhandledCudaError: Call to CUDA function failed. Last error: Cuda failure 217 'peer access is not supported between these two devices'
Attempted Solutions
Environment variables tried but failed:
export NCCL_IGNORE_DISABLED_P2P=1
export NCCL_P2P_DISABLE=1
export NCCL_IB_DISABLE=1
export NCCL_NET_GDR_LEVEL=0References
Environment Setup
# !/bin/bash
# Ref: https://github.com/Agent-One-Lab/AgentFly/blob/main/docs/start/installation.md
set -ex
curl -LsSf https://astral.sh/uv/install.sh | sh
export PATH="$HOME/.local/bin:$PATH"
uv venv uv_agentfly --python 3.10
source uv_agentfly/bin/activate
uv pip install --upgrade pip
export UV_LINK_MODE=copy
git submodule init
git submodule update
uv pip install -e .
uv pip install -e '.[verl]' --no-build-isolation
uv pip install --upgrade datasets
cd verl && uv pip install --no-deps -e .
cd ..
uv pip list
# enroot install
arch=$(dpkg --print-architecture)
curl -fSsL -O https://github.com/NVIDIA/enroot/releases/download/v3.5.0/enroot_3.5.0-1_${arch}.deb
curl -fSsL -O https://github.com/NVIDIA/enroot/releases/download/v3.5.0/enroot+caps_3.5.0-1_${arch}.deb # optional
sudo apt install -y ./*.deb
rm -f ./*.deb
# optional Search requires redis to cache results
if redis-server --version 2>/dev/null | grep -q "7.4.0"; then
echo "Redis 7.4.0 installed, skipped"
else
echo "Installing Redis 7.4.0..."
wget https://download.redis.io/releases/redis-7.4.0.tar.gz
tar xzf redis-7.4.0.tar.gz
cd redis-7.4.0
make
sudo make install
cd ..
rm -rf redis-7.4.0*
redis-server --version
fiPackage Versions
Package Version Editable project location
---------------------------------- ------------- ----------------------------------
accelerate 1.10.1
agentfly 0.0.1 /home/aiscuser/CODES/AgentFly
aiohappyeyeballs 2.6.1
aiohttp 3.12.15
aiohttp-cors 0.8.1
aiosignal 1.4.0
annotated-types 0.7.0
antlr4-python3-runtime 4.9.3
anyio 4.10.0
astor 0.8.1
async-timeout 5.0.1
attrs 25.3.0
av 15.1.0
beautifulsoup4 4.13.5
blake3 1.0.5
bs4 0.0.2
cachetools 6.2.0
cbor2 5.7.0
certifi 2025.8.3
cffi 1.17.1
cfgv 3.4.0
charset-normalizer 3.4.3
click 8.2.1
cloudpickle 3.1.1
codetiming 1.4.0
coloredlogs 15.0.1
colorful 0.5.7
compressed-tensors 0.10.2
cupy-cuda12x 13.6.0
datasets 4.0.0
depyf 0.19.0
dill 0.3.8
diskcache 5.6.3
distlib 0.4.0
distro 1.9.0
dnspython 2.7.0
docker 7.1.0
einops 0.8.1
email-validator 2.3.0
exceptiongroup 1.3.0
faiss-cpu 1.12.0
fastapi 0.116.1
fastapi-cli 0.0.8
fastapi-cloud-cli 0.1.5
fastrlock 0.8.3
filelock 3.19.1
flash-attn 2.8.3
flatbuffers 25.2.10
frozenlist 1.7.0
fsspec 2025.3.0
gguf 0.17.1
gitdb 4.0.12
gitpython 3.1.45
google-api-core 1.16.0
google-auth 1.6.3
googleapis-common-protos 1.70.0
grpcio 1.74.0
h11 0.16.0
hf-xet 1.1.9
httpcore 1.0.9
httptools 0.6.4
httpx 0.28.1
huggingface-hub 0.34.4
humanfriendly 10.0
hydra-core 1.3.2
identify 2.6.13
idna 3.10
importlib-metadata 8.7.0
interegular 0.3.3
jinja2 3.1.6
jiter 0.10.0
jsonschema 4.25.1
jsonschema-specifications 2025.4.1
lark 1.2.2
liger-kernel 0.6.2
llguidance 0.7.30
llvmlite 0.44.0
lm-format-enforcer 0.10.12
markdown-it-py 4.0.0
markupsafe 3.0.2
mdurl 0.1.2
mistral-common 1.8.4
mpmath 1.3.0
msgpack 1.1.1
msgspec 0.19.0
multidict 6.6.4
multiprocess 0.70.16
networkx 3.4.2
ninja 1.13.0
nodeenv 1.9.1
numba 0.61.2
numpy 2.2.6
nvidia-cublas-cu12 12.6.4.1
nvidia-cuda-cupti-cu12 12.6.80
nvidia-cuda-nvrtc-cu12 12.6.77
nvidia-cuda-runtime-cu12 12.6.77
nvidia-cudnn-cu12 9.5.1.17
nvidia-cufft-cu12 11.3.0.4
nvidia-cufile-cu12 1.11.1.6
nvidia-curand-cu12 10.3.7.77
nvidia-cusolver-cu12 11.7.1.2
nvidia-cusparse-cu12 12.5.4.2
nvidia-cusparselt-cu12 0.6.3
nvidia-nccl-cu12 2.26.2
nvidia-nvjitlink-cu12 12.6.85
nvidia-nvtx-cu12 12.6.77
omegaconf 2.3.0
onnxruntime 1.22.1
openai 1.90.0
opencensus 0.11.4
opencensus-context 0.1.3
opencv-python-headless 4.12.0.88
opentelemetry-api 1.36.0
opentelemetry-exporter-prometheus 0.57b0
opentelemetry-proto 1.36.0
opentelemetry-sdk 1.36.0
opentelemetry-semantic-conventions 0.57b0
orjson 3.11.3
outlines-core 0.2.10
packaging 25.0
pandas 2.3.2
partial-json-parser 0.2.1.1.post6
peft 0.17.1
pillow 11.3.0
pip 25.2
platformdirs 4.4.0
pre-commit 4.3.0
prometheus-client 0.22.1
prometheus-fastapi-instrumentator 7.1.0
propcache 0.3.2
protobuf 6.32.0
psutil 7.0.0
py-cpuinfo 9.0.0
py-spy 0.4.1
pyarrow 21.0.0
pyasn1 0.6.1
pyasn1-modules 0.4.2
pybase64 1.4.2
pybind11 3.0.1
pycountry 24.6.1
pycparser 2.22
pydantic 2.11.7
pydantic-core 2.33.2
pydantic-extra-types 2.10.5
pygments 2.19.2
pylatexenc 2.10
python-dateutil 2.9.0.post0
python-dotenv 1.1.1
python-json-logger 3.3.0
python-multipart 0.0.20
pytz 2025.2
pyyaml 6.0.2
pyzmq 27.0.2
qwen-vl-utils 0.0.11
ray 2.49.0
redis 6.4.0
referencing 0.36.2
regex 2025.8.29
requests 2.32.5
responses 0.18.0
rich 14.1.0
rich-toolkit 0.15.0
rignore 0.6.4
rpds-py 0.27.1
rsa 4.9.1
safetensors 0.6.2
scipy 1.15.3
sentencepiece 0.2.1
sentry-sdk 2.35.1
setuptools 80.9.0
shellingham 1.5.4
six 1.17.0
smart-open 7.3.0.post1
smmap 5.0.2
sniffio 1.3.1
soundfile 0.13.1
soupsieve 2.8
soxr 0.5.0.post1
starlette 0.47.3
sympy 1.14.0
tenacity 9.1.2
tensordict 0.6.2
termcolor 3.1.0
tiktoken 0.11.0
timeout-decorator 0.5.0
tokenizers 0.22.0
torch 2.7.1
torchaudio 2.7.1
torchdata 0.11.0
torchvision 0.22.1
tqdm 4.67.1
transformers 4.56.0
triton 3.3.1
typer 0.17.3
typing-extensions 4.15.0
typing-inspection 0.4.1
tzdata 2025.2
urllib3 2.5.0
uvicorn 0.35.0
uvloop 0.21.0
verl 0.3.1.dev0 /home/aiscuser/CODES/AgentFly/verl
virtualenv 20.34.0
vllm 0.10.0
wandb 0.21.3
watchfiles 1.1.0
websockets 15.0.1
wrapt 1.17.3
xformers 0.0.31
xgrammar 0.1.21
xxhash 3.5.0
yarl 1.20.1
zipp 3.23.0
Training Script
#!/bin/bash
# run with 80Gx4[A100/H100]
# verl/run_agents/run_code_agent.sh
set -ex
source uv_agentfly/bin/activate
# =================== Local Configuration ===================
NNODES=1 # Number of nodes for local execution
NGPUS_PER_NODE=4 # Number of GPUs per node
CPUS_PER_TASK=96 # Number of CPUs per task
HEAD_NODE="localhost" # Head node for local execution
# Local node configuration
nodes=("$HEAD_NODE")
echo "Nodes to check: ${nodes[@]}"
# We'll track PIDs so we can wait on them and detect errors
declare -A pids
export head_node=${nodes[0]}
head_node_ip=$(hostname -I | awk '{print $1}')
port=6379
address_head=$head_node_ip:$port
export worker_num=$NNODES
export HYDRA_FULL_ERROR=1
export NCCL_IGNORE_DISABLED_P2P=1
export NCCL_P2P_DISABLE=1
export NCCL_IB_DISABLE=1
export NCCL_NET_GDR_LEVEL=0
export VLLM_USE_V1=1
# =================== Ray start ===================
# Stop existing Ray cluster
ray stop
sleep 10
# Remove existing Ray cluster
rm -rf /tmp/ray/ray_current_cluster
# Start Ray head node
ray start --head --node-ip-address="$head_node_ip" --port=$port \
--num-cpus "${CPUS_PER_TASK}" --num-gpus $NGPUS_PER_NODE --include-dashboard=True --block &
sleep 10
# =================== Start RL training ===================
model=Qwen/Qwen2.5-3B-Instruct
template=qwen2.5-no-system-tool
lr=5e-7
length=512
batch_size=64
num_chains=8
kl_coef=0.001
train_dataset="orz_math_57k_train"
# adv_estimator=rloo
# adv_estimator=reinforce_plus_plus
# adv_estimator=remax
adv_estimator=grpo
# adv_estimator=gae
mini_batch_size=$batch_size
agent_type=code
tools="[code_interpreter]"
reward_name="math_reward_tool"
# reward_name="llm_as_judge_math_reward"
entropy_coeff=0.001
kl_loss_type=mse
max_turns=8
agent_backend="async_verl"
project_name="AgentRL"
total_training_steps=200
experiment_name="test"
# experiment_name="${model}-${agent_type}-${train_dataset}-${lr}-${length}-bs${batch_size}-n${num_chains}-kl${kl_loss_type}${kl_coef}-entropy${entropy_coeff}-${max_turns}steps-${adv_estimator}"
python3 -m verl.trainer.main_ppo \
algorithm.adv_estimator=$adv_estimator \
data.train_files=/mnt/yixiali/CODES/AgentFly/data/rlhf/math//${train_dataset}.json \
data.val_files=/mnt/yixiali/CODES/AgentFly/data/rlhf/math//MATH_500.json \
data.train_batch_size=$batch_size \
agent.agent_type=$agent_type \
agent.tools=$tools \
agent.template=$template \
agent.model_name_or_path=$model \
agent.max_turns=${max_turns} \
agent.backend=${agent_backend} \
agent.reward_name=$reward_name \
agent.num_chains=$num_chains \
agent.use_agent=True \
actor_rollout_ref.actor.optim.lr=$lr \
actor_rollout_ref.model.use_remove_padding=False \
actor_rollout_ref.model.path=${model} \
actor_rollout_ref.actor.ppo_mini_batch_size=${mini_batch_size} \
actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=2 \
actor_rollout_ref.actor.use_kl_loss=True \
actor_rollout_ref.actor.kl_loss_coef=$kl_coef \
actor_rollout_ref.actor.kl_loss_type=$kl_loss_type \
actor_rollout_ref.actor.entropy_coeff=$entropy_coeff \
actor_rollout_ref.model.enable_gradient_checkpointing=False \
actor_rollout_ref.actor.fsdp_config.param_offload=True \
actor_rollout_ref.actor.fsdp_config.optimizer_offload=True \
actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=4 \
actor_rollout_ref.rollout.tensor_model_parallel_size=2 \
actor_rollout_ref.rollout.name=vllm \
actor_rollout_ref.rollout.response_length=$length \
actor_rollout_ref.rollout.gpu_memory_utilization=0.5 \
actor_rollout_ref.ref.log_prob_micro_batch_size_per_gpu=4 \
actor_rollout_ref.ref.fsdp_config.param_offload=True \
critic.model.path=$model \
critic.ppo_mini_batch_size=${mini_batch_size} \
critic.ppo_micro_batch_size_per_gpu=2 \
algorithm.kl_ctrl.kl_coef=$kl_coef \
trainer.critic_warmup=0 \
trainer.logger=['console','wandb'] \
trainer.project_name=$project_name \
trainer.experiment_name=${experiment_name} \
trainer.n_gpus_per_node=4 \
trainer.nnodes=1 \
trainer.save_freq=50 \
trainer.test_freq=10 \
trainer.total_training_steps=$total_training_steps \
trainer.val_before_train=FalseMetadata
Metadata
Assignees
Labels
No labels