Skip to content

Cuda failure 217 'peer access is not supported between these two devices' #11

@X1AOX1A

Description

@X1AOX1A

NCCL P2P Communication Error During Multi-GPU Training

Problem

PPO training fails with NCCL error on multi-GPU setup due to unsupported peer access between devices.

Error Details

(WorkerDict pid=565043) /home/aiscuser/CODES/AgentFly/uv_agentfly/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py:4631: UserWarning: No device id is provided via `init_process_group` or `barrier `. Using the current device set by the user.
(WorkerDict pid=565043)   warnings.warn(  # warn only once
(WorkerDict pid=564401) [rank0]:[W831 14:46:12.241792627 ProcessGroupNCCL.cpp:4718] [PG ID 0 PG GUID 0 Rank 0]  using GPU 0 as device used by this process is currently unknown. This can potentially cause a hang if this rank to GPU mapping is incorrect. You can pecify device_id in init_process_group() to force use of a particular device.
Error executing job with overrides: ['algorithm.adv_estimator=grpo', 'data.train_files=/mnt/yixiali/CODES/AgentFly/data/rlhf/math//orz_math_57k_train.json', 'data.val_files=/mnt/yixiali/CODES/AgentFly/data/rlhf/math//MATH_500.json', 'data.train_batch_size=64', 'agent.agent_type=code', 'agent.tools=[code_interpreter]', 'agent.template=qwen2.5-no-system-tool', 'agent.model_name_or_path=Qwen/Qwen2.5-3B-Instruct', 'agent.max_turns=8', 'agent.backend=async_verl', 'agent.reward_name=math_reward_tool', 'agent.num_chains=8', 'agent.use_agent=True', 'actor_rollout_ref.actor.optim.lr=5e-7', 'actor_rollout_ref.model.use_remove_padding=False', 'actor_rollout_ref.model.path=Qwen/Qwen2.5-3B-Instruct', 'actor_rollout_ref.actor.ppo_mini_batch_size=64', 'actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=2', 'actor_rollout_ref.actor.use_kl_loss=True', 'actor_rollout_ref.actor.kl_loss_coef=0.001', 'actor_rollout_ref.actor.kl_loss_type=mse', 'actor_rollout_ref.actor.entropy_coeff=0.001', 'actor_rollout_ref.model.enable_gradient_checkpointing=False', 'actor_rollout_ref.actor.fsdp_config.param_offload=True', 'actor_rollout_ref.actor.fsdp_config.optimizer_offload=True', 'actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=4', 'actor_rollout_ref.rollout.tensor_model_parallel_size=2', 'actor_rollout_ref.rollout.name=vllm', 'actor_rollout_ref.rollout.response_length=512', 'actor_rollout_ref.rollout.gpu_memory_utilization=0.5', 'actor_rollout_ref.ref.log_prob_micro_batch_size_per_gpu=4', 'actor_rollout_ref.ref.fsdp_config.param_offload=True', 'critic.model.path=Qwen/Qwen2.5-3B-Instruct', 'critic.ppo_mini_batch_size=64', 'critic.ppo_micro_batch_size_per_gpu=2', 'algorithm.kl_ctrl.kl_coef=0.001', 'trainer.critic_warmup=0', 'trainer.logger=[console,wandb]', 'trainer.project_name=AgentRL', 'trainer.experiment_name=test', 'trainer.n_gpus_per_node=4', 'trainer.nnodes=1', 'trainer.save_freq=50', 'trainer.test_freq=10', 'trainer.total_training_steps=200', 'trainer.val_before_train=False']
Traceback (most recent call last):
  File "/opt/conda/envs/ptca/lib/python3.10/runpy.py", line 196, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/opt/conda/envs/ptca/lib/python3.10/runpy.py", line 86, in _run_code
    exec(code, run_globals)
  File "/home/aiscuser/CODES/AgentFly/verl/verl/trainer/main_ppo.py", line 244, in <module>
    main()
  File "/home/aiscuser/CODES/AgentFly/uv_agentfly/lib/python3.10/site-packages/hydra/main.py", line 94, in decorated_main
    _run_hydra(
  File "/home/aiscuser/CODES/AgentFly/uv_agentfly/lib/python3.10/site-packages/hydra/_internal/utils.py", line 394, in _run_hydra
    _run_app(
  File "/home/aiscuser/CODES/AgentFly/uv_agentfly/lib/python3.10/site-packages/hydra/_internal/utils.py", line 457, in _run_app
    run_and_report(
  File "/home/aiscuser/CODES/AgentFly/uv_agentfly/lib/python3.10/site-packages/hydra/_internal/utils.py", line 223, in run_and_report
    raise ex
  File "/home/aiscuser/CODES/AgentFly/uv_agentfly/lib/python3.10/site-packages/hydra/_internal/utils.py", line 220, in run_and_report
    return func()
  File "/home/aiscuser/CODES/AgentFly/uv_agentfly/lib/python3.10/site-packages/hydra/_internal/utils.py", line 458, in <lambda>
    lambda: hydra.run(
  File "/home/aiscuser/CODES/AgentFly/uv_agentfly/lib/python3.10/site-packages/hydra/_internal/hydra.py", line 132, in run
    _ = ret.return_value
  File "/home/aiscuser/CODES/AgentFly/uv_agentfly/lib/python3.10/site-packages/hydra/core/utils.py", line 260, in return_value
    raise self._return_value
  File "/home/aiscuser/CODES/AgentFly/uv_agentfly/lib/python3.10/site-packages/hydra/core/utils.py", line 186, in run_job
    ret.return_value = task_function(task_cfg)
  File "/home/aiscuser/CODES/AgentFly/verl/verl/trainer/main_ppo.py", line 62, in main
    run_ppo(config)
  File "/home/aiscuser/CODES/AgentFly/verl/verl/trainer/main_ppo.py", line 74, in run_ppo
    ray.get(runner.run.remote(config))
  File "/home/aiscuser/CODES/AgentFly/uv_agentfly/lib/python3.10/site-packages/ray/_private/auto_init_hook.py", line 22, in auto_init_wrapper
    return fn(*args, **kwargs)
  File "/home/aiscuser/CODES/AgentFly/uv_agentfly/lib/python3.10/site-packages/ray/_private/client_mode_hook.py", line 104, in wrapper
    return func(*args, **kwargs)
  File "/home/aiscuser/CODES/AgentFly/uv_agentfly/lib/python3.10/site-packages/ray/_private/worker.py", line 2882, in get
    values, debugger_breakpoint = worker.get_objects(object_refs, timeout=timeout)
  File "/home/aiscuser/CODES/AgentFly/uv_agentfly/lib/python3.10/site-packages/ray/_private/worker.py", line 968, in get_objects
    raise value.as_instanceof_cause()
ray.exceptions.RayTaskError(DistBackendError): ray::TaskRunner.run() (pid=560810, ip=100.64.24.39, actor_id=e8bf18909706ae5475513a4701000000, repr=<main_ppo.TaskRunner object at 0x756c19ba2c80>)
  File "/home/aiscuser/CODES/AgentFly/verl/verl/trainer/main_ppo.py", line 180, in run
    trainer.init_workers()
  File "/home/aiscuser/CODES/AgentFly/verl/verl/trainer/ppo/ray_trainer.py", line 746, in init_workers
    self.ref_policy_wg.init_model()
  File "/home/aiscuser/CODES/AgentFly/verl/verl/single_controller/ray/base.py", line 49, in func
    output = ray.get(output)
ray.exceptions.RayTaskError(DistBackendError): ray::WorkerDict.ref_init_model() (pid=565045, ip=100.64.24.39, actor_id=7cc9f5a6383c4734a8d1c4f501000000, repr=<verl.single_controller.ray.base.WorkerDict object at 0x749e8269b040>)
  File "/home/aiscuser/CODES/AgentFly/verl/verl/single_controller/ray/base.py", line 466, in func
    return getattr(self.worker_dict[key], name)(*args, **kwargs)
  File "/home/aiscuser/CODES/AgentFly/verl/verl/single_controller/base/decorator.py", line 501, in inner
    return func(*args, **kwargs)
  File "/home/aiscuser/CODES/AgentFly/verl/verl/workers/fsdp_workers.py", line 521, in init_model
    self.ref_module_fsdp = self._build_model_optimizer(
  File "/home/aiscuser/CODES/AgentFly/verl/verl/workers/fsdp_workers.py", line 233, in _build_model_optimizer
    torch.distributed.barrier()
  File "/home/aiscuser/CODES/AgentFly/uv_agentfly/lib/python3.10/site-packages/torch/distributed/c10d_logger.py", line 81, in wrapper
    return func(*args, **kwargs)
  File "/home/aiscuser/CODES/AgentFly/uv_agentfly/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 4635, in barrier
    work = group.barrier(opts=opts)
torch.distributed.DistBackendError: NCCL error in: /pytorch/torch/csrc/distributed/c10d/ProcessGroupNCCL.cpp:3356, unhandled cuda error (run with NCCL_DEBUG=INFO for details), NCCL version 2.26.2
ncclUnhandledCudaError: Call to CUDA function failed.
Last error:
Cuda failure 217 'peer access is not supported between these two devices'
Cleaning up environments...
0it [00:00, ?it/s]
(TaskRunner pid=560810) Unhandled error (suppress with 'RAY_IGNORE_UNHANDLED_ERRORS=1'): ray::WorkerDict.ref_init_model() (pid=565044, ip=100.64.24.39, actor_id=0f341df54875fc057ed0c0bd01000000, repr=<verl.single_controller.ray.base.WorkerDict object at 0x7d703320b040>)
(TaskRunner pid=560810)   File "/home/aiscuser/CODES/AgentFly/verl/verl/single_controller/ray/base.py", line 466, in func
(TaskRunner pid=560810)     return getattr(self.worker_dict[key], name)(*args, **kwargs)
(TaskRunner pid=560810)   File "/home/aiscuser/CODES/AgentFly/verl/verl/single_controller/base/decorator.py", line 501, in inner
(TaskRunner pid=560810)     return func(*args, **kwargs)
(TaskRunner pid=560810)   File "/home/aiscuser/CODES/AgentFly/verl/verl/workers/fsdp_workers.py", line 521, in init_model
(TaskRunner pid=560810)     self.ref_module_fsdp = self._build_model_optimizer(
(TaskRunner pid=560810)   File "/home/aiscuser/CODES/AgentFly/verl/verl/workers/fsdp_workers.py", line 233, in _build_model_optimizer
(TaskRunner pid=560810)     torch.distributed.barrier()
(TaskRunner pid=560810)   File "/home/aiscuser/CODES/AgentFly/uv_agentfly/lib/python3.10/site-packages/torch/distributed/c10d_logger.py", line 81, in wrapper
(TaskRunner pid=560810)     return func(*args, **kwargs)
(TaskRunner pid=560810)   File "/home/aiscuser/CODES/AgentFly/uv_agentfly/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 4635, in barrier
(TaskRunner pid=560810)     work = group.barrier(opts=opts)
(TaskRunner pid=560810) torch.distributed.DistBackendError: NCCL error in: /pytorch/torch/csrc/distributed/c10d/ProcessGroupNCCL.cpp:3356, unhandled cuda error (run with NCCL_DEBUG=INFO for details), NCCL version 2.26.2
(TaskRunner pid=560810) ncclUnhandledCudaError: Call to CUDA function failed.
(TaskRunner pid=560810) Last error:
(TaskRunner pid=560810) Cuda failure 217 'peer access is not supported between these two devices'
(TaskRunner pid=560810) Unhandled error (suppress with 'RAY_IGNORE_UNHANDLED_ERRORS=1'): ray::WorkerDict.ref_init_model() (pid=564401, ip=100.64.24.39, actor_id=89558e3d0536a122e39c2a7e01000000, repr=<verl.single_controller.ray.base.WorkerDict object at 0x74fa36f1f0a0>)
(TaskRunner pid=560810)   File "/home/aiscuser/CODES/AgentFly/verl/verl/single_controller/ray/base.py", line 466, in func
(TaskRunner pid=560810)     return getattr(self.worker_dict[key], name)(*args, **kwargs)
(TaskRunner pid=560810)   File "/home/aiscuser/CODES/AgentFly/verl/verl/single_controller/base/decorator.py", line 501, in inner
(TaskRunner pid=560810)     return func(*args, **kwargs)
(TaskRunner pid=560810)   File "/home/aiscuser/CODES/AgentFly/verl/verl/workers/fsdp_workers.py", line 521, in init_model
(TaskRunner pid=560810)     self.ref_module_fsdp = self._build_model_optimizer(
(TaskRunner pid=560810)   File "/home/aiscuser/CODES/AgentFly/verl/verl/workers/fsdp_workers.py", line 233, in _build_model_optimizer
(TaskRunner pid=560810)     torch.distributed.barrier()
(TaskRunner pid=560810)   File "/home/aiscuser/CODES/AgentFly/uv_agentfly/lib/python3.10/site-packages/torch/distributed/c10d_logger.py", line 81, in wrapper
(TaskRunner pid=560810)     return func(*args, **kwargs)
(TaskRunner pid=560810)   File "/home/aiscuser/CODES/AgentFly/uv_agentfly/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 4635, in barrier
(TaskRunner pid=560810)     work = group.barrier(opts=opts)
(TaskRunner pid=560810) torch.distributed.DistBackendError: NCCL error in: /pytorch/torch/csrc/distributed/c10d/ProcessGroupNCCL.cpp:3356, unhandled cuda error (run with NCCL_DEBUG=INFO for details), NCCL version 2.26.2
(TaskRunner pid=560810) ncclUnhandledCudaError: Call to CUDA function failed.
(TaskRunner pid=560810) Last error:
(TaskRunner pid=560810) Cuda failure 217 'peer access is not supported between these two devices'
(TaskRunner pid=560810) Unhandled error (suppress with 'RAY_IGNORE_UNHANDLED_ERRORS=1'): ray::WorkerDict.ref_init_model() (pid=565043, ip=100.64.24.39, actor_id=82614e5081ad45519bc4c3f201000000, repr=<verl.single_controller.ray.base.WorkerDict object at 0x70727f5d6f20>)
(TaskRunner pid=560810)   File "/home/aiscuser/CODES/AgentFly/verl/verl/single_controller/ray/base.py", line 466, in func
(TaskRunner pid=560810)     return getattr(self.worker_dict[key], name)(*args, **kwargs)
(TaskRunner pid=560810)   File "/home/aiscuser/CODES/AgentFly/verl/verl/single_controller/base/decorator.py", line 501, in inner
(TaskRunner pid=560810)     return func(*args, **kwargs)
(TaskRunner pid=560810)   File "/home/aiscuser/CODES/AgentFly/verl/verl/workers/fsdp_workers.py", line 521, in init_model
(TaskRunner pid=560810)     self.ref_module_fsdp = self._build_model_optimizer(
(TaskRunner pid=560810)   File "/home/aiscuser/CODES/AgentFly/verl/verl/workers/fsdp_workers.py", line 233, in _build_model_optimizer
(TaskRunner pid=560810)     torch.distributed.barrier()
(TaskRunner pid=560810)   File "/home/aiscuser/CODES/AgentFly/uv_agentfly/lib/python3.10/site-packages/torch/distributed/c10d_logger.py", line 81, in wrapper
(TaskRunner pid=560810)     return func(*args, **kwargs)
(TaskRunner pid=560810)   File "/home/aiscuser/CODES/AgentFly/uv_agentfly/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 4635, in barrier
(TaskRunner pid=560810)     work = group.barrier(opts=opts)
(TaskRunner pid=560810) torch.distributed.DistBackendError: NCCL error in: /pytorch/torch/csrc/distributed/c10d/ProcessGroupNCCL.cpp:3356, unhandled cuda error (run with NCCL_DEBUG=INFO for details), NCCL version 2.26.2
(TaskRunner pid=560810) ncclUnhandledCudaError: Call to CUDA function failed.
(TaskRunner pid=560810) Last error:
(TaskRunner pid=560810) Cuda failure 217 'peer access is not supported between these two devices'
(WorkerDict pid=565044) [W831 14:46:10.832875108 Utils.hpp:137] Warning: Environment variable NCCL_ASYNC_ERROR_HANDLING is deprecated; use TORCH_NCCL_ASYNC_ERROR_HANDLING instead (function operator()) [repeated 3x across cluster] (Ray deduplicates logs by default. Set RAY_DEDUP_LOGS=0 to disable log deduplication, or see https://docs.ray.io/en/master/ray-observability/user-guides/configure-logging.html#log-deduplication for more options.)
(WorkerDict pid=565044) [W831 14:46:10.832074897 socket.cpp:755] [c10d] The client socket cannot be initialized to connect to [100-64-24-39.proxy-node-0.79e5d84c-257e-473d-ad40-0b89b73e0ad7.svc.cluster.local]:50601 (errno: 97 - Address family not supported by protocol).
(WorkerDict pid=564401) `torch_dtype` is deprecated! Use `dtype` instead! [repeated 3x across cluster]
Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s] [repeated 3x across cluster]
Loading checkpoint shards: 100%|██████████| 2/2 [00:00<00:00, 16.22it/s] [repeated 3x across cluster]
(WorkerDict pid=564401) /home/aiscuser/CODES/AgentFly/uv_agentfly/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py:4631: UserWarning: No device id is provided via `init_process_group` or `barrier `. Using the current device set by the user.  [repeated 3x across cluster]
(WorkerDict pid=564401)   warnings.warn(  # warn only once [repeated 3x across cluster]
(WorkerDict pid=565044) [rank2]:[W831 14:46:12.312774421 ProcessGroupNCCL.cpp:4718] [PG ID 0 PG GUID 0 Rank 2]  using GPU 0 as device used by this process is currently unknown. This can potentially cause a hang if this rank to GPU mapping is incorrect. You can pecify device_id in init_process_group() to force use of a particular device. [repeated 3x across cluster]

Root Cause

ncclUnhandledCudaError: Call to CUDA function failed. Last error: Cuda failure 217 'peer access is not supported between these two devices'

Attempted Solutions

Environment variables tried but failed:

export NCCL_IGNORE_DISABLED_P2P=1
export NCCL_P2P_DISABLE=1
export NCCL_IB_DISABLE=1
export NCCL_NET_GDR_LEVEL=0

References

Environment Setup

# !/bin/bash
# Ref: https://github.com/Agent-One-Lab/AgentFly/blob/main/docs/start/installation.md
set -ex

curl -LsSf https://astral.sh/uv/install.sh | sh
export PATH="$HOME/.local/bin:$PATH"
uv venv uv_agentfly --python 3.10
source uv_agentfly/bin/activate
uv pip install --upgrade pip
export UV_LINK_MODE=copy

git submodule init
git submodule update

uv pip install -e .
uv pip install -e '.[verl]' --no-build-isolation
uv pip install --upgrade datasets
cd verl && uv pip install --no-deps -e .
cd ..
uv pip list

# enroot install
arch=$(dpkg --print-architecture)
curl -fSsL -O https://github.com/NVIDIA/enroot/releases/download/v3.5.0/enroot_3.5.0-1_${arch}.deb
curl -fSsL -O https://github.com/NVIDIA/enroot/releases/download/v3.5.0/enroot+caps_3.5.0-1_${arch}.deb # optional
sudo apt install -y ./*.deb
rm -f ./*.deb

# optional Search requires redis to cache results
if redis-server --version 2>/dev/null | grep -q "7.4.0"; then
   echo "Redis 7.4.0 installed, skipped"
else
   echo "Installing Redis 7.4.0..."
   wget https://download.redis.io/releases/redis-7.4.0.tar.gz
   tar xzf redis-7.4.0.tar.gz
   cd redis-7.4.0
   make
   sudo make install
   cd ..
   rm -rf redis-7.4.0*
   redis-server --version
fi

Package Versions

Package                            Version       Editable project location
---------------------------------- ------------- ----------------------------------
accelerate                         1.10.1
agentfly                           0.0.1         /home/aiscuser/CODES/AgentFly
aiohappyeyeballs                   2.6.1
aiohttp                            3.12.15
aiohttp-cors                       0.8.1
aiosignal                          1.4.0
annotated-types                    0.7.0
antlr4-python3-runtime             4.9.3
anyio                              4.10.0
astor                              0.8.1
async-timeout                      5.0.1
attrs                              25.3.0
av                                 15.1.0
beautifulsoup4                     4.13.5
blake3                             1.0.5
bs4                                0.0.2
cachetools                         6.2.0
cbor2                              5.7.0
certifi                            2025.8.3
cffi                               1.17.1
cfgv                               3.4.0
charset-normalizer                 3.4.3
click                              8.2.1
cloudpickle                        3.1.1
codetiming                         1.4.0
coloredlogs                        15.0.1
colorful                           0.5.7
compressed-tensors                 0.10.2
cupy-cuda12x                       13.6.0
datasets                           4.0.0
depyf                              0.19.0
dill                               0.3.8
diskcache                          5.6.3
distlib                            0.4.0
distro                             1.9.0
dnspython                          2.7.0
docker                             7.1.0
einops                             0.8.1
email-validator                    2.3.0
exceptiongroup                     1.3.0
faiss-cpu                          1.12.0
fastapi                            0.116.1
fastapi-cli                        0.0.8
fastapi-cloud-cli                  0.1.5
fastrlock                          0.8.3
filelock                           3.19.1
flash-attn                         2.8.3
flatbuffers                        25.2.10
frozenlist                         1.7.0
fsspec                             2025.3.0
gguf                               0.17.1
gitdb                              4.0.12
gitpython                          3.1.45
google-api-core                    1.16.0
google-auth                        1.6.3
googleapis-common-protos           1.70.0
grpcio                             1.74.0
h11                                0.16.0
hf-xet                             1.1.9
httpcore                           1.0.9
httptools                          0.6.4
httpx                              0.28.1
huggingface-hub                    0.34.4
humanfriendly                      10.0
hydra-core                         1.3.2
identify                           2.6.13
idna                               3.10
importlib-metadata                 8.7.0
interegular                        0.3.3
jinja2                             3.1.6
jiter                              0.10.0
jsonschema                         4.25.1
jsonschema-specifications          2025.4.1
lark                               1.2.2
liger-kernel                       0.6.2
llguidance                         0.7.30
llvmlite                           0.44.0
lm-format-enforcer                 0.10.12
markdown-it-py                     4.0.0
markupsafe                         3.0.2
mdurl                              0.1.2
mistral-common                     1.8.4
mpmath                             1.3.0
msgpack                            1.1.1
msgspec                            0.19.0
multidict                          6.6.4
multiprocess                       0.70.16
networkx                           3.4.2
ninja                              1.13.0
nodeenv                            1.9.1
numba                              0.61.2
numpy                              2.2.6
nvidia-cublas-cu12                 12.6.4.1
nvidia-cuda-cupti-cu12             12.6.80
nvidia-cuda-nvrtc-cu12             12.6.77
nvidia-cuda-runtime-cu12           12.6.77
nvidia-cudnn-cu12                  9.5.1.17
nvidia-cufft-cu12                  11.3.0.4
nvidia-cufile-cu12                 1.11.1.6
nvidia-curand-cu12                 10.3.7.77
nvidia-cusolver-cu12               11.7.1.2
nvidia-cusparse-cu12               12.5.4.2
nvidia-cusparselt-cu12             0.6.3
nvidia-nccl-cu12                   2.26.2
nvidia-nvjitlink-cu12              12.6.85
nvidia-nvtx-cu12                   12.6.77
omegaconf                          2.3.0
onnxruntime                        1.22.1
openai                             1.90.0
opencensus                         0.11.4
opencensus-context                 0.1.3
opencv-python-headless             4.12.0.88
opentelemetry-api                  1.36.0
opentelemetry-exporter-prometheus  0.57b0
opentelemetry-proto                1.36.0
opentelemetry-sdk                  1.36.0
opentelemetry-semantic-conventions 0.57b0
orjson                             3.11.3
outlines-core                      0.2.10
packaging                          25.0
pandas                             2.3.2
partial-json-parser                0.2.1.1.post6
peft                               0.17.1
pillow                             11.3.0
pip                                25.2
platformdirs                       4.4.0
pre-commit                         4.3.0
prometheus-client                  0.22.1
prometheus-fastapi-instrumentator  7.1.0
propcache                          0.3.2
protobuf                           6.32.0
psutil                             7.0.0
py-cpuinfo                         9.0.0
py-spy                             0.4.1
pyarrow                            21.0.0
pyasn1                             0.6.1
pyasn1-modules                     0.4.2
pybase64                           1.4.2
pybind11                           3.0.1
pycountry                          24.6.1
pycparser                          2.22
pydantic                           2.11.7
pydantic-core                      2.33.2
pydantic-extra-types               2.10.5
pygments                           2.19.2
pylatexenc                         2.10
python-dateutil                    2.9.0.post0
python-dotenv                      1.1.1
python-json-logger                 3.3.0
python-multipart                   0.0.20
pytz                               2025.2
pyyaml                             6.0.2
pyzmq                              27.0.2
qwen-vl-utils                      0.0.11
ray                                2.49.0
redis                              6.4.0
referencing                        0.36.2
regex                              2025.8.29
requests                           2.32.5
responses                          0.18.0
rich                               14.1.0
rich-toolkit                       0.15.0
rignore                            0.6.4
rpds-py                            0.27.1
rsa                                4.9.1
safetensors                        0.6.2
scipy                              1.15.3
sentencepiece                      0.2.1
sentry-sdk                         2.35.1
setuptools                         80.9.0
shellingham                        1.5.4
six                                1.17.0
smart-open                         7.3.0.post1
smmap                              5.0.2
sniffio                            1.3.1
soundfile                          0.13.1
soupsieve                          2.8
soxr                               0.5.0.post1
starlette                          0.47.3
sympy                              1.14.0
tenacity                           9.1.2
tensordict                         0.6.2
termcolor                          3.1.0
tiktoken                           0.11.0
timeout-decorator                  0.5.0
tokenizers                         0.22.0
torch                              2.7.1
torchaudio                         2.7.1
torchdata                          0.11.0
torchvision                        0.22.1
tqdm                               4.67.1
transformers                       4.56.0
triton                             3.3.1
typer                              0.17.3
typing-extensions                  4.15.0
typing-inspection                  0.4.1
tzdata                             2025.2
urllib3                            2.5.0
uvicorn                            0.35.0
uvloop                             0.21.0
verl                               0.3.1.dev0    /home/aiscuser/CODES/AgentFly/verl
virtualenv                         20.34.0
vllm                               0.10.0
wandb                              0.21.3
watchfiles                         1.1.0
websockets                         15.0.1
wrapt                              1.17.3
xformers                           0.0.31
xgrammar                           0.1.21
xxhash                             3.5.0
yarl                               1.20.1
zipp                               3.23.0

Training Script

#!/bin/bash
# run with 80Gx4[A100/H100]
# verl/run_agents/run_code_agent.sh
set -ex
source uv_agentfly/bin/activate

# =================== Local Configuration ===================
NNODES=1  # Number of nodes for local execution
NGPUS_PER_NODE=4  # Number of GPUs per node
CPUS_PER_TASK=96  # Number of CPUs per task
HEAD_NODE="localhost"  # Head node for local execution

# Local node configuration
nodes=("$HEAD_NODE")
echo "Nodes to check: ${nodes[@]}"

# We'll track PIDs so we can wait on them and detect errors
declare -A pids
export head_node=${nodes[0]}
head_node_ip=$(hostname -I | awk '{print $1}')
port=6379
address_head=$head_node_ip:$port

export worker_num=$NNODES
export HYDRA_FULL_ERROR=1
export NCCL_IGNORE_DISABLED_P2P=1
export NCCL_P2P_DISABLE=1
export NCCL_IB_DISABLE=1
export NCCL_NET_GDR_LEVEL=0
export VLLM_USE_V1=1

# =================== Ray start ===================
# Stop existing Ray cluster
ray stop

sleep 10
# Remove existing Ray cluster
rm -rf /tmp/ray/ray_current_cluster

# Start Ray head node
ray start --head --node-ip-address="$head_node_ip" --port=$port \
    --num-cpus "${CPUS_PER_TASK}" --num-gpus $NGPUS_PER_NODE --include-dashboard=True --block &

sleep 10


# =================== Start RL training ===================
model=Qwen/Qwen2.5-3B-Instruct
template=qwen2.5-no-system-tool
lr=5e-7
length=512
batch_size=64
num_chains=8
kl_coef=0.001
train_dataset="orz_math_57k_train"
# adv_estimator=rloo
# adv_estimator=reinforce_plus_plus
# adv_estimator=remax
adv_estimator=grpo
# adv_estimator=gae

mini_batch_size=$batch_size

agent_type=code
tools="[code_interpreter]"
reward_name="math_reward_tool"
# reward_name="llm_as_judge_math_reward"
entropy_coeff=0.001
kl_loss_type=mse
max_turns=8
agent_backend="async_verl"
project_name="AgentRL"
total_training_steps=200

experiment_name="test"
# experiment_name="${model}-${agent_type}-${train_dataset}-${lr}-${length}-bs${batch_size}-n${num_chains}-kl${kl_loss_type}${kl_coef}-entropy${entropy_coeff}-${max_turns}steps-${adv_estimator}"

python3 -m verl.trainer.main_ppo \
    algorithm.adv_estimator=$adv_estimator \
    data.train_files=/mnt/yixiali/CODES/AgentFly/data/rlhf/math//${train_dataset}.json \
    data.val_files=/mnt/yixiali/CODES/AgentFly/data/rlhf/math//MATH_500.json \
    data.train_batch_size=$batch_size \
    agent.agent_type=$agent_type \
    agent.tools=$tools \
    agent.template=$template \
    agent.model_name_or_path=$model \
    agent.max_turns=${max_turns} \
    agent.backend=${agent_backend} \
    agent.reward_name=$reward_name \
    agent.num_chains=$num_chains \
    agent.use_agent=True \
    actor_rollout_ref.actor.optim.lr=$lr \
    actor_rollout_ref.model.use_remove_padding=False \
    actor_rollout_ref.model.path=${model} \
    actor_rollout_ref.actor.ppo_mini_batch_size=${mini_batch_size} \
    actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=2 \
    actor_rollout_ref.actor.use_kl_loss=True \
    actor_rollout_ref.actor.kl_loss_coef=$kl_coef \
    actor_rollout_ref.actor.kl_loss_type=$kl_loss_type \
    actor_rollout_ref.actor.entropy_coeff=$entropy_coeff \
    actor_rollout_ref.model.enable_gradient_checkpointing=False \
    actor_rollout_ref.actor.fsdp_config.param_offload=True \
    actor_rollout_ref.actor.fsdp_config.optimizer_offload=True \
    actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=4 \
    actor_rollout_ref.rollout.tensor_model_parallel_size=2 \
    actor_rollout_ref.rollout.name=vllm \
    actor_rollout_ref.rollout.response_length=$length \
    actor_rollout_ref.rollout.gpu_memory_utilization=0.5 \
    actor_rollout_ref.ref.log_prob_micro_batch_size_per_gpu=4 \
    actor_rollout_ref.ref.fsdp_config.param_offload=True \
    critic.model.path=$model \
    critic.ppo_mini_batch_size=${mini_batch_size} \
    critic.ppo_micro_batch_size_per_gpu=2 \
    algorithm.kl_ctrl.kl_coef=$kl_coef \
    trainer.critic_warmup=0 \
    trainer.logger=['console','wandb'] \
    trainer.project_name=$project_name \
    trainer.experiment_name=${experiment_name} \
    trainer.n_gpus_per_node=4 \
    trainer.nnodes=1 \
    trainer.save_freq=50 \
    trainer.test_freq=10 \
    trainer.total_training_steps=$total_training_steps \
    trainer.val_before_train=False

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions