From 11f90ceeb7547907be3ce14bcd29d6d377c6b302 Mon Sep 17 00:00:00 2001 From: Jiani Huang Date: Thu, 3 Jul 2025 15:43:33 -0400 Subject: [PATCH] cuda kernel code allow multi gpu --- sam2/csrc/connected_components.cu | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sam2/csrc/connected_components.cu b/sam2/csrc/connected_components.cu index ced21eb32..cd814fefa 100644 --- a/sam2/csrc/connected_components.cu +++ b/sam2/csrc/connected_components.cu @@ -212,6 +212,7 @@ __global__ void final_counting( std::vector get_connected_componnets( const torch::Tensor& inputs) { + AT_ASSERTM(inputs.is_cuda(), "inputs must be a CUDA tensor"); AT_ASSERTM(inputs.ndimension() == 4, "inputs must be [N, 1, H, W] shape"); AT_ASSERTM( @@ -240,7 +241,8 @@ std::vector get_connected_componnets( dim3 grid_count = dim3((W + BLOCK_COLS) / BLOCK_COLS, (H + BLOCK_ROWS) / BLOCK_ROWS); dim3 block_count = dim3(BLOCK_COLS, BLOCK_ROWS); - cudaStream_t stream = at::cuda::getCurrentCUDAStream(); + auto device_idx = inputs.device().index(); + cudaStream_t stream = at::cuda::getCurrentCUDAStream(device_idx); for (int n = 0; n < N; n++) { uint32_t offset = n * H * W;