From c34c2efee2e2e222a4c84331910a9ea093407113 Mon Sep 17 00:00:00 2001 From: ncianeo Date: Fri, 3 Feb 2023 22:58:14 +0900 Subject: [PATCH] add optional cuda guard for different device usage --- .../pixel_decoder/ops/src/cuda/ms_deform_attn_cuda.cu | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/mask2former/modeling/pixel_decoder/ops/src/cuda/ms_deform_attn_cuda.cu b/mask2former/modeling/pixel_decoder/ops/src/cuda/ms_deform_attn_cuda.cu index 0c465dab..d738cd09 100644 --- a/mask2former/modeling/pixel_decoder/ops/src/cuda/ms_deform_attn_cuda.cu +++ b/mask2former/modeling/pixel_decoder/ops/src/cuda/ms_deform_attn_cuda.cu @@ -20,6 +20,7 @@ #include #include #include +#include at::Tensor ms_deform_attn_cuda_forward( @@ -55,6 +56,8 @@ at::Tensor ms_deform_attn_cuda_forward( const int im2col_step_ = std::min(batch, im2col_step); AT_ASSERTM(batch % im2col_step_ == 0, "batch(%d) must divide im2col_step(%d)", batch, im2col_step_); + + const at::cuda::OptionalCUDAGuard device_guard(device_of(value)); auto output = at::zeros({batch, num_query, num_heads, channels}, value.options()); @@ -63,6 +66,7 @@ at::Tensor ms_deform_attn_cuda_forward( auto per_value_size = spatial_size * num_heads * channels; auto per_sample_loc_size = num_query * num_heads * num_levels * num_point * 2; auto per_attn_weight_size = num_query * num_heads * num_levels * num_point; + for (int n = 0; n < batch/im2col_step_; ++n) { auto columns = output_n.select(0, n); @@ -123,6 +127,8 @@ std::vector ms_deform_attn_cuda_backward( AT_ASSERTM(batch % im2col_step_ == 0, "batch(%d) must divide im2col_step(%d)", batch, im2col_step_); + const at::cuda::OptionalCUDAGuard device_guard(device_of(value)); + auto grad_value = at::zeros_like(value); auto grad_sampling_loc = at::zeros_like(sampling_loc); auto grad_attn_weight = at::zeros_like(attn_weight);