From ade21db9db54be4a0f035f1ca4149afbea8d9038 Mon Sep 17 00:00:00 2001 From: Dan-Flores Date: Wed, 10 Dec 2025 06:57:11 +0000 Subject: [PATCH] try planar NPP function --- src/torchcodec/_core/CudaDeviceInterface.cpp | 22 ++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/src/torchcodec/_core/CudaDeviceInterface.cpp b/src/torchcodec/_core/CudaDeviceInterface.cpp index 80e16506f..1f323ed51 100644 --- a/src/torchcodec/_core/CudaDeviceInterface.cpp +++ b/src/torchcodec/_core/CudaDeviceInterface.cpp @@ -417,14 +417,24 @@ UniqueAVFrame CudaDeviceInterface::convertCUDATensorToAVFrameForEncoding( avFrame != nullptr && avFrame->data[0] != nullptr, "avFrame must be pre-allocated with CUDA memory"); - // TODO VideoEncoder: Investigate ways to avoid this copy - torch::Tensor hwcFrame = tensor.permute({1, 2, 0}).contiguous(); + const Npp8u* pSrc[3] = { + static_cast(tensor.data_ptr()) + + 0 * tensor.stride(0) * tensor.element_size(), // R plane + static_cast(tensor.data_ptr()) + + 1 * tensor.stride(0) * tensor.element_size(), // G plane + static_cast(tensor.data_ptr()) + + 2 * tensor.stride(0) * tensor.element_size() // B plane + }; + + int aSrcStep[3] = { + static_cast(tensor.stride(1) * tensor.element_size()), + static_cast(tensor.stride(1) * tensor.element_size()), + static_cast(tensor.stride(1) * tensor.element_size())}; NppiSize oSizeROI = {width, height}; - NppStatus status = nppiRGBToNV12_8u_ColorTwist32f_C3P2R_Ctx( - static_cast(hwcFrame.data_ptr()), - validateInt64ToInt( - hwcFrame.stride(0) * hwcFrame.element_size(), "nSrcStep"), + NppStatus status = nppiRGBToNV12_8u_ColorTwist32f_P3P2R_Ctx( + pSrc, + aSrcStep, avFrame->data, avFrame->linesize, oSizeROI,