From 3f0ce59e29c02145bba9b9e3cdb159426a16c8e5 Mon Sep 17 00:00:00 2001 From: Sergey Semenov Date: Tue, 9 Sep 2025 08:55:55 -0700 Subject: [PATCH 01/15] [SYCL] Get rid of device kernel info duplication With the introduction of DeviceKernelInfo, assert usage and implicit local argument information is now duplicated in program manager. This patch removes the duplicate maps and makes it so that device kernel info map is filled out during image registration, with the compile time information added when it's available (during the first submission of the kernel). --- sycl/source/detail/device_kernel_info.cpp | 39 ++++++++++----- sycl/source/detail/device_kernel_info.hpp | 9 ++-- sycl/source/detail/get_device_kernel_info.cpp | 2 +- sycl/source/detail/kernel_impl.hpp | 7 ++- .../program_manager/program_manager.cpp | 48 +++++++++---------- .../program_manager/program_manager.hpp | 28 ++--------- sycl/source/handler.cpp | 7 ++- sycl/unittests/program_manager/Cleanup.cpp | 18 ------- 8 files changed, 65 insertions(+), 93 deletions(-) diff --git a/sycl/source/detail/device_kernel_info.cpp b/sycl/source/detail/device_kernel_info.cpp index 526f160c6596b..379a8d1d211f2 100644 --- a/sycl/source/detail/device_kernel_info.cpp +++ b/sycl/source/detail/device_kernel_info.cpp @@ -19,14 +19,10 @@ DeviceKernelInfo::DeviceKernelInfo(const CompileTimeKernelInfoTy &Info) Name(Info.Name.data()) #endif { - init(Name.data()); -} - -void DeviceKernelInfo::init(KernelNameStrRefT KernelName) { - auto &PM = detail::ProgramManager::getInstance(); - MUsesAssert = PM.kernelUsesAssert(KernelName); - MImplicitLocalArgPos = PM.kernelImplicitLocalArgPos(KernelName); #ifndef __INTEL_PREVIEW_BREAKING_CHANGES + // Non-legacy implementation either fills out the data during image + // registration after this constructor is called, or uses default values + // if this instance of DeviceKernelInfo corresponds to an interop kernel. MInitialized.store(true); #endif } @@ -36,9 +32,19 @@ void DeviceKernelInfo::initIfEmpty(const CompileTimeKernelInfoTy &Info) { if (MInitialized.load()) return; - CompileTimeKernelInfoTy::operator=(Info); - Name = Info.Name.data(); - init(Name.data()); + // If this function is called, then this is a default initialized + // device kernel info created from older headers and stored in global handler. + // In that case, fetch the proper instance from program manager and copy its + // values. + auto &PM = detail::ProgramManager::getInstance(); + DeviceKernelInfo &PMDeviceKernelInfo = + PM.getDeviceKernelInfo(KernelNameStrRefT(Info.Name)); + + PMDeviceKernelInfo.CompileTimeKernelInfoTy::operator=(Info); + PMDeviceKernelInfo.Name = Info.Name.data(); + + MUsesAssert = PMDeviceKernelInfo.MUsesAssert; + MImplicitLocalArgPos = PMDeviceKernelInfo.MImplicitLocalArgPos; } #endif @@ -78,18 +84,25 @@ FastKernelSubcacheT &DeviceKernelInfo::getKernelSubcache() { assertInitialized(); return MFastKernelSubcache; } -bool DeviceKernelInfo::usesAssert() { +bool DeviceKernelInfo::usesAssert() const { assertInitialized(); return MUsesAssert; } -const std::optional &DeviceKernelInfo::getImplicitLocalArgPos() { +const std::optional &DeviceKernelInfo::getImplicitLocalArgPos() const { assertInitialized(); return MImplicitLocalArgPos; } +void DeviceKernelInfo::setUsesAssert() { MUsesAssert = true; } + +void DeviceKernelInfo::setImplicitLocalArgPos(int Pos) { + assert(!MImplicitLocalArgPos.has_value() || MImplicitLocalArgPos == Pos); + MImplicitLocalArgPos = Pos; +} + bool DeviceKernelInfo::isCompileTimeInfoSet() const { return KernelSize != 0; } -void DeviceKernelInfo::assertInitialized() { +void DeviceKernelInfo::assertInitialized() const { #ifndef __INTEL_PREVIEW_BREAKING_CHANGES assert(MInitialized.load() && "Data needs to be initialized before use"); #endif diff --git a/sycl/source/detail/device_kernel_info.hpp b/sycl/source/detail/device_kernel_info.hpp index 0ea4ff2d051e6..a5d6c2527a6bb 100644 --- a/sycl/source/detail/device_kernel_info.hpp +++ b/sycl/source/detail/device_kernel_info.hpp @@ -108,11 +108,14 @@ class DeviceKernelInfo : public CompileTimeKernelInfoTy { void setCompileTimeInfoIfNeeded(const CompileTimeKernelInfoTy &Info); FastKernelSubcacheT &getKernelSubcache(); - bool usesAssert(); - const std::optional &getImplicitLocalArgPos(); + bool usesAssert() const; + const std::optional &getImplicitLocalArgPos() const; + + void setUsesAssert(); + void setImplicitLocalArgPos(int Pos); private: - void assertInitialized(); + void assertInitialized() const; bool isCompileTimeInfoSet() const; #ifndef __INTEL_PREVIEW_BREAKING_CHANGES diff --git a/sycl/source/detail/get_device_kernel_info.cpp b/sycl/source/detail/get_device_kernel_info.cpp index 084eeeb60d714..536ce75b82143 100644 --- a/sycl/source/detail/get_device_kernel_info.cpp +++ b/sycl/source/detail/get_device_kernel_info.cpp @@ -22,7 +22,7 @@ KernelNameBasedCacheT *createKernelNameBasedCache() { #endif DeviceKernelInfo &getDeviceKernelInfo(const CompileTimeKernelInfoTy &Info) { - return ProgramManager::getInstance().getOrCreateDeviceKernelInfo(Info); + return ProgramManager::getInstance().getDeviceKernelInfo(Info); } } // namespace detail diff --git a/sycl/source/detail/kernel_impl.hpp b/sycl/source/detail/kernel_impl.hpp index 7d83d4ecf68cc..1b0a2b1182fe2 100644 --- a/sycl/source/detail/kernel_impl.hpp +++ b/sycl/source/detail/kernel_impl.hpp @@ -240,10 +240,9 @@ class kernel_impl { std::string_view getName() const; DeviceKernelInfo &getDeviceKernelInfo() { - return MIsInterop - ? MInteropDeviceKernelInfo - : ProgramManager::getInstance().getOrCreateDeviceKernelInfo( - KernelNameStrT(getName())); + return MIsInterop ? MInteropDeviceKernelInfo + : ProgramManager::getInstance().getDeviceKernelInfo( + KernelNameStrT(getName())); } private: diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index 07ed72c0df423..e73c4b3c70f3e 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -1797,8 +1797,11 @@ void ProgramManager::cacheKernelUsesAssertInfo(const RTDeviceBinaryImage &Img) { const RTDeviceBinaryImage::PropertyRange &AssertUsedRange = Img.getAssertUsed(); if (AssertUsedRange.isAvailable()) - for (const auto &Prop : AssertUsedRange) - m_KernelUsesAssert.insert(Prop->Name); + for (const auto &Prop : AssertUsedRange) { + auto It = m_DeviceKernelInfoMap.find(Prop->Name); + assert(It != m_DeviceKernelInfoMap.end()); + It->second.setUsesAssert(); + } } void ProgramManager::cacheKernelImplicitLocalArg( @@ -1807,36 +1810,27 @@ void ProgramManager::cacheKernelImplicitLocalArg( Img.getImplicitLocalArg(); if (ImplicitLocalArgRange.isAvailable()) for (auto Prop : ImplicitLocalArgRange) { - m_KernelImplicitLocalArgPos[Prop->Name] = - DeviceBinaryProperty(Prop).asUint32(); + auto It = m_DeviceKernelInfoMap.find(Prop->Name); + assert(It != m_DeviceKernelInfoMap.end()); + It->second.setImplicitLocalArgPos(DeviceBinaryProperty(Prop).asUint32()); } } -std::optional -ProgramManager::kernelImplicitLocalArgPos(KernelNameStrRefT KernelName) const { - auto it = m_KernelImplicitLocalArgPos.find(KernelName); - if (it != m_KernelImplicitLocalArgPos.end()) - return it->second; - return {}; -} - -DeviceKernelInfo &ProgramManager::getOrCreateDeviceKernelInfo( - const CompileTimeKernelInfoTy &Info) { +DeviceKernelInfo & +ProgramManager::getDeviceKernelInfo(const CompileTimeKernelInfoTy &Info) { std::lock_guard Guard(m_DeviceKernelInfoMapMutex); - auto [Iter, Inserted] = - m_DeviceKernelInfoMap.try_emplace(KernelNameStrT{Info.Name.data()}, Info); - if (!Inserted) - Iter->second.setCompileTimeInfoIfNeeded(Info); - return Iter->second; + auto It = m_DeviceKernelInfoMap.find(KernelNameStrT{Info.Name.data()}); + assert(It != m_DeviceKernelInfoMap.end()); + It->second.setCompileTimeInfoIfNeeded(Info); + return It->second; } DeviceKernelInfo & -ProgramManager::getOrCreateDeviceKernelInfo(KernelNameStrRefT KernelName) { +ProgramManager::getDeviceKernelInfo(KernelNameStrRefT KernelName) { std::lock_guard Guard(m_DeviceKernelInfoMapMutex); - CompileTimeKernelInfoTy DefaultCompileTimeInfo{std::string_view(KernelName)}; - auto Result = - m_DeviceKernelInfoMap.try_emplace(KernelName, DefaultCompileTimeInfo); - return Result.first->second; + auto It = m_DeviceKernelInfoMap.find(KernelName); + assert(It != m_DeviceKernelInfoMap.end()); + return It->second; } static bool isBfloat16DeviceLibImage(sycl_device_binary RawImg, @@ -2039,6 +2033,10 @@ void ProgramManager::addImage(sycl_device_binary RawImg, m_KernelIDs2BinImage.insert(std::make_pair(It->second, Img.get())); KernelIDs->push_back(It->second); + CompileTimeKernelInfoTy DefaultCompileTimeInfo{std::string_view(name)}; + m_DeviceKernelInfoMap.try_emplace(KernelNameStrT(name), + DefaultCompileTimeInfo); + // Keep track of image to kernel name reference count for cleanup. m_KernelNameRefCount[name]++; } @@ -2232,8 +2230,6 @@ void ProgramManager::removeImages(sycl_device_binaries DeviceBinary) { if (--RefCount == 0) { // TODO aggregate all these maps into a single one since their entries // share lifetime. - m_KernelUsesAssert.erase(Name); - m_KernelImplicitLocalArgPos.erase(Name); m_DeviceKernelInfoMap.erase(Name); m_KernelNameRefCount.erase(RefCountIt); if (Name2IDIt != m_KernelName2KernelIDs.end()) diff --git a/sycl/source/detail/program_manager/program_manager.hpp b/sycl/source/detail/program_manager/program_manager.hpp index b9d0dc700f77c..c7fcd73b8ae43 100644 --- a/sycl/source/detail/program_manager/program_manager.hpp +++ b/sycl/source/detail/program_manager/program_manager.hpp @@ -365,19 +365,13 @@ class ProgramManager { ProgramManager(); ~ProgramManager() = default; - template - bool kernelUsesAssert(const NameT &KernelName) const { - return m_KernelUsesAssert.find(KernelName) != m_KernelUsesAssert.end(); - } - SanitizerType kernelUsesSanitizer() const { return m_SanitizerFoundInImage; } - std::optional - kernelImplicitLocalArgPos(KernelNameStrRefT KernelName) const; + void cacheKernelUsesAssertInfo(const RTDeviceBinaryImage &Img); + void cacheKernelImplicitLocalArg(const RTDeviceBinaryImage &Img); - DeviceKernelInfo & - getOrCreateDeviceKernelInfo(const CompileTimeKernelInfoTy &Info); - DeviceKernelInfo &getOrCreateDeviceKernelInfo(KernelNameStrRefT KernelName); + DeviceKernelInfo &getDeviceKernelInfo(const CompileTimeKernelInfoTy &Info); + DeviceKernelInfo &getDeviceKernelInfo(KernelNameStrRefT KernelName); std::set getRawDeviceImages(const std::vector &KernelIDs); @@ -406,12 +400,6 @@ class ProgramManager { /// Dumps image to current directory void dumpImage(const RTDeviceBinaryImage &Img, uint32_t SequenceID = 0) const; - /// Add info on kernels using assert into cache - void cacheKernelUsesAssertInfo(const RTDeviceBinaryImage &Img); - - /// Add info on kernels using local arg into cache - void cacheKernelImplicitLocalArg(const RTDeviceBinaryImage &Img); - std::set collectDependentDeviceImagesForVirtualFunctions( const RTDeviceBinaryImage &Img, const device_impl &Dev); @@ -518,14 +506,6 @@ class ProgramManager { bool m_UseSpvFile = false; RTDeviceBinaryImageUPtr m_SpvFileImage; - // std::less<> is a transparent comparator that enabled comparison between - // different types without temporary key_type object creation. This includes - // standard overloads, such as comparison between std::string and - // std::string_view or just char*. - using KernelUsesAssertSet = std::set>; - KernelUsesAssertSet m_KernelUsesAssert; - std::unordered_map m_KernelImplicitLocalArgPos; - // Map for storing device kernel information. Runtime lookup should be avoided // by caching the pointers when possible. std::unordered_map m_DeviceKernelInfoMap; diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index 26477c99be62c..f776983b69080 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -567,7 +567,7 @@ event handler::finalize() { // Fetch the device kernel info pointer if it hasn't been set (e.g. // in kernel bundle or free function cases). impl->MKernelData.setDeviceKernelInfoPtr( - &detail::ProgramManager::getInstance().getOrCreateDeviceKernelInfo( + &detail::ProgramManager::getInstance().getDeviceKernelInfo( toKernelNameStrT(MKernelName))); } assert(impl->MKernelData.getKernelName() == MKernelName); @@ -974,7 +974,7 @@ void handler::extractArgsAndReqs() { #ifndef __INTEL_PREVIEW_BREAKING_CHANGES if (impl->MKernelData.getDeviceKernelInfoPtr() == nullptr) { impl->MKernelData.setDeviceKernelInfoPtr( - &detail::ProgramManager::getInstance().getOrCreateDeviceKernelInfo( + &detail::ProgramManager::getInstance().getDeviceKernelInfo( detail::toKernelNameStrT(MKernel->getName()))); } #endif @@ -2249,8 +2249,7 @@ void handler::setKernelNameBasedCachePtr( HandlerInfo.IsESIMD = impl->MKernelIsESIMD; HandlerInfo.HasSpecialCaptures = impl->MKernelHasSpecialCaptures; impl->MKernelData.setDeviceKernelInfoPtr( - &detail::ProgramManager::getInstance().getOrCreateDeviceKernelInfo( - HandlerInfo)); + &detail::ProgramManager::getInstance().getDeviceKernelInfo(HandlerInfo)); } void handler::setKernelInfo( diff --git a/sycl/unittests/program_manager/Cleanup.cpp b/sycl/unittests/program_manager/Cleanup.cpp index 1bcbfa7676255..40025f19abddf 100644 --- a/sycl/unittests/program_manager/Cleanup.cpp +++ b/sycl/unittests/program_manager/Cleanup.cpp @@ -73,13 +73,6 @@ class ProgramManagerExposed : public sycl::detail::ProgramManager { return m_EliminatedKernelArgMasks; } - KernelUsesAssertSet &getKernelUsesAssert() { return m_KernelUsesAssert; } - - std::unordered_map & - getKernelImplicitLocalArgPos() { - return m_KernelImplicitLocalArgPos; - } - std::unordered_map> & getHostPipes() { @@ -311,11 +304,6 @@ void checkAllInvolvedContainers(ProgramManagerExposed &PM, "Kernel name reference count " + CommentPostfix); EXPECT_EQ(PM.getEliminatedKernelArgMask().size(), ExpectedImgCount) << "Eliminated kernel arg mask " + CommentPostfix; - checkContainer(PM.getKernelUsesAssert(), ExpectedEntryCount, - generateRefNames(ImgIds, "Kernel"), - "KernelUsesAssert " + CommentPostfix); - EXPECT_EQ(PM.getKernelImplicitLocalArgPos().size(), ExpectedEntryCount) - << "Kernel implicit local arg pos " + CommentPostfix; if (!MultipleImgsPerEntryTestCase) { // FIXME expected to fail for now, device globals cleanup seems to be @@ -365,10 +353,6 @@ TEST(ImageRemoval, BaseContainers) { generateRefName("B", "HostPipe").c_str()); PM.addOrInitHostPipeEntry(PipeC::get_host_ptr(), generateRefName("C", "HostPipe").c_str()); - std::vector KernelNames = - generateRefNames({"A", "B", "C"}, "Kernel"); - for (const std::string &Name : KernelNames) - PM.getOrCreateDeviceKernelInfo(Name); checkAllInvolvedContainers(PM, ImagesToRemove.size() + ImagesToKeep.size(), {"A", "B", "C"}, "check failed before removal"); @@ -392,8 +376,6 @@ TEST(ImageRemoval, MultipleImagesPerEntry) { convertAndAddImages(PM, ImagesToRemoveSameEntries, NativeImagesForRemoval, TestBinaries); - std::string KernelName = generateRefName("A", "Kernel"); - PM.getOrCreateDeviceKernelInfo(KernelName); checkAllInvolvedContainers( PM, ImagesToRemoveSameEntries.size() + ImagesToKeepSameEntries.size(), /*ExpectedEntryCount*/ 1, {"A"}, "check failed before removal", From 1343d13e1a6232f304ebd5f3519a399972add77d Mon Sep 17 00:00:00 2001 From: Sergey Semenov Date: Fri, 10 Oct 2025 06:55:25 -0700 Subject: [PATCH 02/15] [SYCL] Use 'kernel's info instance for more image origins --- sycl/source/detail/kernel_impl.cpp | 12 +++++++----- sycl/source/detail/kernel_impl.hpp | 10 ++++++---- 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/sycl/source/detail/kernel_impl.cpp b/sycl/source/detail/kernel_impl.cpp index dfed6881a3ef8..a72013e174023 100644 --- a/sycl/source/detail/kernel_impl.cpp +++ b/sycl/source/detail/kernel_impl.cpp @@ -31,8 +31,8 @@ kernel_impl::kernel_impl(Managed &&Kernel, MCreatedFromSource(true), MKernelBundleImpl(KernelBundleImpl ? KernelBundleImpl->shared_from_this() : nullptr), - MIsInterop(true), MKernelArgMaskPtr{ArgMask}, - MInteropDeviceKernelInfo(createCompileTimeKernelInfo(getName())) { + MIsInterop(true), MKernelArgMaskPtr{ArgMask}, MOwnsDeviceKernelInfo(true), + MDeviceKernelInfo(createCompileTimeKernelInfo(getName())) { ur_context_handle_t UrContext = nullptr; // Using the adapter from the passed ContextImpl getAdapter().call( @@ -59,9 +59,11 @@ kernel_impl::kernel_impl(Managed &&Kernel, MKernelBundleImpl(KernelBundleImpl.shared_from_this()), MIsInterop(MDeviceImageImpl->getOriginMask() & ImageOriginInterop), MKernelArgMaskPtr{ArgMask}, MCacheMutex{CacheMutex}, - MInteropDeviceKernelInfo(MIsInterop - ? createCompileTimeKernelInfo(getName()) - : createCompileTimeKernelInfo()) { + MOwnsDeviceKernelInfo(MDeviceImageImpl->getOriginMask() & + ~ImageOriginSYCLOffline), + MDeviceKernelInfo(MOwnsDeviceKernelInfo + ? createCompileTimeKernelInfo(getName()) + : createCompileTimeKernelInfo()) { // Enable USM indirect access for interop and non-sycl-jit source kernels. // sycl-jit kernels will enable this if needed through the regular kernel // path. diff --git a/sycl/source/detail/kernel_impl.hpp b/sycl/source/detail/kernel_impl.hpp index 7d83d4ecf68cc..2ee90dba8872f 100644 --- a/sycl/source/detail/kernel_impl.hpp +++ b/sycl/source/detail/kernel_impl.hpp @@ -240,8 +240,8 @@ class kernel_impl { std::string_view getName() const; DeviceKernelInfo &getDeviceKernelInfo() { - return MIsInterop - ? MInteropDeviceKernelInfo + return MOwnsDeviceKernelInfo + ? MDeviceKernelInfo : ProgramManager::getInstance().getOrCreateDeviceKernelInfo( KernelNameStrT(getName())); } @@ -259,9 +259,11 @@ class kernel_impl { std::mutex *MCacheMutex = nullptr; mutable std::string MName; - // It is used for the interop kernels only. + // Used for images that aren't obtained with standard SYCL offline + // compilation. // For regular kernel we get DeviceKernelInfo from the ProgramManager. - DeviceKernelInfo MInteropDeviceKernelInfo; + bool MOwnsDeviceKernelInfo = false; + DeviceKernelInfo MDeviceKernelInfo; bool isBuiltInKernel(device_impl &Device) const; void checkIfValidForNumArgsInfoQuery() const; From 31578850eea1c446022ea41d828798697fa22a7d Mon Sep 17 00:00:00 2001 From: Sergey Semenov Date: Thu, 16 Oct 2025 08:28:34 -0700 Subject: [PATCH 03/15] Check kernel info ownership with mixed image origins --- sycl/source/detail/kernel_impl.cpp | 15 +++++++++++++-- sycl/source/detail/kernel_impl.hpp | 1 + .../detail/program_manager/program_manager.cpp | 7 +++++++ .../detail/program_manager/program_manager.hpp | 1 + 4 files changed, 22 insertions(+), 2 deletions(-) diff --git a/sycl/source/detail/kernel_impl.cpp b/sycl/source/detail/kernel_impl.cpp index a72013e174023..4034f227288c0 100644 --- a/sycl/source/detail/kernel_impl.cpp +++ b/sycl/source/detail/kernel_impl.cpp @@ -59,11 +59,11 @@ kernel_impl::kernel_impl(Managed &&Kernel, MKernelBundleImpl(KernelBundleImpl.shared_from_this()), MIsInterop(MDeviceImageImpl->getOriginMask() & ImageOriginInterop), MKernelArgMaskPtr{ArgMask}, MCacheMutex{CacheMutex}, - MOwnsDeviceKernelInfo(MDeviceImageImpl->getOriginMask() & - ~ImageOriginSYCLOffline), + MOwnsDeviceKernelInfo(checkOwnsDeviceKernelInfo()), MDeviceKernelInfo(MOwnsDeviceKernelInfo ? createCompileTimeKernelInfo(getName()) : createCompileTimeKernelInfo()) { + // Enable USM indirect access for interop and non-sycl-jit source kernels. // sycl-jit kernels will enable this if needed through the regular kernel // path. @@ -123,6 +123,17 @@ std::string_view kernel_impl::getName() const { return MName; } +bool kernel_impl::checkOwnsDeviceKernelInfo() { + // If the image originates from something other than standard offline + // compilation, this kernel needs to own its info structure. + // We could also have a mixed origin image, in which case the device kernel + // info might reside in program manager. + return MDeviceImageImpl->getOriginMask() != ImageOriginSYCLOffline && + (!(MDeviceImageImpl->getOriginMask() & ImageOriginSYCLOffline) || + !ProgramManager::getInstance().tryGetDeviceKernelInfo( + static_cast(getName()))); +} + bool kernel_impl::isBuiltInKernel(device_impl &Device) const { auto BuiltInKernels = Device.get_info(); if (BuiltInKernels.empty()) diff --git a/sycl/source/detail/kernel_impl.hpp b/sycl/source/detail/kernel_impl.hpp index 8219b5b86ba3e..952e4a0b9e477 100644 --- a/sycl/source/detail/kernel_impl.hpp +++ b/sycl/source/detail/kernel_impl.hpp @@ -239,6 +239,7 @@ class kernel_impl { std::mutex *getCacheMutex() const { return MCacheMutex; } std::string_view getName() const; + bool checkOwnsDeviceKernelInfo(); DeviceKernelInfo &getDeviceKernelInfo() { return MOwnsDeviceKernelInfo ? MDeviceKernelInfo diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index e73c4b3c70f3e..1f29189930135 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -1833,6 +1833,13 @@ ProgramManager::getDeviceKernelInfo(KernelNameStrRefT KernelName) { return It->second; } +DeviceKernelInfo * +ProgramManager::tryGetDeviceKernelInfo(KernelNameStrRefT KernelName) { + std::lock_guard Guard(m_DeviceKernelInfoMapMutex); + auto It = m_DeviceKernelInfoMap.find(KernelName); + return It != m_DeviceKernelInfoMap.end() ? &It->second : nullptr; +} + static bool isBfloat16DeviceLibImage(sycl_device_binary RawImg, uint32_t *LibVersion = nullptr) { sycl_device_binary_property_set ImgPS; diff --git a/sycl/source/detail/program_manager/program_manager.hpp b/sycl/source/detail/program_manager/program_manager.hpp index c7fcd73b8ae43..0753866321c12 100644 --- a/sycl/source/detail/program_manager/program_manager.hpp +++ b/sycl/source/detail/program_manager/program_manager.hpp @@ -372,6 +372,7 @@ class ProgramManager { DeviceKernelInfo &getDeviceKernelInfo(const CompileTimeKernelInfoTy &Info); DeviceKernelInfo &getDeviceKernelInfo(KernelNameStrRefT KernelName); + DeviceKernelInfo *tryGetDeviceKernelInfo(KernelNameStrRefT KernelName); std::set getRawDeviceImages(const std::vector &KernelIDs); From 68df2eb7a57e72d208cd93c91fd17e20095e2f30 Mon Sep 17 00:00:00 2001 From: Sergey Semenov Date: Thu, 16 Oct 2025 08:31:49 -0700 Subject: [PATCH 04/15] Adjust kernel_from_file test --- sycl/test-e2e/Config/kernel_from_file.cpp | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/sycl/test-e2e/Config/kernel_from_file.cpp b/sycl/test-e2e/Config/kernel_from_file.cpp index c5ae870000ce9..1f64745ec36d7 100644 --- a/sycl/test-e2e/Config/kernel_from_file.cpp +++ b/sycl/test-e2e/Config/kernel_from_file.cpp @@ -1,11 +1,9 @@ // REQUIRES: target-spir -// FIXME Disabled fallback assert as it'll require either online linking or -// explicit offline linking step here // FIXME separate compilation requires -fno-sycl-dead-args-optimization // As we are doing a separate device compilation here, we need to explicitly // add the device lib instrumentation (itt_compiler_wrapper) -// RUN: %clangxx -Wno-error=ignored-attributes -DSYCL_DISABLE_FALLBACK_ASSERT %cxx_std_optionc++17 -fsycl-device-only -fno-sycl-dead-args-optimization -Xclang -fsycl-int-header=%t.h %s -o %t.bc -Xclang -verify-ignore-unexpected=note,warning -Wno-sycl-strict +// RUN: %clangxx -Wno-error=ignored-attributes -DUSED_KERNEL -fno-sycl-dead-args-optimization %cxx_std_optionc++17 -fsycl-device-only -Xclang -fsycl-int-header=%t.h %s -o %t.bc -Xclang -verify-ignore-unexpected=note,warning -Wno-sycl-strict // >> ---- unbundle compiler wrapper and asan device objects // RUN: clang-offload-bundler -type=o -targets=sycl-spir64-unknown-unknown -input=%sycl_static_libs_dir/libsycl-itt-compiler-wrappers%obj_ext -output=%t_compiler_wrappers.bc -unbundle // RUN: %if linux %{ clang-offload-bundler -type=o -targets=sycl-spir64-unknown-unknown -input=%sycl_static_libs_dir/libsycl-asan%obj_ext -output=%t_asan.bc -unbundle %} @@ -13,7 +11,9 @@ // RUN: %if linux %{ llvm-link -o=%t_app.bc %t.bc %t_compiler_wrappers.bc %t_asan.bc %} %else %{ llvm-link -o=%t_app.bc %t.bc %t_compiler_wrappers.bc %} // >> ---- translate to SPIR-V // RUN: llvm-spirv -o %t.spv %t_app.bc -// RUN: %clangxx -Wno-error=ignored-attributes %sycl_include -DSYCL_DISABLE_FALLBACK_ASSERT %cxx_std_optionc++17 %include_option %t.h %s -o %t.out %sycl_options -Xclang -verify-ignore-unexpected=note,warning %if preview-mode %{-Wno-unused-command-line-argument%} +// Need to perform full compilation here since the SYCL runtime uses image +// properties from the fat binary. +// RUN: %{build} -fno-sycl-dead-args-optimization -o %t.out // RUN: env SYCL_USE_KERNEL_SPV=%t.spv %{run} %t.out #include @@ -31,10 +31,15 @@ int main(int argc, char **argv) { event e = myQueue.submit([&](handler &cgh) { auto ptr = buf.get_access(cgh); - cgh.single_task([=]() { ptr[0]++; }); + cgh.single_task([=]() { +#ifdef USED_KERNEL + ptr[0]++; +#else + ptr[0]--; +#endif + }); }); e.wait_and_throw(); - } catch (sycl::exception const &e) { std::cerr << "SYCL exception caught:\n"; std::cerr << e.what() << "\n"; From 0e1e67ef0fe8170558684d132e303d3acb8955aa Mon Sep 17 00:00:00 2001 From: Sergey Semenov Date: Mon, 20 Oct 2025 07:22:51 -0700 Subject: [PATCH 05/15] Update unit tests to contain image binaries --- sycl/unittests/Extensions/CommandGraph/CommandGraph.cpp | 6 ++++++ .../Extensions/CommandGraph/CommonReferenceSemantics.cpp | 5 ++++- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/sycl/unittests/Extensions/CommandGraph/CommandGraph.cpp b/sycl/unittests/Extensions/CommandGraph/CommandGraph.cpp index dedd4ebbcb407..c5d417a7d2026 100644 --- a/sycl/unittests/Extensions/CommandGraph/CommandGraph.cpp +++ b/sycl/unittests/Extensions/CommandGraph/CommandGraph.cpp @@ -15,6 +15,12 @@ class Kernel3; MOCK_INTEGRATION_HEADER(Kernel1) MOCK_INTEGRATION_HEADER(Kernel2) MOCK_INTEGRATION_HEADER(Kernel3) +static sycl::unittest::MockDeviceImage CommandGraphImgs[3] = { + sycl::unittest::generateDefaultImage({"Kernel1"}), + sycl::unittest::generateDefaultImage({"Kernel2"}), + sycl::unittest::generateDefaultImage({"Kernel3"})}; +static sycl::unittest::MockDeviceImageArray<3> CommandGraphImgArray{ + CommandGraphImgs}; using namespace sycl; using namespace sycl::ext::oneapi; diff --git a/sycl/unittests/Extensions/CommandGraph/CommonReferenceSemantics.cpp b/sycl/unittests/Extensions/CommandGraph/CommonReferenceSemantics.cpp index d6fd4d1ec5e4b..e0b7e1da44e9e 100644 --- a/sycl/unittests/Extensions/CommandGraph/CommonReferenceSemantics.cpp +++ b/sycl/unittests/Extensions/CommandGraph/CommonReferenceSemantics.cpp @@ -14,7 +14,10 @@ using namespace sycl::ext::oneapi; class MockKernel; MOCK_INTEGRATION_HEADER(MockKernel) - +static sycl::unittest::MockDeviceImage MockKernelImg = + sycl::unittest::generateDefaultImage({"MockKernel"}); +static sycl::unittest::MockDeviceImageArray<1> MockKernelImgArray{ + &MockKernelImg}; /** * Checks that the operators and constructors of graph related classes meet the * common reference semantics. From 3d952d5e427533f78e52bb12090571be71b5197b Mon Sep 17 00:00:00 2001 From: Sergey Semenov Date: Thu, 6 Nov 2025 07:09:38 -0800 Subject: [PATCH 06/15] Fix compilation --- sycl/source/detail/scheduler/commands.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index 498a3cee61728..effb74b29fbba 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -2469,9 +2469,8 @@ static ur_result_t SetKernelParamsAndLaunch( applyFuncOnFilteredArgs(EliminatedArgMask, Args, setFunc); } - std::optional ImplicitLocalArg = - ProgramManager::getInstance().kernelImplicitLocalArgPos( - DeviceKernelInfo.Name); + const std::optional &ImplicitLocalArg = + DeviceKernelInfo.getImplicitLocalArgPos(); // Set the implicit local memory buffer to support // get_work_group_scratch_memory. This is for backend not supporting // CUDA-style local memory setting. Note that we may have -1 as a position, From a7a31e8d016f72b44ae1198944cbba7e4084285f Mon Sep 17 00:00:00 2001 From: Sergey Semenov Date: Thu, 6 Nov 2025 08:03:12 -0800 Subject: [PATCH 07/15] Appease clang-format --- sycl/source/detail/scheduler/commands.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index effb74b29fbba..90aabf70f909b 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -2470,7 +2470,7 @@ static ur_result_t SetKernelParamsAndLaunch( } const std::optional &ImplicitLocalArg = - DeviceKernelInfo.getImplicitLocalArgPos(); + DeviceKernelInfo.getImplicitLocalArgPos(); // Set the implicit local memory buffer to support // get_work_group_scratch_memory. This is for backend not supporting // CUDA-style local memory setting. Note that we may have -1 as a position, From 1a663bac0b6802798b51ff6096cfdb3bbcecf48b Mon Sep 17 00:00:00 2001 From: Sergey Semenov Date: Wed, 3 Dec 2025 07:48:43 -0800 Subject: [PATCH 08/15] Apply clang-format --- sycl/source/detail/device_kernel_info.cpp | 3 +-- sycl/source/detail/device_kernel_info.hpp | 1 + sycl/source/detail/kernel_impl.cpp | 3 +-- 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/sycl/source/detail/device_kernel_info.cpp b/sycl/source/detail/device_kernel_info.cpp index 42e4cbe58b886..a256870a1058d 100644 --- a/sycl/source/detail/device_kernel_info.cpp +++ b/sycl/source/detail/device_kernel_info.cpp @@ -13,8 +13,7 @@ inline namespace _V1 { namespace detail { DeviceKernelInfo::DeviceKernelInfo(const CompileTimeKernelInfoTy &Info) - : CompileTimeKernelInfoTy(Info) { -} + : CompileTimeKernelInfoTy(Info) {} template inline constexpr bool operator==(const CompileTimeKernelInfoTy &LHS, diff --git a/sycl/source/detail/device_kernel_info.hpp b/sycl/source/detail/device_kernel_info.hpp index 5017d642c1c05..15ceaaedd7a9d 100644 --- a/sycl/source/detail/device_kernel_info.hpp +++ b/sycl/source/detail/device_kernel_info.hpp @@ -101,6 +101,7 @@ class DeviceKernelInfo : public CompileTimeKernelInfoTy { } void setImplicitLocalArgPos(int Pos); + private: bool isCompileTimeInfoSet() const { return KernelSize != 0; } diff --git a/sycl/source/detail/kernel_impl.cpp b/sycl/source/detail/kernel_impl.cpp index e30fd4baf47e3..484291366ee3d 100644 --- a/sycl/source/detail/kernel_impl.cpp +++ b/sycl/source/detail/kernel_impl.cpp @@ -130,8 +130,7 @@ bool kernel_impl::checkOwnsDeviceKernelInfo() { // info might reside in program manager. return MDeviceImageImpl->getOriginMask() != ImageOriginSYCLOffline && (!(MDeviceImageImpl->getOriginMask() & ImageOriginSYCLOffline) || - !ProgramManager::getInstance().tryGetDeviceKernelInfo( - getName())); + !ProgramManager::getInstance().tryGetDeviceKernelInfo(getName())); } bool kernel_impl::isBuiltInKernel(device_impl &Device) const { From ac56e0ad495cbc30de95b75910c994b5194ef339 Mon Sep 17 00:00:00 2001 From: Sergey Semenov Date: Wed, 3 Dec 2025 12:15:22 -0800 Subject: [PATCH 09/15] Make implicit local arg member const --- sycl/source/detail/device_kernel_info.cpp | 11 +++--- sycl/source/detail/device_kernel_info.hpp | 8 ++--- sycl/source/detail/kernel_impl.cpp | 12 ++----- .../program_manager/program_manager.cpp | 36 +++++++++---------- .../program_manager/program_manager.hpp | 2 -- 5 files changed, 28 insertions(+), 41 deletions(-) diff --git a/sycl/source/detail/device_kernel_info.cpp b/sycl/source/detail/device_kernel_info.cpp index a256870a1058d..b0cfa2b1bd70c 100644 --- a/sycl/source/detail/device_kernel_info.cpp +++ b/sycl/source/detail/device_kernel_info.cpp @@ -12,8 +12,10 @@ namespace sycl { inline namespace _V1 { namespace detail { -DeviceKernelInfo::DeviceKernelInfo(const CompileTimeKernelInfoTy &Info) - : CompileTimeKernelInfoTy(Info) {} +DeviceKernelInfo::DeviceKernelInfo(std::string_view Name, + std::optional ImplicitLocalArgPos) + : CompileTimeKernelInfoTy{Name}, MImplicitLocalArgPos{ImplicitLocalArgPos} { +} template inline constexpr bool operator==(const CompileTimeKernelInfoTy &LHS, @@ -42,11 +44,6 @@ void DeviceKernelInfo::setCompileTimeInfoIfNeeded( assert(isCompileTimeInfoSet()); assert(Info == *this); } - -void DeviceKernelInfo::setImplicitLocalArgPos(int Pos) { - assert(!MImplicitLocalArgPos.has_value() || MImplicitLocalArgPos == Pos); - MImplicitLocalArgPos = Pos; -} } // namespace detail } // namespace _V1 } // namespace sycl diff --git a/sycl/source/detail/device_kernel_info.hpp b/sycl/source/detail/device_kernel_info.hpp index 15ceaaedd7a9d..d74b5d63bbd42 100644 --- a/sycl/source/detail/device_kernel_info.hpp +++ b/sycl/source/detail/device_kernel_info.hpp @@ -89,9 +89,9 @@ struct FastKernelSubcacheT { // into this structure and get rid of the other KernelName -> * maps. class DeviceKernelInfo : public CompileTimeKernelInfoTy { public: - DeviceKernelInfo(const CompileTimeKernelInfoTy &Info); + DeviceKernelInfo(std::string_view Name, + std::optional ImplicitLocalArgPos = {}); - void init(std::string_view KernelName); void setCompileTimeInfoIfNeeded(const CompileTimeKernelInfoTy &Info); FastKernelSubcacheT &getKernelSubcache() { return MFastKernelSubcache; } @@ -100,13 +100,11 @@ class DeviceKernelInfo : public CompileTimeKernelInfoTy { return MImplicitLocalArgPos; } - void setImplicitLocalArgPos(int Pos); - private: bool isCompileTimeInfoSet() const { return KernelSize != 0; } FastKernelSubcacheT MFastKernelSubcache; - std::optional MImplicitLocalArgPos; + const std::optional MImplicitLocalArgPos; }; } // namespace detail diff --git a/sycl/source/detail/kernel_impl.cpp b/sycl/source/detail/kernel_impl.cpp index 484291366ee3d..86122dd32b4f5 100644 --- a/sycl/source/detail/kernel_impl.cpp +++ b/sycl/source/detail/kernel_impl.cpp @@ -16,11 +16,6 @@ namespace sycl { inline namespace _V1 { namespace detail { -static CompileTimeKernelInfoTy -createCompileTimeKernelInfo(std::string_view KernelName = {}) { - return CompileTimeKernelInfoTy{KernelName}; -} - kernel_impl::kernel_impl(Managed &&Kernel, context_impl &Context, kernel_bundle_impl *KernelBundleImpl, @@ -32,7 +27,7 @@ kernel_impl::kernel_impl(Managed &&Kernel, MKernelBundleImpl(KernelBundleImpl ? KernelBundleImpl->shared_from_this() : nullptr), MIsInterop(true), MKernelArgMaskPtr{ArgMask}, MOwnsDeviceKernelInfo(true), - MDeviceKernelInfo(createCompileTimeKernelInfo(getName())) { + MDeviceKernelInfo(getName()) { ur_context_handle_t UrContext = nullptr; // Using the adapter from the passed ContextImpl getAdapter().call( @@ -60,9 +55,8 @@ kernel_impl::kernel_impl(Managed &&Kernel, MIsInterop(MDeviceImageImpl->getOriginMask() & ImageOriginInterop), MKernelArgMaskPtr{ArgMask}, MCacheMutex{CacheMutex}, MOwnsDeviceKernelInfo(checkOwnsDeviceKernelInfo()), - MDeviceKernelInfo(MOwnsDeviceKernelInfo - ? createCompileTimeKernelInfo(getName()) - : createCompileTimeKernelInfo()) { + MDeviceKernelInfo(MOwnsDeviceKernelInfo ? getName() + : std::string_view()) { // Enable USM indirect access for interop and non-sycl-jit source kernels. // sycl-jit kernels will enable this if needed through the regular kernel diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index 9a2bac97c6d39..3054c6d891793 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -1788,18 +1788,6 @@ Managed ProgramManager::build( return LinkedProg; } -void ProgramManager::cacheKernelImplicitLocalArg( - const RTDeviceBinaryImage &Img) { - const RTDeviceBinaryImage::PropertyRange &ImplicitLocalArgRange = - Img.getImplicitLocalArg(); - if (ImplicitLocalArgRange.isAvailable()) - for (auto Prop : ImplicitLocalArgRange) { - auto It = m_DeviceKernelInfoMap.find(Prop->Name); - assert(It != m_DeviceKernelInfoMap.end()); - It->second.setImplicitLocalArgPos(DeviceBinaryProperty(Prop).asUint32()); - } -} - DeviceKernelInfo & ProgramManager::getDeviceKernelInfo(const CompileTimeKernelInfoTy &Info) { std::lock_guard Guard(m_DeviceKernelInfoMapMutex); @@ -2000,6 +1988,16 @@ void ProgramManager::addImage(sycl_device_binary RawImg, m_BinImg2KernelIDs[Img.get()]; KernelIDs.reset(new std::vector); + std::unordered_map ImplicitLocalArgPositions; + const RTDeviceBinaryImage::PropertyRange &ImplicitLocalArgRange = + Img->getImplicitLocalArg(); + if (ImplicitLocalArgRange.isAvailable()) + for (auto Prop : ImplicitLocalArgRange) { + auto Result = ImplicitLocalArgPositions.try_emplace( + Prop->Name, DeviceBinaryProperty(Prop).asUint32()); + assert(Result.second && "Duplicate implicit arg property"); + } + for (sycl_offload_entry EntriesIt = EntriesB; EntriesIt != EntriesE; EntriesIt = EntriesIt->Increment()) { @@ -2024,10 +2022,14 @@ void ProgramManager::addImage(sycl_device_binary RawImg, m_KernelIDs2BinImage.insert(std::make_pair(It->second, Img.get())); KernelIDs->push_back(It->second); - CompileTimeKernelInfoTy DefaultCompileTimeInfo{std::string_view(name)}; - m_DeviceKernelInfoMap.try_emplace(std::string_view(name), - DefaultCompileTimeInfo); - + std::optional ImplicitLocalArgPos; + auto ImplicitLocalArgPosIt = ImplicitLocalArgPositions.find(name); + if (ImplicitLocalArgPosIt != ImplicitLocalArgPositions.end()) + ImplicitLocalArgPos = ImplicitLocalArgPosIt->second; + auto Result = + m_DeviceKernelInfoMap.try_emplace(name, name, ImplicitLocalArgPos); + assert(ImplicitLocalArgPos == Result->first.getImplicitLocalArgPos() && + "Conflicting values of implicit local arg positions"); // Keep track of image to kernel name reference count for cleanup. m_KernelNameRefCount[name]++; } @@ -2048,8 +2050,6 @@ void ProgramManager::addImage(sycl_device_binary RawImg, } } - cacheKernelImplicitLocalArg(*Img); - // Sort kernel ids for faster search std::sort(KernelIDs->begin(), KernelIDs->end(), LessByHash{}); diff --git a/sycl/source/detail/program_manager/program_manager.hpp b/sycl/source/detail/program_manager/program_manager.hpp index 09b8d1b0b0f29..24b78f84516bb 100644 --- a/sycl/source/detail/program_manager/program_manager.hpp +++ b/sycl/source/detail/program_manager/program_manager.hpp @@ -393,8 +393,6 @@ class ProgramManager { SanitizerType kernelUsesSanitizer() const { return m_SanitizerFoundInImage; } - void cacheKernelImplicitLocalArg(const RTDeviceBinaryImage &Img); - DeviceKernelInfo &getDeviceKernelInfo(const CompileTimeKernelInfoTy &Info); DeviceKernelInfo &getDeviceKernelInfo(std::string_view KernelName); DeviceKernelInfo *tryGetDeviceKernelInfo(std::string_view KernelName); From 8e4fe989234c1db978b5ee60ee1fa02d2a0ebbda Mon Sep 17 00:00:00 2001 From: Sergey Semenov Date: Thu, 4 Dec 2025 04:14:53 -0800 Subject: [PATCH 10/15] Fix build error --- sycl/source/detail/program_manager/program_manager.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index 3054c6d891793..e9cd549ec5606 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -2028,7 +2028,8 @@ void ProgramManager::addImage(sycl_device_binary RawImg, ImplicitLocalArgPos = ImplicitLocalArgPosIt->second; auto Result = m_DeviceKernelInfoMap.try_emplace(name, name, ImplicitLocalArgPos); - assert(ImplicitLocalArgPos == Result->first.getImplicitLocalArgPos() && + assert(ImplicitLocalArgPos == + Result.first->second.getImplicitLocalArgPos() && "Conflicting values of implicit local arg positions"); // Keep track of image to kernel name reference count for cleanup. m_KernelNameRefCount[name]++; From 59f8a66d5d68b85e605b59e7a39d0b0c33cccf8a Mon Sep 17 00:00:00 2001 From: Sergey Semenov Date: Mon, 8 Dec 2025 06:34:29 -0800 Subject: [PATCH 11/15] Revert "Fix build error" This reverts commit 8e4fe989234c1db978b5ee60ee1fa02d2a0ebbda. --- sycl/source/detail/program_manager/program_manager.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index e9cd549ec5606..3054c6d891793 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -2028,8 +2028,7 @@ void ProgramManager::addImage(sycl_device_binary RawImg, ImplicitLocalArgPos = ImplicitLocalArgPosIt->second; auto Result = m_DeviceKernelInfoMap.try_emplace(name, name, ImplicitLocalArgPos); - assert(ImplicitLocalArgPos == - Result.first->second.getImplicitLocalArgPos() && + assert(ImplicitLocalArgPos == Result->first.getImplicitLocalArgPos() && "Conflicting values of implicit local arg positions"); // Keep track of image to kernel name reference count for cleanup. m_KernelNameRefCount[name]++; From 83260d5fdd49286e8f3a5310b86fd32227d3985b Mon Sep 17 00:00:00 2001 From: Sergey Semenov Date: Mon, 8 Dec 2025 06:34:34 -0800 Subject: [PATCH 12/15] Revert "Make implicit local arg member const" This reverts commit ac56e0ad495cbc30de95b75910c994b5194ef339. --- sycl/source/detail/device_kernel_info.cpp | 11 +++--- sycl/source/detail/device_kernel_info.hpp | 8 +++-- sycl/source/detail/kernel_impl.cpp | 12 +++++-- .../program_manager/program_manager.cpp | 36 +++++++++---------- .../program_manager/program_manager.hpp | 2 ++ 5 files changed, 41 insertions(+), 28 deletions(-) diff --git a/sycl/source/detail/device_kernel_info.cpp b/sycl/source/detail/device_kernel_info.cpp index b0cfa2b1bd70c..a256870a1058d 100644 --- a/sycl/source/detail/device_kernel_info.cpp +++ b/sycl/source/detail/device_kernel_info.cpp @@ -12,10 +12,8 @@ namespace sycl { inline namespace _V1 { namespace detail { -DeviceKernelInfo::DeviceKernelInfo(std::string_view Name, - std::optional ImplicitLocalArgPos) - : CompileTimeKernelInfoTy{Name}, MImplicitLocalArgPos{ImplicitLocalArgPos} { -} +DeviceKernelInfo::DeviceKernelInfo(const CompileTimeKernelInfoTy &Info) + : CompileTimeKernelInfoTy(Info) {} template inline constexpr bool operator==(const CompileTimeKernelInfoTy &LHS, @@ -44,6 +42,11 @@ void DeviceKernelInfo::setCompileTimeInfoIfNeeded( assert(isCompileTimeInfoSet()); assert(Info == *this); } + +void DeviceKernelInfo::setImplicitLocalArgPos(int Pos) { + assert(!MImplicitLocalArgPos.has_value() || MImplicitLocalArgPos == Pos); + MImplicitLocalArgPos = Pos; +} } // namespace detail } // namespace _V1 } // namespace sycl diff --git a/sycl/source/detail/device_kernel_info.hpp b/sycl/source/detail/device_kernel_info.hpp index d74b5d63bbd42..15ceaaedd7a9d 100644 --- a/sycl/source/detail/device_kernel_info.hpp +++ b/sycl/source/detail/device_kernel_info.hpp @@ -89,9 +89,9 @@ struct FastKernelSubcacheT { // into this structure and get rid of the other KernelName -> * maps. class DeviceKernelInfo : public CompileTimeKernelInfoTy { public: - DeviceKernelInfo(std::string_view Name, - std::optional ImplicitLocalArgPos = {}); + DeviceKernelInfo(const CompileTimeKernelInfoTy &Info); + void init(std::string_view KernelName); void setCompileTimeInfoIfNeeded(const CompileTimeKernelInfoTy &Info); FastKernelSubcacheT &getKernelSubcache() { return MFastKernelSubcache; } @@ -100,11 +100,13 @@ class DeviceKernelInfo : public CompileTimeKernelInfoTy { return MImplicitLocalArgPos; } + void setImplicitLocalArgPos(int Pos); + private: bool isCompileTimeInfoSet() const { return KernelSize != 0; } FastKernelSubcacheT MFastKernelSubcache; - const std::optional MImplicitLocalArgPos; + std::optional MImplicitLocalArgPos; }; } // namespace detail diff --git a/sycl/source/detail/kernel_impl.cpp b/sycl/source/detail/kernel_impl.cpp index 86122dd32b4f5..484291366ee3d 100644 --- a/sycl/source/detail/kernel_impl.cpp +++ b/sycl/source/detail/kernel_impl.cpp @@ -16,6 +16,11 @@ namespace sycl { inline namespace _V1 { namespace detail { +static CompileTimeKernelInfoTy +createCompileTimeKernelInfo(std::string_view KernelName = {}) { + return CompileTimeKernelInfoTy{KernelName}; +} + kernel_impl::kernel_impl(Managed &&Kernel, context_impl &Context, kernel_bundle_impl *KernelBundleImpl, @@ -27,7 +32,7 @@ kernel_impl::kernel_impl(Managed &&Kernel, MKernelBundleImpl(KernelBundleImpl ? KernelBundleImpl->shared_from_this() : nullptr), MIsInterop(true), MKernelArgMaskPtr{ArgMask}, MOwnsDeviceKernelInfo(true), - MDeviceKernelInfo(getName()) { + MDeviceKernelInfo(createCompileTimeKernelInfo(getName())) { ur_context_handle_t UrContext = nullptr; // Using the adapter from the passed ContextImpl getAdapter().call( @@ -55,8 +60,9 @@ kernel_impl::kernel_impl(Managed &&Kernel, MIsInterop(MDeviceImageImpl->getOriginMask() & ImageOriginInterop), MKernelArgMaskPtr{ArgMask}, MCacheMutex{CacheMutex}, MOwnsDeviceKernelInfo(checkOwnsDeviceKernelInfo()), - MDeviceKernelInfo(MOwnsDeviceKernelInfo ? getName() - : std::string_view()) { + MDeviceKernelInfo(MOwnsDeviceKernelInfo + ? createCompileTimeKernelInfo(getName()) + : createCompileTimeKernelInfo()) { // Enable USM indirect access for interop and non-sycl-jit source kernels. // sycl-jit kernels will enable this if needed through the regular kernel diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index 3054c6d891793..9a2bac97c6d39 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -1788,6 +1788,18 @@ Managed ProgramManager::build( return LinkedProg; } +void ProgramManager::cacheKernelImplicitLocalArg( + const RTDeviceBinaryImage &Img) { + const RTDeviceBinaryImage::PropertyRange &ImplicitLocalArgRange = + Img.getImplicitLocalArg(); + if (ImplicitLocalArgRange.isAvailable()) + for (auto Prop : ImplicitLocalArgRange) { + auto It = m_DeviceKernelInfoMap.find(Prop->Name); + assert(It != m_DeviceKernelInfoMap.end()); + It->second.setImplicitLocalArgPos(DeviceBinaryProperty(Prop).asUint32()); + } +} + DeviceKernelInfo & ProgramManager::getDeviceKernelInfo(const CompileTimeKernelInfoTy &Info) { std::lock_guard Guard(m_DeviceKernelInfoMapMutex); @@ -1988,16 +2000,6 @@ void ProgramManager::addImage(sycl_device_binary RawImg, m_BinImg2KernelIDs[Img.get()]; KernelIDs.reset(new std::vector); - std::unordered_map ImplicitLocalArgPositions; - const RTDeviceBinaryImage::PropertyRange &ImplicitLocalArgRange = - Img->getImplicitLocalArg(); - if (ImplicitLocalArgRange.isAvailable()) - for (auto Prop : ImplicitLocalArgRange) { - auto Result = ImplicitLocalArgPositions.try_emplace( - Prop->Name, DeviceBinaryProperty(Prop).asUint32()); - assert(Result.second && "Duplicate implicit arg property"); - } - for (sycl_offload_entry EntriesIt = EntriesB; EntriesIt != EntriesE; EntriesIt = EntriesIt->Increment()) { @@ -2022,14 +2024,10 @@ void ProgramManager::addImage(sycl_device_binary RawImg, m_KernelIDs2BinImage.insert(std::make_pair(It->second, Img.get())); KernelIDs->push_back(It->second); - std::optional ImplicitLocalArgPos; - auto ImplicitLocalArgPosIt = ImplicitLocalArgPositions.find(name); - if (ImplicitLocalArgPosIt != ImplicitLocalArgPositions.end()) - ImplicitLocalArgPos = ImplicitLocalArgPosIt->second; - auto Result = - m_DeviceKernelInfoMap.try_emplace(name, name, ImplicitLocalArgPos); - assert(ImplicitLocalArgPos == Result->first.getImplicitLocalArgPos() && - "Conflicting values of implicit local arg positions"); + CompileTimeKernelInfoTy DefaultCompileTimeInfo{std::string_view(name)}; + m_DeviceKernelInfoMap.try_emplace(std::string_view(name), + DefaultCompileTimeInfo); + // Keep track of image to kernel name reference count for cleanup. m_KernelNameRefCount[name]++; } @@ -2050,6 +2048,8 @@ void ProgramManager::addImage(sycl_device_binary RawImg, } } + cacheKernelImplicitLocalArg(*Img); + // Sort kernel ids for faster search std::sort(KernelIDs->begin(), KernelIDs->end(), LessByHash{}); diff --git a/sycl/source/detail/program_manager/program_manager.hpp b/sycl/source/detail/program_manager/program_manager.hpp index 24b78f84516bb..09b8d1b0b0f29 100644 --- a/sycl/source/detail/program_manager/program_manager.hpp +++ b/sycl/source/detail/program_manager/program_manager.hpp @@ -393,6 +393,8 @@ class ProgramManager { SanitizerType kernelUsesSanitizer() const { return m_SanitizerFoundInImage; } + void cacheKernelImplicitLocalArg(const RTDeviceBinaryImage &Img); + DeviceKernelInfo &getDeviceKernelInfo(const CompileTimeKernelInfoTy &Info); DeviceKernelInfo &getDeviceKernelInfo(std::string_view KernelName); DeviceKernelInfo *tryGetDeviceKernelInfo(std::string_view KernelName); From 00ee337d848da4c86290ea2786e1ac2420d162d1 Mon Sep 17 00:00:00 2001 From: Sergey Semenov Date: Mon, 8 Dec 2025 06:37:19 -0800 Subject: [PATCH 13/15] Add clarifying comment for implicitlocalargpos --- sycl/source/detail/device_kernel_info.hpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sycl/source/detail/device_kernel_info.hpp b/sycl/source/detail/device_kernel_info.hpp index 15ceaaedd7a9d..aebeabdefd11c 100644 --- a/sycl/source/detail/device_kernel_info.hpp +++ b/sycl/source/detail/device_kernel_info.hpp @@ -100,6 +100,8 @@ class DeviceKernelInfo : public CompileTimeKernelInfoTy { return MImplicitLocalArgPos; } + // Implicit local argument position is used only for some backends, + // so this value may need to be updated when more images are added. void setImplicitLocalArgPos(int Pos); private: From dbb4cc9b671006464381f27b1b172fba453efa56 Mon Sep 17 00:00:00 2001 From: Sergey Semenov Date: Mon, 8 Dec 2025 06:39:24 -0800 Subject: [PATCH 14/15] Minor update --- sycl/source/detail/device_kernel_info.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sycl/source/detail/device_kernel_info.hpp b/sycl/source/detail/device_kernel_info.hpp index aebeabdefd11c..710cab687bb32 100644 --- a/sycl/source/detail/device_kernel_info.hpp +++ b/sycl/source/detail/device_kernel_info.hpp @@ -100,8 +100,8 @@ class DeviceKernelInfo : public CompileTimeKernelInfoTy { return MImplicitLocalArgPos; } - // Implicit local argument position is used only for some backends, - // so this value may need to be updated when more images are added. + // Implicit local argument position is used only for some backends, so this + // funciton allows setting it as more images are added. void setImplicitLocalArgPos(int Pos); private: From b6784dba9dde0be242edc839bf1b9bce0eb61c3a Mon Sep 17 00:00:00 2001 From: Sergey Semenov Date: Mon, 15 Dec 2025 05:32:49 -0800 Subject: [PATCH 15/15] Minor comment edits --- sycl/source/detail/device_kernel_info.hpp | 2 +- sycl/test-e2e/Config/kernel_from_file.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/sycl/source/detail/device_kernel_info.hpp b/sycl/source/detail/device_kernel_info.hpp index 710cab687bb32..f06d03c126d11 100644 --- a/sycl/source/detail/device_kernel_info.hpp +++ b/sycl/source/detail/device_kernel_info.hpp @@ -101,7 +101,7 @@ class DeviceKernelInfo : public CompileTimeKernelInfoTy { } // Implicit local argument position is used only for some backends, so this - // funciton allows setting it as more images are added. + // function allows setting it as more images are added. void setImplicitLocalArgPos(int Pos); private: diff --git a/sycl/test-e2e/Config/kernel_from_file.cpp b/sycl/test-e2e/Config/kernel_from_file.cpp index 1f64745ec36d7..8d7b435d0bcc7 100644 --- a/sycl/test-e2e/Config/kernel_from_file.cpp +++ b/sycl/test-e2e/Config/kernel_from_file.cpp @@ -12,7 +12,7 @@ // >> ---- translate to SPIR-V // RUN: llvm-spirv -o %t.spv %t_app.bc // Need to perform full compilation here since the SYCL runtime uses image -// properties from the fat binary. +// properties from the multi-architecture binary. // RUN: %{build} -fno-sycl-dead-args-optimization -o %t.out // RUN: env SYCL_USE_KERNEL_SPV=%t.spv %{run} %t.out