-
Notifications
You must be signed in to change notification settings - Fork 188
[CIR][CUDA][HIP] Set internal linkage for device variable shadows #2041
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -54,6 +54,18 @@ class CIRGenNVCUDARuntime : public CIRGenCUDARuntime { | |
|
|
||
| // Map a kernel handle to the kernel stub. | ||
| llvm::DenseMap<mlir::Operation *, mlir::Operation *> KernelStubs; | ||
| struct VarInfo { | ||
| cir::GlobalOp var; | ||
| const VarDecl *declaration; | ||
| DeviceVarFlags flags; | ||
| }; | ||
|
|
||
| llvm::SmallVector<VarInfo, 16> deviceVars; | ||
|
|
||
| /// Keeps track of variable containing handle of GPU binary. Populated by | ||
| /// ModuleCtorFunction() and used to create corresponding cleanup calls in | ||
| /// ModuleDtorFunction() | ||
| llvm::GlobalVariable *gpuBinaryHandle = nullptr; | ||
|
Comment on lines
+65
to
+68
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't see this being used at CodeGen. We handle the "gpuBinaryHandle" during "lowering". Why do you think we need this here?
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Good catch, this is merely an artifact from bringing the skeleton from OG, Will remove. |
||
|
|
||
| // Mangle context for device. | ||
| std::unique_ptr<MangleContext> deviceMC; | ||
|
|
@@ -72,6 +84,7 @@ class CIRGenNVCUDARuntime : public CIRGenCUDARuntime { | |
|
|
||
| void emitDeviceStub(CIRGenFunction &cgf, cir::FuncOp fn, | ||
| FunctionArgList &args) override; | ||
| void handleVarRegistration(const VarDecl *vd, cir::GlobalOp var) override; | ||
|
|
||
| mlir::Operation *getKernelHandle(cir::FuncOp fn, GlobalDecl GD) override; | ||
|
|
||
|
|
@@ -86,6 +99,15 @@ class CIRGenNVCUDARuntime : public CIRGenCUDARuntime { | |
| /// Returns function or variable name on device side even if the current | ||
| /// compilation is for host. | ||
| std::string getDeviceSideName(const NamedDecl *nd) override; | ||
|
|
||
| void registerDeviceVar(const VarDecl *vd, cir::GlobalOp &var, bool isExtern, | ||
| bool isConstant) { | ||
| deviceVars.push_back({var, | ||
| vd, | ||
| {DeviceVarFlags::Variable, isExtern, isConstant, | ||
| vd->hasAttr<HIPManagedAttr>(), | ||
| /*Normalized*/ false, 0}}); | ||
| } | ||
| }; | ||
|
|
||
| } // namespace | ||
|
|
@@ -401,3 +423,35 @@ void CIRGenNVCUDARuntime::internalizeDeviceSideVar( | |
| d->getType()->isCUDADeviceBuiltinTextureType()) | ||
| llvm_unreachable("NYI"); | ||
| } | ||
|
|
||
| void CIRGenNVCUDARuntime::handleVarRegistration(const VarDecl *declaration, | ||
| cir::GlobalOp globalVariable) { | ||
| if (declaration->hasAttr<CUDADeviceAttr>() || | ||
| declaration->hasAttr<CUDAConstantAttr>()) { | ||
| // Shadow variables and their properties must be registered with CUDA | ||
| // runtime. Skip Extern global variables, which will be registered in | ||
| // the TU where they are defined. | ||
| // | ||
| // Don't register a C++17 inline variable. The local symbol can be | ||
| // discarded and referencing a discarded local symbol from outside the | ||
| // comdat (__cuda_register_globals) is disallowed by the ELF spec. | ||
| // | ||
| // HIP managed variables need to be always recorded in device and host | ||
| // compilations for transformation. | ||
| // | ||
| // HIP managed variables and variables in CUDADeviceVarODRUsedByHost are | ||
| // added to llvm.compiler-used, therefore they are safe to be registered. | ||
| if ((!declaration->hasExternalStorage() && !declaration->isInline()) || | ||
| cgm.getASTContext().CUDADeviceVarODRUsedByHost.contains(declaration) || | ||
| declaration->hasAttr<HIPManagedAttr>()) { | ||
| registerDeviceVar(declaration, globalVariable, | ||
| !declaration->hasDefinition(), | ||
| declaration->hasAttr<CUDAConstantAttr>()); | ||
| } | ||
| } else if (declaration->getType()->isCUDADeviceBuiltinSurfaceType() || | ||
| declaration->getType()->isCUDADeviceBuiltinTextureType()) { | ||
| // Builtin surfaces and textures and their template arguments are | ||
| // also registered with CUDA runtime. | ||
| llvm_unreachable("Surface and Texture registration NYI"); | ||
| } | ||
| } | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I see you are mixing CUDA and HIP tests here. This is ok, but we had historically split them between CUDA/HIP directories.
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Good Point, I'll make sure to split both things from now on. |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,4 +1,4 @@ | ||
| #include "cuda.h" | ||
| #include "../Inputs/cuda.h" | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We prefer on not having a relative path here |
||
|
|
||
| // RUN: echo "sample fatbin" > %t.fatbin | ||
| // RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir \ | ||
|
|
@@ -33,7 +33,7 @@ | |
| // LLVM-HOST: i32 1212764230, i32 1, ptr @__hip_fatbin_str, ptr null | ||
| // LLVM-HOST: }, section ".hipFatBinSegment" | ||
| // LLVM-HOST: @_Z2fnv = constant ptr @_Z17__device_stub__fnv, align 8 | ||
| // LLVM-HOST: @a = global i32 undef, align 4 | ||
| // LLVM-HOST: @a = internal global i32 undef, align 4 | ||
| // LLVM-HOST: @llvm.global_ctors = {{.*}}ptr @__hip_module_ctor | ||
|
|
||
| // CIR-HOST: cir.func internal private @__hip_module_dtor() { | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why do you need this? Does this exist in OG?