aboutsummaryrefslogtreecommitdiffstats
path: root/lib/CodeGen/CGCUDANV.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/CodeGen/CGCUDANV.cpp')
-rw-r--r--lib/CodeGen/CGCUDANV.cpp23
1 files changed, 14 insertions, 9 deletions
diff --git a/lib/CodeGen/CGCUDANV.cpp b/lib/CodeGen/CGCUDANV.cpp
index 4d4038dae9cf..5c5cbaff0252 100644
--- a/lib/CodeGen/CGCUDANV.cpp
+++ b/lib/CodeGen/CGCUDANV.cpp
@@ -93,7 +93,7 @@ private:
GV->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::None);
}
if (Alignment)
- GV->setAlignment(Alignment);
+ GV->setAlignment(llvm::Align(Alignment));
return llvm::ConstantExpr::getGetElementPtr(ConstStr.getElementType(),
ConstStr.getPointer(), Zeros);
@@ -236,7 +236,8 @@ void CGNVCUDARuntime::emitDeviceStub(CodeGenFunction &CGF,
EmittedKernels.push_back({CGF.CurFn, CGF.CurFuncDecl});
if (CudaFeatureEnabled(CGM.getTarget().getSDKVersion(),
- CudaFeature::CUDA_USES_NEW_LAUNCH))
+ CudaFeature::CUDA_USES_NEW_LAUNCH) ||
+ CGF.getLangOpts().HIPUseNewLaunchAPI)
emitDeviceStubBodyNew(CGF, Args);
else
emitDeviceStubBodyLegacy(CGF, Args);
@@ -264,14 +265,18 @@ void CGNVCUDARuntime::emitDeviceStubBodyNew(CodeGenFunction &CGF,
llvm::BasicBlock *EndBlock = CGF.createBasicBlock("setup.end");
- // Lookup cudaLaunchKernel function.
+ // Lookup cudaLaunchKernel/hipLaunchKernel function.
// cudaError_t cudaLaunchKernel(const void *func, dim3 gridDim, dim3 blockDim,
// void **args, size_t sharedMem,
// cudaStream_t stream);
+ // hipError_t hipLaunchKernel(const void *func, dim3 gridDim, dim3 blockDim,
+ // void **args, size_t sharedMem,
+ // hipStream_t stream);
TranslationUnitDecl *TUDecl = CGM.getContext().getTranslationUnitDecl();
DeclContext *DC = TranslationUnitDecl::castToDeclContext(TUDecl);
+ auto LaunchKernelName = addPrefixToName("LaunchKernel");
IdentifierInfo &cudaLaunchKernelII =
- CGM.getContext().Idents.get("cudaLaunchKernel");
+ CGM.getContext().Idents.get(LaunchKernelName);
FunctionDecl *cudaLaunchKernelFD = nullptr;
for (const auto &Result : DC->lookup(&cudaLaunchKernelII)) {
if (FunctionDecl *FD = dyn_cast<FunctionDecl>(Result))
@@ -280,7 +285,7 @@ void CGNVCUDARuntime::emitDeviceStubBodyNew(CodeGenFunction &CGF,
if (cudaLaunchKernelFD == nullptr) {
CGM.Error(CGF.CurFuncDecl->getLocation(),
- "Can't find declaration for cudaLaunchKernel()");
+ "Can't find declaration for " + LaunchKernelName);
return;
}
// Create temporary dim3 grid_dim, block_dim.
@@ -301,7 +306,7 @@ void CGNVCUDARuntime::emitDeviceStubBodyNew(CodeGenFunction &CGF,
/*ShmemSize=*/ShmemSize.getType(),
/*Stream=*/Stream.getType()},
/*isVarArg=*/false),
- "__cudaPopCallConfiguration");
+ addUnderscoredPrefixToName("PopCallConfiguration"));
CGF.EmitRuntimeCallOrInvoke(cudaPopConfigFn,
{GridDim.getPointer(), BlockDim.getPointer(),
@@ -329,7 +334,7 @@ void CGNVCUDARuntime::emitDeviceStubBodyNew(CodeGenFunction &CGF,
const CGFunctionInfo &FI =
CGM.getTypes().arrangeFunctionDeclaration(cudaLaunchKernelFD);
llvm::FunctionCallee cudaLaunchKernelFn =
- CGM.CreateRuntimeFunction(FTy, "cudaLaunchKernel");
+ CGM.CreateRuntimeFunction(FTy, LaunchKernelName);
CGF.EmitCall(FI, CGCallee::forDirect(cudaLaunchKernelFn), ReturnValueSlot(),
LaunchKernelArgs);
CGF.EmitBranch(EndBlock);
@@ -623,7 +628,7 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() {
Linkage,
/*Initializer=*/llvm::ConstantPointerNull::get(VoidPtrPtrTy),
"__hip_gpubin_handle");
- GpuBinaryHandle->setAlignment(CGM.getPointerAlign().getQuantity());
+ GpuBinaryHandle->setAlignment(CGM.getPointerAlign().getAsAlign());
// Prevent the weak symbol in different shared libraries being merged.
if (Linkage != llvm::GlobalValue::InternalLinkage)
GpuBinaryHandle->setVisibility(llvm::GlobalValue::HiddenVisibility);
@@ -664,7 +669,7 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() {
GpuBinaryHandle = new llvm::GlobalVariable(
TheModule, VoidPtrPtrTy, false, llvm::GlobalValue::InternalLinkage,
llvm::ConstantPointerNull::get(VoidPtrPtrTy), "__cuda_gpubin_handle");
- GpuBinaryHandle->setAlignment(CGM.getPointerAlign().getQuantity());
+ GpuBinaryHandle->setAlignment(CGM.getPointerAlign().getAsAlign());
CtorBuilder.CreateAlignedStore(RegisterFatbinCall, GpuBinaryHandle,
CGM.getPointerAlign());