aboutsummaryrefslogtreecommitdiffstats
path: root/lib/CodeGen/CGOpenMPRuntime.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/CodeGen/CGOpenMPRuntime.cpp')
-rw-r--r--lib/CodeGen/CGOpenMPRuntime.cpp1214
1 files changed, 894 insertions, 320 deletions
diff --git a/lib/CodeGen/CGOpenMPRuntime.cpp b/lib/CodeGen/CGOpenMPRuntime.cpp
index 27e7175da841..2a13a2a58156 100644
--- a/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -752,6 +752,11 @@ enum OpenMPRTLFunction {
// arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
// *arg_types);
OMPRTL__tgt_target_data_update_nowait,
+ // Call to int64_t __tgt_mapper_num_components(void *rt_mapper_handle);
+ OMPRTL__tgt_mapper_num_components,
+ // Call to void __tgt_push_mapper_component(void *rt_mapper_handle, void
+ // *base, void *begin, int64_t size, int64_t type);
+ OMPRTL__tgt_push_mapper_component,
};
/// A basic class for pre|post-action for advanced codegen sequence for OpenMP
@@ -1259,6 +1264,52 @@ CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator,
loadOffloadInfoMetadata();
}
+bool CGOpenMPRuntime::tryEmitDeclareVariant(const GlobalDecl &NewGD,
+ const GlobalDecl &OldGD,
+ llvm::GlobalValue *OrigAddr,
+ bool IsForDefinition) {
+ // Emit at least a definition for the aliasee if the the address of the
+ // original function is requested.
+ if (IsForDefinition || OrigAddr)
+ (void)CGM.GetAddrOfGlobal(NewGD);
+ StringRef NewMangledName = CGM.getMangledName(NewGD);
+ llvm::GlobalValue *Addr = CGM.GetGlobalValue(NewMangledName);
+ if (Addr && !Addr->isDeclaration()) {
+ const auto *D = cast<FunctionDecl>(OldGD.getDecl());
+ const CGFunctionInfo &FI = CGM.getTypes().arrangeGlobalDeclaration(OldGD);
+ llvm::Type *DeclTy = CGM.getTypes().GetFunctionType(FI);
+
+ // Create a reference to the named value. This ensures that it is emitted
+ // if a deferred decl.
+ llvm::GlobalValue::LinkageTypes LT = CGM.getFunctionLinkage(OldGD);
+
+ // Create the new alias itself, but don't set a name yet.
+ auto *GA =
+ llvm::GlobalAlias::create(DeclTy, 0, LT, "", Addr, &CGM.getModule());
+
+ if (OrigAddr) {
+ assert(OrigAddr->isDeclaration() && "Expected declaration");
+
+ GA->takeName(OrigAddr);
+ OrigAddr->replaceAllUsesWith(
+ llvm::ConstantExpr::getBitCast(GA, OrigAddr->getType()));
+ OrigAddr->eraseFromParent();
+ } else {
+ GA->setName(CGM.getMangledName(OldGD));
+ }
+
+ // Set attributes which are particular to an alias; this is a
+ // specialization of the attributes which may be set on a global function.
+ if (D->hasAttr<WeakAttr>() || D->hasAttr<WeakRefAttr>() ||
+ D->isWeakImported())
+ GA->setLinkage(llvm::Function::WeakAnyLinkage);
+
+ CGM.SetCommonAttributes(OldGD, GA);
+ return true;
+ }
+ return false;
+}
+
void CGOpenMPRuntime::clear() {
InternalVars.clear();
// Clean non-target variable declarations possibly used only in debug info.
@@ -1272,6 +1323,14 @@ void CGOpenMPRuntime::clear() {
continue;
GV->eraseFromParent();
}
+ // Emit aliases for the deferred aliasees.
+ for (const auto &Pair : DeferredVariantFunction) {
+ StringRef MangledName = CGM.getMangledName(Pair.second.second);
+ llvm::GlobalValue *Addr = CGM.GetGlobalValue(MangledName);
+ // If not able to emit alias, just emit original declaration.
+ (void)tryEmitDeclareVariant(Pair.second.first, Pair.second.second, Addr,
+ /*IsForDefinition=*/false);
+ }
}
std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const {
@@ -1638,18 +1697,23 @@ llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
return ThreadID;
}
// If exceptions are enabled, do not use parameter to avoid possible crash.
- if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
- !CGF.getLangOpts().CXXExceptions ||
- CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) {
- if (auto *OMPRegionInfo =
- dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
- if (OMPRegionInfo->getThreadIDVariable()) {
- // Check if this an outlined function with thread id passed as argument.
- LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
+ if (auto *OMPRegionInfo =
+ dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
+ if (OMPRegionInfo->getThreadIDVariable()) {
+ // Check if this an outlined function with thread id passed as argument.
+ LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
+ llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent();
+ if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
+ !CGF.getLangOpts().CXXExceptions ||
+ CGF.Builder.GetInsertBlock() == TopBlock ||
+ !isa<llvm::Instruction>(LVal.getPointer()) ||
+ cast<llvm::Instruction>(LVal.getPointer())->getParent() == TopBlock ||
+ cast<llvm::Instruction>(LVal.getPointer())->getParent() ==
+ CGF.Builder.GetInsertBlock()) {
ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
// If value loaded in entry block, cache it and use it everywhere in
// function.
- if (CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) {
+ if (CGF.Builder.GetInsertBlock() == TopBlock) {
auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
Elem.second.ThreadID = ThreadID;
}
@@ -1686,6 +1750,12 @@ void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
UDRMap.erase(D);
FunctionUDRMap.erase(CGF.CurFn);
}
+ auto I = FunctionUDMMap.find(CGF.CurFn);
+ if (I != FunctionUDMMap.end()) {
+ for(auto *D : I->second)
+ UDMMap.erase(D);
+ FunctionUDMMap.erase(I);
+ }
}
llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
@@ -2459,6 +2529,24 @@ llvm::FunctionCallee CGOpenMPRuntime::createRuntimeFunction(unsigned Function) {
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update_nowait");
break;
}
+ case OMPRTL__tgt_mapper_num_components: {
+ // Build int64_t __tgt_mapper_num_components(void *rt_mapper_handle);
+ llvm::Type *TypeParams[] = {CGM.VoidPtrTy};
+ auto *FnTy =
+ llvm::FunctionType::get(CGM.Int64Ty, TypeParams, /*isVarArg*/ false);
+ RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_mapper_num_components");
+ break;
+ }
+ case OMPRTL__tgt_push_mapper_component: {
+ // Build void __tgt_push_mapper_component(void *rt_mapper_handle, void
+ // *base, void *begin, int64_t size, int64_t type);
+ llvm::Type *TypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy, CGM.VoidPtrTy,
+ CGM.Int64Ty, CGM.Int64Ty};
+ auto *FnTy =
+ llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
+ RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_push_mapper_component");
+ break;
+ }
}
assert(RTLFn && "Unable to find OpenMP runtime function");
return RTLFn;
@@ -2552,6 +2640,32 @@ CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) {
return CGM.CreateRuntimeFunction(FnTy, Name);
}
+/// Obtain information that uniquely identifies a target entry. This
+/// consists of the file and device IDs as well as line number associated with
+/// the relevant entry source location.
+static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc,
+ unsigned &DeviceID, unsigned &FileID,
+ unsigned &LineNum) {
+ SourceManager &SM = C.getSourceManager();
+
+ // The loc should be always valid and have a file ID (the user cannot use
+ // #pragma directives in macros)
+
+ assert(Loc.isValid() && "Source location is expected to be always valid.");
+
+ PresumedLoc PLoc = SM.getPresumedLoc(Loc);
+ assert(PLoc.isValid() && "Source location is expected to be always valid.");
+
+ llvm::sys::fs::UniqueID ID;
+ if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID))
+ SM.getDiagnostics().Report(diag::err_cannot_open_file)
+ << PLoc.getFilename() << EC.message();
+
+ DeviceID = ID.getDevice();
+ FileID = ID.getFile();
+ LineNum = PLoc.getLine();
+}
+
Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) {
if (CGM.getLangOpts().OpenMPSimd)
return Address::invalid();
@@ -2563,19 +2677,27 @@ Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) {
SmallString<64> PtrName;
{
llvm::raw_svector_ostream OS(PtrName);
- OS << CGM.getMangledName(GlobalDecl(VD)) << "_decl_tgt_ref_ptr";
+ OS << CGM.getMangledName(GlobalDecl(VD));
+ if (!VD->isExternallyVisible()) {
+ unsigned DeviceID, FileID, Line;
+ getTargetEntryUniqueInfo(CGM.getContext(),
+ VD->getCanonicalDecl()->getBeginLoc(),
+ DeviceID, FileID, Line);
+ OS << llvm::format("_%x", FileID);
+ }
+ OS << "_decl_tgt_ref_ptr";
}
llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName);
if (!Ptr) {
QualType PtrTy = CGM.getContext().getPointerType(VD->getType());
Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy),
PtrName);
- if (!CGM.getLangOpts().OpenMPIsDevice) {
- auto *GV = cast<llvm::GlobalVariable>(Ptr);
- GV->setLinkage(llvm::GlobalValue::ExternalLinkage);
+
+ auto *GV = cast<llvm::GlobalVariable>(Ptr);
+ GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
+
+ if (!CGM.getLangOpts().OpenMPIsDevice)
GV->setInitializer(CGM.GetAddrOfGlobal(VD));
- }
- CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ptr));
registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr));
}
return Address(Ptr, CGM.getContext().getDeclAlign(VD));
@@ -2749,35 +2871,12 @@ llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
return nullptr;
}
-/// Obtain information that uniquely identifies a target entry. This
-/// consists of the file and device IDs as well as line number associated with
-/// the relevant entry source location.
-static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc,
- unsigned &DeviceID, unsigned &FileID,
- unsigned &LineNum) {
- SourceManager &SM = C.getSourceManager();
-
- // The loc should be always valid and have a file ID (the user cannot use
- // #pragma directives in macros)
-
- assert(Loc.isValid() && "Source location is expected to be always valid.");
-
- PresumedLoc PLoc = SM.getPresumedLoc(Loc);
- assert(PLoc.isValid() && "Source location is expected to be always valid.");
-
- llvm::sys::fs::UniqueID ID;
- if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID))
- SM.getDiagnostics().Report(diag::err_cannot_open_file)
- << PLoc.getFilename() << EC.message();
-
- DeviceID = ID.getDevice();
- FileID = ID.getFile();
- LineNum = PLoc.getLine();
-}
-
bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD,
llvm::GlobalVariable *Addr,
bool PerformInit) {
+ if (CGM.getLangOpts().OMPTargetTriples.empty() &&
+ !CGM.getLangOpts().OpenMPIsDevice)
+ return false;
Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
@@ -2981,14 +3080,16 @@ void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
CGF.EmitRuntimeCall(
RT.createRuntimeFunction(OMPRTL__kmpc_serialized_parallel), Args);
- // OutlinedFn(&GTid, &zero, CapturedStruct);
- Address ZeroAddr = CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
- /*Name*/ ".zero.addr");
- CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
+ // OutlinedFn(&GTid, &zero_bound, CapturedStruct);
+ Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
+ Address ZeroAddrBound =
+ CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
+ /*Name=*/".bound.zero.addr");
+ CGF.InitTempAlloca(ZeroAddrBound, CGF.Builder.getInt32(/*C*/ 0));
llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
// ThreadId for serialized parallels is 0.
- OutlinedFnArgs.push_back(ZeroAddr.getPointer());
- OutlinedFnArgs.push_back(ZeroAddr.getPointer());
+ OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
+ OutlinedFnArgs.push_back(ZeroAddrBound.getPointer());
OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
@@ -3283,9 +3384,9 @@ void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
// <copy_func>, did_it);
if (DidIt.isValid()) {
llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
- QualType CopyprivateArrayTy =
- C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal,
- /*IndexTypeQuals=*/0);
+ QualType CopyprivateArrayTy = C.getConstantArrayType(
+ C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
+ /*IndexTypeQuals=*/0);
// Create a list of all private variables for copyprivate.
Address CopyprivateList =
CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
@@ -3472,7 +3573,7 @@ bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
return Schedule != OMP_sch_static;
}
-static int addMonoNonMonoModifier(OpenMPSchedType Schedule,
+static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule,
OpenMPScheduleClauseModifier M1,
OpenMPScheduleClauseModifier M2) {
int Modifier = 0;
@@ -3506,6 +3607,18 @@ static int addMonoNonMonoModifier(OpenMPSchedType Schedule,
case OMPC_SCHEDULE_MODIFIER_unknown:
break;
}
+ // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription.
+ // If the static schedule kind is specified or if the ordered clause is
+ // specified, and if the nonmonotonic modifier is not specified, the effect is
+ // as if the monotonic modifier is specified. Otherwise, unless the monotonic
+ // modifier is specified, the effect is as if the nonmonotonic modifier is
+ // specified.
+ if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) {
+ if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static ||
+ Schedule == OMP_sch_static_balanced_chunked ||
+ Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static))
+ Modifier = OMP_sch_modifier_nonmonotonic;
+ }
return Schedule | Modifier;
}
@@ -3530,13 +3643,14 @@ void CGOpenMPRuntime::emitForDispatchInit(
llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
: CGF.Builder.getIntN(IVSize, 1);
llvm::Value *Args[] = {
- emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
+ emitUpdateLocation(CGF, Loc),
+ getThreadID(CGF, Loc),
CGF.Builder.getInt32(addMonoNonMonoModifier(
- Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
- DispatchValues.LB, // Lower
- DispatchValues.UB, // Upper
- CGF.Builder.getIntN(IVSize, 1), // Stride
- Chunk // Chunk
+ CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
+ DispatchValues.LB, // Lower
+ DispatchValues.UB, // Upper
+ CGF.Builder.getIntN(IVSize, 1), // Stride
+ Chunk // Chunk
};
CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
}
@@ -3578,7 +3692,7 @@ static void emitForStaticInitCall(
llvm::Value *Args[] = {
UpdateLocation,
ThreadId,
- CGF.Builder.getInt32(addMonoNonMonoModifier(Schedule, M1,
+ CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1,
M2)), // Schedule type
Values.IL.getPointer(), // &isLastIter
Values.LB.getPointer(), // &LB
@@ -3899,157 +4013,6 @@ void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
Action(E.getKey(), E.getValue());
}
-llvm::Function *
-CGOpenMPRuntime::createOffloadingBinaryDescriptorRegistration() {
- // If we don't have entries or if we are emitting code for the device, we
- // don't need to do anything.
- if (CGM.getLangOpts().OpenMPIsDevice || OffloadEntriesInfoManager.empty())
- return nullptr;
-
- llvm::Module &M = CGM.getModule();
- ASTContext &C = CGM.getContext();
-
- // Get list of devices we care about
- const std::vector<llvm::Triple> &Devices = CGM.getLangOpts().OMPTargetTriples;
-
- // We should be creating an offloading descriptor only if there are devices
- // specified.
- assert(!Devices.empty() && "No OpenMP offloading devices??");
-
- // Create the external variables that will point to the begin and end of the
- // host entries section. These will be defined by the linker.
- llvm::Type *OffloadEntryTy =
- CGM.getTypes().ConvertTypeForMem(getTgtOffloadEntryQTy());
- std::string EntriesBeginName = getName({"omp_offloading", "entries_begin"});
- auto *HostEntriesBegin = new llvm::GlobalVariable(
- M, OffloadEntryTy, /*isConstant=*/true,
- llvm::GlobalValue::ExternalLinkage, /*Initializer=*/nullptr,
- EntriesBeginName);
- std::string EntriesEndName = getName({"omp_offloading", "entries_end"});
- auto *HostEntriesEnd =
- new llvm::GlobalVariable(M, OffloadEntryTy, /*isConstant=*/true,
- llvm::GlobalValue::ExternalLinkage,
- /*Initializer=*/nullptr, EntriesEndName);
-
- // Create all device images
- auto *DeviceImageTy = cast<llvm::StructType>(
- CGM.getTypes().ConvertTypeForMem(getTgtDeviceImageQTy()));
- ConstantInitBuilder DeviceImagesBuilder(CGM);
- ConstantArrayBuilder DeviceImagesEntries =
- DeviceImagesBuilder.beginArray(DeviceImageTy);
-
- for (const llvm::Triple &Device : Devices) {
- StringRef T = Device.getTriple();
- std::string BeginName = getName({"omp_offloading", "img_start", ""});
- auto *ImgBegin = new llvm::GlobalVariable(
- M, CGM.Int8Ty, /*isConstant=*/true,
- llvm::GlobalValue::ExternalWeakLinkage,
- /*Initializer=*/nullptr, Twine(BeginName).concat(T));
- std::string EndName = getName({"omp_offloading", "img_end", ""});
- auto *ImgEnd = new llvm::GlobalVariable(
- M, CGM.Int8Ty, /*isConstant=*/true,
- llvm::GlobalValue::ExternalWeakLinkage,
- /*Initializer=*/nullptr, Twine(EndName).concat(T));
-
- llvm::Constant *Data[] = {ImgBegin, ImgEnd, HostEntriesBegin,
- HostEntriesEnd};
- createConstantGlobalStructAndAddToParent(CGM, getTgtDeviceImageQTy(), Data,
- DeviceImagesEntries);
- }
-
- // Create device images global array.
- std::string ImagesName = getName({"omp_offloading", "device_images"});
- llvm::GlobalVariable *DeviceImages =
- DeviceImagesEntries.finishAndCreateGlobal(ImagesName,
- CGM.getPointerAlign(),
- /*isConstant=*/true);
- DeviceImages->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
-
- // This is a Zero array to be used in the creation of the constant expressions
- llvm::Constant *Index[] = {llvm::Constant::getNullValue(CGM.Int32Ty),
- llvm::Constant::getNullValue(CGM.Int32Ty)};
-
- // Create the target region descriptor.
- llvm::Constant *Data[] = {
- llvm::ConstantInt::get(CGM.Int32Ty, Devices.size()),
- llvm::ConstantExpr::getGetElementPtr(DeviceImages->getValueType(),
- DeviceImages, Index),
- HostEntriesBegin, HostEntriesEnd};
- std::string Descriptor = getName({"omp_offloading", "descriptor"});
- llvm::GlobalVariable *Desc = createGlobalStruct(
- CGM, getTgtBinaryDescriptorQTy(), /*IsConstant=*/true, Data, Descriptor);
-
- // Emit code to register or unregister the descriptor at execution
- // startup or closing, respectively.
-
- llvm::Function *UnRegFn;
- {
- FunctionArgList Args;
- ImplicitParamDecl DummyPtr(C, C.VoidPtrTy, ImplicitParamDecl::Other);
- Args.push_back(&DummyPtr);
-
- CodeGenFunction CGF(CGM);
- // Disable debug info for global (de-)initializer because they are not part
- // of some particular construct.
- CGF.disableDebugInfo();
- const auto &FI =
- CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
- llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
- std::string UnregName = getName({"omp_offloading", "descriptor_unreg"});
- UnRegFn = CGM.CreateGlobalInitOrDestructFunction(FTy, UnregName, FI);
- CGF.StartFunction(GlobalDecl(), C.VoidTy, UnRegFn, FI, Args);
- CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_unregister_lib),
- Desc);
- CGF.FinishFunction();
- }
- llvm::Function *RegFn;
- {
- CodeGenFunction CGF(CGM);
- // Disable debug info for global (de-)initializer because they are not part
- // of some particular construct.
- CGF.disableDebugInfo();
- const auto &FI = CGM.getTypes().arrangeNullaryFunction();
- llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
-
- // Encode offload target triples into the registration function name. It
- // will serve as a comdat key for the registration/unregistration code for
- // this particular combination of offloading targets.
- SmallVector<StringRef, 4U> RegFnNameParts(Devices.size() + 2U);
- RegFnNameParts[0] = "omp_offloading";
- RegFnNameParts[1] = "descriptor_reg";
- llvm::transform(Devices, std::next(RegFnNameParts.begin(), 2),
- [](const llvm::Triple &T) -> const std::string& {
- return T.getTriple();
- });
- llvm::sort(std::next(RegFnNameParts.begin(), 2), RegFnNameParts.end());
- std::string Descriptor = getName(RegFnNameParts);
- RegFn = CGM.CreateGlobalInitOrDestructFunction(FTy, Descriptor, FI);
- CGF.StartFunction(GlobalDecl(), C.VoidTy, RegFn, FI, FunctionArgList());
- CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_register_lib), Desc);
- // Create a variable to drive the registration and unregistration of the
- // descriptor, so we can reuse the logic that emits Ctors and Dtors.
- ImplicitParamDecl RegUnregVar(C, C.getTranslationUnitDecl(),
- SourceLocation(), nullptr, C.CharTy,
- ImplicitParamDecl::Other);
- CGM.getCXXABI().registerGlobalDtor(CGF, RegUnregVar, UnRegFn, Desc);
- CGF.FinishFunction();
- }
- if (CGM.supportsCOMDAT()) {
- // It is sufficient to call registration function only once, so create a
- // COMDAT group for registration/unregistration functions and associated
- // data. That would reduce startup time and code size. Registration
- // function serves as a COMDAT group key.
- llvm::Comdat *ComdatKey = M.getOrInsertComdat(RegFn->getName());
- RegFn->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage);
- RegFn->setVisibility(llvm::GlobalValue::HiddenVisibility);
- RegFn->setComdat(ComdatKey);
- UnRegFn->setComdat(ComdatKey);
- DeviceImages->setComdat(ComdatKey);
- Desc->setComdat(ComdatKey);
- }
- return RegFn;
-}
-
void CGOpenMPRuntime::createOffloadEntry(
llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags,
llvm::GlobalValue::LinkageTypes Linkage) {
@@ -4077,8 +4040,7 @@ void CGOpenMPRuntime::createOffloadEntry(
Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage);
// The entry has to be created in the section the linker expects it to be.
- std::string Section = getName({"omp_offloading", "entries"});
- Entry->setSection(Section);
+ Entry->setSection("omp_offloading_entries");
}
void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
@@ -4091,13 +4053,16 @@ void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
// Right now we only generate metadata for function that contain target
// regions.
- // If we do not have entries, we don't need to do anything.
- if (OffloadEntriesInfoManager.empty())
+ // If we are in simd mode or there are no entries, we don't need to do
+ // anything.
+ if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty())
return;
llvm::Module &M = CGM.getModule();
llvm::LLVMContext &C = M.getContext();
- SmallVector<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *, 16>
+ SmallVector<std::tuple<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *,
+ SourceLocation, StringRef>,
+ 16>
OrderedEntries(OffloadEntriesInfoManager.size());
llvm::SmallVector<StringRef, 16> ParentFunctions(
OffloadEntriesInfoManager.size());
@@ -4115,7 +4080,8 @@ void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
// Create function that emits metadata for each target region entry;
auto &&TargetRegionMetadataEmitter =
- [&C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt, &GetMDString](
+ [this, &C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt,
+ &GetMDString](
unsigned DeviceID, unsigned FileID, StringRef ParentName,
unsigned Line,
const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) {
@@ -4133,8 +4099,19 @@ void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
GetMDInt(FileID), GetMDString(ParentName),
GetMDInt(Line), GetMDInt(E.getOrder())};
+ SourceLocation Loc;
+ for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(),
+ E = CGM.getContext().getSourceManager().fileinfo_end();
+ I != E; ++I) {
+ if (I->getFirst()->getUniqueID().getDevice() == DeviceID &&
+ I->getFirst()->getUniqueID().getFile() == FileID) {
+ Loc = CGM.getContext().getSourceManager().translateFileLineCol(
+ I->getFirst(), Line, 1);
+ break;
+ }
+ }
// Save this entry in the right position of the ordered entries array.
- OrderedEntries[E.getOrder()] = &E;
+ OrderedEntries[E.getOrder()] = std::make_tuple(&E, Loc, ParentName);
ParentFunctions[E.getOrder()] = ParentName;
// Add metadata to the named metadata node.
@@ -4162,7 +4139,8 @@ void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
GetMDInt(E.getFlags()), GetMDInt(E.getOrder())};
// Save this entry in the right position of the ordered entries array.
- OrderedEntries[E.getOrder()] = &E;
+ OrderedEntries[E.getOrder()] =
+ std::make_tuple(&E, SourceLocation(), MangledName);
// Add metadata to the named metadata node.
MD->addOperand(llvm::MDNode::get(C, Ops));
@@ -4171,11 +4149,11 @@ void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo(
DeviceGlobalVarMetadataEmitter);
- for (const auto *E : OrderedEntries) {
- assert(E && "All ordered entries must exist!");
+ for (const auto &E : OrderedEntries) {
+ assert(std::get<0>(E) && "All ordered entries must exist!");
if (const auto *CE =
dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>(
- E)) {
+ std::get<0>(E))) {
if (!CE->getID() || !CE->getAddress()) {
// Do not blame the entry if the parent funtion is not emitted.
StringRef FnName = ParentFunctions[CE->getOrder()];
@@ -4183,16 +4161,16 @@ void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
continue;
unsigned DiagID = CGM.getDiags().getCustomDiagID(
DiagnosticsEngine::Error,
- "Offloading entry for target region is incorrect: either the "
+ "Offloading entry for target region in %0 is incorrect: either the "
"address or the ID is invalid.");
- CGM.getDiags().Report(DiagID);
+ CGM.getDiags().Report(std::get<1>(E), DiagID) << FnName;
continue;
}
createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0,
CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage);
- } else if (const auto *CE =
- dyn_cast<OffloadEntriesInfoManagerTy::
- OffloadEntryInfoDeviceGlobalVar>(E)) {
+ } else if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy::
+ OffloadEntryInfoDeviceGlobalVar>(
+ std::get<0>(E))) {
OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags =
static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
CE->getFlags());
@@ -4203,10 +4181,10 @@ void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
continue;
if (!CE->getAddress()) {
unsigned DiagID = CGM.getDiags().getCustomDiagID(
- DiagnosticsEngine::Error,
- "Offloading entry for declare target variable is incorrect: the "
- "address is invalid.");
- CGM.getDiags().Report(DiagID);
+ DiagnosticsEngine::Error, "Offloading entry for declare target "
+ "variable %0 is incorrect: the "
+ "address is invalid.");
+ CGM.getDiags().Report(std::get<1>(E), DiagID) << std::get<2>(E);
continue;
}
// The vaiable has no definition - no need to add the entry.
@@ -5242,7 +5220,7 @@ void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
// Define type kmp_depend_info[<Dependences.size()>];
QualType KmpDependInfoArrayTy = C.getConstantArrayType(
KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies),
- ArrayType::Normal, /*IndexTypeQuals=*/0);
+ nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
// kmp_depend_info[<Dependences.size()>] deps;
DependenciesArray =
CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
@@ -5763,7 +5741,7 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
}
llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
QualType ReductionArrayTy =
- C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal,
+ C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
/*IndexTypeQuals=*/0);
Address ReductionList =
CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
@@ -6235,7 +6213,7 @@ llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
unsigned Size = Data.ReductionVars.size();
llvm::APInt ArraySize(/*numBits=*/64, Size);
QualType ArrayRDType = C.getConstantArrayType(
- RDType, ArraySize, ArrayType::Normal, /*IndexTypeQuals=*/0);
+ RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
// kmp_task_red_input_t .rd_input.[Size];
Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input.");
ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionCopies,
@@ -6720,12 +6698,16 @@ emitNumTeamsForTargetDirective(CodeGenFunction &CGF,
case OMPD_teams_distribute_parallel_for_simd:
case OMPD_target_update:
case OMPD_declare_simd:
+ case OMPD_declare_variant:
case OMPD_declare_target:
case OMPD_end_declare_target:
case OMPD_declare_reduction:
case OMPD_declare_mapper:
case OMPD_taskloop:
case OMPD_taskloop_simd:
+ case OMPD_master_taskloop:
+ case OMPD_master_taskloop_simd:
+ case OMPD_parallel_master_taskloop:
case OMPD_requires:
case OMPD_unknown:
break;
@@ -7025,12 +7007,16 @@ emitNumThreadsForTargetDirective(CodeGenFunction &CGF,
case OMPD_teams_distribute_parallel_for_simd:
case OMPD_target_update:
case OMPD_declare_simd:
+ case OMPD_declare_variant:
case OMPD_declare_target:
case OMPD_end_declare_target:
case OMPD_declare_reduction:
case OMPD_declare_mapper:
case OMPD_taskloop:
case OMPD_taskloop_simd:
+ case OMPD_master_taskloop:
+ case OMPD_master_taskloop_simd:
+ case OMPD_parallel_master_taskloop:
case OMPD_requires:
case OMPD_unknown:
break;
@@ -7079,12 +7065,24 @@ public:
OMP_MAP_LITERAL = 0x100,
/// Implicit map
OMP_MAP_IMPLICIT = 0x200,
+ /// Close is a hint to the runtime to allocate memory close to
+ /// the target device.
+ OMP_MAP_CLOSE = 0x400,
/// The 16 MSBs of the flags indicate whether the entry is member of some
/// struct/class.
OMP_MAP_MEMBER_OF = 0xffff000000000000,
LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF),
};
+ /// Get the offset of the OMP_MAP_MEMBER_OF field.
+ static unsigned getFlagMemberOffset() {
+ unsigned Offset = 0;
+ for (uint64_t Remain = OMP_MAP_MEMBER_OF; !(Remain & 1);
+ Remain = Remain >> 1)
+ Offset++;
+ return Offset;
+ }
+
/// Class that associates information with a base pointer to be passed to the
/// runtime library.
class BasePointerInfo {
@@ -7148,8 +7146,11 @@ private:
: IE(IE), VD(VD) {}
};
- /// Directive from where the map clauses were extracted.
- const OMPExecutableDirective &CurDir;
+ /// The target directive from where the mappable clauses were extracted. It
+ /// is either a executable directive or a user-defined mapper directive.
+ llvm::PointerUnion<const OMPExecutableDirective *,
+ const OMPDeclareMapperDecl *>
+ CurDir;
/// Function the directive is being generated for.
CodeGenFunction &CGF;
@@ -7181,9 +7182,11 @@ private:
OAE->getBase()->IgnoreParenImpCasts())
.getCanonicalType();
- // If there is no length associated with the expression, that means we
- // are using the whole length of the base.
- if (!OAE->getLength() && OAE->getColonLoc().isValid())
+ // If there is no length associated with the expression and lower bound is
+ // not specified too, that means we are using the whole length of the
+ // base.
+ if (!OAE->getLength() && OAE->getColonLoc().isValid() &&
+ !OAE->getLowerBound())
return CGF.getTypeSize(BaseTy);
llvm::Value *ElemSize;
@@ -7197,13 +7200,30 @@ private:
// If we don't have a length at this point, that is because we have an
// array section with a single element.
- if (!OAE->getLength())
+ if (!OAE->getLength() && OAE->getColonLoc().isInvalid())
return ElemSize;
- llvm::Value *LengthVal = CGF.EmitScalarExpr(OAE->getLength());
- LengthVal =
- CGF.Builder.CreateIntCast(LengthVal, CGF.SizeTy, /*isSigned=*/false);
- return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
+ if (const Expr *LenExpr = OAE->getLength()) {
+ llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr);
+ LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(),
+ CGF.getContext().getSizeType(),
+ LenExpr->getExprLoc());
+ return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
+ }
+ assert(!OAE->getLength() && OAE->getColonLoc().isValid() &&
+ OAE->getLowerBound() && "expected array_section[lb:].");
+ // Size = sizetype - lb * elemtype;
+ llvm::Value *LengthVal = CGF.getTypeSize(BaseTy);
+ llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound());
+ LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(),
+ CGF.getContext().getSizeType(),
+ OAE->getLowerBound()->getExprLoc());
+ LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize);
+ llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal);
+ llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal);
+ LengthVal = CGF.Builder.CreateSelect(
+ Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0));
+ return LengthVal;
}
return CGF.getTypeSize(ExprTy);
}
@@ -7247,6 +7267,9 @@ private:
if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_always)
!= MapModifiers.end())
Bits |= OMP_MAP_ALWAYS;
+ if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_close)
+ != MapModifiers.end())
+ Bits |= OMP_MAP_CLOSE;
return Bits;
}
@@ -7675,10 +7698,10 @@ private:
if (!IsExpressionFirstInfo) {
// If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
- // then we reset the TO/FROM/ALWAYS/DELETE flags.
+ // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags.
if (IsPointer)
Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS |
- OMP_MAP_DELETE);
+ OMP_MAP_DELETE | OMP_MAP_CLOSE);
if (ShouldBeMemberOf) {
// Set placeholder value MEMBER_OF=FFFF to indicate that the flag
@@ -7752,9 +7775,9 @@ private:
}
static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) {
- // Member of is given by the 16 MSB of the flag, so rotate by 48 bits.
+ // Rotate by getFlagMemberOffset() bits.
return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1)
- << 48);
+ << getFlagMemberOffset());
}
static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags,
@@ -7834,7 +7857,7 @@ private:
public:
MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
- : CurDir(Dir), CGF(CGF) {
+ : CurDir(&Dir), CGF(CGF) {
// Extract firstprivate clause information.
for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
for (const auto *D : C->varlists())
@@ -7846,6 +7869,10 @@ public:
DevPointersMap[L.first].push_back(L.second);
}
+ /// Constructor for the declare mapper directive.
+ MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF)
+ : CurDir(&Dir), CGF(CGF) {}
+
/// Generate code for the combined entry if we have a partially mapped struct
/// and take care of the mapping flags of the arguments corresponding to
/// individual struct members.
@@ -7907,18 +7934,20 @@ public:
IsImplicit);
};
- // FIXME: MSVC 2013 seems to require this-> to find member CurDir.
- for (const auto *C : this->CurDir.getClausesOfKind<OMPMapClause>())
+ assert(CurDir.is<const OMPExecutableDirective *>() &&
+ "Expect a executable directive");
+ const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
+ for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>())
for (const auto &L : C->component_lists()) {
InfoGen(L.first, L.second, C->getMapType(), C->getMapTypeModifiers(),
/*ReturnDevicePointer=*/false, C->isImplicit());
}
- for (const auto *C : this->CurDir.getClausesOfKind<OMPToClause>())
+ for (const auto *C : CurExecDir->getClausesOfKind<OMPToClause>())
for (const auto &L : C->component_lists()) {
InfoGen(L.first, L.second, OMPC_MAP_to, llvm::None,
/*ReturnDevicePointer=*/false, C->isImplicit());
}
- for (const auto *C : this->CurDir.getClausesOfKind<OMPFromClause>())
+ for (const auto *C : CurExecDir->getClausesOfKind<OMPFromClause>())
for (const auto &L : C->component_lists()) {
InfoGen(L.first, L.second, OMPC_MAP_from, llvm::None,
/*ReturnDevicePointer=*/false, C->isImplicit());
@@ -7933,9 +7962,8 @@ public:
llvm::MapVector<const ValueDecl *, SmallVector<DeferredDevicePtrEntryTy, 4>>
DeferredInfo;
- // FIXME: MSVC 2013 seems to require this-> to find member CurDir.
for (const auto *C :
- this->CurDir.getClausesOfKind<OMPUseDevicePtrClause>()) {
+ CurExecDir->getClausesOfKind<OMPUseDevicePtrClause>()) {
for (const auto &L : C->component_lists()) {
assert(!L.second.empty() && "Not expecting empty list of components!");
const ValueDecl *VD = L.second.back().getAssociatedDeclaration();
@@ -7964,7 +7992,6 @@ public:
// We didn't find any match in our map information - generate a zero
// size array section - if the pointer is a struct member we defer this
// action until the whole struct has been processed.
- // FIXME: MSVC 2013 seems to require this-> to find member CGF.
if (isa<MemberExpr>(IE)) {
// Insert the pointer into Info to be processed by
// generateInfoForComponentList. Because it is a member pointer
@@ -7977,11 +8004,11 @@ public:
/*ReturnDevicePointer=*/false, C->isImplicit());
DeferredInfo[nullptr].emplace_back(IE, VD);
} else {
- llvm::Value *Ptr = this->CGF.EmitLoadOfScalar(
- this->CGF.EmitLValue(IE), IE->getExprLoc());
+ llvm::Value *Ptr =
+ CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc());
BasePointers.emplace_back(Ptr, VD);
Pointers.push_back(Ptr);
- Sizes.push_back(llvm::Constant::getNullValue(this->CGF.Int64Ty));
+ Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_TARGET_PARAM);
}
}
@@ -8005,11 +8032,10 @@ public:
// Remember the current base pointer index.
unsigned CurrentBasePointersIdx = CurBasePointers.size();
- // FIXME: MSVC 2013 seems to require this-> to find the member method.
- this->generateInfoForComponentList(
- L.MapType, L.MapModifiers, L.Components, CurBasePointers,
- CurPointers, CurSizes, CurTypes, PartialStruct,
- IsFirstComponentList, L.IsImplicit);
+ generateInfoForComponentList(L.MapType, L.MapModifiers, L.Components,
+ CurBasePointers, CurPointers, CurSizes,
+ CurTypes, PartialStruct,
+ IsFirstComponentList, L.IsImplicit);
// If this entry relates with a device pointer, set the relevant
// declaration and add the 'return pointer' flag.
@@ -8061,6 +8087,78 @@ public:
}
}
+ /// Generate all the base pointers, section pointers, sizes and map types for
+ /// the extracted map clauses of user-defined mapper.
+ void generateAllInfoForMapper(MapBaseValuesArrayTy &BasePointers,
+ MapValuesArrayTy &Pointers,
+ MapValuesArrayTy &Sizes,
+ MapFlagsArrayTy &Types) const {
+ assert(CurDir.is<const OMPDeclareMapperDecl *>() &&
+ "Expect a declare mapper directive");
+ const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>();
+ // We have to process the component lists that relate with the same
+ // declaration in a single chunk so that we can generate the map flags
+ // correctly. Therefore, we organize all lists in a map.
+ llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info;
+
+ // Helper function to fill the information map for the different supported
+ // clauses.
+ auto &&InfoGen = [&Info](
+ const ValueDecl *D,
+ OMPClauseMappableExprCommon::MappableExprComponentListRef L,
+ OpenMPMapClauseKind MapType,
+ ArrayRef<OpenMPMapModifierKind> MapModifiers,
+ bool ReturnDevicePointer, bool IsImplicit) {
+ const ValueDecl *VD =
+ D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr;
+ Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer,
+ IsImplicit);
+ };
+
+ for (const auto *C : CurMapperDir->clauselists()) {
+ const auto *MC = cast<OMPMapClause>(C);
+ for (const auto &L : MC->component_lists()) {
+ InfoGen(L.first, L.second, MC->getMapType(), MC->getMapTypeModifiers(),
+ /*ReturnDevicePointer=*/false, MC->isImplicit());
+ }
+ }
+
+ for (const auto &M : Info) {
+ // We need to know when we generate information for the first component
+ // associated with a capture, because the mapping flags depend on it.
+ bool IsFirstComponentList = true;
+
+ // Temporary versions of arrays
+ MapBaseValuesArrayTy CurBasePointers;
+ MapValuesArrayTy CurPointers;
+ MapValuesArrayTy CurSizes;
+ MapFlagsArrayTy CurTypes;
+ StructRangeInfoTy PartialStruct;
+
+ for (const MapInfo &L : M.second) {
+ assert(!L.Components.empty() &&
+ "Not expecting declaration with no component lists.");
+ generateInfoForComponentList(L.MapType, L.MapModifiers, L.Components,
+ CurBasePointers, CurPointers, CurSizes,
+ CurTypes, PartialStruct,
+ IsFirstComponentList, L.IsImplicit);
+ IsFirstComponentList = false;
+ }
+
+ // If there is an entry in PartialStruct it means we have a struct with
+ // individual members mapped. Emit an extra combined entry.
+ if (PartialStruct.Base.isValid())
+ emitCombinedEntry(BasePointers, Pointers, Sizes, Types, CurTypes,
+ PartialStruct);
+
+ // We need to append the results of this capture to what we already have.
+ BasePointers.append(CurBasePointers.begin(), CurBasePointers.end());
+ Pointers.append(CurPointers.begin(), CurPointers.end());
+ Sizes.append(CurSizes.begin(), CurSizes.end());
+ Types.append(CurTypes.begin(), CurTypes.end());
+ }
+ }
+
/// Emit capture info for lambdas for variables captured by reference.
void generateInfoForLambdaCaptures(
const ValueDecl *VD, llvm::Value *Arg, MapBaseValuesArrayTy &BasePointers,
@@ -8184,8 +8282,10 @@ public:
std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef,
OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool>;
SmallVector<MapData, 4> DeclComponentLists;
- // FIXME: MSVC 2013 seems to require this-> to find member CurDir.
- for (const auto *C : this->CurDir.getClausesOfKind<OMPMapClause>()) {
+ assert(CurDir.is<const OMPExecutableDirective *>() &&
+ "Expect a executable directive");
+ const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
+ for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
for (const auto &L : C->decl_component_lists(VD)) {
assert(L.first == VD &&
"We got information for the wrong declaration??");
@@ -8333,9 +8433,12 @@ public:
MapValuesArrayTy &Pointers,
MapValuesArrayTy &Sizes,
MapFlagsArrayTy &Types) const {
+ assert(CurDir.is<const OMPExecutableDirective *>() &&
+ "Expect a executable directive");
+ const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
// Map other list items in the map clause which are not captured variables
// but "declare target link" global variables.
- for (const auto *C : this->CurDir.getClausesOfKind<OMPMapClause>()) {
+ for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
for (const auto &L : C->component_lists()) {
if (!L.first)
continue;
@@ -8472,9 +8575,9 @@ emitOffloadingArrays(CodeGenFunction &CGF,
}
llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true);
- QualType PointerArrayType =
- Ctx.getConstantArrayType(Ctx.VoidPtrTy, PointerNumAP, ArrayType::Normal,
- /*IndexTypeQuals=*/0);
+ QualType PointerArrayType = Ctx.getConstantArrayType(
+ Ctx.VoidPtrTy, PointerNumAP, nullptr, ArrayType::Normal,
+ /*IndexTypeQuals=*/0);
Info.BasePointersArray =
CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer();
@@ -8487,9 +8590,9 @@ emitOffloadingArrays(CodeGenFunction &CGF,
QualType Int64Ty =
Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
if (hasRuntimeEvaluationCaptureSize) {
- QualType SizeArrayType =
- Ctx.getConstantArrayType(Int64Ty, PointerNumAP, ArrayType::Normal,
- /*IndexTypeQuals=*/0);
+ QualType SizeArrayType = Ctx.getConstantArrayType(
+ Int64Ty, PointerNumAP, nullptr, ArrayType::Normal,
+ /*IndexTypeQuals=*/0);
Info.SizesArray =
CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer();
} else {
@@ -8562,6 +8665,7 @@ emitOffloadingArrays(CodeGenFunction &CGF,
}
}
}
+
/// Emit the arguments to be passed to the runtime library based on the
/// arrays of pointers, sizes and map types.
static void emitOffloadingArraysArgument(
@@ -8677,12 +8781,16 @@ getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) {
case OMPD_teams_distribute_parallel_for_simd:
case OMPD_target_update:
case OMPD_declare_simd:
+ case OMPD_declare_variant:
case OMPD_declare_target:
case OMPD_end_declare_target:
case OMPD_declare_reduction:
case OMPD_declare_mapper:
case OMPD_taskloop:
case OMPD_taskloop_simd:
+ case OMPD_master_taskloop:
+ case OMPD_master_taskloop_simd:
+ case OMPD_parallel_master_taskloop:
case OMPD_requires:
case OMPD_unknown:
llvm_unreachable("Unexpected directive.");
@@ -8692,10 +8800,343 @@ getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) {
return nullptr;
}
+/// Emit the user-defined mapper function. The code generation follows the
+/// pattern in the example below.
+/// \code
+/// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle,
+/// void *base, void *begin,
+/// int64_t size, int64_t type) {
+/// // Allocate space for an array section first.
+/// if (size > 1 && !maptype.IsDelete)
+/// __tgt_push_mapper_component(rt_mapper_handle, base, begin,
+/// size*sizeof(Ty), clearToFrom(type));
+/// // Map members.
+/// for (unsigned i = 0; i < size; i++) {
+/// // For each component specified by this mapper:
+/// for (auto c : all_components) {
+/// if (c.hasMapper())
+/// (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size,
+/// c.arg_type);
+/// else
+/// __tgt_push_mapper_component(rt_mapper_handle, c.arg_base,
+/// c.arg_begin, c.arg_size, c.arg_type);
+/// }
+/// }
+/// // Delete the array section.
+/// if (size > 1 && maptype.IsDelete)
+/// __tgt_push_mapper_component(rt_mapper_handle, base, begin,
+/// size*sizeof(Ty), clearToFrom(type));
+/// }
+/// \endcode
+void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D,
+ CodeGenFunction *CGF) {
+ if (UDMMap.count(D) > 0)
+ return;
+ ASTContext &C = CGM.getContext();
+ QualType Ty = D->getType();
+ QualType PtrTy = C.getPointerType(Ty).withRestrict();
+ QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
+ auto *MapperVarDecl =
+ cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl());
+ SourceLocation Loc = D->getLocation();
+ CharUnits ElementSize = C.getTypeSizeInChars(Ty);
+
+ // Prepare mapper function arguments and attributes.
+ ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
+ C.VoidPtrTy, ImplicitParamDecl::Other);
+ ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
+ ImplicitParamDecl::Other);
+ ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
+ C.VoidPtrTy, ImplicitParamDecl::Other);
+ ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
+ ImplicitParamDecl::Other);
+ ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
+ ImplicitParamDecl::Other);
+ FunctionArgList Args;
+ Args.push_back(&HandleArg);
+ Args.push_back(&BaseArg);
+ Args.push_back(&BeginArg);
+ Args.push_back(&SizeArg);
+ Args.push_back(&TypeArg);
+ const CGFunctionInfo &FnInfo =
+ CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
+ llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
+ SmallString<64> TyStr;
+ llvm::raw_svector_ostream Out(TyStr);
+ CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out);
+ std::string Name = getName({"omp_mapper", TyStr, D->getName()});
+ auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
+ Name, &CGM.getModule());
+ CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
+ Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
+ // Start the mapper function code generation.
+ CodeGenFunction MapperCGF(CGM);
+ MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
+ // Compute the starting and end addreses of array elements.
+ llvm::Value *Size = MapperCGF.EmitLoadOfScalar(
+ MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false,
+ C.getPointerType(Int64Ty), Loc);
+ llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast(
+ MapperCGF.GetAddrOfLocalVar(&BeginArg).getPointer(),
+ CGM.getTypes().ConvertTypeForMem(C.getPointerType(PtrTy)));
+ llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(PtrBegin, Size);
+ llvm::Value *MapType = MapperCGF.EmitLoadOfScalar(
+ MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false,
+ C.getPointerType(Int64Ty), Loc);
+ // Prepare common arguments for array initiation and deletion.
+ llvm::Value *Handle = MapperCGF.EmitLoadOfScalar(
+ MapperCGF.GetAddrOfLocalVar(&HandleArg),
+ /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
+ llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar(
+ MapperCGF.GetAddrOfLocalVar(&BaseArg),
+ /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
+ llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar(
+ MapperCGF.GetAddrOfLocalVar(&BeginArg),
+ /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
+
+ // Emit array initiation if this is an array section and \p MapType indicates
+ // that memory allocation is required.
+ llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head");
+ emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
+ ElementSize, HeadBB, /*IsInit=*/true);
+
+ // Emit a for loop to iterate through SizeArg of elements and map all of them.
+
+ // Emit the loop header block.
+ MapperCGF.EmitBlock(HeadBB);
+ llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body");
+ llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done");
+ // Evaluate whether the initial condition is satisfied.
+ llvm::Value *IsEmpty =
+ MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty");
+ MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
+ llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock();
+
+ // Emit the loop body block.
+ MapperCGF.EmitBlock(BodyBB);
+ llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI(
+ PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent");
+ PtrPHI->addIncoming(PtrBegin, EntryBB);
+ Address PtrCurrent =
+ Address(PtrPHI, MapperCGF.GetAddrOfLocalVar(&BeginArg)
+ .getAlignment()
+ .alignmentOfArrayElement(ElementSize));
+ // Privatize the declared variable of mapper to be the current array element.
+ CodeGenFunction::OMPPrivateScope Scope(MapperCGF);
+ Scope.addPrivate(MapperVarDecl, [&MapperCGF, PtrCurrent, PtrTy]() {
+ return MapperCGF
+ .EmitLoadOfPointerLValue(PtrCurrent, PtrTy->castAs<PointerType>())
+ .getAddress();
+ });
+ (void)Scope.Privatize();
+
+ // Get map clause information. Fill up the arrays with all mapped variables.
+ MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
+ MappableExprsHandler::MapValuesArrayTy Pointers;
+ MappableExprsHandler::MapValuesArrayTy Sizes;
+ MappableExprsHandler::MapFlagsArrayTy MapTypes;
+ MappableExprsHandler MEHandler(*D, MapperCGF);
+ MEHandler.generateAllInfoForMapper(BasePointers, Pointers, Sizes, MapTypes);
+
+ // Call the runtime API __tgt_mapper_num_components to get the number of
+ // pre-existing components.
+ llvm::Value *OffloadingArgs[] = {Handle};
+ llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall(
+ createRuntimeFunction(OMPRTL__tgt_mapper_num_components), OffloadingArgs);
+ llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl(
+ PreviousSize,
+ MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset()));
+
+ // Fill up the runtime mapper handle for all components.
+ for (unsigned I = 0; I < BasePointers.size(); ++I) {
+ llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast(
+ *BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
+ llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast(
+ Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
+ llvm::Value *CurSizeArg = Sizes[I];
+
+ // Extract the MEMBER_OF field from the map type.
+ llvm::BasicBlock *MemberBB = MapperCGF.createBasicBlock("omp.member");
+ MapperCGF.EmitBlock(MemberBB);
+ llvm::Value *OriMapType = MapperCGF.Builder.getInt64(MapTypes[I]);
+ llvm::Value *Member = MapperCGF.Builder.CreateAnd(
+ OriMapType,
+ MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_MEMBER_OF));
+ llvm::BasicBlock *MemberCombineBB =
+ MapperCGF.createBasicBlock("omp.member.combine");
+ llvm::BasicBlock *TypeBB = MapperCGF.createBasicBlock("omp.type");
+ llvm::Value *IsMember = MapperCGF.Builder.CreateIsNull(Member);
+ MapperCGF.Builder.CreateCondBr(IsMember, TypeBB, MemberCombineBB);
+ // Add the number of pre-existing components to the MEMBER_OF field if it
+ // is valid.
+ MapperCGF.EmitBlock(MemberCombineBB);
+ llvm::Value *CombinedMember =
+ MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize);
+ // Do nothing if it is not a member of previous components.
+ MapperCGF.EmitBlock(TypeBB);
+ llvm::PHINode *MemberMapType =
+ MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.membermaptype");
+ MemberMapType->addIncoming(OriMapType, MemberBB);
+ MemberMapType->addIncoming(CombinedMember, MemberCombineBB);
+
+ // Combine the map type inherited from user-defined mapper with that
+ // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM
+ // bits of the \a MapType, which is the input argument of the mapper
+ // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM
+ // bits of MemberMapType.
+ // [OpenMP 5.0], 1.2.6. map-type decay.
+ // | alloc | to | from | tofrom | release | delete
+ // ----------------------------------------------------------
+ // alloc | alloc | alloc | alloc | alloc | release | delete
+ // to | alloc | to | alloc | to | release | delete
+ // from | alloc | alloc | from | from | release | delete
+ // tofrom | alloc | to | from | tofrom | release | delete
+ llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd(
+ MapType,
+ MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO |
+ MappableExprsHandler::OMP_MAP_FROM));
+ llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc");
+ llvm::BasicBlock *AllocElseBB =
+ MapperCGF.createBasicBlock("omp.type.alloc.else");
+ llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to");
+ llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else");
+ llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from");
+ llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end");
+ llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom);
+ MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB);
+ // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM.
+ MapperCGF.EmitBlock(AllocBB);
+ llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd(
+ MemberMapType,
+ MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
+ MappableExprsHandler::OMP_MAP_FROM)));
+ MapperCGF.Builder.CreateBr(EndBB);
+ MapperCGF.EmitBlock(AllocElseBB);
+ llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ(
+ LeftToFrom,
+ MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO));
+ MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB);
+ // In case of to, clear OMP_MAP_FROM.
+ MapperCGF.EmitBlock(ToBB);
+ llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd(
+ MemberMapType,
+ MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_FROM));
+ MapperCGF.Builder.CreateBr(EndBB);
+ MapperCGF.EmitBlock(ToElseBB);
+ llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ(
+ LeftToFrom,
+ MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_FROM));
+ MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB);
+ // In case of from, clear OMP_MAP_TO.
+ MapperCGF.EmitBlock(FromBB);
+ llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd(
+ MemberMapType,
+ MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO));
+ // In case of tofrom, do nothing.
+ MapperCGF.EmitBlock(EndBB);
+ llvm::PHINode *CurMapType =
+ MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype");
+ CurMapType->addIncoming(AllocMapType, AllocBB);
+ CurMapType->addIncoming(ToMapType, ToBB);
+ CurMapType->addIncoming(FromMapType, FromBB);
+ CurMapType->addIncoming(MemberMapType, ToElseBB);
+
+ // TODO: call the corresponding mapper function if a user-defined mapper is
+ // associated with this map clause.
+ // Call the runtime API __tgt_push_mapper_component to fill up the runtime
+ // data structure.
+ llvm::Value *OffloadingArgs[] = {Handle, CurBaseArg, CurBeginArg,
+ CurSizeArg, CurMapType};
+ MapperCGF.EmitRuntimeCall(
+ createRuntimeFunction(OMPRTL__tgt_push_mapper_component),
+ OffloadingArgs);
+ }
+
+ // Update the pointer to point to the next element that needs to be mapped,
+ // and check whether we have mapped all elements.
+ llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32(
+ PtrPHI, /*Idx0=*/1, "omp.arraymap.next");
+ PtrPHI->addIncoming(PtrNext, BodyBB);
+ llvm::Value *IsDone =
+ MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone");
+ llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit");
+ MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB);
+
+ MapperCGF.EmitBlock(ExitBB);
+ // Emit array deletion if this is an array section and \p MapType indicates
+ // that deletion is required.
+ emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
+ ElementSize, DoneBB, /*IsInit=*/false);
+
+ // Emit the function exit block.
+ MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true);
+ MapperCGF.FinishFunction();
+ UDMMap.try_emplace(D, Fn);
+ if (CGF) {
+ auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn);
+ Decls.second.push_back(D);
+ }
+}
+
+/// Emit the array initialization or deletion portion for user-defined mapper
+/// code generation. First, it evaluates whether an array section is mapped and
+/// whether the \a MapType instructs to delete this section. If \a IsInit is
+/// true, and \a MapType indicates to not delete this array, array
+/// initialization code is generated. If \a IsInit is false, and \a MapType
+/// indicates to not this array, array deletion code is generated.
+void CGOpenMPRuntime::emitUDMapperArrayInitOrDel(
+ CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base,
+ llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType,
+ CharUnits ElementSize, llvm::BasicBlock *ExitBB, bool IsInit) {
+ StringRef Prefix = IsInit ? ".init" : ".del";
+
+ // Evaluate if this is an array section.
+ llvm::BasicBlock *IsDeleteBB =
+ MapperCGF.createBasicBlock("omp.array" + Prefix + ".evaldelete");
+ llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.array" + Prefix);
+ llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGE(
+ Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray");
+ MapperCGF.Builder.CreateCondBr(IsArray, IsDeleteBB, ExitBB);
+
+ // Evaluate if we are going to delete this section.
+ MapperCGF.EmitBlock(IsDeleteBB);
+ llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd(
+ MapType,
+ MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_DELETE));
+ llvm::Value *DeleteCond;
+ if (IsInit) {
+ DeleteCond = MapperCGF.Builder.CreateIsNull(
+ DeleteBit, "omp.array" + Prefix + ".delete");
+ } else {
+ DeleteCond = MapperCGF.Builder.CreateIsNotNull(
+ DeleteBit, "omp.array" + Prefix + ".delete");
+ }
+ MapperCGF.Builder.CreateCondBr(DeleteCond, BodyBB, ExitBB);
+
+ MapperCGF.EmitBlock(BodyBB);
+ // Get the array size by multiplying element size and element number (i.e., \p
+ // Size).
+ llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul(
+ Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
+ // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves
+ // memory allocation/deletion purpose only.
+ llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd(
+ MapType,
+ MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
+ MappableExprsHandler::OMP_MAP_FROM)));
+ // Call the runtime API __tgt_push_mapper_component to fill up the runtime
+ // data structure.
+ llvm::Value *OffloadingArgs[] = {Handle, Base, Begin, ArraySize, MapTypeArg};
+ MapperCGF.EmitRuntimeCall(
+ createRuntimeFunction(OMPRTL__tgt_push_mapper_component), OffloadingArgs);
+}
+
void CGOpenMPRuntime::emitTargetNumIterationsCall(
- CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *Device,
- const llvm::function_ref<llvm::Value *(
- CodeGenFunction &CGF, const OMPLoopDirective &D)> &SizeEmitter) {
+ CodeGenFunction &CGF, const OMPExecutableDirective &D,
+ llvm::Value *DeviceID,
+ llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
+ const OMPLoopDirective &D)>
+ SizeEmitter) {
OpenMPDirectiveKind Kind = D.getDirectiveKind();
const OMPExecutableDirective *TD = &D;
// Get nested teams distribute kind directive, if any.
@@ -8704,30 +9145,24 @@ void CGOpenMPRuntime::emitTargetNumIterationsCall(
if (!TD)
return;
const auto *LD = cast<OMPLoopDirective>(TD);
- auto &&CodeGen = [LD, &Device, &SizeEmitter, this](CodeGenFunction &CGF,
+ auto &&CodeGen = [LD, DeviceID, SizeEmitter, this](CodeGenFunction &CGF,
PrePostActionTy &) {
- llvm::Value *NumIterations = SizeEmitter(CGF, *LD);
-
- // Emit device ID if any.
- llvm::Value *DeviceID;
- if (Device)
- DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
- CGF.Int64Ty, /*isSigned=*/true);
- else
- DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
-
- llvm::Value *Args[] = {DeviceID, NumIterations};
- CGF.EmitRuntimeCall(
- createRuntimeFunction(OMPRTL__kmpc_push_target_tripcount), Args);
+ if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) {
+ llvm::Value *Args[] = {DeviceID, NumIterations};
+ CGF.EmitRuntimeCall(
+ createRuntimeFunction(OMPRTL__kmpc_push_target_tripcount), Args);
+ }
};
emitInlinedDirective(CGF, OMPD_unknown, CodeGen);
}
-void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF,
- const OMPExecutableDirective &D,
- llvm::Function *OutlinedFn,
- llvm::Value *OutlinedFnID,
- const Expr *IfCond, const Expr *Device) {
+void CGOpenMPRuntime::emitTargetCall(
+ CodeGenFunction &CGF, const OMPExecutableDirective &D,
+ llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
+ const Expr *Device,
+ llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
+ const OMPLoopDirective &D)>
+ SizeEmitter) {
if (!CGF.HaveInsertPoint())
return;
@@ -8746,8 +9181,8 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF,
llvm::Value *MapTypesArray = nullptr;
// Fill up the pointer arrays and transfer execution to the device.
auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo,
- &MapTypesArray, &CS, RequiresOuterTask,
- &CapturedVars](CodeGenFunction &CGF, PrePostActionTy &) {
+ &MapTypesArray, &CS, RequiresOuterTask, &CapturedVars,
+ SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) {
// On top of the arrays that were filled up, the target offloading call
// takes as arguments the device id as well as the host pointer. The host
// pointer is used by the runtime library to identify the current target
@@ -8779,6 +9214,9 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF,
llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D);
llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D);
+ // Emit tripcount for the target loop-based directive.
+ emitTargetNumIterationsCall(CGF, D, DeviceID, SizeEmitter);
+
bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
// The target region is an outlined function launched by the runtime
// via calls __tgt_target() or __tgt_target_teams().
@@ -9103,12 +9541,16 @@ void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
case OMPD_teams_distribute_parallel_for_simd:
case OMPD_target_update:
case OMPD_declare_simd:
+ case OMPD_declare_variant:
case OMPD_declare_target:
case OMPD_end_declare_target:
case OMPD_declare_reduction:
case OMPD_declare_mapper:
case OMPD_taskloop:
case OMPD_taskloop_simd:
+ case OMPD_master_taskloop:
+ case OMPD_master_taskloop_simd:
+ case OMPD_parallel_master_taskloop:
case OMPD_requires:
case OMPD_unknown:
llvm_unreachable("Unknown target directive for OpenMP device codegen.");
@@ -9137,14 +9579,28 @@ void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) {
// If emitting code for the host, we do not process FD here. Instead we do
// the normal code generation.
- if (!CGM.getLangOpts().OpenMPIsDevice)
+ if (!CGM.getLangOpts().OpenMPIsDevice) {
+ if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl())) {
+ Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
+ OMPDeclareTargetDeclAttr::getDeviceType(FD);
+ // Do not emit device_type(nohost) functions for the host.
+ if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_NoHost)
+ return true;
+ }
return false;
+ }
const ValueDecl *VD = cast<ValueDecl>(GD.getDecl());
StringRef Name = CGM.getMangledName(GD);
// Try to detect target regions in the function.
- if (const auto *FD = dyn_cast<FunctionDecl>(VD))
+ if (const auto *FD = dyn_cast<FunctionDecl>(VD)) {
scanForTargetRegionsFunctions(FD->getBody(), Name);
+ Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
+ OMPDeclareTargetDeclAttr::getDeviceType(FD);
+ // Do not emit device_type(nohost) functions for the host.
+ if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_Host)
+ return true;
+ }
// Do not to emit function if it is not marked as declare target.
return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) &&
@@ -9221,6 +9677,9 @@ CGOpenMPRuntime::registerTargetFirstprivateCopy(CodeGenFunction &CGF,
void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD,
llvm::Constant *Addr) {
+ if (CGM.getLangOpts().OMPTargetTriples.empty() &&
+ !CGM.getLangOpts().OpenMPIsDevice)
+ return;
llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
if (!Res) {
@@ -9433,17 +9892,6 @@ llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() {
return RequiresRegFn;
}
-llvm::Function *CGOpenMPRuntime::emitRegistrationFunction() {
- // If we have offloading in the current module, we need to emit the entries
- // now and register the offloading descriptor.
- createOffloadEntriesAndInfoMetadata();
-
- // Create and register the offloading binary descriptors. This is the main
- // entity that captures all the information about offloading in the current
- // compilation unit.
- return createOffloadingBinaryDescriptorRegistration();
-}
-
void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF,
const OMPExecutableDirective &D,
SourceLocation Loc,
@@ -9711,12 +10159,16 @@ void CGOpenMPRuntime::emitTargetDataStandAloneCall(
case OMPD_teams_distribute_parallel_for:
case OMPD_teams_distribute_parallel_for_simd:
case OMPD_declare_simd:
+ case OMPD_declare_variant:
case OMPD_declare_target:
case OMPD_end_declare_target:
case OMPD_declare_reduction:
case OMPD_declare_mapper:
case OMPD_taskloop:
case OMPD_taskloop_simd:
+ case OMPD_master_taskloop:
+ case OMPD_master_taskloop_simd:
+ case OMPD_parallel_master_taskloop:
case OMPD_target:
case OMPD_target_simd:
case OMPD_target_teams_distribute:
@@ -10377,7 +10829,7 @@ void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF,
}
llvm::APInt Size(/*numBits=*/32, NumIterations.size());
QualType ArrayTy =
- C.getConstantArrayType(KmpDimTy, Size, ArrayType::Normal, 0);
+ C.getConstantArrayType(KmpDimTy, Size, nullptr, ArrayType::Normal, 0);
Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
CGF.EmitNullInitialization(DimsAddr, ArrayTy);
@@ -10428,7 +10880,7 @@ void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
llvm::APInt Size(/*numBits=*/32, C->getNumLoops());
QualType ArrayTy = CGM.getContext().getConstantArrayType(
- Int64Ty, Size, ArrayType::Normal, 0);
+ Int64Ty, Size, nullptr, ArrayType::Normal, 0);
Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr");
for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) {
const Expr *CounterVal = C->getLoopData(I);
@@ -10566,6 +11018,131 @@ Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF,
return Address(Addr, Align);
}
+/// Checks current context and returns true if it matches the context selector.
+template <OMPDeclareVariantAttr::CtxSelectorSetType CtxSet,
+ OMPDeclareVariantAttr::CtxSelectorType Ctx>
+static bool checkContext(const OMPDeclareVariantAttr *A) {
+ assert(CtxSet != OMPDeclareVariantAttr::CtxSetUnknown &&
+ Ctx != OMPDeclareVariantAttr::CtxUnknown &&
+ "Unknown context selector or context selector set.");
+ return false;
+}
+
+/// Checks for implementation={vendor(<vendor>)} context selector.
+/// \returns true iff <vendor>="llvm", false otherwise.
+template <>
+bool checkContext<OMPDeclareVariantAttr::CtxSetImplementation,
+ OMPDeclareVariantAttr::CtxVendor>(
+ const OMPDeclareVariantAttr *A) {
+ return llvm::all_of(A->implVendors(),
+ [](StringRef S) { return !S.compare_lower("llvm"); });
+}
+
+static bool greaterCtxScore(ASTContext &Ctx, const Expr *LHS, const Expr *RHS) {
+ // If both scores are unknown, choose the very first one.
+ if (!LHS && !RHS)
+ return true;
+ // If only one is known, return this one.
+ if (LHS && !RHS)
+ return true;
+ if (!LHS && RHS)
+ return false;
+ llvm::APSInt LHSVal = LHS->EvaluateKnownConstInt(Ctx);
+ llvm::APSInt RHSVal = RHS->EvaluateKnownConstInt(Ctx);
+ return llvm::APSInt::compareValues(LHSVal, RHSVal) >= 0;
+}
+
+namespace {
+/// Comparator for the priority queue for context selector.
+class OMPDeclareVariantAttrComparer
+ : public std::greater<const OMPDeclareVariantAttr *> {
+private:
+ ASTContext &Ctx;
+
+public:
+ OMPDeclareVariantAttrComparer(ASTContext &Ctx) : Ctx(Ctx) {}
+ bool operator()(const OMPDeclareVariantAttr *LHS,
+ const OMPDeclareVariantAttr *RHS) const {
+ const Expr *LHSExpr = nullptr;
+ const Expr *RHSExpr = nullptr;
+ if (LHS->getCtxScore() == OMPDeclareVariantAttr::ScoreSpecified)
+ LHSExpr = LHS->getScore();
+ if (RHS->getCtxScore() == OMPDeclareVariantAttr::ScoreSpecified)
+ RHSExpr = RHS->getScore();
+ return greaterCtxScore(Ctx, LHSExpr, RHSExpr);
+ }
+};
+} // anonymous namespace
+
+/// Finds the variant function that matches current context with its context
+/// selector.
+static const FunctionDecl *getDeclareVariantFunction(ASTContext &Ctx,
+ const FunctionDecl *FD) {
+ if (!FD->hasAttrs() || !FD->hasAttr<OMPDeclareVariantAttr>())
+ return FD;
+ // Iterate through all DeclareVariant attributes and check context selectors.
+ auto &&Comparer = [&Ctx](const OMPDeclareVariantAttr *LHS,
+ const OMPDeclareVariantAttr *RHS) {
+ const Expr *LHSExpr = nullptr;
+ const Expr *RHSExpr = nullptr;
+ if (LHS->getCtxScore() == OMPDeclareVariantAttr::ScoreSpecified)
+ LHSExpr = LHS->getScore();
+ if (RHS->getCtxScore() == OMPDeclareVariantAttr::ScoreSpecified)
+ RHSExpr = RHS->getScore();
+ return greaterCtxScore(Ctx, LHSExpr, RHSExpr);
+ };
+ const OMPDeclareVariantAttr *TopMostAttr = nullptr;
+ for (const auto *A : FD->specific_attrs<OMPDeclareVariantAttr>()) {
+ const OMPDeclareVariantAttr *SelectedAttr = nullptr;
+ switch (A->getCtxSelectorSet()) {
+ case OMPDeclareVariantAttr::CtxSetImplementation:
+ switch (A->getCtxSelector()) {
+ case OMPDeclareVariantAttr::CtxVendor:
+ if (checkContext<OMPDeclareVariantAttr::CtxSetImplementation,
+ OMPDeclareVariantAttr::CtxVendor>(A))
+ SelectedAttr = A;
+ break;
+ case OMPDeclareVariantAttr::CtxUnknown:
+ llvm_unreachable(
+ "Unknown context selector in implementation selector set.");
+ }
+ break;
+ case OMPDeclareVariantAttr::CtxSetUnknown:
+ llvm_unreachable("Unknown context selector set.");
+ }
+ // If the attribute matches the context, find the attribute with the highest
+ // score.
+ if (SelectedAttr && (!TopMostAttr || !Comparer(TopMostAttr, SelectedAttr)))
+ TopMostAttr = SelectedAttr;
+ }
+ if (!TopMostAttr)
+ return FD;
+ return cast<FunctionDecl>(
+ cast<DeclRefExpr>(TopMostAttr->getVariantFuncRef()->IgnoreParenImpCasts())
+ ->getDecl());
+}
+
+bool CGOpenMPRuntime::emitDeclareVariant(GlobalDecl GD, bool IsForDefinition) {
+ const auto *D = cast<FunctionDecl>(GD.getDecl());
+ // If the original function is defined already, use its definition.
+ StringRef MangledName = CGM.getMangledName(GD);
+ llvm::GlobalValue *Orig = CGM.GetGlobalValue(MangledName);
+ if (Orig && !Orig->isDeclaration())
+ return false;
+ const FunctionDecl *NewFD = getDeclareVariantFunction(CGM.getContext(), D);
+ // Emit original function if it does not have declare variant attribute or the
+ // context does not match.
+ if (NewFD == D)
+ return false;
+ GlobalDecl NewGD = GD.getWithDecl(NewFD);
+ if (tryEmitDeclareVariant(NewGD, GD, Orig, IsForDefinition)) {
+ DeferredVariantFunction.erase(D);
+ return true;
+ }
+ DeferredVariantFunction.insert(std::make_pair(D, std::make_pair(NewGD, GD)));
+ return true;
+}
+
llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction(
const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
@@ -10786,12 +11363,13 @@ void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction(
llvm_unreachable("Not supported in SIMD-only mode");
}
-void CGOpenMPSIMDRuntime::emitTargetCall(CodeGenFunction &CGF,
- const OMPExecutableDirective &D,
- llvm::Function *OutlinedFn,
- llvm::Value *OutlinedFnID,
- const Expr *IfCond,
- const Expr *Device) {
+void CGOpenMPSIMDRuntime::emitTargetCall(
+ CodeGenFunction &CGF, const OMPExecutableDirective &D,
+ llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
+ const Expr *Device,
+ llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
+ const OMPLoopDirective &D)>
+ SizeEmitter) {
llvm_unreachable("Not supported in SIMD-only mode");
}
@@ -10807,10 +11385,6 @@ bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) {
return false;
}
-llvm::Function *CGOpenMPSIMDRuntime::emitRegistrationFunction() {
- return nullptr;
-}
-
void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF,
const OMPExecutableDirective &D,
SourceLocation Loc,