aboutsummaryrefslogtreecommitdiffstats
path: root/lib/CodeGen
diff options
context:
space:
mode:
Diffstat (limited to 'lib/CodeGen')
-rw-r--r--lib/CodeGen/BackendUtil.cpp85
-rw-r--r--lib/CodeGen/CGAtomic.cpp9
-rw-r--r--lib/CodeGen/CGBlocks.cpp5
-rw-r--r--lib/CodeGen/CGBuiltin.cpp518
-rw-r--r--lib/CodeGen/CGCUDANV.cpp23
-rw-r--r--lib/CodeGen/CGCXX.cpp6
-rw-r--r--lib/CodeGen/CGCXXABI.cpp4
-rw-r--r--lib/CodeGen/CGCXXABI.h2
-rw-r--r--lib/CodeGen/CGCall.cpp84
-rw-r--r--lib/CodeGen/CGClass.cpp51
-rw-r--r--lib/CodeGen/CGCleanup.cpp11
-rw-r--r--lib/CodeGen/CGDebugInfo.cpp97
-rw-r--r--lib/CodeGen/CGDecl.cpp39
-rw-r--r--lib/CodeGen/CGDeclCXX.cpp15
-rw-r--r--lib/CodeGen/CGException.cpp8
-rw-r--r--lib/CodeGen/CGExpr.cpp90
-rw-r--r--lib/CodeGen/CGExprAgg.cpp26
-rw-r--r--lib/CodeGen/CGExprCXX.cpp45
-rw-r--r--lib/CodeGen/CGExprComplex.cpp4
-rw-r--r--lib/CodeGen/CGExprConstant.cpp10
-rw-r--r--lib/CodeGen/CGExprScalar.cpp223
-rw-r--r--lib/CodeGen/CGLoopInfo.cpp60
-rw-r--r--lib/CodeGen/CGLoopInfo.h12
-rw-r--r--lib/CodeGen/CGNonTrivialStruct.cpp2
-rw-r--r--lib/CodeGen/CGObjC.cpp4
-rw-r--r--lib/CodeGen/CGObjCGNU.cpp13
-rw-r--r--lib/CodeGen/CGObjCMac.cpp47
-rw-r--r--lib/CodeGen/CGOpenMPRuntime.cpp1214
-rw-r--r--lib/CodeGen/CGOpenMPRuntime.h106
-rw-r--r--lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp128
-rw-r--r--lib/CodeGen/CGOpenMPRuntimeNVPTX.h12
-rw-r--r--lib/CodeGen/CGStmt.cpp72
-rw-r--r--lib/CodeGen/CGStmtOpenMP.cpp160
-rw-r--r--lib/CodeGen/CGVTables.cpp92
-rw-r--r--lib/CodeGen/CodeGenAction.cpp41
-rw-r--r--lib/CodeGen/CodeGenFunction.cpp56
-rw-r--r--lib/CodeGen/CodeGenFunction.h15
-rw-r--r--lib/CodeGen/CodeGenModule.cpp233
-rw-r--r--lib/CodeGen/CodeGenModule.h15
-rw-r--r--lib/CodeGen/CodeGenPGO.cpp2
-rw-r--r--lib/CodeGen/CodeGenPGO.h4
-rw-r--r--lib/CodeGen/CodeGenTypes.cpp25
-rw-r--r--lib/CodeGen/ConstantInitBuilder.cpp2
-rw-r--r--lib/CodeGen/CoverageMappingGen.cpp27
-rw-r--r--lib/CodeGen/CoverageMappingGen.h8
-rw-r--r--lib/CodeGen/EHScopeStack.h4
-rw-r--r--lib/CodeGen/ItaniumCXXABI.cpp246
-rw-r--r--lib/CodeGen/MicrosoftCXXABI.cpp17
-rw-r--r--lib/CodeGen/ModuleBuilder.cpp15
-rw-r--r--lib/CodeGen/ObjectFilePCHContainerOperations.cpp12
-rw-r--r--lib/CodeGen/TargetInfo.cpp362
51 files changed, 3183 insertions, 1178 deletions
diff --git a/lib/CodeGen/BackendUtil.cpp b/lib/CodeGen/BackendUtil.cpp
index 497652e85b47..75a54d8f3c8a 100644
--- a/lib/CodeGen/BackendUtil.cpp
+++ b/lib/CodeGen/BackendUtil.cpp
@@ -37,6 +37,7 @@
#include "llvm/MC/SubtargetFeature.h"
#include "llvm/Passes/PassBuilder.h"
#include "llvm/Passes/PassPlugin.h"
+#include "llvm/Passes/StandardInstrumentations.h"
#include "llvm/Support/BuryPointer.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/MemoryBuffer.h"
@@ -60,6 +61,7 @@
#include "llvm/Transforms/Instrumentation/HWAddressSanitizer.h"
#include "llvm/Transforms/Instrumentation/InstrProfiling.h"
#include "llvm/Transforms/Instrumentation/MemorySanitizer.h"
+#include "llvm/Transforms/Instrumentation/SanitizerCoverage.h"
#include "llvm/Transforms/Instrumentation/ThreadSanitizer.h"
#include "llvm/Transforms/ObjCARC.h"
#include "llvm/Transforms/Scalar.h"
@@ -117,8 +119,8 @@ class EmitAssemblyHelper {
std::unique_ptr<llvm::ToolOutputFile> openOutputFile(StringRef Path) {
std::error_code EC;
- auto F = llvm::make_unique<llvm::ToolOutputFile>(Path, EC,
- llvm::sys::fs::F_None);
+ auto F = std::make_unique<llvm::ToolOutputFile>(Path, EC,
+ llvm::sys::fs::OF_None);
if (EC) {
Diags.Report(diag::err_fe_unable_to_open_output) << Path << EC.message();
F.reset();
@@ -195,11 +197,8 @@ static void addBoundsCheckingPass(const PassManagerBuilder &Builder,
PM.add(createBoundsCheckingLegacyPass());
}
-static void addSanitizerCoveragePass(const PassManagerBuilder &Builder,
- legacy::PassManagerBase &PM) {
- const PassManagerBuilderWrapper &BuilderWrapper =
- static_cast<const PassManagerBuilderWrapper&>(Builder);
- const CodeGenOptions &CGOpts = BuilderWrapper.getCGOpts();
+static SanitizerCoverageOptions
+getSancovOptsFromCGOpts(const CodeGenOptions &CGOpts) {
SanitizerCoverageOptions Opts;
Opts.CoverageType =
static_cast<SanitizerCoverageOptions::Type>(CGOpts.SanitizeCoverageType);
@@ -215,7 +214,16 @@ static void addSanitizerCoveragePass(const PassManagerBuilder &Builder,
Opts.Inline8bitCounters = CGOpts.SanitizeCoverageInline8bitCounters;
Opts.PCTable = CGOpts.SanitizeCoveragePCTable;
Opts.StackDepth = CGOpts.SanitizeCoverageStackDepth;
- PM.add(createSanitizerCoverageModulePass(Opts));
+ return Opts;
+}
+
+static void addSanitizerCoveragePass(const PassManagerBuilder &Builder,
+ legacy::PassManagerBase &PM) {
+ const PassManagerBuilderWrapper &BuilderWrapper =
+ static_cast<const PassManagerBuilderWrapper &>(Builder);
+ const CodeGenOptions &CGOpts = BuilderWrapper.getCGOpts();
+ auto Opts = getSancovOptsFromCGOpts(CGOpts);
+ PM.add(createModuleSanitizerCoverageLegacyPassPass(Opts));
}
// Check if ASan should use GC-friendly instrumentation for globals.
@@ -231,9 +239,13 @@ static bool asanUseGlobalsGC(const Triple &T, const CodeGenOptions &CGOpts) {
return true;
case Triple::ELF:
return CGOpts.DataSections && !CGOpts.DisableIntegratedAS;
- default:
- return false;
+ case Triple::XCOFF:
+ llvm::report_fatal_error("ASan not implemented for XCOFF.");
+ case Triple::Wasm:
+ case Triple::UnknownObjectFormat:
+ break;
}
+ return false;
}
static void addAddressSanitizerPasses(const PassManagerBuilder &Builder,
@@ -456,6 +468,8 @@ static void initTargetOptions(llvm::TargetOptions &Options,
Options.ExceptionModel = llvm::ExceptionHandling::WinEH;
if (LangOpts.DWARFExceptions)
Options.ExceptionModel = llvm::ExceptionHandling::DwarfCFI;
+ if (LangOpts.WasmExceptions)
+ Options.ExceptionModel = llvm::ExceptionHandling::Wasm;
Options.NoInfsFPMath = CodeGenOpts.NoInfsFPMath;
Options.NoNaNsFPMath = CodeGenOpts.NoNaNsFPMath;
@@ -481,6 +495,7 @@ static void initTargetOptions(llvm::TargetOptions &Options,
CodeGenOpts.IncrementalLinkerCompatible;
Options.MCOptions.MCPIECopyRelocations = CodeGenOpts.PIECopyRelocations;
Options.MCOptions.MCFatalWarnings = CodeGenOpts.FatalWarnings;
+ Options.MCOptions.MCNoWarn = CodeGenOpts.NoWarn;
Options.MCOptions.AsmVerbose = CodeGenOpts.AsmVerbose;
Options.MCOptions.PreserveAsmComments = CodeGenOpts.PreserveAsmComments;
Options.MCOptions.ABIName = TargetOpts.ABI;
@@ -848,7 +863,7 @@ void EmitAssemblyHelper::EmitAssembly(BackendAction Action,
if (!TheModule->getModuleFlag("ThinLTO"))
TheModule->addModuleFlag(Module::Error, "ThinLTO", uint32_t(0));
TheModule->addModuleFlag(Module::Error, "EnableSplitLTOUnit",
- CodeGenOpts.EnableSplitLTOUnit);
+ uint32_t(1));
}
PerModulePasses.add(createBitcodeWriterPass(
@@ -880,6 +895,7 @@ void EmitAssemblyHelper::EmitAssembly(BackendAction Action,
{
PrettyStackTraceString CrashInfo("Per-function optimization");
+ llvm::TimeTraceScope TimeScope("PerFunctionPasses", StringRef(""));
PerFunctionPasses.doInitialization();
for (Function &F : *TheModule)
@@ -890,11 +906,13 @@ void EmitAssemblyHelper::EmitAssembly(BackendAction Action,
{
PrettyStackTraceString CrashInfo("Per-module optimization passes");
+ llvm::TimeTraceScope TimeScope("PerModulePasses", StringRef(""));
PerModulePasses.run(*TheModule);
}
{
PrettyStackTraceString CrashInfo("Code generation");
+ llvm::TimeTraceScope TimeScope("CodeGenPasses", StringRef(""));
CodeGenPasses.run(*TheModule);
}
@@ -956,6 +974,7 @@ static void addSanitizersAtO0(ModulePassManager &MPM,
}
if (LangOpts.Sanitize.has(SanitizerKind::Memory)) {
+ MPM.addPass(MemorySanitizerPass({}));
MPM.addPass(createModuleToFunctionPassAdaptor(MemorySanitizerPass({})));
}
@@ -965,6 +984,7 @@ static void addSanitizersAtO0(ModulePassManager &MPM,
}
if (LangOpts.Sanitize.has(SanitizerKind::Thread)) {
+ MPM.addPass(ThreadSanitizerPass());
MPM.addPass(createModuleToFunctionPassAdaptor(ThreadSanitizerPass()));
}
}
@@ -1050,7 +1070,10 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager(
PTO.LoopVectorization = CodeGenOpts.VectorizeLoop;
PTO.SLPVectorization = CodeGenOpts.VectorizeSLP;
- PassBuilder PB(TM.get(), PTO, PGOOpt);
+ PassInstrumentationCallbacks PIC;
+ StandardInstrumentations SI;
+ SI.registerCallbacks(PIC);
+ PassBuilder PB(TM.get(), PTO, PGOOpt, &PIC);
// Attempt to load pass plugins and register their callbacks with PB.
for (auto &PluginFN : CodeGenOpts.PassPlugins) {
@@ -1077,7 +1100,6 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager(
std::unique_ptr<TargetLibraryInfoImpl> TLII(
createTLII(TargetTriple, CodeGenOpts));
FAM.registerPass([&] { return TargetLibraryAnalysis(*TLII); });
- MAM.registerPass([&] { return TargetLibraryAnalysis(*TLII); });
// Register all the basic analyses with the managers.
PB.registerModuleAnalyses(MAM);
@@ -1105,6 +1127,16 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager(
// code generation.
MPM.addPass(AlwaysInlinerPass(/*InsertLifetimeIntrinsics=*/false));
+ // At -O0, we can still do PGO. Add all the requested passes for
+ // instrumentation PGO, if requested.
+ if (PGOOpt && (PGOOpt->Action == PGOOptions::IRInstr ||
+ PGOOpt->Action == PGOOptions::IRUse))
+ PB.addPGOInstrPassesForO0(
+ MPM, CodeGenOpts.DebugPassManager,
+ /* RunProfileGen */ (PGOOpt->Action == PGOOptions::IRInstr),
+ /* IsCS */ false, PGOOpt->ProfileFile,
+ PGOOpt->ProfileRemappingFile);
+
// At -O0 we directly run necessary sanitizer passes.
if (LangOpts.Sanitize.has(SanitizerKind::LocalBounds))
MPM.addPass(createModuleToFunctionPassAdaptor(BoundsCheckingPass()));
@@ -1132,16 +1164,23 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager(
[](FunctionPassManager &FPM, PassBuilder::OptimizationLevel Level) {
FPM.addPass(BoundsCheckingPass());
});
- if (LangOpts.Sanitize.has(SanitizerKind::Memory))
+ if (LangOpts.Sanitize.has(SanitizerKind::Memory)) {
+ PB.registerPipelineStartEPCallback([](ModulePassManager &MPM) {
+ MPM.addPass(MemorySanitizerPass({}));
+ });
PB.registerOptimizerLastEPCallback(
[](FunctionPassManager &FPM, PassBuilder::OptimizationLevel Level) {
FPM.addPass(MemorySanitizerPass({}));
});
- if (LangOpts.Sanitize.has(SanitizerKind::Thread))
+ }
+ if (LangOpts.Sanitize.has(SanitizerKind::Thread)) {
+ PB.registerPipelineStartEPCallback(
+ [](ModulePassManager &MPM) { MPM.addPass(ThreadSanitizerPass()); });
PB.registerOptimizerLastEPCallback(
[](FunctionPassManager &FPM, PassBuilder::OptimizationLevel Level) {
FPM.addPass(ThreadSanitizerPass());
});
+ }
if (LangOpts.Sanitize.has(SanitizerKind::Address)) {
PB.registerPipelineStartEPCallback([&](ModulePassManager &MPM) {
MPM.addPass(
@@ -1191,6 +1230,13 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager(
}
}
+ if (CodeGenOpts.SanitizeCoverageType ||
+ CodeGenOpts.SanitizeCoverageIndirectCalls ||
+ CodeGenOpts.SanitizeCoverageTraceCmp) {
+ auto SancovOpts = getSancovOptsFromCGOpts(CodeGenOpts);
+ MPM.addPass(ModuleSanitizerCoveragePass(SancovOpts));
+ }
+
if (LangOpts.Sanitize.has(SanitizerKind::HWAddress)) {
bool Recover = CodeGenOpts.SanitizeRecover.has(SanitizerKind::HWAddress);
MPM.addPass(HWAddressSanitizerPass(
@@ -1201,8 +1247,9 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager(
/*CompileKernel=*/true, /*Recover=*/true));
}
- if (CodeGenOpts.OptimizationLevel == 0)
+ if (CodeGenOpts.OptimizationLevel == 0) {
addSanitizersAtO0(MPM, TargetTriple, LangOpts, CodeGenOpts);
+ }
}
// FIXME: We still use the legacy pass manager to do code generation. We
@@ -1239,7 +1286,7 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager(
if (!TheModule->getModuleFlag("ThinLTO"))
TheModule->addModuleFlag(Module::Error, "ThinLTO", uint32_t(0));
TheModule->addModuleFlag(Module::Error, "EnableSplitLTOUnit",
- CodeGenOpts.EnableSplitLTOUnit);
+ uint32_t(1));
}
MPM.addPass(
BitcodeWriterPass(*OS, CodeGenOpts.EmitLLVMUseLists, EmitLTOSummary));
@@ -1372,7 +1419,7 @@ static void runThinLTOBackend(ModuleSummaryIndex *CombinedIndex, Module *M,
OwnedImports.push_back(std::move(*MBOrErr));
}
auto AddStream = [&](size_t Task) {
- return llvm::make_unique<lto::NativeObjectStream>(std::move(OS));
+ return std::make_unique<lto::NativeObjectStream>(std::move(OS));
};
lto::Config Conf;
if (CGOpts.SaveTempsFilePrefix != "") {
@@ -1484,7 +1531,7 @@ void clang::EmitBackendOutput(DiagnosticsEngine &Diags,
// trying to read it. Also for some features, like CFI, we must skip
// the compilation as CombinedIndex does not contain all required
// information.
- EmptyModule = llvm::make_unique<llvm::Module>("empty", M->getContext());
+ EmptyModule = std::make_unique<llvm::Module>("empty", M->getContext());
EmptyModule->setTargetTriple(M->getTargetTriple());
M = EmptyModule.get();
}
diff --git a/lib/CodeGen/CGAtomic.cpp b/lib/CodeGen/CGAtomic.cpp
index a95cd12c2d64..505916350750 100644
--- a/lib/CodeGen/CGAtomic.cpp
+++ b/lib/CodeGen/CGAtomic.cpp
@@ -102,12 +102,13 @@ namespace {
llvm::APInt Size(
/*numBits=*/32,
C.toCharUnitsFromBits(AtomicSizeInBits).getQuantity());
- AtomicTy = C.getConstantArrayType(C.CharTy, Size, ArrayType::Normal,
- /*IndexTypeQuals=*/0);
+ AtomicTy =
+ C.getConstantArrayType(C.CharTy, Size, nullptr, ArrayType::Normal,
+ /*IndexTypeQuals=*/0);
}
AtomicAlign = ValueAlign = lvalue.getAlignment();
} else if (lvalue.isVectorElt()) {
- ValueTy = lvalue.getType()->getAs<VectorType>()->getElementType();
+ ValueTy = lvalue.getType()->castAs<VectorType>()->getElementType();
ValueSizeInBits = C.getTypeSize(ValueTy);
AtomicTy = lvalue.getType();
AtomicSizeInBits = C.getTypeSize(AtomicTy);
@@ -969,7 +970,7 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
auto CastToGenericAddrSpace = [&](llvm::Value *V, QualType PT) {
if (!E->isOpenCL())
return V;
- auto AS = PT->getAs<PointerType>()->getPointeeType().getAddressSpace();
+ auto AS = PT->castAs<PointerType>()->getPointeeType().getAddressSpace();
if (AS == LangAS::opencl_generic)
return V;
auto DestAS = getContext().getTargetAddressSpace(LangAS::opencl_generic);
diff --git a/lib/CodeGen/CGBlocks.cpp b/lib/CodeGen/CGBlocks.cpp
index c3ee7129d9d7..f90d9439af25 100644
--- a/lib/CodeGen/CGBlocks.cpp
+++ b/lib/CodeGen/CGBlocks.cpp
@@ -1253,8 +1253,7 @@ llvm::Type *CodeGenModule::getGenericBlockLiteralType() {
RValue CodeGenFunction::EmitBlockCallExpr(const CallExpr *E,
ReturnValueSlot ReturnValue) {
- const BlockPointerType *BPT =
- E->getCallee()->getType()->getAs<BlockPointerType>();
+ const auto *BPT = E->getCallee()->getType()->castAs<BlockPointerType>();
llvm::Value *BlockPtr = EmitScalarExpr(E->getCallee());
llvm::Type *GenBlockTy = CGM.getGenericBlockLiteralType();
llvm::Value *Func = nullptr;
@@ -1802,7 +1801,7 @@ struct CallBlockRelease final : EHScopeStack::Cleanup {
bool CodeGenFunction::cxxDestructorCanThrow(QualType T) {
if (const auto *RD = T->getAsCXXRecordDecl())
if (const CXXDestructorDecl *DD = RD->getDestructor())
- return DD->getType()->getAs<FunctionProtoType>()->canThrow();
+ return DD->getType()->castAs<FunctionProtoType>()->canThrow();
return false;
}
diff --git a/lib/CodeGen/CGBuiltin.cpp b/lib/CodeGen/CGBuiltin.cpp
index a300bab49f9c..f9871b233149 100644
--- a/lib/CodeGen/CGBuiltin.cpp
+++ b/lib/CodeGen/CGBuiltin.cpp
@@ -843,10 +843,12 @@ static RValue EmitMSVCRTSetJmp(CodeGenFunction &CGF, MSVCSetJmpKind SJKind,
Name = SJKind == MSVCSetJmpKind::_setjmp ? "_setjmp" : "_setjmpex";
Arg1Ty = CGF.Int8PtrTy;
if (CGF.getTarget().getTriple().getArch() == llvm::Triple::aarch64) {
- Arg1 = CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(Intrinsic::sponentry));
+ Arg1 = CGF.Builder.CreateCall(
+ CGF.CGM.getIntrinsic(Intrinsic::sponentry, CGF.AllocaInt8PtrTy));
} else
- Arg1 = CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(Intrinsic::frameaddress),
- llvm::ConstantInt::get(CGF.Int32Ty, 0));
+ Arg1 = CGF.Builder.CreateCall(
+ CGF.CGM.getIntrinsic(Intrinsic::frameaddress, CGF.AllocaInt8PtrTy),
+ llvm::ConstantInt::get(CGF.Int32Ty, 0));
}
// Mark the call site and declaration with ReturnsTwice.
@@ -1394,9 +1396,8 @@ EmitCheckedMixedSignMultiply(CodeGenFunction &CGF, const clang::Expr *Op1,
static llvm::Value *dumpRecord(CodeGenFunction &CGF, QualType RType,
Value *&RecordPtr, CharUnits Align,
llvm::FunctionCallee Func, int Lvl) {
- const auto *RT = RType->getAs<RecordType>();
ASTContext &Context = CGF.getContext();
- RecordDecl *RD = RT->getDecl()->getDefinition();
+ RecordDecl *RD = RType->castAs<RecordType>()->getDecl()->getDefinition();
std::string Pad = std::string(Lvl * 4, ' ');
Value *GString =
@@ -1555,6 +1556,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
case Builtin::BIceill:
case Builtin::BI__builtin_ceil:
case Builtin::BI__builtin_ceilf:
+ case Builtin::BI__builtin_ceilf16:
case Builtin::BI__builtin_ceill:
return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::ceil));
@@ -1563,6 +1565,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
case Builtin::BIcopysignl:
case Builtin::BI__builtin_copysign:
case Builtin::BI__builtin_copysignf:
+ case Builtin::BI__builtin_copysignf16:
case Builtin::BI__builtin_copysignl:
case Builtin::BI__builtin_copysignf128:
return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::copysign));
@@ -1572,6 +1575,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
case Builtin::BIcosl:
case Builtin::BI__builtin_cos:
case Builtin::BI__builtin_cosf:
+ case Builtin::BI__builtin_cosf16:
case Builtin::BI__builtin_cosl:
return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::cos));
@@ -1580,6 +1584,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
case Builtin::BIexpl:
case Builtin::BI__builtin_exp:
case Builtin::BI__builtin_expf:
+ case Builtin::BI__builtin_expf16:
case Builtin::BI__builtin_expl:
return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::exp));
@@ -1588,6 +1593,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
case Builtin::BIexp2l:
case Builtin::BI__builtin_exp2:
case Builtin::BI__builtin_exp2f:
+ case Builtin::BI__builtin_exp2f16:
case Builtin::BI__builtin_exp2l:
return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::exp2));
@@ -1596,6 +1602,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
case Builtin::BIfabsl:
case Builtin::BI__builtin_fabs:
case Builtin::BI__builtin_fabsf:
+ case Builtin::BI__builtin_fabsf16:
case Builtin::BI__builtin_fabsl:
case Builtin::BI__builtin_fabsf128:
return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::fabs));
@@ -1605,6 +1612,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
case Builtin::BIfloorl:
case Builtin::BI__builtin_floor:
case Builtin::BI__builtin_floorf:
+ case Builtin::BI__builtin_floorf16:
case Builtin::BI__builtin_floorl:
return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::floor));
@@ -1613,6 +1621,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
case Builtin::BIfmal:
case Builtin::BI__builtin_fma:
case Builtin::BI__builtin_fmaf:
+ case Builtin::BI__builtin_fmaf16:
case Builtin::BI__builtin_fmal:
return RValue::get(emitTernaryBuiltin(*this, E, Intrinsic::fma));
@@ -1621,6 +1630,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
case Builtin::BIfmaxl:
case Builtin::BI__builtin_fmax:
case Builtin::BI__builtin_fmaxf:
+ case Builtin::BI__builtin_fmaxf16:
case Builtin::BI__builtin_fmaxl:
return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::maxnum));
@@ -1629,6 +1639,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
case Builtin::BIfminl:
case Builtin::BI__builtin_fmin:
case Builtin::BI__builtin_fminf:
+ case Builtin::BI__builtin_fminf16:
case Builtin::BI__builtin_fminl:
return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::minnum));
@@ -1639,6 +1650,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
case Builtin::BIfmodl:
case Builtin::BI__builtin_fmod:
case Builtin::BI__builtin_fmodf:
+ case Builtin::BI__builtin_fmodf16:
case Builtin::BI__builtin_fmodl: {
Value *Arg1 = EmitScalarExpr(E->getArg(0));
Value *Arg2 = EmitScalarExpr(E->getArg(1));
@@ -1650,6 +1662,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
case Builtin::BIlogl:
case Builtin::BI__builtin_log:
case Builtin::BI__builtin_logf:
+ case Builtin::BI__builtin_logf16:
case Builtin::BI__builtin_logl:
return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::log));
@@ -1658,6 +1671,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
case Builtin::BIlog10l:
case Builtin::BI__builtin_log10:
case Builtin::BI__builtin_log10f:
+ case Builtin::BI__builtin_log10f16:
case Builtin::BI__builtin_log10l:
return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::log10));
@@ -1666,6 +1680,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
case Builtin::BIlog2l:
case Builtin::BI__builtin_log2:
case Builtin::BI__builtin_log2f:
+ case Builtin::BI__builtin_log2f16:
case Builtin::BI__builtin_log2l:
return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::log2));
@@ -1682,6 +1697,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
case Builtin::BIpowl:
case Builtin::BI__builtin_pow:
case Builtin::BI__builtin_powf:
+ case Builtin::BI__builtin_powf16:
case Builtin::BI__builtin_powl:
return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::pow));
@@ -1690,6 +1706,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
case Builtin::BIrintl:
case Builtin::BI__builtin_rint:
case Builtin::BI__builtin_rintf:
+ case Builtin::BI__builtin_rintf16:
case Builtin::BI__builtin_rintl:
return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::rint));
@@ -1698,6 +1715,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
case Builtin::BIroundl:
case Builtin::BI__builtin_round:
case Builtin::BI__builtin_roundf:
+ case Builtin::BI__builtin_roundf16:
case Builtin::BI__builtin_roundl:
return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::round));
@@ -1706,6 +1724,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
case Builtin::BIsinl:
case Builtin::BI__builtin_sin:
case Builtin::BI__builtin_sinf:
+ case Builtin::BI__builtin_sinf16:
case Builtin::BI__builtin_sinl:
return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::sin));
@@ -1714,6 +1733,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
case Builtin::BIsqrtl:
case Builtin::BI__builtin_sqrt:
case Builtin::BI__builtin_sqrtf:
+ case Builtin::BI__builtin_sqrtf16:
case Builtin::BI__builtin_sqrtl:
return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::sqrt));
@@ -1722,6 +1742,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
case Builtin::BItruncl:
case Builtin::BI__builtin_trunc:
case Builtin::BI__builtin_truncf:
+ case Builtin::BI__builtin_truncf16:
case Builtin::BI__builtin_truncl:
return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::trunc));
@@ -2026,11 +2047,13 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
Value *AlignmentValue = EmitScalarExpr(E->getArg(1));
ConstantInt *AlignmentCI = cast<ConstantInt>(AlignmentValue);
- unsigned Alignment = (unsigned)AlignmentCI->getZExtValue();
+ if (AlignmentCI->getValue().ugt(llvm::Value::MaximumAlignment))
+ AlignmentCI = ConstantInt::get(AlignmentCI->getType(),
+ llvm::Value::MaximumAlignment);
EmitAlignmentAssumption(PtrValue, Ptr,
/*The expr loc is sufficient.*/ SourceLocation(),
- Alignment, OffsetValue);
+ AlignmentCI, OffsetValue);
return RValue::get(PtrValue);
}
case Builtin::BI__assume:
@@ -2077,10 +2100,6 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
case Builtin::BI__builtin_constant_p: {
llvm::Type *ResultType = ConvertType(E->getType());
- if (CGM.getCodeGenOpts().OptimizationLevel == 0)
- // At -O0, we don't perform inlining, so we don't need to delay the
- // processing.
- return RValue::get(ConstantInt::get(ResultType, 0));
const Expr *Arg = E->getArg(0);
QualType ArgType = Arg->getType();
@@ -2131,7 +2150,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
Locality = (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) :
llvm::ConstantInt::get(Int32Ty, 3);
Value *Data = llvm::ConstantInt::get(Int32Ty, 1);
- Function *F = CGM.getIntrinsic(Intrinsic::prefetch);
+ Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType());
return RValue::get(Builder.CreateCall(F, {Address, RW, Locality, Data}));
}
case Builtin::BI__builtin_readcyclecounter: {
@@ -2344,7 +2363,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
.toCharUnitsFromBits(TI.getSuitableAlign())
.getQuantity();
AllocaInst *AI = Builder.CreateAlloca(Builder.getInt8Ty(), Size);
- AI->setAlignment(SuitableAlignmentInBytes);
+ AI->setAlignment(MaybeAlign(SuitableAlignmentInBytes));
initializeAlloca(*this, AI, Size, SuitableAlignmentInBytes);
return RValue::get(AI);
}
@@ -2357,7 +2376,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
unsigned AlignmentInBytes =
CGM.getContext().toCharUnitsFromBits(AlignmentInBits).getQuantity();
AllocaInst *AI = Builder.CreateAlloca(Builder.getInt8Ty(), Size);
- AI->setAlignment(AlignmentInBytes);
+ AI->setAlignment(MaybeAlign(AlignmentInBytes));
initializeAlloca(*this, AI, Size, AlignmentInBytes);
return RValue::get(AI);
}
@@ -2556,7 +2575,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
case Builtin::BI__builtin_frame_address: {
Value *Depth = ConstantEmitter(*this).emitAbstract(E->getArg(0),
getContext().UnsignedIntTy);
- Function *F = CGM.getIntrinsic(Intrinsic::frameaddress);
+ Function *F = CGM.getIntrinsic(Intrinsic::frameaddress, AllocaInt8PtrTy);
return RValue::get(Builder.CreateCall(F, Depth));
}
case Builtin::BI__builtin_extract_return_addr: {
@@ -2637,9 +2656,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
Address Buf = EmitPointerWithAlignment(E->getArg(0));
// Store the frame pointer to the setjmp buffer.
- Value *FrameAddr =
- Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress),
- ConstantInt::get(Int32Ty, 0));
+ Value *FrameAddr = Builder.CreateCall(
+ CGM.getIntrinsic(Intrinsic::frameaddress, AllocaInt8PtrTy),
+ ConstantInt::get(Int32Ty, 0));
Builder.CreateStore(FrameAddr, Buf);
// Store the stack pointer to the setjmp buffer.
@@ -3673,13 +3692,13 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
case Builtin::BIget_pipe_num_packets:
case Builtin::BIget_pipe_max_packets: {
const char *BaseName;
- const PipeType *PipeTy = E->getArg(0)->getType()->getAs<PipeType>();
+ const auto *PipeTy = E->getArg(0)->getType()->castAs<PipeType>();
if (BuiltinID == Builtin::BIget_pipe_num_packets)
BaseName = "__get_pipe_num_packets";
else
BaseName = "__get_pipe_max_packets";
- auto Name = std::string(BaseName) +
- std::string(PipeTy->isReadOnly() ? "_ro" : "_wo");
+ std::string Name = std::string(BaseName) +
+ std::string(PipeTy->isReadOnly() ? "_ro" : "_wo");
// Building the generic function prototype.
Value *Arg0 = EmitScalarExpr(E->getArg(0));
@@ -3769,7 +3788,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
-> std::tuple<llvm::Value *, llvm::Value *, llvm::Value *> {
llvm::APInt ArraySize(32, NumArgs - First);
QualType SizeArrayTy = getContext().getConstantArrayType(
- getContext().getSizeType(), ArraySize, ArrayType::Normal,
+ getContext().getSizeType(), ArraySize, nullptr, ArrayType::Normal,
/*IndexTypeQuals=*/0);
auto Tmp = CreateMemTemp(SizeArrayTy, "block_sizes");
llvm::Value *TmpPtr = Tmp.getPointer();
@@ -3977,6 +3996,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
break;
case Builtin::BI__builtin_canonicalize:
case Builtin::BI__builtin_canonicalizef:
+ case Builtin::BI__builtin_canonicalizef16:
case Builtin::BI__builtin_canonicalizel:
return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::canonicalize));
@@ -4219,6 +4239,9 @@ static Value *EmitTargetArchBuiltinExpr(CodeGenFunction *CGF,
case llvm::Triple::aarch64:
case llvm::Triple::aarch64_be:
return CGF->EmitAArch64BuiltinExpr(BuiltinID, E, Arch);
+ case llvm::Triple::bpfeb:
+ case llvm::Triple::bpfel:
+ return CGF->EmitBPFBuiltinExpr(BuiltinID, E);
case llvm::Triple::x86:
case llvm::Triple::x86_64:
return CGF->EmitX86BuiltinExpr(BuiltinID, E);
@@ -6019,7 +6042,7 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
// Locality is not supported on ARM target
Value *Locality = llvm::ConstantInt::get(Int32Ty, 3);
- Function *F = CGM.getIntrinsic(Intrinsic::prefetch);
+ Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType());
return Builder.CreateCall(F, {Address, RW, Locality, IsData});
}
@@ -6958,7 +6981,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
// FIXME: We need AArch64 specific LLVM intrinsic if we want to specify
// PLDL3STRM or PLDL2STRM.
- Function *F = CGM.getIntrinsic(Intrinsic::prefetch);
+ Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType());
return Builder.CreateCall(F, {Address, RW, Locality, IsData});
}
@@ -7293,12 +7316,13 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
}
if (BuiltinID == AArch64::BI_AddressOfReturnAddress) {
- llvm::Function *F = CGM.getIntrinsic(Intrinsic::addressofreturnaddress);
+ llvm::Function *F =
+ CGM.getIntrinsic(Intrinsic::addressofreturnaddress, AllocaInt8PtrTy);
return Builder.CreateCall(F);
}
if (BuiltinID == AArch64::BI__builtin_sponentry) {
- llvm::Function *F = CGM.getIntrinsic(Intrinsic::sponentry);
+ llvm::Function *F = CGM.getIntrinsic(Intrinsic::sponentry, AllocaInt8PtrTy);
return Builder.CreateCall(F);
}
@@ -8011,6 +8035,151 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
"vgetq_lane");
}
+ case AArch64::BI_BitScanForward:
+ case AArch64::BI_BitScanForward64:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanForward, E);
+ case AArch64::BI_BitScanReverse:
+ case AArch64::BI_BitScanReverse64:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanReverse, E);
+ case AArch64::BI_InterlockedAnd64:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd, E);
+ case AArch64::BI_InterlockedExchange64:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange, E);
+ case AArch64::BI_InterlockedExchangeAdd64:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd, E);
+ case AArch64::BI_InterlockedExchangeSub64:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeSub, E);
+ case AArch64::BI_InterlockedOr64:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr, E);
+ case AArch64::BI_InterlockedXor64:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E);
+ case AArch64::BI_InterlockedDecrement64:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E);
+ case AArch64::BI_InterlockedIncrement64:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E);
+ case AArch64::BI_InterlockedExchangeAdd8_acq:
+ case AArch64::BI_InterlockedExchangeAdd16_acq:
+ case AArch64::BI_InterlockedExchangeAdd_acq:
+ case AArch64::BI_InterlockedExchangeAdd64_acq:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd_acq, E);
+ case AArch64::BI_InterlockedExchangeAdd8_rel:
+ case AArch64::BI_InterlockedExchangeAdd16_rel:
+ case AArch64::BI_InterlockedExchangeAdd_rel:
+ case AArch64::BI_InterlockedExchangeAdd64_rel:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd_rel, E);
+ case AArch64::BI_InterlockedExchangeAdd8_nf:
+ case AArch64::BI_InterlockedExchangeAdd16_nf:
+ case AArch64::BI_InterlockedExchangeAdd_nf:
+ case AArch64::BI_InterlockedExchangeAdd64_nf:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd_nf, E);
+ case AArch64::BI_InterlockedExchange8_acq:
+ case AArch64::BI_InterlockedExchange16_acq:
+ case AArch64::BI_InterlockedExchange_acq:
+ case AArch64::BI_InterlockedExchange64_acq:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange_acq, E);
+ case AArch64::BI_InterlockedExchange8_rel:
+ case AArch64::BI_InterlockedExchange16_rel:
+ case AArch64::BI_InterlockedExchange_rel:
+ case AArch64::BI_InterlockedExchange64_rel:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange_rel, E);
+ case AArch64::BI_InterlockedExchange8_nf:
+ case AArch64::BI_InterlockedExchange16_nf:
+ case AArch64::BI_InterlockedExchange_nf:
+ case AArch64::BI_InterlockedExchange64_nf:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange_nf, E);
+ case AArch64::BI_InterlockedCompareExchange8_acq:
+ case AArch64::BI_InterlockedCompareExchange16_acq:
+ case AArch64::BI_InterlockedCompareExchange_acq:
+ case AArch64::BI_InterlockedCompareExchange64_acq:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedCompareExchange_acq, E);
+ case AArch64::BI_InterlockedCompareExchange8_rel:
+ case AArch64::BI_InterlockedCompareExchange16_rel:
+ case AArch64::BI_InterlockedCompareExchange_rel:
+ case AArch64::BI_InterlockedCompareExchange64_rel:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedCompareExchange_rel, E);
+ case AArch64::BI_InterlockedCompareExchange8_nf:
+ case AArch64::BI_InterlockedCompareExchange16_nf:
+ case AArch64::BI_InterlockedCompareExchange_nf:
+ case AArch64::BI_InterlockedCompareExchange64_nf:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedCompareExchange_nf, E);
+ case AArch64::BI_InterlockedOr8_acq:
+ case AArch64::BI_InterlockedOr16_acq:
+ case AArch64::BI_InterlockedOr_acq:
+ case AArch64::BI_InterlockedOr64_acq:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr_acq, E);
+ case AArch64::BI_InterlockedOr8_rel:
+ case AArch64::BI_InterlockedOr16_rel:
+ case AArch64::BI_InterlockedOr_rel:
+ case AArch64::BI_InterlockedOr64_rel:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr_rel, E);
+ case AArch64::BI_InterlockedOr8_nf:
+ case AArch64::BI_InterlockedOr16_nf:
+ case AArch64::BI_InterlockedOr_nf:
+ case AArch64::BI_InterlockedOr64_nf:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr_nf, E);
+ case AArch64::BI_InterlockedXor8_acq:
+ case AArch64::BI_InterlockedXor16_acq:
+ case AArch64::BI_InterlockedXor_acq:
+ case AArch64::BI_InterlockedXor64_acq:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor_acq, E);
+ case AArch64::BI_InterlockedXor8_rel:
+ case AArch64::BI_InterlockedXor16_rel:
+ case AArch64::BI_InterlockedXor_rel:
+ case AArch64::BI_InterlockedXor64_rel:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor_rel, E);
+ case AArch64::BI_InterlockedXor8_nf:
+ case AArch64::BI_InterlockedXor16_nf:
+ case AArch64::BI_InterlockedXor_nf:
+ case AArch64::BI_InterlockedXor64_nf:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor_nf, E);
+ case AArch64::BI_InterlockedAnd8_acq:
+ case AArch64::BI_InterlockedAnd16_acq:
+ case AArch64::BI_InterlockedAnd_acq:
+ case AArch64::BI_InterlockedAnd64_acq:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd_acq, E);
+ case AArch64::BI_InterlockedAnd8_rel:
+ case AArch64::BI_InterlockedAnd16_rel:
+ case AArch64::BI_InterlockedAnd_rel:
+ case AArch64::BI_InterlockedAnd64_rel:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd_rel, E);
+ case AArch64::BI_InterlockedAnd8_nf:
+ case AArch64::BI_InterlockedAnd16_nf:
+ case AArch64::BI_InterlockedAnd_nf:
+ case AArch64::BI_InterlockedAnd64_nf:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd_nf, E);
+ case AArch64::BI_InterlockedIncrement16_acq:
+ case AArch64::BI_InterlockedIncrement_acq:
+ case AArch64::BI_InterlockedIncrement64_acq:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement_acq, E);
+ case AArch64::BI_InterlockedIncrement16_rel:
+ case AArch64::BI_InterlockedIncrement_rel:
+ case AArch64::BI_InterlockedIncrement64_rel:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement_rel, E);
+ case AArch64::BI_InterlockedIncrement16_nf:
+ case AArch64::BI_InterlockedIncrement_nf:
+ case AArch64::BI_InterlockedIncrement64_nf:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement_nf, E);
+ case AArch64::BI_InterlockedDecrement16_acq:
+ case AArch64::BI_InterlockedDecrement_acq:
+ case AArch64::BI_InterlockedDecrement64_acq:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement_acq, E);
+ case AArch64::BI_InterlockedDecrement16_rel:
+ case AArch64::BI_InterlockedDecrement_rel:
+ case AArch64::BI_InterlockedDecrement64_rel:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement_rel, E);
+ case AArch64::BI_InterlockedDecrement16_nf:
+ case AArch64::BI_InterlockedDecrement_nf:
+ case AArch64::BI_InterlockedDecrement64_nf:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement_nf, E);
+
+ case AArch64::BI_InterlockedAdd: {
+ Value *Arg0 = EmitScalarExpr(E->getArg(0));
+ Value *Arg1 = EmitScalarExpr(E->getArg(1));
+ AtomicRMWInst *RMWI = Builder.CreateAtomicRMW(
+ AtomicRMWInst::Add, Arg0, Arg1,
+ llvm::AtomicOrdering::SequentiallyConsistent);
+ return Builder.CreateAdd(RMWI, Arg1);
+ }
}
llvm::VectorType *VTy = GetNeonType(this, Type);
@@ -9128,152 +9297,38 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
Int = Intrinsic::aarch64_neon_suqadd;
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vuqadd");
}
- case AArch64::BI_BitScanForward:
- case AArch64::BI_BitScanForward64:
- return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanForward, E);
- case AArch64::BI_BitScanReverse:
- case AArch64::BI_BitScanReverse64:
- return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanReverse, E);
- case AArch64::BI_InterlockedAnd64:
- return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd, E);
- case AArch64::BI_InterlockedExchange64:
- return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange, E);
- case AArch64::BI_InterlockedExchangeAdd64:
- return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd, E);
- case AArch64::BI_InterlockedExchangeSub64:
- return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeSub, E);
- case AArch64::BI_InterlockedOr64:
- return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr, E);
- case AArch64::BI_InterlockedXor64:
- return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E);
- case AArch64::BI_InterlockedDecrement64:
- return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E);
- case AArch64::BI_InterlockedIncrement64:
- return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E);
- case AArch64::BI_InterlockedExchangeAdd8_acq:
- case AArch64::BI_InterlockedExchangeAdd16_acq:
- case AArch64::BI_InterlockedExchangeAdd_acq:
- case AArch64::BI_InterlockedExchangeAdd64_acq:
- return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd_acq, E);
- case AArch64::BI_InterlockedExchangeAdd8_rel:
- case AArch64::BI_InterlockedExchangeAdd16_rel:
- case AArch64::BI_InterlockedExchangeAdd_rel:
- case AArch64::BI_InterlockedExchangeAdd64_rel:
- return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd_rel, E);
- case AArch64::BI_InterlockedExchangeAdd8_nf:
- case AArch64::BI_InterlockedExchangeAdd16_nf:
- case AArch64::BI_InterlockedExchangeAdd_nf:
- case AArch64::BI_InterlockedExchangeAdd64_nf:
- return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd_nf, E);
- case AArch64::BI_InterlockedExchange8_acq:
- case AArch64::BI_InterlockedExchange16_acq:
- case AArch64::BI_InterlockedExchange_acq:
- case AArch64::BI_InterlockedExchange64_acq:
- return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange_acq, E);
- case AArch64::BI_InterlockedExchange8_rel:
- case AArch64::BI_InterlockedExchange16_rel:
- case AArch64::BI_InterlockedExchange_rel:
- case AArch64::BI_InterlockedExchange64_rel:
- return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange_rel, E);
- case AArch64::BI_InterlockedExchange8_nf:
- case AArch64::BI_InterlockedExchange16_nf:
- case AArch64::BI_InterlockedExchange_nf:
- case AArch64::BI_InterlockedExchange64_nf:
- return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange_nf, E);
- case AArch64::BI_InterlockedCompareExchange8_acq:
- case AArch64::BI_InterlockedCompareExchange16_acq:
- case AArch64::BI_InterlockedCompareExchange_acq:
- case AArch64::BI_InterlockedCompareExchange64_acq:
- return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedCompareExchange_acq, E);
- case AArch64::BI_InterlockedCompareExchange8_rel:
- case AArch64::BI_InterlockedCompareExchange16_rel:
- case AArch64::BI_InterlockedCompareExchange_rel:
- case AArch64::BI_InterlockedCompareExchange64_rel:
- return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedCompareExchange_rel, E);
- case AArch64::BI_InterlockedCompareExchange8_nf:
- case AArch64::BI_InterlockedCompareExchange16_nf:
- case AArch64::BI_InterlockedCompareExchange_nf:
- case AArch64::BI_InterlockedCompareExchange64_nf:
- return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedCompareExchange_nf, E);
- case AArch64::BI_InterlockedOr8_acq:
- case AArch64::BI_InterlockedOr16_acq:
- case AArch64::BI_InterlockedOr_acq:
- case AArch64::BI_InterlockedOr64_acq:
- return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr_acq, E);
- case AArch64::BI_InterlockedOr8_rel:
- case AArch64::BI_InterlockedOr16_rel:
- case AArch64::BI_InterlockedOr_rel:
- case AArch64::BI_InterlockedOr64_rel:
- return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr_rel, E);
- case AArch64::BI_InterlockedOr8_nf:
- case AArch64::BI_InterlockedOr16_nf:
- case AArch64::BI_InterlockedOr_nf:
- case AArch64::BI_InterlockedOr64_nf:
- return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr_nf, E);
- case AArch64::BI_InterlockedXor8_acq:
- case AArch64::BI_InterlockedXor16_acq:
- case AArch64::BI_InterlockedXor_acq:
- case AArch64::BI_InterlockedXor64_acq:
- return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor_acq, E);
- case AArch64::BI_InterlockedXor8_rel:
- case AArch64::BI_InterlockedXor16_rel:
- case AArch64::BI_InterlockedXor_rel:
- case AArch64::BI_InterlockedXor64_rel:
- return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor_rel, E);
- case AArch64::BI_InterlockedXor8_nf:
- case AArch64::BI_InterlockedXor16_nf:
- case AArch64::BI_InterlockedXor_nf:
- case AArch64::BI_InterlockedXor64_nf:
- return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor_nf, E);
- case AArch64::BI_InterlockedAnd8_acq:
- case AArch64::BI_InterlockedAnd16_acq:
- case AArch64::BI_InterlockedAnd_acq:
- case AArch64::BI_InterlockedAnd64_acq:
- return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd_acq, E);
- case AArch64::BI_InterlockedAnd8_rel:
- case AArch64::BI_InterlockedAnd16_rel:
- case AArch64::BI_InterlockedAnd_rel:
- case AArch64::BI_InterlockedAnd64_rel:
- return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd_rel, E);
- case AArch64::BI_InterlockedAnd8_nf:
- case AArch64::BI_InterlockedAnd16_nf:
- case AArch64::BI_InterlockedAnd_nf:
- case AArch64::BI_InterlockedAnd64_nf:
- return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd_nf, E);
- case AArch64::BI_InterlockedIncrement16_acq:
- case AArch64::BI_InterlockedIncrement_acq:
- case AArch64::BI_InterlockedIncrement64_acq:
- return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement_acq, E);
- case AArch64::BI_InterlockedIncrement16_rel:
- case AArch64::BI_InterlockedIncrement_rel:
- case AArch64::BI_InterlockedIncrement64_rel:
- return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement_rel, E);
- case AArch64::BI_InterlockedIncrement16_nf:
- case AArch64::BI_InterlockedIncrement_nf:
- case AArch64::BI_InterlockedIncrement64_nf:
- return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement_nf, E);
- case AArch64::BI_InterlockedDecrement16_acq:
- case AArch64::BI_InterlockedDecrement_acq:
- case AArch64::BI_InterlockedDecrement64_acq:
- return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement_acq, E);
- case AArch64::BI_InterlockedDecrement16_rel:
- case AArch64::BI_InterlockedDecrement_rel:
- case AArch64::BI_InterlockedDecrement64_rel:
- return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement_rel, E);
- case AArch64::BI_InterlockedDecrement16_nf:
- case AArch64::BI_InterlockedDecrement_nf:
- case AArch64::BI_InterlockedDecrement64_nf:
- return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement_nf, E);
-
- case AArch64::BI_InterlockedAdd: {
- Value *Arg0 = EmitScalarExpr(E->getArg(0));
- Value *Arg1 = EmitScalarExpr(E->getArg(1));
- AtomicRMWInst *RMWI = Builder.CreateAtomicRMW(
- AtomicRMWInst::Add, Arg0, Arg1,
- llvm::AtomicOrdering::SequentiallyConsistent);
- return Builder.CreateAdd(RMWI, Arg1);
}
+}
+
+Value *CodeGenFunction::EmitBPFBuiltinExpr(unsigned BuiltinID,
+ const CallExpr *E) {
+ assert(BuiltinID == BPF::BI__builtin_preserve_field_info &&
+ "unexpected ARM builtin");
+
+ const Expr *Arg = E->getArg(0);
+ bool IsBitField = Arg->IgnoreParens()->getObjectKind() == OK_BitField;
+
+ if (!getDebugInfo()) {
+ CGM.Error(E->getExprLoc(), "using builtin_preserve_field_info() without -g");
+ return IsBitField ? EmitLValue(Arg).getBitFieldPointer()
+ : EmitLValue(Arg).getPointer();
}
+
+ // Enable underlying preserve_*_access_index() generation.
+ bool OldIsInPreservedAIRegion = IsInPreservedAIRegion;
+ IsInPreservedAIRegion = true;
+ Value *FieldAddr = IsBitField ? EmitLValue(Arg).getBitFieldPointer()
+ : EmitLValue(Arg).getPointer();
+ IsInPreservedAIRegion = OldIsInPreservedAIRegion;
+
+ ConstantInt *C = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
+ Value *InfoKind = ConstantInt::get(Int64Ty, C->getSExtValue());
+
+ // Built the IR for the preserve_field_info intrinsic.
+ llvm::Function *FnGetFieldInfo = llvm::Intrinsic::getDeclaration(
+ &CGM.getModule(), llvm::Intrinsic::bpf_preserve_field_info,
+ {FieldAddr->getType()});
+ return Builder.CreateCall(FnGetFieldInfo, {FieldAddr, InfoKind});
}
llvm::Value *CodeGenFunction::
@@ -10034,7 +10089,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
Value *RW = ConstantInt::get(Int32Ty, (C->getZExtValue() >> 2) & 0x1);
Value *Locality = ConstantInt::get(Int32Ty, C->getZExtValue() & 0x3);
Value *Data = ConstantInt::get(Int32Ty, 1);
- Function *F = CGM.getIntrinsic(Intrinsic::prefetch);
+ Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType());
return Builder.CreateCall(F, {Address, RW, Locality, Data});
}
case X86::BI_mm_clflush: {
@@ -11169,7 +11224,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
// Unaligned nontemporal store of the scalar value.
StoreInst *SI = Builder.CreateDefaultAlignedStore(Src, BC);
SI->setMetadata(CGM.getModule().getMDKindID("nontemporal"), Node);
- SI->setAlignment(1);
+ SI->setAlignment(llvm::Align::None());
return SI;
}
// Rotate is a special case of funnel shift - 1st 2 args are the same.
@@ -12113,7 +12168,8 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
}
case X86::BI_AddressOfReturnAddress: {
- Function *F = CGM.getIntrinsic(Intrinsic::addressofreturnaddress);
+ Function *F =
+ CGM.getIntrinsic(Intrinsic::addressofreturnaddress, AllocaInt8PtrTy);
return Builder.CreateCall(F);
}
case X86::BI__stosb: {
@@ -13924,6 +13980,15 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_tls_size, ResultType);
return Builder.CreateCall(Callee);
}
+ case WebAssembly::BI__builtin_wasm_tls_align: {
+ llvm::Type *ResultType = ConvertType(E->getType());
+ Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_tls_align, ResultType);
+ return Builder.CreateCall(Callee);
+ }
+ case WebAssembly::BI__builtin_wasm_tls_base: {
+ Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_tls_base);
+ return Builder.CreateCall(Callee);
+ }
case WebAssembly::BI__builtin_wasm_throw: {
Value *Tag = EmitScalarExpr(E->getArg(0));
Value *Obj = EmitScalarExpr(E->getArg(1));
@@ -13954,6 +14019,26 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_atomic_notify);
return Builder.CreateCall(Callee, {Addr, Count});
}
+ case WebAssembly::BI__builtin_wasm_trunc_s_i32_f32:
+ case WebAssembly::BI__builtin_wasm_trunc_s_i32_f64:
+ case WebAssembly::BI__builtin_wasm_trunc_s_i64_f32:
+ case WebAssembly::BI__builtin_wasm_trunc_s_i64_f64: {
+ Value *Src = EmitScalarExpr(E->getArg(0));
+ llvm::Type *ResT = ConvertType(E->getType());
+ Function *Callee =
+ CGM.getIntrinsic(Intrinsic::wasm_trunc_signed, {ResT, Src->getType()});
+ return Builder.CreateCall(Callee, {Src});
+ }
+ case WebAssembly::BI__builtin_wasm_trunc_u_i32_f32:
+ case WebAssembly::BI__builtin_wasm_trunc_u_i32_f64:
+ case WebAssembly::BI__builtin_wasm_trunc_u_i64_f32:
+ case WebAssembly::BI__builtin_wasm_trunc_u_i64_f64: {
+ Value *Src = EmitScalarExpr(E->getArg(0));
+ llvm::Type *ResT = ConvertType(E->getType());
+ Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_trunc_unsigned,
+ {ResT, Src->getType()});
+ return Builder.CreateCall(Callee, {Src});
+ }
case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32_f32:
case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32_f64:
case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i64_f32:
@@ -13998,6 +14083,12 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
ConvertType(E->getType()));
return Builder.CreateCall(Callee, {LHS, RHS});
}
+ case WebAssembly::BI__builtin_wasm_swizzle_v8x16: {
+ Value *Src = EmitScalarExpr(E->getArg(0));
+ Value *Indices = EmitScalarExpr(E->getArg(1));
+ Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_swizzle);
+ return Builder.CreateCall(Callee, {Src, Indices});
+ }
case WebAssembly::BI__builtin_wasm_extract_lane_s_i8x16:
case WebAssembly::BI__builtin_wasm_extract_lane_u_i8x16:
case WebAssembly::BI__builtin_wasm_extract_lane_s_i16x8:
@@ -14139,7 +14230,86 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
Function *Callee = CGM.getIntrinsic(Intrinsic::sqrt, Vec->getType());
return Builder.CreateCall(Callee, {Vec});
}
-
+ case WebAssembly::BI__builtin_wasm_qfma_f32x4:
+ case WebAssembly::BI__builtin_wasm_qfms_f32x4:
+ case WebAssembly::BI__builtin_wasm_qfma_f64x2:
+ case WebAssembly::BI__builtin_wasm_qfms_f64x2: {
+ Value *A = EmitScalarExpr(E->getArg(0));
+ Value *B = EmitScalarExpr(E->getArg(1));
+ Value *C = EmitScalarExpr(E->getArg(2));
+ unsigned IntNo;
+ switch (BuiltinID) {
+ case WebAssembly::BI__builtin_wasm_qfma_f32x4:
+ case WebAssembly::BI__builtin_wasm_qfma_f64x2:
+ IntNo = Intrinsic::wasm_qfma;
+ break;
+ case WebAssembly::BI__builtin_wasm_qfms_f32x4:
+ case WebAssembly::BI__builtin_wasm_qfms_f64x2:
+ IntNo = Intrinsic::wasm_qfms;
+ break;
+ default:
+ llvm_unreachable("unexpected builtin ID");
+ }
+ Function *Callee = CGM.getIntrinsic(IntNo, A->getType());
+ return Builder.CreateCall(Callee, {A, B, C});
+ }
+ case WebAssembly::BI__builtin_wasm_narrow_s_i8x16_i16x8:
+ case WebAssembly::BI__builtin_wasm_narrow_u_i8x16_i16x8:
+ case WebAssembly::BI__builtin_wasm_narrow_s_i16x8_i32x4:
+ case WebAssembly::BI__builtin_wasm_narrow_u_i16x8_i32x4: {
+ Value *Low = EmitScalarExpr(E->getArg(0));
+ Value *High = EmitScalarExpr(E->getArg(1));
+ unsigned IntNo;
+ switch (BuiltinID) {
+ case WebAssembly::BI__builtin_wasm_narrow_s_i8x16_i16x8:
+ case WebAssembly::BI__builtin_wasm_narrow_s_i16x8_i32x4:
+ IntNo = Intrinsic::wasm_narrow_signed;
+ break;
+ case WebAssembly::BI__builtin_wasm_narrow_u_i8x16_i16x8:
+ case WebAssembly::BI__builtin_wasm_narrow_u_i16x8_i32x4:
+ IntNo = Intrinsic::wasm_narrow_unsigned;
+ break;
+ default:
+ llvm_unreachable("unexpected builtin ID");
+ }
+ Function *Callee =
+ CGM.getIntrinsic(IntNo, {ConvertType(E->getType()), Low->getType()});
+ return Builder.CreateCall(Callee, {Low, High});
+ }
+ case WebAssembly::BI__builtin_wasm_widen_low_s_i16x8_i8x16:
+ case WebAssembly::BI__builtin_wasm_widen_high_s_i16x8_i8x16:
+ case WebAssembly::BI__builtin_wasm_widen_low_u_i16x8_i8x16:
+ case WebAssembly::BI__builtin_wasm_widen_high_u_i16x8_i8x16:
+ case WebAssembly::BI__builtin_wasm_widen_low_s_i32x4_i16x8:
+ case WebAssembly::BI__builtin_wasm_widen_high_s_i32x4_i16x8:
+ case WebAssembly::BI__builtin_wasm_widen_low_u_i32x4_i16x8:
+ case WebAssembly::BI__builtin_wasm_widen_high_u_i32x4_i16x8: {
+ Value *Vec = EmitScalarExpr(E->getArg(0));
+ unsigned IntNo;
+ switch (BuiltinID) {
+ case WebAssembly::BI__builtin_wasm_widen_low_s_i16x8_i8x16:
+ case WebAssembly::BI__builtin_wasm_widen_low_s_i32x4_i16x8:
+ IntNo = Intrinsic::wasm_widen_low_signed;
+ break;
+ case WebAssembly::BI__builtin_wasm_widen_high_s_i16x8_i8x16:
+ case WebAssembly::BI__builtin_wasm_widen_high_s_i32x4_i16x8:
+ IntNo = Intrinsic::wasm_widen_high_signed;
+ break;
+ case WebAssembly::BI__builtin_wasm_widen_low_u_i16x8_i8x16:
+ case WebAssembly::BI__builtin_wasm_widen_low_u_i32x4_i16x8:
+ IntNo = Intrinsic::wasm_widen_low_unsigned;
+ break;
+ case WebAssembly::BI__builtin_wasm_widen_high_u_i16x8_i8x16:
+ case WebAssembly::BI__builtin_wasm_widen_high_u_i32x4_i16x8:
+ IntNo = Intrinsic::wasm_widen_high_unsigned;
+ break;
+ default:
+ llvm_unreachable("unexpected builtin ID");
+ }
+ Function *Callee =
+ CGM.getIntrinsic(IntNo, {ConvertType(E->getType()), Vec->getType()});
+ return Builder.CreateCall(Callee, Vec);
+ }
default:
return nullptr;
}
diff --git a/lib/CodeGen/CGCUDANV.cpp b/lib/CodeGen/CGCUDANV.cpp
index 4d4038dae9cf..5c5cbaff0252 100644
--- a/lib/CodeGen/CGCUDANV.cpp
+++ b/lib/CodeGen/CGCUDANV.cpp
@@ -93,7 +93,7 @@ private:
GV->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::None);
}
if (Alignment)
- GV->setAlignment(Alignment);
+ GV->setAlignment(llvm::Align(Alignment));
return llvm::ConstantExpr::getGetElementPtr(ConstStr.getElementType(),
ConstStr.getPointer(), Zeros);
@@ -236,7 +236,8 @@ void CGNVCUDARuntime::emitDeviceStub(CodeGenFunction &CGF,
EmittedKernels.push_back({CGF.CurFn, CGF.CurFuncDecl});
if (CudaFeatureEnabled(CGM.getTarget().getSDKVersion(),
- CudaFeature::CUDA_USES_NEW_LAUNCH))
+ CudaFeature::CUDA_USES_NEW_LAUNCH) ||
+ CGF.getLangOpts().HIPUseNewLaunchAPI)
emitDeviceStubBodyNew(CGF, Args);
else
emitDeviceStubBodyLegacy(CGF, Args);
@@ -264,14 +265,18 @@ void CGNVCUDARuntime::emitDeviceStubBodyNew(CodeGenFunction &CGF,
llvm::BasicBlock *EndBlock = CGF.createBasicBlock("setup.end");
- // Lookup cudaLaunchKernel function.
+ // Lookup cudaLaunchKernel/hipLaunchKernel function.
// cudaError_t cudaLaunchKernel(const void *func, dim3 gridDim, dim3 blockDim,
// void **args, size_t sharedMem,
// cudaStream_t stream);
+ // hipError_t hipLaunchKernel(const void *func, dim3 gridDim, dim3 blockDim,
+ // void **args, size_t sharedMem,
+ // hipStream_t stream);
TranslationUnitDecl *TUDecl = CGM.getContext().getTranslationUnitDecl();
DeclContext *DC = TranslationUnitDecl::castToDeclContext(TUDecl);
+ auto LaunchKernelName = addPrefixToName("LaunchKernel");
IdentifierInfo &cudaLaunchKernelII =
- CGM.getContext().Idents.get("cudaLaunchKernel");
+ CGM.getContext().Idents.get(LaunchKernelName);
FunctionDecl *cudaLaunchKernelFD = nullptr;
for (const auto &Result : DC->lookup(&cudaLaunchKernelII)) {
if (FunctionDecl *FD = dyn_cast<FunctionDecl>(Result))
@@ -280,7 +285,7 @@ void CGNVCUDARuntime::emitDeviceStubBodyNew(CodeGenFunction &CGF,
if (cudaLaunchKernelFD == nullptr) {
CGM.Error(CGF.CurFuncDecl->getLocation(),
- "Can't find declaration for cudaLaunchKernel()");
+ "Can't find declaration for " + LaunchKernelName);
return;
}
// Create temporary dim3 grid_dim, block_dim.
@@ -301,7 +306,7 @@ void CGNVCUDARuntime::emitDeviceStubBodyNew(CodeGenFunction &CGF,
/*ShmemSize=*/ShmemSize.getType(),
/*Stream=*/Stream.getType()},
/*isVarArg=*/false),
- "__cudaPopCallConfiguration");
+ addUnderscoredPrefixToName("PopCallConfiguration"));
CGF.EmitRuntimeCallOrInvoke(cudaPopConfigFn,
{GridDim.getPointer(), BlockDim.getPointer(),
@@ -329,7 +334,7 @@ void CGNVCUDARuntime::emitDeviceStubBodyNew(CodeGenFunction &CGF,
const CGFunctionInfo &FI =
CGM.getTypes().arrangeFunctionDeclaration(cudaLaunchKernelFD);
llvm::FunctionCallee cudaLaunchKernelFn =
- CGM.CreateRuntimeFunction(FTy, "cudaLaunchKernel");
+ CGM.CreateRuntimeFunction(FTy, LaunchKernelName);
CGF.EmitCall(FI, CGCallee::forDirect(cudaLaunchKernelFn), ReturnValueSlot(),
LaunchKernelArgs);
CGF.EmitBranch(EndBlock);
@@ -623,7 +628,7 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() {
Linkage,
/*Initializer=*/llvm::ConstantPointerNull::get(VoidPtrPtrTy),
"__hip_gpubin_handle");
- GpuBinaryHandle->setAlignment(CGM.getPointerAlign().getQuantity());
+ GpuBinaryHandle->setAlignment(CGM.getPointerAlign().getAsAlign());
// Prevent the weak symbol in different shared libraries being merged.
if (Linkage != llvm::GlobalValue::InternalLinkage)
GpuBinaryHandle->setVisibility(llvm::GlobalValue::HiddenVisibility);
@@ -664,7 +669,7 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() {
GpuBinaryHandle = new llvm::GlobalVariable(
TheModule, VoidPtrPtrTy, false, llvm::GlobalValue::InternalLinkage,
llvm::ConstantPointerNull::get(VoidPtrPtrTy), "__cuda_gpubin_handle");
- GpuBinaryHandle->setAlignment(CGM.getPointerAlign().getQuantity());
+ GpuBinaryHandle->setAlignment(CGM.getPointerAlign().getAsAlign());
CtorBuilder.CreateAlignedStore(RegisterFatbinCall, GpuBinaryHandle,
CGM.getPointerAlign());
diff --git a/lib/CodeGen/CGCXX.cpp b/lib/CodeGen/CGCXX.cpp
index 6d903a0d09e2..7e5fe0fd6b1d 100644
--- a/lib/CodeGen/CGCXX.cpp
+++ b/lib/CodeGen/CGCXX.cpp
@@ -80,7 +80,7 @@ bool CodeGenModule::TryEmitBaseDestructorAsAlias(const CXXDestructorDecl *D) {
// Skip base classes with trivial destructors.
const auto *Base =
- cast<CXXRecordDecl>(I.getType()->getAs<RecordType>()->getDecl());
+ cast<CXXRecordDecl>(I.getType()->castAs<RecordType>()->getDecl());
if (Base->hasTrivialDestructor()) continue;
// If we've already found a base class with a non-trivial
@@ -104,8 +104,8 @@ bool CodeGenModule::TryEmitBaseDestructorAsAlias(const CXXDestructorDecl *D) {
// Give up if the calling conventions don't match. We could update the call,
// but it is probably not worth it.
const CXXDestructorDecl *BaseD = UniqueBase->getDestructor();
- if (BaseD->getType()->getAs<FunctionType>()->getCallConv() !=
- D->getType()->getAs<FunctionType>()->getCallConv())
+ if (BaseD->getType()->castAs<FunctionType>()->getCallConv() !=
+ D->getType()->castAs<FunctionType>()->getCallConv())
return true;
GlobalDecl AliasDecl(D, Dtor_Base);
diff --git a/lib/CodeGen/CGCXXABI.cpp b/lib/CodeGen/CGCXXABI.cpp
index 041c0f8959fd..23dae2b61d04 100644
--- a/lib/CodeGen/CGCXXABI.cpp
+++ b/lib/CodeGen/CGCXXABI.cpp
@@ -46,8 +46,8 @@ CGCallee CGCXXABI::EmitLoadOfMemberFunctionPointer(
ThisPtrForCall = This.getPointer();
const FunctionProtoType *FPT =
MPT->getPointeeType()->getAs<FunctionProtoType>();
- const CXXRecordDecl *RD =
- cast<CXXRecordDecl>(MPT->getClass()->getAs<RecordType>()->getDecl());
+ const auto *RD =
+ cast<CXXRecordDecl>(MPT->getClass()->castAs<RecordType>()->getDecl());
llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(
CGM.getTypes().arrangeCXXMethodType(RD, FPT, /*FD=*/nullptr));
llvm::Constant *FnPtr = llvm::Constant::getNullValue(FTy->getPointerTo());
diff --git a/lib/CodeGen/CGCXXABI.h b/lib/CodeGen/CGCXXABI.h
index 3a9c3b347439..bff49be7a3c4 100644
--- a/lib/CodeGen/CGCXXABI.h
+++ b/lib/CodeGen/CGCXXABI.h
@@ -577,7 +577,7 @@ public:
// Determine if references to thread_local global variables can be made
// directly or require access through a thread wrapper function.
- virtual bool usesThreadWrapperFunction() const = 0;
+ virtual bool usesThreadWrapperFunction(const VarDecl *VD) const = 0;
/// Emit a reference to a non-local thread_local variable (including
/// triggering the initialization of all thread_local variables in its
diff --git a/lib/CodeGen/CGCall.cpp b/lib/CodeGen/CGCall.cpp
index cf8024550eee..b74f6f942426 100644
--- a/lib/CodeGen/CGCall.cpp
+++ b/lib/CodeGen/CGCall.cpp
@@ -903,7 +903,7 @@ struct NoExpansion : TypeExpansion {
static std::unique_ptr<TypeExpansion>
getTypeExpansion(QualType Ty, const ASTContext &Context) {
if (const ConstantArrayType *AT = Context.getAsConstantArrayType(Ty)) {
- return llvm::make_unique<ConstantArrayExpansion>(
+ return std::make_unique<ConstantArrayExpansion>(
AT->getElementType(), AT->getSize().getZExtValue());
}
if (const RecordType *RT = Ty->getAs<RecordType>()) {
@@ -947,13 +947,13 @@ getTypeExpansion(QualType Ty, const ASTContext &Context) {
Fields.push_back(FD);
}
}
- return llvm::make_unique<RecordExpansion>(std::move(Bases),
+ return std::make_unique<RecordExpansion>(std::move(Bases),
std::move(Fields));
}
if (const ComplexType *CT = Ty->getAs<ComplexType>()) {
- return llvm::make_unique<ComplexExpansion>(CT->getElementType());
+ return std::make_unique<ComplexExpansion>(CT->getElementType());
}
- return llvm::make_unique<NoExpansion>();
+ return std::make_unique<NoExpansion>();
}
static int getExpansionSize(QualType Ty, const ASTContext &Context) {
@@ -1713,16 +1713,19 @@ void CodeGenModule::ConstructDefaultFnAttrList(StringRef Name, bool HasOptnone,
if (!CodeGenOpts.TrapFuncName.empty())
FuncAttrs.addAttribute("trap-func-name", CodeGenOpts.TrapFuncName);
} else {
- // Attributes that should go on the function, but not the call site.
- if (!CodeGenOpts.DisableFPElim) {
- FuncAttrs.addAttribute("no-frame-pointer-elim", "false");
- } else if (CodeGenOpts.OmitLeafFramePointer) {
- FuncAttrs.addAttribute("no-frame-pointer-elim", "false");
- FuncAttrs.addAttribute("no-frame-pointer-elim-non-leaf");
- } else {
- FuncAttrs.addAttribute("no-frame-pointer-elim", "true");
- FuncAttrs.addAttribute("no-frame-pointer-elim-non-leaf");
+ StringRef FpKind;
+ switch (CodeGenOpts.getFramePointer()) {
+ case CodeGenOptions::FramePointerKind::None:
+ FpKind = "none";
+ break;
+ case CodeGenOptions::FramePointerKind::NonLeaf:
+ FpKind = "non-leaf";
+ break;
+ case CodeGenOptions::FramePointerKind::All:
+ FpKind = "all";
+ break;
}
+ FuncAttrs.addAttribute("frame-pointer", FpKind);
FuncAttrs.addAttribute("less-precise-fpmad",
llvm::toStringRef(CodeGenOpts.LessPreciseFPMAD));
@@ -2123,8 +2126,8 @@ void CodeGenModule::ConstructAttributeList(
if (!PTy->isIncompleteType() && PTy->isConstantSizeType()) {
auto info = getContext().getTypeInfoInChars(PTy);
Attrs.addDereferenceableAttr(info.first.getQuantity());
- Attrs.addAttribute(llvm::Attribute::getWithAlignment(getLLVMContext(),
- info.second.getQuantity()));
+ Attrs.addAttribute(llvm::Attribute::getWithAlignment(
+ getLLVMContext(), info.second.getAsAlign()));
}
break;
}
@@ -3089,8 +3092,8 @@ void CodeGenFunction::EmitDelegateCallArg(CallArgList &args,
// Deactivate the cleanup for the callee-destructed param that was pushed.
if (hasAggregateEvaluationKind(type) && !CurFuncIsThunk &&
- type->getAs<RecordType>()->getDecl()->isParamDestroyedInCallee() &&
- type.isDestructedType()) {
+ type->castAs<RecordType>()->getDecl()->isParamDestroyedInCallee() &&
+ param->needsDestruction(getContext())) {
EHScopeStack::stable_iterator cleanup =
CalleeDestructedParamCleanups.lookup(cast<ParmVarDecl>(param));
assert(cleanup.isValid() &&
@@ -3574,7 +3577,7 @@ void CodeGenFunction::EmitCallArg(CallArgList &args, const Expr *E,
// However, we still have to push an EH-only cleanup in case we unwind before
// we make it to the call.
if (HasAggregateEvalKind &&
- type->getAs<RecordType>()->getDecl()->isParamDestroyedInCallee()) {
+ type->castAs<RecordType>()->getDecl()->isParamDestroyedInCallee()) {
// If we're using inalloca, use the argument memory. Otherwise, use a
// temporary.
AggValueSlot Slot;
@@ -3838,7 +3841,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
AI = CreateTempAlloca(ArgStruct, "argmem");
}
auto Align = CallInfo.getArgStructAlignment();
- AI->setAlignment(Align.getQuantity());
+ AI->setAlignment(Align.getAsAlign());
AI->setUsedWithInAlloca(true);
assert(AI->isUsedWithInAlloca() && !AI->isStaticAlloca());
ArgMemory = Address(AI, Align);
@@ -3875,6 +3878,11 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
Address swiftErrorTemp = Address::invalid();
Address swiftErrorArg = Address::invalid();
+ // When passing arguments using temporary allocas, we need to add the
+ // appropriate lifetime markers. This vector keeps track of all the lifetime
+ // markers that need to be ended right after the call.
+ SmallVector<CallLifetimeEnd, 2> CallLifetimeEndAfterCall;
+
// Translate all of the arguments as necessary to match the IR lowering.
assert(CallInfo.arg_size() == CallArgs.size() &&
"Mismatch between function signature & arguments.");
@@ -3991,6 +3999,18 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
Address AI = CreateMemTempWithoutCast(
I->Ty, ArgInfo.getIndirectAlign(), "byval-temp");
IRCallArgs[FirstIRArg] = AI.getPointer();
+
+ // Emit lifetime markers for the temporary alloca.
+ uint64_t ByvalTempElementSize =
+ CGM.getDataLayout().getTypeAllocSize(AI.getElementType());
+ llvm::Value *LifetimeSize =
+ EmitLifetimeStart(ByvalTempElementSize, AI.getPointer());
+
+ // Add cleanup code to emit the end lifetime marker after the call.
+ if (LifetimeSize) // In case we disabled lifetime markers.
+ CallLifetimeEndAfterCall.emplace_back(AI, LifetimeSize);
+
+ // Generate the copy.
I->copyInto(*this, AI);
} else {
// Skip the extra memcpy call.
@@ -4129,11 +4149,12 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
auto scalarAlign = CGM.getDataLayout().getPrefTypeAlignment(scalarType);
// Materialize to a temporary.
- addr = CreateTempAlloca(RV.getScalarVal()->getType(),
- CharUnits::fromQuantity(std::max(
- layout->getAlignment(), scalarAlign)),
- "tmp",
- /*ArraySize=*/nullptr, &AllocaAddr);
+ addr = CreateTempAlloca(
+ RV.getScalarVal()->getType(),
+ CharUnits::fromQuantity(std::max(
+ (unsigned)layout->getAlignment().value(), scalarAlign)),
+ "tmp",
+ /*ArraySize=*/nullptr, &AllocaAddr);
tempSize = EmitLifetimeStart(scalarSize, AllocaAddr.getPointer());
Builder.CreateStore(RV.getScalarVal(), addr);
@@ -4273,8 +4294,8 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
// Update the largest vector width if any arguments have vector types.
for (unsigned i = 0; i < IRCallArgs.size(); ++i) {
if (auto *VT = dyn_cast<llvm::VectorType>(IRCallArgs[i]->getType()))
- LargestVectorWidth = std::max(LargestVectorWidth,
- VT->getPrimitiveSizeInBits());
+ LargestVectorWidth = std::max((uint64_t)LargestVectorWidth,
+ VT->getPrimitiveSizeInBits().getFixedSize());
}
// Compute the calling convention and attributes.
@@ -4357,8 +4378,8 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
// Update largest vector width from the return type.
if (auto *VT = dyn_cast<llvm::VectorType>(CI->getType()))
- LargestVectorWidth = std::max(LargestVectorWidth,
- VT->getPrimitiveSizeInBits());
+ LargestVectorWidth = std::max((uint64_t)LargestVectorWidth,
+ VT->getPrimitiveSizeInBits().getFixedSize());
// Insert instrumentation or attach profile metadata at indirect call sites.
// For more details, see the comment before the definition of
@@ -4548,7 +4569,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
llvm::Value *Alignment = EmitScalarExpr(AA->getAlignment());
llvm::ConstantInt *AlignmentCI = cast<llvm::ConstantInt>(Alignment);
EmitAlignmentAssumption(Ret.getScalarVal(), RetTy, Loc, AA->getLocation(),
- AlignmentCI->getZExtValue(), OffsetValue);
+ AlignmentCI, OffsetValue);
} else if (const auto *AA = TargetDecl->getAttr<AllocAlignAttr>()) {
llvm::Value *AlignmentVal = CallArgs[AA->getParamIndex().getLLVMIndex()]
.getRValue(*this)
@@ -4558,6 +4579,11 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
}
}
+ // Explicitly call CallLifetimeEnd::Emit just to re-use the code even though
+ // we can't use the full cleanup mechanism.
+ for (CallLifetimeEnd &LifetimeEnd : CallLifetimeEndAfterCall)
+ LifetimeEnd.Emit(*this, /*Flags=*/{});
+
return Ret;
}
diff --git a/lib/CodeGen/CGClass.cpp b/lib/CodeGen/CGClass.cpp
index c8bb63c5c4b1..04ef912b18bd 100644
--- a/lib/CodeGen/CGClass.cpp
+++ b/lib/CodeGen/CGClass.cpp
@@ -161,8 +161,8 @@ CharUnits CodeGenModule::computeNonVirtualBaseClassOffset(
// Get the layout.
const ASTRecordLayout &Layout = Context.getASTRecordLayout(RD);
- const CXXRecordDecl *BaseDecl =
- cast<CXXRecordDecl>(Base->getType()->getAs<RecordType>()->getDecl());
+ const auto *BaseDecl =
+ cast<CXXRecordDecl>(Base->getType()->castAs<RecordType>()->getDecl());
// Add the offset.
Offset += Layout.getBaseClassOffset(BaseDecl);
@@ -246,7 +246,8 @@ ApplyNonVirtualAndVirtualOffset(CodeGenFunction &CGF, Address addr,
// Apply the base offset.
llvm::Value *ptr = addr.getPointer();
- ptr = CGF.Builder.CreateBitCast(ptr, CGF.Int8PtrTy);
+ unsigned AddrSpace = ptr->getType()->getPointerAddressSpace();
+ ptr = CGF.Builder.CreateBitCast(ptr, CGF.Int8Ty->getPointerTo(AddrSpace));
ptr = CGF.Builder.CreateInBoundsGEP(ptr, baseOffset, "add.ptr");
// If we have a virtual component, the alignment of the result will
@@ -279,8 +280,8 @@ Address CodeGenFunction::GetAddressOfBaseClass(
// *start* with a step down to the correct virtual base subobject,
// and hence will not require any further steps.
if ((*Start)->isVirtual()) {
- VBase =
- cast<CXXRecordDecl>((*Start)->getType()->getAs<RecordType>()->getDecl());
+ VBase = cast<CXXRecordDecl>(
+ (*Start)->getType()->castAs<RecordType>()->getDecl());
++Start;
}
@@ -381,7 +382,9 @@ CodeGenFunction::GetAddressOfDerivedClass(Address BaseAddr,
QualType DerivedTy =
getContext().getCanonicalType(getContext().getTagDeclType(Derived));
- llvm::Type *DerivedPtrTy = ConvertType(DerivedTy)->getPointerTo();
+ unsigned AddrSpace =
+ BaseAddr.getPointer()->getType()->getPointerAddressSpace();
+ llvm::Type *DerivedPtrTy = ConvertType(DerivedTy)->getPointerTo(AddrSpace);
llvm::Value *NonVirtualOffset =
CGM.GetNonVirtualBaseClassOffset(Derived, PathBegin, PathEnd);
@@ -536,8 +539,8 @@ static void EmitBaseInitializer(CodeGenFunction &CGF,
Address ThisPtr = CGF.LoadCXXThisAddress();
const Type *BaseType = BaseInit->getBaseClass();
- CXXRecordDecl *BaseClassDecl =
- cast<CXXRecordDecl>(BaseType->getAs<RecordType>()->getDecl());
+ const auto *BaseClassDecl =
+ cast<CXXRecordDecl>(BaseType->castAs<RecordType>()->getDecl());
bool isBaseVirtual = BaseInit->isBaseVirtual();
@@ -739,7 +742,7 @@ bool CodeGenFunction::IsConstructorDelegationValid(
// We also disable the optimization for variadic functions because
// it's impossible to "re-pass" varargs.
- if (Ctor->getType()->getAs<FunctionProtoType>()->isVariadic())
+ if (Ctor->getType()->castAs<FunctionProtoType>()->isVariadic())
return false;
// FIXME: Decide if we can do a delegation of a delegating constructor.
@@ -1245,7 +1248,7 @@ namespace {
static bool isInitializerOfDynamicClass(const CXXCtorInitializer *BaseInit) {
const Type *BaseType = BaseInit->getBaseClass();
const auto *BaseClassDecl =
- cast<CXXRecordDecl>(BaseType->getAs<RecordType>()->getDecl());
+ cast<CXXRecordDecl>(BaseType->castAs<RecordType>()->getDecl());
return BaseClassDecl->isDynamicClass();
}
@@ -1814,8 +1817,8 @@ void CodeGenFunction::EnterDtorCleanups(const CXXDestructorDecl *DD,
// We push them in the forward order so that they'll be popped in
// the reverse order.
for (const auto &Base : ClassDecl->vbases()) {
- CXXRecordDecl *BaseClassDecl
- = cast<CXXRecordDecl>(Base.getType()->getAs<RecordType>()->getDecl());
+ auto *BaseClassDecl =
+ cast<CXXRecordDecl>(Base.getType()->castAs<RecordType>()->getDecl());
// Ignore trivial destructors.
if (BaseClassDecl->hasTrivialDestructor())
@@ -2083,7 +2086,7 @@ static bool canEmitDelegateCallArgs(CodeGenFunction &CGF,
if (CGF.getTarget().getCXXABI().areArgsDestroyedLeftToRightInCallee()) {
// If the parameters are callee-cleanup, it's not safe to forward.
for (auto *P : Ctor->parameters())
- if (P->getType().isDestructedType())
+ if (P->needsDestruction(CGF.getContext()))
return false;
// Likewise if they're inalloca.
@@ -2530,8 +2533,8 @@ void CodeGenFunction::getVTablePointers(BaseSubobject Base,
// Traverse bases.
for (const auto &I : RD->bases()) {
- CXXRecordDecl *BaseDecl
- = cast<CXXRecordDecl>(I.getType()->getAs<RecordType>()->getDecl());
+ auto *BaseDecl =
+ cast<CXXRecordDecl>(I.getType()->castAs<RecordType>()->getDecl());
// Ignore classes without a vtable.
if (!BaseDecl->isDynamicClass())
@@ -2784,11 +2787,16 @@ void CodeGenFunction::EmitVTablePtrCheck(const CXXRecordDecl *RD,
bool CodeGenFunction::ShouldEmitVTableTypeCheckedLoad(const CXXRecordDecl *RD) {
if (!CGM.getCodeGenOpts().WholeProgramVTables ||
- !SanOpts.has(SanitizerKind::CFIVCall) ||
- !CGM.getCodeGenOpts().SanitizeTrap.has(SanitizerKind::CFIVCall) ||
!CGM.HasHiddenLTOVisibility(RD))
return false;
+ if (CGM.getCodeGenOpts().VirtualFunctionElimination)
+ return true;
+
+ if (!SanOpts.has(SanitizerKind::CFIVCall) ||
+ !CGM.getCodeGenOpts().SanitizeTrap.has(SanitizerKind::CFIVCall))
+ return false;
+
std::string TypeName = RD->getQualifiedNameAsString();
return !getContext().getSanitizerBlacklist().isBlacklistedType(
SanitizerKind::CFIVCall, TypeName);
@@ -2811,8 +2819,13 @@ llvm::Value *CodeGenFunction::EmitVTableTypeCheckedLoad(
TypeId});
llvm::Value *CheckResult = Builder.CreateExtractValue(CheckedLoad, 1);
- EmitCheck(std::make_pair(CheckResult, SanitizerKind::CFIVCall),
- SanitizerHandler::CFICheckFail, nullptr, nullptr);
+ std::string TypeName = RD->getQualifiedNameAsString();
+ if (SanOpts.has(SanitizerKind::CFIVCall) &&
+ !getContext().getSanitizerBlacklist().isBlacklistedType(
+ SanitizerKind::CFIVCall, TypeName)) {
+ EmitCheck(std::make_pair(CheckResult, SanitizerKind::CFIVCall),
+ SanitizerHandler::CFICheckFail, {}, {});
+ }
return Builder.CreateBitCast(
Builder.CreateExtractValue(CheckedLoad, 0),
diff --git a/lib/CodeGen/CGCleanup.cpp b/lib/CodeGen/CGCleanup.cpp
index 5594f3030229..c117dd5c25c1 100644
--- a/lib/CodeGen/CGCleanup.cpp
+++ b/lib/CodeGen/CGCleanup.cpp
@@ -304,13 +304,13 @@ void EHScopeStack::Cleanup::anchor() {}
static void createStoreInstBefore(llvm::Value *value, Address addr,
llvm::Instruction *beforeInst) {
auto store = new llvm::StoreInst(value, addr.getPointer(), beforeInst);
- store->setAlignment(addr.getAlignment().getQuantity());
+ store->setAlignment(addr.getAlignment().getAsAlign());
}
static llvm::LoadInst *createLoadInstBefore(Address addr, const Twine &name,
llvm::Instruction *beforeInst) {
auto load = new llvm::LoadInst(addr.getPointer(), name, beforeInst);
- load->setAlignment(addr.getAlignment().getQuantity());
+ load->setAlignment(addr.getAlignment().getAsAlign());
return load;
}
@@ -740,14 +740,15 @@ void CodeGenFunction::PopCleanupBlock(bool FallthroughIsBranchThrough) {
// here. Unfortunately, if you ask for a SmallVector<char>, the
// alignment isn't sufficient.
auto *CleanupSource = reinterpret_cast<char *>(Scope.getCleanupBuffer());
- llvm::AlignedCharArray<EHScopeStack::ScopeStackAlignment, 8 * sizeof(void *)> CleanupBufferStack;
+ alignas(EHScopeStack::ScopeStackAlignment) char
+ CleanupBufferStack[8 * sizeof(void *)];
std::unique_ptr<char[]> CleanupBufferHeap;
size_t CleanupSize = Scope.getCleanupSize();
EHScopeStack::Cleanup *Fn;
if (CleanupSize <= sizeof(CleanupBufferStack)) {
- memcpy(CleanupBufferStack.buffer, CleanupSource, CleanupSize);
- Fn = reinterpret_cast<EHScopeStack::Cleanup *>(CleanupBufferStack.buffer);
+ memcpy(CleanupBufferStack, CleanupSource, CleanupSize);
+ Fn = reinterpret_cast<EHScopeStack::Cleanup *>(CleanupBufferStack);
} else {
CleanupBufferHeap.reset(new char[CleanupSize]);
memcpy(CleanupBufferHeap.get(), CleanupSource, CleanupSize);
diff --git a/lib/CodeGen/CGDebugInfo.cpp b/lib/CodeGen/CGDebugInfo.cpp
index f6ee7ee26d4b..7c63743f3b43 100644
--- a/lib/CodeGen/CGDebugInfo.cpp
+++ b/lib/CodeGen/CGDebugInfo.cpp
@@ -314,7 +314,9 @@ StringRef CGDebugInfo::getClassName(const RecordDecl *RD) {
if (isa<ClassTemplateSpecializationDecl>(RD)) {
SmallString<128> Name;
llvm::raw_svector_ostream OS(Name);
- RD->getNameForDiagnostic(OS, getPrintingPolicy(),
+ PrintingPolicy PP = getPrintingPolicy();
+ PP.PrintCanonicalTypes = true;
+ RD->getNameForDiagnostic(OS, PP,
/*Qualified*/ false);
// Copy this name on the side and use its reference.
@@ -537,11 +539,11 @@ void CGDebugInfo::CreateCompileUnit() {
// file to determine the real absolute path for the file.
std::string MainFileDir;
if (const FileEntry *MainFile = SM.getFileEntryForID(SM.getMainFileID())) {
- MainFileDir = remapDIPath(MainFile->getDir()->getName());
- if (MainFileDir != ".") {
+ MainFileDir = MainFile->getDir()->getName();
+ if (!llvm::sys::path::is_absolute(MainFileName)) {
llvm::SmallString<1024> MainFileDirSS(MainFileDir);
llvm::sys::path::append(MainFileDirSS, MainFileName);
- MainFileName = MainFileDirSS.str();
+ MainFileName = llvm::sys::path::remove_leading_dotslash(MainFileDirSS);
}
// If the main file name provided is identical to the input file name, and
// if the input file is a preprocessed source, use the module name for
@@ -561,6 +563,10 @@ void CGDebugInfo::CreateCompileUnit() {
if (LO.CPlusPlus) {
if (LO.ObjC)
LangTag = llvm::dwarf::DW_LANG_ObjC_plus_plus;
+ else if (LO.CPlusPlus14)
+ LangTag = llvm::dwarf::DW_LANG_C_plus_plus_14;
+ else if (LO.CPlusPlus11)
+ LangTag = llvm::dwarf::DW_LANG_C_plus_plus_11;
else
LangTag = llvm::dwarf::DW_LANG_C_plus_plus;
} else if (LO.ObjC) {
@@ -697,6 +703,22 @@ llvm::DIType *CGDebugInfo::CreateType(const BuiltinType *BT) {
case BuiltinType::Id: \
return getOrCreateStructPtrType("opencl_" #ExtType, Id##Ty);
#include "clang/Basic/OpenCLExtensionTypes.def"
+ // TODO: real support for SVE types requires more infrastructure
+ // to be added first. The types have a variable length and are
+ // represented in debug info as types whose length depends on a
+ // target-specific pseudo register.
+#define SVE_TYPE(Name, Id, SingletonId) \
+ case BuiltinType::Id:
+#include "clang/Basic/AArch64SVEACLETypes.def"
+ {
+ unsigned DiagID = CGM.getDiags().getCustomDiagID(
+ DiagnosticsEngine::Error,
+ "cannot yet generate debug info for SVE type '%0'");
+ auto Name = BT->getName(CGM.getContext().getPrintingPolicy());
+ CGM.getDiags().Report(DiagID) << Name;
+ // Return something safe.
+ return CreateType(cast<const BuiltinType>(CGM.getContext().IntTy));
+ }
case BuiltinType::UChar:
case BuiltinType::Char_U:
@@ -862,6 +884,8 @@ llvm::DIType *CGDebugInfo::CreateType(const PointerType *Ty,
static bool hasCXXMangling(const TagDecl *TD, llvm::DICompileUnit *TheCU) {
switch (TheCU->getSourceLanguage()) {
case llvm::dwarf::DW_LANG_C_plus_plus:
+ case llvm::dwarf::DW_LANG_C_plus_plus_11:
+ case llvm::dwarf::DW_LANG_C_plus_plus_14:
return true;
case llvm::dwarf::DW_LANG_ObjC_plus_plus:
return isa<CXXRecordDecl>(TD) || isa<EnumDecl>(TD);
@@ -1583,6 +1607,8 @@ llvm::DISubprogram *CGDebugInfo::CreateCXXMemberFunction(
ContainingType = RecordTy;
}
+ if (Method->isNoReturn())
+ Flags |= llvm::DINode::FlagNoReturn;
if (Method->isStatic())
Flags |= llvm::DINode::FlagStaticMember;
if (Method->isImplicit())
@@ -1637,7 +1663,7 @@ void CGDebugInfo::CollectCXXMemberFunctions(
if (!Method || Method->isImplicit() || Method->hasAttr<NoDebugAttr>())
continue;
- if (Method->getType()->getAs<FunctionProtoType>()->getContainedAutoType())
+ if (Method->getType()->castAs<FunctionProtoType>()->getContainedAutoType())
continue;
// Reuse the existing member function declaration if it exists.
@@ -1677,7 +1703,7 @@ void CGDebugInfo::CollectCXXBasesAux(
const ASTRecordLayout &RL = CGM.getContext().getASTRecordLayout(RD);
for (const auto &BI : Bases) {
const auto *Base =
- cast<CXXRecordDecl>(BI.getType()->getAs<RecordType>()->getDecl());
+ cast<CXXRecordDecl>(BI.getType()->castAs<RecordType>()->getDecl());
if (!SeenTypes.insert(Base).second)
continue;
auto *BaseTy = getOrCreateType(BI.getType(), Unit);
@@ -1769,6 +1795,7 @@ CGDebugInfo::CollectTemplateParams(const TemplateParameterList *TPList,
CGM.getContext().toCharUnitsFromBits((int64_t)fieldOffset);
V = CGM.getCXXABI().EmitMemberDataPointer(MPT, chars);
}
+ assert(V && "Failed to find template parameter pointer");
V = V->stripPointerCasts();
}
TemplateParams.push_back(DBuilder.createTemplateValueParameter(
@@ -2695,6 +2722,8 @@ llvm::DIType *CGDebugInfo::CreateType(const MemberPointerType *Ty,
break;
case MSInheritanceAttr::Keyword_unspecified_inheritance:
break;
+ case MSInheritanceAttr::SpellingNotCalculated:
+ llvm_unreachable("Spelling not yet calculated");
}
}
}
@@ -2978,7 +3007,7 @@ llvm::DIType *CGDebugInfo::CreateTypeNode(QualType Ty, llvm::DIFile *Unit) {
#define ABSTRACT_TYPE(Class, Base)
#define NON_CANONICAL_TYPE(Class, Base)
#define DEPENDENT_TYPE(Class, Base) case Type::Class:
-#include "clang/AST/TypeNodes.def"
+#include "clang/AST/TypeNodes.inc"
llvm_unreachable("Dependent types cannot show up in debug information");
case Type::ExtVector:
@@ -3105,7 +3134,8 @@ llvm::DICompositeType *CGDebugInfo::CreateLimitedType(const RecordType *Ty) {
SmallString<256> Identifier = getTypeIdentifier(Ty, CGM, TheCU);
- // Explicitly record the calling convention for C++ records.
+ // Explicitly record the calling convention and export symbols for C++
+ // records.
auto Flags = llvm::DINode::FlagZero;
if (auto CXXRD = dyn_cast<CXXRecordDecl>(RD)) {
if (CGM.getCXXABI().getRecordArgABI(CXXRD) == CGCXXABI::RAA_Indirect)
@@ -3116,6 +3146,10 @@ llvm::DICompositeType *CGDebugInfo::CreateLimitedType(const RecordType *Ty) {
// Record if a C++ record is non-trivial type.
if (!CXXRD->isTrivial())
Flags |= llvm::DINode::FlagNonTrivial;
+
+ // Record exports it symbols to the containing structure.
+ if (CXXRD->isAnonymousStructOrUnion())
+ Flags |= llvm::DINode::FlagExportSymbols;
}
llvm::DICompositeType *RealDecl = DBuilder.createReplaceableCompositeType(
@@ -3247,8 +3281,8 @@ void CGDebugInfo::collectVarDeclProps(const VarDecl *VD, llvm::DIFile *&Unit,
llvm::APInt ConstVal(32, 1);
QualType ET = CGM.getContext().getAsArrayType(T)->getElementType();
- T = CGM.getContext().getConstantArrayType(ET, ConstVal, ArrayType::Normal,
- 0);
+ T = CGM.getContext().getConstantArrayType(ET, ConstVal, nullptr,
+ ArrayType::Normal, 0);
}
Name = VD->getName();
@@ -3298,13 +3332,13 @@ llvm::DISubprogram *CGDebugInfo::getFunctionFwdDeclOrStub(GlobalDecl GD,
unsigned Line = getLineNumber(Loc);
collectFunctionDeclProps(GD, Unit, Name, LinkageName, DContext, TParamsArray,
Flags);
- auto *FD = dyn_cast<FunctionDecl>(GD.getDecl());
+ auto *FD = cast<FunctionDecl>(GD.getDecl());
// Build function type.
SmallVector<QualType, 16> ArgTypes;
- if (FD)
- for (const ParmVarDecl *Parm : FD->parameters())
- ArgTypes.push_back(Parm->getType());
+ for (const ParmVarDecl *Parm : FD->parameters())
+ ArgTypes.push_back(Parm->getType());
+
CallingConv CC = FD->getType()->castAs<FunctionType>()->getCallConv();
QualType FnType = CGM.getContext().getFunctionType(
FD->getReturnType(), ArgTypes, FunctionProtoType::ExtProtoInfo(CC));
@@ -3677,8 +3711,7 @@ void CGDebugInfo::EmitFuncDeclForCallSite(llvm::CallBase *CallOrInvoke,
const FunctionDecl *CalleeDecl) {
auto &CGOpts = CGM.getCodeGenOpts();
if (!CGOpts.EnableDebugEntryValues || !CGM.getLangOpts().Optimize ||
- !CallOrInvoke ||
- CGM.getCodeGenOpts().getDebugInfo() < codegenoptions::LimitedDebugInfo)
+ !CallOrInvoke)
return;
auto *Func = CallOrInvoke->getCalledFunction();
@@ -3844,8 +3877,8 @@ CGDebugInfo::EmitTypeForVarWithBlocksAttr(const VarDecl *VD,
if (NumPaddingBytes.isPositive()) {
llvm::APInt pad(32, NumPaddingBytes.getQuantity());
- FType = CGM.getContext().getConstantArrayType(CGM.getContext().CharTy,
- pad, ArrayType::Normal, 0);
+ FType = CGM.getContext().getConstantArrayType(
+ CGM.getContext().CharTy, pad, nullptr, ArrayType::Normal, 0);
EltTys.push_back(CreateMemberType(Unit, FType, "", &FieldOffset));
}
}
@@ -4417,19 +4450,27 @@ void CGDebugInfo::EmitGlobalVariable(const ValueDecl *VD, const APValue &Init) {
StringRef Name = VD->getName();
llvm::DIType *Ty = getOrCreateType(VD->getType(), Unit);
- // Do not use global variables for enums, unless in CodeView.
if (const auto *ECD = dyn_cast<EnumConstantDecl>(VD)) {
const auto *ED = cast<EnumDecl>(ECD->getDeclContext());
assert(isa<EnumType>(ED->getTypeForDecl()) && "Enum without EnumType?");
- (void)ED;
-
- // If CodeView, emit enums as global variables, unless they are defined
- // inside a class. We do this because MSVC doesn't emit S_CONSTANTs for
- // enums in classes, and because it is difficult to attach this scope
- // information to the global variable.
- if (!CGM.getCodeGenOpts().EmitCodeView ||
- isa<RecordDecl>(ED->getDeclContext()))
+
+ if (CGM.getCodeGenOpts().EmitCodeView) {
+ // If CodeView, emit enums as global variables, unless they are defined
+ // inside a class. We do this because MSVC doesn't emit S_CONSTANTs for
+ // enums in classes, and because it is difficult to attach this scope
+ // information to the global variable.
+ if (isa<RecordDecl>(ED->getDeclContext()))
+ return;
+ } else {
+ // If not CodeView, emit DW_TAG_enumeration_type if necessary. For
+ // example: for "enum { ZERO };", a DW_TAG_enumeration_type is created the
+ // first time `ZERO` is referenced in a function.
+ llvm::DIType *EDTy =
+ getOrCreateType(QualType(ED->getTypeForDecl(), 0), Unit);
+ assert (EDTy->getTag() == llvm::dwarf::DW_TAG_enumeration_type);
+ (void)EDTy;
return;
+ }
}
llvm::DIScope *DContext = nullptr;
@@ -4524,7 +4565,7 @@ void CGDebugInfo::EmitUsingDecl(const UsingDecl &UD) {
// return type in the definition)
if (const auto *FD = dyn_cast<FunctionDecl>(USD.getUnderlyingDecl()))
if (const auto *AT =
- FD->getType()->getAs<FunctionProtoType>()->getContainedAutoType())
+ FD->getType()->castAs<FunctionProtoType>()->getContainedAutoType())
if (AT->getDeducedType().isNull())
return;
if (llvm::DINode *Target =
diff --git a/lib/CodeGen/CGDecl.cpp b/lib/CodeGen/CGDecl.cpp
index 6ad43cefc4d2..563841c068f6 100644
--- a/lib/CodeGen/CGDecl.cpp
+++ b/lib/CodeGen/CGDecl.cpp
@@ -250,7 +250,7 @@ llvm::Constant *CodeGenModule::getOrCreateStaticVarDecl(
llvm::GlobalVariable *GV = new llvm::GlobalVariable(
getModule(), LTy, Ty.isConstant(getContext()), Linkage, Init, Name,
nullptr, llvm::GlobalVariable::NotThreadLocal, TargetAS);
- GV->setAlignment(getContext().getDeclAlign(&D).getQuantity());
+ GV->setAlignment(getContext().getDeclAlign(&D).getAsAlign());
if (supportsCOMDAT() && GV->isWeakForLinker())
GV->setComdat(TheModule.getOrInsertComdat(GV->getName()));
@@ -305,14 +305,6 @@ llvm::Constant *CodeGenModule::getOrCreateStaticVarDecl(
return Addr;
}
-/// hasNontrivialDestruction - Determine whether a type's destruction is
-/// non-trivial. If so, and the variable uses static initialization, we must
-/// register its destructor to run on exit.
-static bool hasNontrivialDestruction(QualType T) {
- CXXRecordDecl *RD = T->getBaseElementTypeUnsafe()->getAsCXXRecordDecl();
- return RD && !RD->hasTrivialDestructor();
-}
-
/// AddInitializerToStaticVarDecl - Add the initializer for 'D' to the
/// global variable that has already been created for it. If the initializer
/// has a different type than GV does, this may free GV and return a different
@@ -372,7 +364,7 @@ CodeGenFunction::AddInitializerToStaticVarDecl(const VarDecl &D,
emitter.finalize(GV);
- if (hasNontrivialDestruction(D.getType()) && HaveInsertPoint()) {
+ if (D.needsDestruction(getContext()) && HaveInsertPoint()) {
// We have a constant initializer, but a nontrivial destructor. We still
// need to perform a guarded "initialization" in order to register the
// destructor.
@@ -416,7 +408,7 @@ void CodeGenFunction::EmitStaticVarDecl(const VarDecl &D,
if (D.getInit() && !isCudaSharedVar)
var = AddInitializerToStaticVarDecl(D, var);
- var->setAlignment(alignment.getQuantity());
+ var->setAlignment(alignment.getAsAlign());
if (D.hasAttr<AnnotateAttr>())
CGM.AddGlobalAnnotations(&D, var);
@@ -427,6 +419,8 @@ void CodeGenFunction::EmitStaticVarDecl(const VarDecl &D,
var->addAttribute("data-section", SA->getName());
if (auto *SA = D.getAttr<PragmaClangRodataSectionAttr>())
var->addAttribute("rodata-section", SA->getName());
+ if (auto *SA = D.getAttr<PragmaClangRelroSectionAttr>())
+ var->addAttribute("relro-section", SA->getName());
if (const SectionAttr *SA = D.getAttr<SectionAttr>())
var->setSection(SA->getName());
@@ -1120,11 +1114,11 @@ Address CodeGenModule::createUnnamedGlobalFrom(const VarDecl &D,
llvm::GlobalVariable *GV = new llvm::GlobalVariable(
getModule(), Ty, isConstant, llvm::GlobalValue::PrivateLinkage,
Constant, Name, InsertBefore, llvm::GlobalValue::NotThreadLocal, AS);
- GV->setAlignment(Align.getQuantity());
+ GV->setAlignment(Align.getAsAlign());
GV->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
CacheEntry = GV;
} else if (CacheEntry->getAlignment() < Align.getQuantity()) {
- CacheEntry->setAlignment(Align.getQuantity());
+ CacheEntry->setAlignment(Align.getAsAlign());
}
return Address(CacheEntry, Align);
@@ -1994,7 +1988,7 @@ void CodeGenFunction::EmitAutoVarCleanups(const AutoVarEmission &emission) {
const VarDecl &D = *emission.Variable;
// Check the type for a cleanup.
- if (QualType::DestructionKind dtorKind = D.getType().isDestructedType())
+ if (QualType::DestructionKind dtorKind = D.needsDestruction(getContext()))
emitAutoVarTypeCleanup(emission, dtorKind);
// In GC mode, honor objc_precise_lifetime.
@@ -2403,8 +2397,9 @@ void CodeGenFunction::EmitParmDecl(const VarDecl &D, ParamValue Arg,
// Don't push a cleanup in a thunk for a method that will also emit a
// cleanup.
if (hasAggregateEvaluationKind(Ty) && !CurFuncIsThunk &&
- Ty->getAs<RecordType>()->getDecl()->isParamDestroyedInCallee()) {
- if (QualType::DestructionKind DtorKind = Ty.isDestructedType()) {
+ Ty->castAs<RecordType>()->getDecl()->isParamDestroyedInCallee()) {
+ if (QualType::DestructionKind DtorKind =
+ D.needsDestruction(getContext())) {
assert((DtorKind == QualType::DK_cxx_destructor ||
DtorKind == QualType::DK_nontrivial_c_struct) &&
"unexpected destructor type");
@@ -2496,10 +2491,11 @@ void CodeGenFunction::EmitParmDecl(const VarDecl &D, ParamValue Arg,
setAddrOfLocalVar(&D, DeclPtr);
- // Emit debug info for param declaration.
+ // Emit debug info for param declarations in non-thunk functions.
if (CGDebugInfo *DI = getDebugInfo()) {
if (CGM.getCodeGenOpts().getDebugInfo() >=
- codegenoptions::LimitedDebugInfo) {
+ codegenoptions::LimitedDebugInfo &&
+ !CurFuncIsThunk) {
DI->EmitDeclareOfArgVariable(&D, DeclPtr.getPointer(), ArgNo, Builder);
}
}
@@ -2529,10 +2525,11 @@ void CodeGenModule::EmitOMPDeclareReduction(const OMPDeclareReductionDecl *D,
}
void CodeGenModule::EmitOMPDeclareMapper(const OMPDeclareMapperDecl *D,
- CodeGenFunction *CGF) {
- if (!LangOpts.OpenMP || (!LangOpts.EmitAllDecls && !D->isUsed()))
+ CodeGenFunction *CGF) {
+ if (!LangOpts.OpenMP || LangOpts.OpenMPSimd ||
+ (!LangOpts.EmitAllDecls && !D->isUsed()))
return;
- // FIXME: need to implement mapper code generation
+ getOpenMPRuntime().emitUserDefinedMapper(D, CGF);
}
void CodeGenModule::EmitOMPRequiresDecl(const OMPRequiresDecl *D) {
diff --git a/lib/CodeGen/CGDeclCXX.cpp b/lib/CodeGen/CGDeclCXX.cpp
index 7a0605b8450a..bf16b7bec4b1 100644
--- a/lib/CodeGen/CGDeclCXX.cpp
+++ b/lib/CodeGen/CGDeclCXX.cpp
@@ -73,16 +73,10 @@ static void EmitDeclDestroy(CodeGenFunction &CGF, const VarDecl &D,
// that isn't balanced out by a destructor call as intended by the
// attribute. This also checks for -fno-c++-static-destructors and
// bails even if the attribute is not present.
- if (D.isNoDestroy(CGF.getContext()))
- return;
-
- CodeGenModule &CGM = CGF.CGM;
+ QualType::DestructionKind DtorKind = D.needsDestruction(CGF.getContext());
// FIXME: __attribute__((cleanup)) ?
- QualType Type = D.getType();
- QualType::DestructionKind DtorKind = Type.isDestructedType();
-
switch (DtorKind) {
case QualType::DK_none:
return;
@@ -101,6 +95,9 @@ static void EmitDeclDestroy(CodeGenFunction &CGF, const VarDecl &D,
llvm::FunctionCallee Func;
llvm::Constant *Argument;
+ CodeGenModule &CGM = CGF.CGM;
+ QualType Type = D.getType();
+
// Special-case non-array C++ destructors, if they have the right signature.
// Under some ABIs, destructors return this instead of void, and cannot be
// passed directly to __cxa_atexit if the target does not allow this
@@ -251,8 +248,8 @@ llvm::Function *CodeGenFunction::createAtExitStub(const VarDecl &VD,
llvm::CallInst *call = CGF.Builder.CreateCall(dtor, addr);
// Make sure the call and the callee agree on calling convention.
- if (llvm::Function *dtorFn =
- dyn_cast<llvm::Function>(dtor.getCallee()->stripPointerCasts()))
+ if (auto *dtorFn = dyn_cast<llvm::Function>(
+ dtor.getCallee()->stripPointerCastsAndAliases()))
call->setCallingConv(dtorFn->getCallingConv());
CGF.FinishFunction();
diff --git a/lib/CodeGen/CGException.cpp b/lib/CodeGen/CGException.cpp
index 3b7a88a0b769..645d7a878e3b 100644
--- a/lib/CodeGen/CGException.cpp
+++ b/lib/CodeGen/CGException.cpp
@@ -165,10 +165,7 @@ static const EHPersonality &getCXXPersonality(const TargetInfo &Target,
return EHPersonality::GNU_CPlusPlus;
if (L.SEHExceptions)
return EHPersonality::GNU_CPlusPlus_SEH;
- // Wasm EH is a non-MVP feature for now.
- if (Target.hasFeature("exception-handling") &&
- (T.getArch() == llvm::Triple::wasm32 ||
- T.getArch() == llvm::Triple::wasm64))
+ if (L.WasmExceptions)
return EHPersonality::GNU_Wasm_CPlusPlus;
return EHPersonality::GNU_CPlusPlus;
}
@@ -1774,7 +1771,8 @@ void CodeGenFunction::EmitCapturedLocals(CodeGenFunction &ParentCGF,
// EH registration is passed in as the EBP physical register. We can
// recover that with llvm.frameaddress(1).
EntryFP = Builder.CreateCall(
- CGM.getIntrinsic(llvm::Intrinsic::frameaddress), {Builder.getInt32(1)});
+ CGM.getIntrinsic(llvm::Intrinsic::frameaddress, AllocaInt8PtrTy),
+ {Builder.getInt32(1)});
} else {
// Otherwise, for x64 and 32-bit finally functions, the parent FP is the
// second parameter.
diff --git a/lib/CodeGen/CGExpr.cpp b/lib/CodeGen/CGExpr.cpp
index 5a4b1188b711..dcd365c8eaf0 100644
--- a/lib/CodeGen/CGExpr.cpp
+++ b/lib/CodeGen/CGExpr.cpp
@@ -66,7 +66,7 @@ Address CodeGenFunction::CreateTempAllocaWithoutCast(llvm::Type *Ty,
const Twine &Name,
llvm::Value *ArraySize) {
auto Alloca = CreateTempAlloca(Ty, Name, ArraySize);
- Alloca->setAlignment(Align.getQuantity());
+ Alloca->setAlignment(Align.getAsAlign());
return Address(Alloca, Align);
}
@@ -126,7 +126,7 @@ Address CodeGenFunction::CreateDefaultAlignTempAlloca(llvm::Type *Ty,
void CodeGenFunction::InitTempAlloca(Address Var, llvm::Value *Init) {
assert(isa<llvm::AllocaInst>(Var.getPointer()));
auto *Store = new llvm::StoreInst(Init, Var.getPointer());
- Store->setAlignment(Var.getAlignment().getQuantity());
+ Store->setAlignment(Var.getAlignment().getAsAlign());
llvm::BasicBlock *Block = AllocaInsertPt->getParent();
Block->getInstList().insertAfter(AllocaInsertPt->getIterator(), Store);
}
@@ -392,7 +392,7 @@ static Address createReferenceTemporary(CodeGenFunction &CGF,
llvm::GlobalValue::NotThreadLocal,
CGF.getContext().getTargetAddressSpace(AS));
CharUnits alignment = CGF.getContext().getTypeAlignInChars(Ty);
- GV->setAlignment(alignment.getQuantity());
+ GV->setAlignment(alignment.getAsAlign());
llvm::Constant *C = GV;
if (AS != LangAS::Default)
C = TCG.performAddrSpaceCast(
@@ -516,13 +516,13 @@ EmitMaterializeTemporaryExpr(const MaterializeTemporaryExpr *M) {
// Avoid creating a conditional cleanup just to hold an llvm.lifetime.end
// marker. Instead, start the lifetime of a conditional temporary earlier
- // so that it's unconditional. Don't do this in ASan's use-after-scope
- // mode so that it gets the more precise lifetime marks. If the type has
- // a non-trivial destructor, we'll have a cleanup block for it anyway,
- // so this typically doesn't help; skip it in that case.
+ // so that it's unconditional. Don't do this with sanitizers which need
+ // more precise lifetime marks.
ConditionalEvaluation *OldConditional = nullptr;
CGBuilderTy::InsertPoint OldIP;
if (isInConditionalBranch() && !E->getType().isDestructedType() &&
+ !SanOpts.has(SanitizerKind::HWAddress) &&
+ !SanOpts.has(SanitizerKind::Memory) &&
!CGM.getCodeGenOpts().SanitizeAddressUseAfterScope) {
OldConditional = OutermostConditional;
OutermostConditional = nullptr;
@@ -677,8 +677,7 @@ void CodeGenFunction::EmitTypeCheck(TypeCheckKind TCK, SourceLocation Loc,
// Quickly determine whether we have a pointer to an alloca. It's possible
// to skip null checks, and some alignment checks, for these pointers. This
// can reduce compile-time significantly.
- auto PtrToAlloca =
- dyn_cast<llvm::AllocaInst>(Ptr->stripPointerCastsNoFollowAliases());
+ auto PtrToAlloca = dyn_cast<llvm::AllocaInst>(Ptr->stripPointerCasts());
llvm::Value *True = llvm::ConstantInt::getTrue(getLLVMContext());
llvm::Value *IsNonNull = nullptr;
@@ -998,7 +997,7 @@ EmitComplexPrePostIncDec(const UnaryOperator *E, LValue LV,
// Add the inc/dec to the real part.
NextVal = Builder.CreateAdd(InVal.first, NextVal, isInc ? "inc" : "dec");
} else {
- QualType ElemTy = E->getType()->getAs<ComplexType>()->getElementType();
+ QualType ElemTy = E->getType()->castAs<ComplexType>()->getElementType();
llvm::APFloat FVal(getContext().getFloatTypeSemantics(ElemTy), 1);
if (!isInc)
FVal.changeSign();
@@ -1268,6 +1267,8 @@ LValue CodeGenFunction::EmitLValue(const Expr *E) {
case Expr::CXXOperatorCallExprClass:
case Expr::UserDefinedLiteralClass:
return EmitCallExprLValue(cast<CallExpr>(E));
+ case Expr::CXXRewrittenBinaryOperatorClass:
+ return EmitLValue(cast<CXXRewrittenBinaryOperator>(E)->getSemanticForm());
case Expr::VAArgExprClass:
return EmitVAArgExprLValue(cast<VAArgExpr>(E));
case Expr::DeclRefExprClass:
@@ -2195,7 +2196,7 @@ static void setObjCGCLValueClass(const ASTContext &Ctx, const Expr *E,
// If ivar is a structure pointer, assigning to field of
// this struct follows gcc's behavior and makes it a non-ivar
// writer-barrier conservatively.
- ExpTy = ExpTy->getAs<PointerType>()->getPointeeType();
+ ExpTy = ExpTy->castAs<PointerType>()->getPointeeType();
if (ExpTy->isRecordType()) {
LV.setObjCIvar(false);
return;
@@ -2231,7 +2232,7 @@ static void setObjCGCLValueClass(const ASTContext &Ctx, const Expr *E,
// a non-ivar write-barrier.
QualType ExpTy = E->getType();
if (ExpTy->isPointerType())
- ExpTy = ExpTy->getAs<PointerType>()->getPointeeType();
+ ExpTy = ExpTy->castAs<PointerType>()->getPointeeType();
if (ExpTy->isRecordType())
LV.setObjCIvar(false);
}
@@ -2362,7 +2363,7 @@ static LValue EmitGlobalVarDeclLValue(CodeGenFunction &CGF,
// If it's thread_local, emit a call to its wrapper function instead.
if (VD->getTLSKind() == VarDecl::TLS_Dynamic &&
- CGF.CGM.getCXXABI().usesThreadWrapperFunction())
+ CGF.CGM.getCXXABI().usesThreadWrapperFunction(VD))
return CGF.CGM.getCXXABI().EmitThreadLocalVarDeclLValue(CGF, VD, T);
// Check if the variable is marked as declare target with link clause in
// device codegen.
@@ -2540,6 +2541,11 @@ LValue CodeGenFunction::EmitDeclRefLValue(const DeclRefExpr *E) {
// Spill the constant value to a global.
Addr = CGM.createUnnamedGlobalFrom(*VD, Val,
getContext().getDeclAlign(VD));
+ llvm::Type *VarTy = getTypes().ConvertTypeForMem(VD->getType());
+ auto *PTy = llvm::PointerType::get(
+ VarTy, getContext().getTargetAddressSpace(VD->getType()));
+ if (PTy != Addr.getType())
+ Addr = Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, PTy);
} else {
// Should we be using the alignment of the constant pointer we emitted?
CharUnits Alignment =
@@ -3400,6 +3406,7 @@ static Address emitArraySubscriptGEP(CodeGenFunction &CGF, Address addr,
ArrayRef<llvm::Value *> indices,
QualType eltType, bool inbounds,
bool signedIndices, SourceLocation loc,
+ QualType *arrayType = nullptr,
const llvm::Twine &name = "arrayidx") {
// All the indices except that last must be zero.
#ifndef NDEBUG
@@ -3428,9 +3435,12 @@ static Address emitArraySubscriptGEP(CodeGenFunction &CGF, Address addr,
} else {
// Remember the original array subscript for bpf target
unsigned idx = LastIndex->getZExtValue();
+ llvm::DIType *DbgInfo = nullptr;
+ if (arrayType)
+ DbgInfo = CGF.getDebugInfo()->getOrCreateStandaloneType(*arrayType, loc);
eltPtr = CGF.Builder.CreatePreserveArrayAccessIndex(addr.getPointer(),
indices.size() - 1,
- idx);
+ idx, DbgInfo);
}
return Address(eltPtr, eltAlign);
@@ -3567,19 +3577,21 @@ LValue CodeGenFunction::EmitArraySubscriptExpr(const ArraySubscriptExpr *E,
auto *Idx = EmitIdxAfterBase(/*Promote*/true);
// Propagate the alignment from the array itself to the result.
+ QualType arrayType = Array->getType();
Addr = emitArraySubscriptGEP(
*this, ArrayLV.getAddress(), {CGM.getSize(CharUnits::Zero()), Idx},
E->getType(), !getLangOpts().isSignedOverflowDefined(), SignedIndices,
- E->getExprLoc());
+ E->getExprLoc(), &arrayType);
EltBaseInfo = ArrayLV.getBaseInfo();
EltTBAAInfo = CGM.getTBAAInfoForSubobject(ArrayLV, E->getType());
} else {
// The base must be a pointer; emit it with an estimate of its alignment.
Addr = EmitPointerWithAlignment(E->getBase(), &EltBaseInfo, &EltTBAAInfo);
auto *Idx = EmitIdxAfterBase(/*Promote*/true);
+ QualType ptrType = E->getBase()->getType();
Addr = emitArraySubscriptGEP(*this, Addr, Idx, E->getType(),
!getLangOpts().isSignedOverflowDefined(),
- SignedIndices, E->getExprLoc());
+ SignedIndices, E->getExprLoc(), &ptrType);
}
LValue LV = MakeAddrLValue(Addr, E->getType(), EltBaseInfo, EltTBAAInfo);
@@ -3980,9 +3992,19 @@ LValue CodeGenFunction::EmitLValueForField(LValue base,
const CGBitFieldInfo &Info = RL.getBitFieldInfo(field);
Address Addr = base.getAddress();
unsigned Idx = RL.getLLVMFieldNo(field);
- if (Idx != 0)
- // For structs, we GEP to the field that the record layout suggests.
- Addr = Builder.CreateStructGEP(Addr, Idx, field->getName());
+ if (!IsInPreservedAIRegion) {
+ if (Idx != 0)
+ // For structs, we GEP to the field that the record layout suggests.
+ Addr = Builder.CreateStructGEP(Addr, Idx, field->getName());
+ } else {
+ const RecordDecl *rec = field->getParent();
+ llvm::DIType *DbgInfo = getDebugInfo()->getOrCreateRecordType(
+ getContext().getRecordType(rec), rec->getLocation());
+ Addr = Builder.CreatePreserveStructAccessIndex(Addr, Idx,
+ getDebugInfoFIndex(rec, field->getFieldIndex()),
+ DbgInfo);
+ }
+
// Get the access type.
llvm::Type *FieldIntTy =
llvm::Type::getIntNTy(getLLVMContext(), Info.StorageSize);
@@ -4051,7 +4073,6 @@ LValue CodeGenFunction::EmitLValueForField(LValue base,
unsigned RecordCVR = base.getVRQualifiers();
if (rec->isUnion()) {
// For unions, there is no pointer adjustment.
- assert(!FieldType->isReferenceType() && "union has reference member");
if (CGM.getCodeGenOpts().StrictVTablePointers &&
hasAnyVptr(FieldType, getContext()))
// Because unions can easily skip invariant.barriers, we need to add
@@ -4068,27 +4089,30 @@ LValue CodeGenFunction::EmitLValueForField(LValue base,
addr.getPointer(), getDebugInfoFIndex(rec, field->getFieldIndex()), DbgInfo),
addr.getAlignment());
}
- } else {
+ if (FieldType->isReferenceType())
+ addr = Builder.CreateElementBitCast(
+ addr, CGM.getTypes().ConvertTypeForMem(FieldType), field->getName());
+ } else {
if (!IsInPreservedAIRegion)
// For structs, we GEP to the field that the record layout suggests.
addr = emitAddrOfFieldStorage(*this, addr, field);
else
// Remember the original struct field index
addr = emitPreserveStructAccess(*this, addr, field);
+ }
- // If this is a reference field, load the reference right now.
- if (FieldType->isReferenceType()) {
- LValue RefLVal = MakeAddrLValue(addr, FieldType, FieldBaseInfo,
- FieldTBAAInfo);
- if (RecordCVR & Qualifiers::Volatile)
- RefLVal.getQuals().addVolatile();
- addr = EmitLoadOfReference(RefLVal, &FieldBaseInfo, &FieldTBAAInfo);
-
- // Qualifiers on the struct don't apply to the referencee.
- RecordCVR = 0;
- FieldType = FieldType->getPointeeType();
- }
+ // If this is a reference field, load the reference right now.
+ if (FieldType->isReferenceType()) {
+ LValue RefLVal =
+ MakeAddrLValue(addr, FieldType, FieldBaseInfo, FieldTBAAInfo);
+ if (RecordCVR & Qualifiers::Volatile)
+ RefLVal.getQuals().addVolatile();
+ addr = EmitLoadOfReference(RefLVal, &FieldBaseInfo, &FieldTBAAInfo);
+
+ // Qualifiers on the struct don't apply to the referencee.
+ RecordCVR = 0;
+ FieldType = FieldType->getPointeeType();
}
// Make sure that the address is pointing to the right type. This is critical
diff --git a/lib/CodeGen/CGExprAgg.cpp b/lib/CodeGen/CGExprAgg.cpp
index 695facd50b67..2f0e4937613f 100644
--- a/lib/CodeGen/CGExprAgg.cpp
+++ b/lib/CodeGen/CGExprAgg.cpp
@@ -150,6 +150,9 @@ public:
void VisitBinAssign(const BinaryOperator *E);
void VisitBinComma(const BinaryOperator *E);
void VisitBinCmp(const BinaryOperator *E);
+ void VisitCXXRewrittenBinaryOperator(CXXRewrittenBinaryOperator *E) {
+ Visit(E->getSemanticForm());
+ }
void VisitObjCMessageExpr(ObjCMessageExpr *E);
void VisitObjCIvarRefExpr(ObjCIvarRefExpr *E) {
@@ -501,7 +504,7 @@ void AggExprEmitter::EmitArrayInit(Address DestPtr, llvm::ArrayType *AType,
CGM.getContext().getTargetAddressSpace(AS));
Emitter.finalize(GV);
CharUnits Align = CGM.getContext().getTypeAlignInChars(ArrayQTy);
- GV->setAlignment(Align.getQuantity());
+ GV->setAlignment(Align.getAsAlign());
EmitFinalDestCopy(ArrayQTy, CGF.MakeAddrLValue(GV, ArrayQTy, Align));
return;
}
@@ -1495,6 +1498,13 @@ void AggExprEmitter::VisitInitListExpr(InitListExpr *E) {
// initializers throws an exception.
SmallVector<EHScopeStack::stable_iterator, 16> cleanups;
llvm::Instruction *cleanupDominator = nullptr;
+ auto addCleanup = [&](const EHScopeStack::stable_iterator &cleanup) {
+ cleanups.push_back(cleanup);
+ if (!cleanupDominator) // create placeholder once needed
+ cleanupDominator = CGF.Builder.CreateAlignedLoad(
+ CGF.Int8Ty, llvm::Constant::getNullValue(CGF.Int8PtrTy),
+ CharUnits::One());
+ };
unsigned curInitIndex = 0;
@@ -1519,7 +1529,7 @@ void AggExprEmitter::VisitInitListExpr(InitListExpr *E) {
if (QualType::DestructionKind dtorKind =
Base.getType().isDestructedType()) {
CGF.pushDestroy(dtorKind, V, Base.getType());
- cleanups.push_back(CGF.EHStack.stable_begin());
+ addCleanup(CGF.EHStack.stable_begin());
}
}
}
@@ -1596,15 +1606,9 @@ void AggExprEmitter::VisitInitListExpr(InitListExpr *E) {
= field->getType().isDestructedType()) {
assert(LV.isSimple());
if (CGF.needsEHCleanup(dtorKind)) {
- if (!cleanupDominator)
- cleanupDominator = CGF.Builder.CreateAlignedLoad(
- CGF.Int8Ty,
- llvm::Constant::getNullValue(CGF.Int8PtrTy),
- CharUnits::One()); // placeholder
-
CGF.pushDestroy(EHCleanup, LV.getAddress(), field->getType(),
CGF.getDestroyer(dtorKind), false);
- cleanups.push_back(CGF.EHStack.stable_begin());
+ addCleanup(CGF.EHStack.stable_begin());
pushedCleanup = true;
}
}
@@ -1620,6 +1624,8 @@ void AggExprEmitter::VisitInitListExpr(InitListExpr *E) {
// Deactivate all the partial cleanups in reverse order, which
// generally means popping them.
+ assert((cleanupDominator || cleanups.empty()) &&
+ "Missing cleanupDominator before deactivating cleanup blocks");
for (unsigned i = cleanups.size(); i != 0; --i)
CGF.DeactivateCleanupBlock(cleanups[i-1], cleanupDominator);
@@ -1756,7 +1762,7 @@ static CharUnits GetNumNonZeroBytesInInit(const Expr *E, CodeGenFunction &CGF) {
// referencee. InitListExprs for unions and arrays can't have references.
if (const RecordType *RT = E->getType()->getAs<RecordType>()) {
if (!RT->isUnionType()) {
- RecordDecl *SD = E->getType()->getAs<RecordType>()->getDecl();
+ RecordDecl *SD = RT->getDecl();
CharUnits NumNonZeroBytes = CharUnits::Zero();
unsigned ILEElement = 0;
diff --git a/lib/CodeGen/CGExprCXX.cpp b/lib/CodeGen/CGExprCXX.cpp
index 5476d13b7c46..114d806d454b 100644
--- a/lib/CodeGen/CGExprCXX.cpp
+++ b/lib/CodeGen/CGExprCXX.cpp
@@ -382,7 +382,7 @@ RValue CodeGenFunction::EmitCXXMemberOrOperatorMemberCallExpr(
const CXXRecordDecl *RD;
std::tie(VTable, RD) =
CGM.getCXXABI().LoadVTablePtr(*this, This.getAddress(),
- MD->getParent());
+ CalleeDecl->getParent());
EmitVTablePtrCheckForCall(RD, VTable, CFITCK_NVCall, CE->getBeginLoc());
}
@@ -418,13 +418,10 @@ CodeGenFunction::EmitCXXMemberPointerCallExpr(const CXXMemberCallExpr *E,
const Expr *BaseExpr = BO->getLHS();
const Expr *MemFnExpr = BO->getRHS();
- const MemberPointerType *MPT =
- MemFnExpr->getType()->castAs<MemberPointerType>();
-
- const FunctionProtoType *FPT =
- MPT->getPointeeType()->castAs<FunctionProtoType>();
- const CXXRecordDecl *RD =
- cast<CXXRecordDecl>(MPT->getClass()->getAs<RecordType>()->getDecl());
+ const auto *MPT = MemFnExpr->getType()->castAs<MemberPointerType>();
+ const auto *FPT = MPT->getPointeeType()->castAs<FunctionProtoType>();
+ const auto *RD =
+ cast<CXXRecordDecl>(MPT->getClass()->castAs<RecordType>()->getDecl());
// Emit the 'this' pointer.
Address This = Address::invalid();
@@ -535,7 +532,7 @@ static void EmitNullBaseClassInitialization(CodeGenFunction &CGF,
CharUnits Align = std::max(Layout.getNonVirtualAlignment(),
DestPtr.getAlignment());
- NullVariable->setAlignment(Align.getQuantity());
+ NullVariable->setAlignment(Align.getAsAlign());
Address SrcPtr = Address(CGF.EmitCastToVoidPtr(NullVariable), Align);
@@ -1882,9 +1879,33 @@ static void EmitObjectDelete(CodeGenFunction &CGF,
Dtor = RD->getDestructor();
if (Dtor->isVirtual()) {
- CGF.CGM.getCXXABI().emitVirtualObjectDelete(CGF, DE, Ptr, ElementType,
- Dtor);
- return;
+ bool UseVirtualCall = true;
+ const Expr *Base = DE->getArgument();
+ if (auto *DevirtualizedDtor =
+ dyn_cast_or_null<const CXXDestructorDecl>(
+ Dtor->getDevirtualizedMethod(
+ Base, CGF.CGM.getLangOpts().AppleKext))) {
+ UseVirtualCall = false;
+ const CXXRecordDecl *DevirtualizedClass =
+ DevirtualizedDtor->getParent();
+ if (declaresSameEntity(getCXXRecord(Base), DevirtualizedClass)) {
+ // Devirtualized to the class of the base type (the type of the
+ // whole expression).
+ Dtor = DevirtualizedDtor;
+ } else {
+ // Devirtualized to some other type. Would need to cast the this
+ // pointer to that type but we don't have support for that yet, so
+ // do a virtual call. FIXME: handle the case where it is
+ // devirtualized to the derived type (the type of the inner
+ // expression) as in EmitCXXMemberOrOperatorMemberCallExpr.
+ UseVirtualCall = true;
+ }
+ }
+ if (UseVirtualCall) {
+ CGF.CGM.getCXXABI().emitVirtualObjectDelete(CGF, DE, Ptr, ElementType,
+ Dtor);
+ return;
+ }
}
}
}
diff --git a/lib/CodeGen/CGExprComplex.cpp b/lib/CodeGen/CGExprComplex.cpp
index 6a5fb45ba259..385f87f12a9b 100644
--- a/lib/CodeGen/CGExprComplex.cpp
+++ b/lib/CodeGen/CGExprComplex.cpp
@@ -279,6 +279,10 @@ public:
return EmitBinDiv(EmitBinOps(E));
}
+ ComplexPairTy VisitCXXRewrittenBinaryOperator(CXXRewrittenBinaryOperator *E) {
+ return Visit(E->getSemanticForm());
+ }
+
// Compound assignments.
ComplexPairTy VisitBinAddAssign(const CompoundAssignOperator *E) {
return EmitCompoundAssign(E, &ComplexExprEmitter::EmitBinAdd);
diff --git a/lib/CodeGen/CGExprConstant.cpp b/lib/CodeGen/CGExprConstant.cpp
index 31cf2aef1ba0..96e8c9c0d0e6 100644
--- a/lib/CodeGen/CGExprConstant.cpp
+++ b/lib/CodeGen/CGExprConstant.cpp
@@ -659,7 +659,7 @@ static bool EmitDesignatedInitUpdater(ConstantEmitter &Emitter,
}
bool ConstStructBuilder::Build(InitListExpr *ILE, bool AllowOverwrite) {
- RecordDecl *RD = ILE->getType()->getAs<RecordType>()->getDecl();
+ RecordDecl *RD = ILE->getType()->castAs<RecordType>()->getDecl();
const ASTRecordLayout &Layout = CGM.getContext().getASTRecordLayout(RD);
unsigned FieldNo = -1;
@@ -839,7 +839,7 @@ bool ConstStructBuilder::Build(const APValue &Val, const RecordDecl *RD,
}
llvm::Constant *ConstStructBuilder::Finalize(QualType Type) {
- RecordDecl *RD = Type->getAs<RecordType>()->getDecl();
+ RecordDecl *RD = Type->castAs<RecordType>()->getDecl();
llvm::Type *ValTy = CGM.getTypes().ConvertType(Type);
return Builder.build(ValTy, RD->hasFlexibleArrayMember());
}
@@ -907,7 +907,7 @@ static ConstantAddress tryEmitGlobalCompoundLiteral(CodeGenModule &CGM,
llvm::GlobalVariable::NotThreadLocal,
CGM.getContext().getTargetAddressSpace(addressSpace));
emitter.finalize(GV);
- GV->setAlignment(Align.getQuantity());
+ GV->setAlignment(Align.getAsAlign());
CGM.setAddrOfConstantCompoundLiteral(E, GV);
return ConstantAddress(GV, Align);
}
@@ -1269,8 +1269,8 @@ public:
return nullptr;
// FIXME: We should not have to call getBaseElementType here.
- const RecordType *RT =
- CGM.getContext().getBaseElementType(Ty)->getAs<RecordType>();
+ const auto *RT =
+ CGM.getContext().getBaseElementType(Ty)->castAs<RecordType>();
const CXXRecordDecl *RD = cast<CXXRecordDecl>(RT->getDecl());
// If the class doesn't have a trivial destructor, we can't emit it as a
diff --git a/lib/CodeGen/CGExprScalar.cpp b/lib/CodeGen/CGExprScalar.cpp
index 3d082de2a14f..55a413a2a717 100644
--- a/lib/CodeGen/CGExprScalar.cpp
+++ b/lib/CodeGen/CGExprScalar.cpp
@@ -294,8 +294,7 @@ public:
Value *AlignmentValue = CGF.EmitScalarExpr(AVAttr->getAlignment());
llvm::ConstantInt *AlignmentCI = cast<llvm::ConstantInt>(AlignmentValue);
- CGF.EmitAlignmentAssumption(V, E, AVAttr->getLocation(),
- AlignmentCI->getZExtValue());
+ CGF.EmitAlignmentAssumption(V, E, AVAttr->getLocation(), AlignmentCI);
}
/// EmitLoadOfLValue - Given an expression with complex type that represents a
@@ -674,6 +673,10 @@ public:
return llvm::ConstantInt::get(ConvertType(E->getType()), E->getValue());
}
+ Value *VisitConceptSpecializationExpr(const ConceptSpecializationExpr *E) {
+ return Builder.getInt1(E->isSatisfied());
+ }
+
Value *VisitArrayTypeTraitExpr(const ArrayTypeTraitExpr *E) {
return llvm::ConstantInt::get(Builder.getInt32Ty(), E->getValue());
}
@@ -814,6 +817,10 @@ public:
Value *VisitBinPtrMemD(const Expr *E) { return EmitLoadOfLValue(E); }
Value *VisitBinPtrMemI(const Expr *E) { return EmitLoadOfLValue(E); }
+ Value *VisitCXXRewrittenBinaryOperator(CXXRewrittenBinaryOperator *E) {
+ return Visit(E->getSemanticForm());
+ }
+
// Other Operators.
Value *VisitBlockExpr(const BlockExpr *BE);
Value *VisitAbstractConditionalOperator(const AbstractConditionalOperator *);
@@ -1657,8 +1664,8 @@ Value *ScalarExprEmitter::VisitConvertVectorExpr(ConvertVectorExpr *E) {
if (SrcTy == DstTy)
return Src;
- QualType SrcEltType = SrcType->getAs<VectorType>()->getElementType(),
- DstEltType = DstType->getAs<VectorType>()->getElementType();
+ QualType SrcEltType = SrcType->castAs<VectorType>()->getElementType(),
+ DstEltType = DstType->castAs<VectorType>()->getElementType();
assert(SrcTy->isVectorTy() &&
"ConvertVector source IR type must be a vector");
@@ -2577,14 +2584,16 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV,
Value *ScalarExprEmitter::VisitUnaryMinus(const UnaryOperator *E) {
TestAndClearIgnoreResultAssign();
+ Value *Op = Visit(E->getSubExpr());
+
+ // Generate a unary FNeg for FP ops.
+ if (Op->getType()->isFPOrFPVectorTy())
+ return Builder.CreateFNeg(Op, "fneg");
+
// Emit unary minus with EmitSub so we handle overflow cases etc.
BinOpInfo BinOp;
- BinOp.RHS = Visit(E->getSubExpr());
-
- if (BinOp.RHS->getType()->isFPOrFPVectorTy())
- BinOp.LHS = llvm::ConstantFP::getZeroValueForNegation(BinOp.RHS->getType());
- else
- BinOp.LHS = llvm::Constant::getNullValue(BinOp.RHS->getType());
+ BinOp.RHS = Op;
+ BinOp.LHS = llvm::Constant::getNullValue(BinOp.RHS->getType());
BinOp.Ty = E->getType();
BinOp.Opcode = BO_Sub;
// FIXME: once UnaryOperator carries FPFeatures, copy it here.
@@ -2662,7 +2671,7 @@ Value *ScalarExprEmitter::VisitOffsetOfExpr(OffsetOfExpr *E) {
case OffsetOfNode::Field: {
FieldDecl *MemberDecl = ON.getField();
- RecordDecl *RD = CurrentType->getAs<RecordType>()->getDecl();
+ RecordDecl *RD = CurrentType->castAs<RecordType>()->getDecl();
const ASTRecordLayout &RL = CGF.getContext().getASTRecordLayout(RD);
// Compute the index of the field in its parent.
@@ -2695,7 +2704,7 @@ Value *ScalarExprEmitter::VisitOffsetOfExpr(OffsetOfExpr *E) {
continue;
}
- RecordDecl *RD = CurrentType->getAs<RecordType>()->getDecl();
+ RecordDecl *RD = CurrentType->castAs<RecordType>()->getDecl();
const ASTRecordLayout &RL = CGF.getContext().getASTRecordLayout(RD);
// Save the element type.
@@ -3745,7 +3754,7 @@ Value *ScalarExprEmitter::EmitCompare(const BinaryOperator *E,
Value *FirstVecArg = LHS,
*SecondVecArg = RHS;
- QualType ElTy = LHSTy->getAs<VectorType>()->getElementType();
+ QualType ElTy = LHSTy->castAs<VectorType>()->getElementType();
const BuiltinType *BTy = ElTy->getAs<BuiltinType>();
BuiltinType::Kind ElementKind = BTy->getKind();
@@ -4414,8 +4423,8 @@ Value *ScalarExprEmitter::VisitAsTypeExpr(AsTypeExpr *E) {
return Src;
}
- return Src = createCastsForTypeOfSameSize(Builder, CGF.CGM.getDataLayout(),
- Src, DstTy, "astype");
+ return createCastsForTypeOfSameSize(Builder, CGF.CGM.getDataLayout(),
+ Src, DstTy, "astype");
}
Value *ScalarExprEmitter::VisitAtomicExpr(AtomicExpr *E) {
@@ -4533,32 +4542,43 @@ LValue CodeGenFunction::EmitCompoundAssignmentLValue(
llvm_unreachable("Unhandled compound assignment operator");
}
-Value *CodeGenFunction::EmitCheckedInBoundsGEP(Value *Ptr,
- ArrayRef<Value *> IdxList,
- bool SignedIndices,
- bool IsSubtraction,
- SourceLocation Loc,
- const Twine &Name) {
- Value *GEPVal = Builder.CreateInBoundsGEP(Ptr, IdxList, Name);
+struct GEPOffsetAndOverflow {
+ // The total (signed) byte offset for the GEP.
+ llvm::Value *TotalOffset;
+ // The offset overflow flag - true if the total offset overflows.
+ llvm::Value *OffsetOverflows;
+};
- // If the pointer overflow sanitizer isn't enabled, do nothing.
- if (!SanOpts.has(SanitizerKind::PointerOverflow))
- return GEPVal;
+/// Evaluate given GEPVal, which is either an inbounds GEP, or a constant,
+/// and compute the total offset it applies from it's base pointer BasePtr.
+/// Returns offset in bytes and a boolean flag whether an overflow happened
+/// during evaluation.
+static GEPOffsetAndOverflow EmitGEPOffsetInBytes(Value *BasePtr, Value *GEPVal,
+ llvm::LLVMContext &VMContext,
+ CodeGenModule &CGM,
+ CGBuilderTy Builder) {
+ const auto &DL = CGM.getDataLayout();
- // If the GEP has already been reduced to a constant, leave it be.
- if (isa<llvm::Constant>(GEPVal))
- return GEPVal;
+ // The total (signed) byte offset for the GEP.
+ llvm::Value *TotalOffset = nullptr;
- // Only check for overflows in the default address space.
- if (GEPVal->getType()->getPointerAddressSpace())
- return GEPVal;
+ // Was the GEP already reduced to a constant?
+ if (isa<llvm::Constant>(GEPVal)) {
+ // Compute the offset by casting both pointers to integers and subtracting:
+ // GEPVal = BasePtr + ptr(Offset) <--> Offset = int(GEPVal) - int(BasePtr)
+ Value *BasePtr_int =
+ Builder.CreatePtrToInt(BasePtr, DL.getIntPtrType(BasePtr->getType()));
+ Value *GEPVal_int =
+ Builder.CreatePtrToInt(GEPVal, DL.getIntPtrType(GEPVal->getType()));
+ TotalOffset = Builder.CreateSub(GEPVal_int, BasePtr_int);
+ return {TotalOffset, /*OffsetOverflows=*/Builder.getFalse()};
+ }
auto *GEP = cast<llvm::GEPOperator>(GEPVal);
+ assert(GEP->getPointerOperand() == BasePtr &&
+ "BasePtr must be the the base of the GEP.");
assert(GEP->isInBounds() && "Expected inbounds GEP");
- SanitizerScope SanScope(this);
- auto &VMContext = getLLVMContext();
- const auto &DL = CGM.getDataLayout();
auto *IntPtrTy = DL.getIntPtrType(GEP->getPointerOperandType());
// Grab references to the signed add/mul overflow intrinsics for intptr_t.
@@ -4568,8 +4588,6 @@ Value *CodeGenFunction::EmitCheckedInBoundsGEP(Value *Ptr,
auto *SMulIntrinsic =
CGM.getIntrinsic(llvm::Intrinsic::smul_with_overflow, IntPtrTy);
- // The total (signed) byte offset for the GEP.
- llvm::Value *TotalOffset = nullptr;
// The offset overflow flag - true if the total offset overflows.
llvm::Value *OffsetOverflows = Builder.getFalse();
@@ -4627,41 +4645,122 @@ Value *CodeGenFunction::EmitCheckedInBoundsGEP(Value *Ptr,
TotalOffset = eval(BO_Add, TotalOffset, LocalOffset);
}
- // Common case: if the total offset is zero, don't emit a check.
- if (TotalOffset == Zero)
+ return {TotalOffset, OffsetOverflows};
+}
+
+Value *
+CodeGenFunction::EmitCheckedInBoundsGEP(Value *Ptr, ArrayRef<Value *> IdxList,
+ bool SignedIndices, bool IsSubtraction,
+ SourceLocation Loc, const Twine &Name) {
+ Value *GEPVal = Builder.CreateInBoundsGEP(Ptr, IdxList, Name);
+
+ // If the pointer overflow sanitizer isn't enabled, do nothing.
+ if (!SanOpts.has(SanitizerKind::PointerOverflow))
+ return GEPVal;
+
+ llvm::Type *PtrTy = Ptr->getType();
+
+ // Perform nullptr-and-offset check unless the nullptr is defined.
+ bool PerformNullCheck = !NullPointerIsDefined(
+ Builder.GetInsertBlock()->getParent(), PtrTy->getPointerAddressSpace());
+ // Check for overflows unless the GEP got constant-folded,
+ // and only in the default address space
+ bool PerformOverflowCheck =
+ !isa<llvm::Constant>(GEPVal) && PtrTy->getPointerAddressSpace() == 0;
+
+ if (!(PerformNullCheck || PerformOverflowCheck))
+ return GEPVal;
+
+ const auto &DL = CGM.getDataLayout();
+
+ SanitizerScope SanScope(this);
+ llvm::Type *IntPtrTy = DL.getIntPtrType(PtrTy);
+
+ GEPOffsetAndOverflow EvaluatedGEP =
+ EmitGEPOffsetInBytes(Ptr, GEPVal, getLLVMContext(), CGM, Builder);
+
+ assert((!isa<llvm::Constant>(EvaluatedGEP.TotalOffset) ||
+ EvaluatedGEP.OffsetOverflows == Builder.getFalse()) &&
+ "If the offset got constant-folded, we don't expect that there was an "
+ "overflow.");
+
+ auto *Zero = llvm::ConstantInt::getNullValue(IntPtrTy);
+
+ // Common case: if the total offset is zero, and we are using C++ semantics,
+ // where nullptr+0 is defined, don't emit a check.
+ if (EvaluatedGEP.TotalOffset == Zero && CGM.getLangOpts().CPlusPlus)
return GEPVal;
// Now that we've computed the total offset, add it to the base pointer (with
// wrapping semantics).
- auto *IntPtr = Builder.CreatePtrToInt(GEP->getPointerOperand(), IntPtrTy);
- auto *ComputedGEP = Builder.CreateAdd(IntPtr, TotalOffset);
-
- // The GEP is valid if:
- // 1) The total offset doesn't overflow, and
- // 2) The sign of the difference between the computed address and the base
- // pointer matches the sign of the total offset.
- llvm::Value *ValidGEP;
- auto *NoOffsetOverflow = Builder.CreateNot(OffsetOverflows);
- if (SignedIndices) {
- auto *PosOrZeroValid = Builder.CreateICmpUGE(ComputedGEP, IntPtr);
- auto *PosOrZeroOffset = Builder.CreateICmpSGE(TotalOffset, Zero);
- llvm::Value *NegValid = Builder.CreateICmpULT(ComputedGEP, IntPtr);
- ValidGEP = Builder.CreateAnd(
- Builder.CreateSelect(PosOrZeroOffset, PosOrZeroValid, NegValid),
- NoOffsetOverflow);
- } else if (!SignedIndices && !IsSubtraction) {
- auto *PosOrZeroValid = Builder.CreateICmpUGE(ComputedGEP, IntPtr);
- ValidGEP = Builder.CreateAnd(PosOrZeroValid, NoOffsetOverflow);
- } else {
- auto *NegOrZeroValid = Builder.CreateICmpULE(ComputedGEP, IntPtr);
- ValidGEP = Builder.CreateAnd(NegOrZeroValid, NoOffsetOverflow);
+ auto *IntPtr = Builder.CreatePtrToInt(Ptr, IntPtrTy);
+ auto *ComputedGEP = Builder.CreateAdd(IntPtr, EvaluatedGEP.TotalOffset);
+
+ llvm::SmallVector<std::pair<llvm::Value *, SanitizerMask>, 2> Checks;
+
+ if (PerformNullCheck) {
+ // In C++, if the base pointer evaluates to a null pointer value,
+ // the only valid pointer this inbounds GEP can produce is also
+ // a null pointer, so the offset must also evaluate to zero.
+ // Likewise, if we have non-zero base pointer, we can not get null pointer
+ // as a result, so the offset can not be -intptr_t(BasePtr).
+ // In other words, both pointers are either null, or both are non-null,
+ // or the behaviour is undefined.
+ //
+ // C, however, is more strict in this regard, and gives more
+ // optimization opportunities: in C, additionally, nullptr+0 is undefined.
+ // So both the input to the 'gep inbounds' AND the output must not be null.
+ auto *BaseIsNotNullptr = Builder.CreateIsNotNull(Ptr);
+ auto *ResultIsNotNullptr = Builder.CreateIsNotNull(ComputedGEP);
+ auto *Valid =
+ CGM.getLangOpts().CPlusPlus
+ ? Builder.CreateICmpEQ(BaseIsNotNullptr, ResultIsNotNullptr)
+ : Builder.CreateAnd(BaseIsNotNullptr, ResultIsNotNullptr);
+ Checks.emplace_back(Valid, SanitizerKind::PointerOverflow);
+ }
+
+ if (PerformOverflowCheck) {
+ // The GEP is valid if:
+ // 1) The total offset doesn't overflow, and
+ // 2) The sign of the difference between the computed address and the base
+ // pointer matches the sign of the total offset.
+ llvm::Value *ValidGEP;
+ auto *NoOffsetOverflow = Builder.CreateNot(EvaluatedGEP.OffsetOverflows);
+ if (SignedIndices) {
+ // GEP is computed as `unsigned base + signed offset`, therefore:
+ // * If offset was positive, then the computed pointer can not be
+ // [unsigned] less than the base pointer, unless it overflowed.
+ // * If offset was negative, then the computed pointer can not be
+ // [unsigned] greater than the bas pointere, unless it overflowed.
+ auto *PosOrZeroValid = Builder.CreateICmpUGE(ComputedGEP, IntPtr);
+ auto *PosOrZeroOffset =
+ Builder.CreateICmpSGE(EvaluatedGEP.TotalOffset, Zero);
+ llvm::Value *NegValid = Builder.CreateICmpULT(ComputedGEP, IntPtr);
+ ValidGEP =
+ Builder.CreateSelect(PosOrZeroOffset, PosOrZeroValid, NegValid);
+ } else if (!IsSubtraction) {
+ // GEP is computed as `unsigned base + unsigned offset`, therefore the
+ // computed pointer can not be [unsigned] less than base pointer,
+ // unless there was an overflow.
+ // Equivalent to `@llvm.uadd.with.overflow(%base, %offset)`.
+ ValidGEP = Builder.CreateICmpUGE(ComputedGEP, IntPtr);
+ } else {
+ // GEP is computed as `unsigned base - unsigned offset`, therefore the
+ // computed pointer can not be [unsigned] greater than base pointer,
+ // unless there was an overflow.
+ // Equivalent to `@llvm.usub.with.overflow(%base, sub(0, %offset))`.
+ ValidGEP = Builder.CreateICmpULE(ComputedGEP, IntPtr);
+ }
+ ValidGEP = Builder.CreateAnd(ValidGEP, NoOffsetOverflow);
+ Checks.emplace_back(ValidGEP, SanitizerKind::PointerOverflow);
}
+ assert(!Checks.empty() && "Should have produced some checks.");
+
llvm::Constant *StaticArgs[] = {EmitCheckSourceLocation(Loc)};
// Pass the computed GEP to the runtime to avoid emitting poisoned arguments.
llvm::Value *DynamicArgs[] = {IntPtr, ComputedGEP};
- EmitCheck(std::make_pair(ValidGEP, SanitizerKind::PointerOverflow),
- SanitizerHandler::PointerOverflow, StaticArgs, DynamicArgs);
+ EmitCheck(Checks, SanitizerHandler::PointerOverflow, StaticArgs, DynamicArgs);
return GEPVal;
}
diff --git a/lib/CodeGen/CGLoopInfo.cpp b/lib/CodeGen/CGLoopInfo.cpp
index b2bc42bfa013..c21d4feee7a8 100644
--- a/lib/CodeGen/CGLoopInfo.cpp
+++ b/lib/CodeGen/CGLoopInfo.cpp
@@ -218,6 +218,7 @@ LoopInfo::createLoopVectorizeMetadata(const LoopAttributes &Attrs,
if (Attrs.VectorizeEnable == LoopAttributes::Disable)
Enabled = false;
else if (Attrs.VectorizeEnable != LoopAttributes::Unspecified ||
+ Attrs.VectorizePredicateEnable != LoopAttributes::Unspecified ||
Attrs.InterleaveCount != 0 || Attrs.VectorizeWidth != 0)
Enabled = true;
@@ -251,8 +252,32 @@ LoopInfo::createLoopVectorizeMetadata(const LoopAttributes &Attrs,
Args.push_back(TempNode.get());
Args.append(LoopProperties.begin(), LoopProperties.end());
+ // Setting vectorize.predicate
+ bool IsVectorPredicateEnabled = false;
+ if (Attrs.VectorizePredicateEnable != LoopAttributes::Unspecified &&
+ Attrs.VectorizeEnable != LoopAttributes::Disable &&
+ Attrs.VectorizeWidth < 1) {
+
+ IsVectorPredicateEnabled =
+ (Attrs.VectorizePredicateEnable == LoopAttributes::Enable);
+
+ Metadata *Vals[] = {
+ MDString::get(Ctx, "llvm.loop.vectorize.predicate.enable"),
+ ConstantAsMetadata::get(ConstantInt::get(llvm::Type::getInt1Ty(Ctx),
+ IsVectorPredicateEnabled))};
+ Args.push_back(MDNode::get(Ctx, Vals));
+ }
+
// Setting vectorize.width
if (Attrs.VectorizeWidth > 0) {
+ // This implies vectorize.enable = true, but only add it when it is not
+ // already enabled.
+ if (Attrs.VectorizeEnable != LoopAttributes::Enable)
+ Args.push_back(
+ MDNode::get(Ctx, {MDString::get(Ctx, "llvm.loop.vectorize.enable"),
+ ConstantAsMetadata::get(ConstantInt::get(
+ llvm::Type::getInt1Ty(Ctx), 1))}));
+
Metadata *Vals[] = {
MDString::get(Ctx, "llvm.loop.vectorize.width"),
ConstantAsMetadata::get(ConstantInt::get(llvm::Type::getInt32Ty(Ctx),
@@ -270,12 +295,15 @@ LoopInfo::createLoopVectorizeMetadata(const LoopAttributes &Attrs,
}
// Setting vectorize.enable
- if (Attrs.VectorizeEnable != LoopAttributes::Unspecified) {
+ if (Attrs.VectorizeEnable != LoopAttributes::Unspecified ||
+ IsVectorPredicateEnabled) {
Metadata *Vals[] = {
MDString::get(Ctx, "llvm.loop.vectorize.enable"),
ConstantAsMetadata::get(ConstantInt::get(
llvm::Type::getInt1Ty(Ctx),
- (Attrs.VectorizeEnable == LoopAttributes::Enable)))};
+ IsVectorPredicateEnabled
+ ? true
+ : (Attrs.VectorizeEnable == LoopAttributes::Enable)))};
Args.push_back(MDNode::get(Ctx, Vals));
}
@@ -411,7 +439,8 @@ MDNode *LoopInfo::createMetadata(
LoopAttributes::LoopAttributes(bool IsParallel)
: IsParallel(IsParallel), VectorizeEnable(LoopAttributes::Unspecified),
UnrollEnable(LoopAttributes::Unspecified),
- UnrollAndJamEnable(LoopAttributes::Unspecified), VectorizeWidth(0),
+ UnrollAndJamEnable(LoopAttributes::Unspecified),
+ VectorizePredicateEnable(LoopAttributes::Unspecified), VectorizeWidth(0),
InterleaveCount(0), UnrollCount(0), UnrollAndJamCount(0),
DistributeEnable(LoopAttributes::Unspecified), PipelineDisabled(false),
PipelineInitiationInterval(0) {}
@@ -425,6 +454,7 @@ void LoopAttributes::clear() {
VectorizeEnable = LoopAttributes::Unspecified;
UnrollEnable = LoopAttributes::Unspecified;
UnrollAndJamEnable = LoopAttributes::Unspecified;
+ VectorizePredicateEnable = LoopAttributes::Unspecified;
DistributeEnable = LoopAttributes::Unspecified;
PipelineDisabled = false;
PipelineInitiationInterval = 0;
@@ -446,6 +476,7 @@ LoopInfo::LoopInfo(BasicBlock *Header, const LoopAttributes &Attrs,
Attrs.InterleaveCount == 0 && Attrs.UnrollCount == 0 &&
Attrs.UnrollAndJamCount == 0 && !Attrs.PipelineDisabled &&
Attrs.PipelineInitiationInterval == 0 &&
+ Attrs.VectorizePredicateEnable == LoopAttributes::Unspecified &&
Attrs.VectorizeEnable == LoopAttributes::Unspecified &&
Attrs.UnrollEnable == LoopAttributes::Unspecified &&
Attrs.UnrollAndJamEnable == LoopAttributes::Unspecified &&
@@ -480,6 +511,7 @@ void LoopInfo::finish() {
BeforeJam.InterleaveCount = Attrs.InterleaveCount;
BeforeJam.VectorizeEnable = Attrs.VectorizeEnable;
BeforeJam.DistributeEnable = Attrs.DistributeEnable;
+ BeforeJam.VectorizePredicateEnable = Attrs.VectorizePredicateEnable;
switch (Attrs.UnrollEnable) {
case LoopAttributes::Unspecified:
@@ -495,6 +527,7 @@ void LoopInfo::finish() {
break;
}
+ AfterJam.VectorizePredicateEnable = Attrs.VectorizePredicateEnable;
AfterJam.UnrollCount = Attrs.UnrollCount;
AfterJam.PipelineDisabled = Attrs.PipelineDisabled;
AfterJam.PipelineInitiationInterval = Attrs.PipelineInitiationInterval;
@@ -516,6 +549,7 @@ void LoopInfo::finish() {
// add it manually.
SmallVector<Metadata *, 1> BeforeLoopProperties;
if (BeforeJam.VectorizeEnable != LoopAttributes::Unspecified ||
+ BeforeJam.VectorizePredicateEnable != LoopAttributes::Unspecified ||
BeforeJam.InterleaveCount != 0 || BeforeJam.VectorizeWidth != 0)
BeforeLoopProperties.push_back(
MDNode::get(Ctx, MDString::get(Ctx, "llvm.loop.isvectorized")));
@@ -537,8 +571,9 @@ void LoopInfo::finish() {
void LoopInfoStack::push(BasicBlock *Header, const llvm::DebugLoc &StartLoc,
const llvm::DebugLoc &EndLoc) {
- Active.push_back(LoopInfo(Header, StagedAttrs, StartLoc, EndLoc,
- Active.empty() ? nullptr : &Active.back()));
+ Active.emplace_back(
+ new LoopInfo(Header, StagedAttrs, StartLoc, EndLoc,
+ Active.empty() ? nullptr : Active.back().get()));
// Clear the attributes so nested loops do not inherit them.
StagedAttrs.clear();
}
@@ -603,6 +638,9 @@ void LoopInfoStack::push(BasicBlock *Header, clang::ASTContext &Ctx,
case LoopHintAttr::UnrollAndJam:
setUnrollAndJamState(LoopAttributes::Disable);
break;
+ case LoopHintAttr::VectorizePredicate:
+ setVectorizePredicateState(LoopAttributes::Disable);
+ break;
case LoopHintAttr::Distribute:
setDistributeState(false);
break;
@@ -630,6 +668,9 @@ void LoopInfoStack::push(BasicBlock *Header, clang::ASTContext &Ctx,
case LoopHintAttr::UnrollAndJam:
setUnrollAndJamState(LoopAttributes::Enable);
break;
+ case LoopHintAttr::VectorizePredicate:
+ setVectorizePredicateState(LoopAttributes::Enable);
+ break;
case LoopHintAttr::Distribute:
setDistributeState(true);
break;
@@ -653,6 +694,7 @@ void LoopInfoStack::push(BasicBlock *Header, clang::ASTContext &Ctx,
break;
case LoopHintAttr::Unroll:
case LoopHintAttr::UnrollAndJam:
+ case LoopHintAttr::VectorizePredicate:
case LoopHintAttr::UnrollCount:
case LoopHintAttr::UnrollAndJamCount:
case LoopHintAttr::VectorizeWidth:
@@ -681,6 +723,7 @@ void LoopInfoStack::push(BasicBlock *Header, clang::ASTContext &Ctx,
case LoopHintAttr::Distribute:
case LoopHintAttr::PipelineDisabled:
case LoopHintAttr::PipelineInitiationInterval:
+ case LoopHintAttr::VectorizePredicate:
llvm_unreachable("Options cannot be used with 'full' hint.");
break;
}
@@ -704,6 +747,7 @@ void LoopInfoStack::push(BasicBlock *Header, clang::ASTContext &Ctx,
break;
case LoopHintAttr::Unroll:
case LoopHintAttr::UnrollAndJam:
+ case LoopHintAttr::VectorizePredicate:
case LoopHintAttr::Vectorize:
case LoopHintAttr::Interleave:
case LoopHintAttr::Distribute:
@@ -721,16 +765,16 @@ void LoopInfoStack::push(BasicBlock *Header, clang::ASTContext &Ctx,
void LoopInfoStack::pop() {
assert(!Active.empty() && "No active loops to pop");
- Active.back().finish();
+ Active.back()->finish();
Active.pop_back();
}
void LoopInfoStack::InsertHelper(Instruction *I) const {
if (I->mayReadOrWriteMemory()) {
SmallVector<Metadata *, 4> AccessGroups;
- for (const LoopInfo &AL : Active) {
+ for (const auto &AL : Active) {
// Here we assume that every loop that has an access group is parallel.
- if (MDNode *Group = AL.getAccessGroup())
+ if (MDNode *Group = AL->getAccessGroup())
AccessGroups.push_back(Group);
}
MDNode *UnionMD = nullptr;
diff --git a/lib/CodeGen/CGLoopInfo.h b/lib/CodeGen/CGLoopInfo.h
index 35d0e00527b9..5abcf37c5433 100644
--- a/lib/CodeGen/CGLoopInfo.h
+++ b/lib/CodeGen/CGLoopInfo.h
@@ -51,6 +51,9 @@ struct LoopAttributes {
/// Value for llvm.loop.unroll_and_jam.* metadata (enable, disable, or full).
LVEnableState UnrollAndJamEnable;
+ /// Value for llvm.loop.vectorize.predicate metadata
+ LVEnableState VectorizePredicateEnable;
+
/// Value for llvm.loop.vectorize.width metadata.
unsigned VectorizeWidth;
@@ -237,6 +240,11 @@ public:
StagedAttrs.UnrollEnable = State;
}
+ /// Set the next pushed vectorize predicate state.
+ void setVectorizePredicateState(const LoopAttributes::LVEnableState &State) {
+ StagedAttrs.VectorizePredicateEnable = State;
+ }
+
/// Set the next pushed loop unroll_and_jam state.
void setUnrollAndJamState(const LoopAttributes::LVEnableState &State) {
StagedAttrs.UnrollAndJamEnable = State;
@@ -267,11 +275,11 @@ private:
bool hasInfo() const { return !Active.empty(); }
/// Return the LoopInfo for the current loop. HasInfo should be called
/// first to ensure LoopInfo is present.
- const LoopInfo &getInfo() const { return Active.back(); }
+ const LoopInfo &getInfo() const { return *Active.back(); }
/// The set of attributes that will be applied to the next pushed loop.
LoopAttributes StagedAttrs;
/// Stack of active loops.
- llvm::SmallVector<LoopInfo, 4> Active;
+ llvm::SmallVector<std::unique_ptr<LoopInfo>, 4> Active;
};
} // end namespace CodeGen
diff --git a/lib/CodeGen/CGNonTrivialStruct.cpp b/lib/CodeGen/CGNonTrivialStruct.cpp
index caf62d2ac93a..05615aa12881 100644
--- a/lib/CodeGen/CGNonTrivialStruct.cpp
+++ b/lib/CodeGen/CGNonTrivialStruct.cpp
@@ -823,7 +823,7 @@ static void callSpecialFunction(G &&Gen, StringRef FuncName, QualType QT,
Gen.callFunc(FuncName, QT, Addrs, CGF);
}
-template <size_t N> std::array<Address, N> createNullAddressArray();
+template <size_t N> static std::array<Address, N> createNullAddressArray();
template <> std::array<Address, 1> createNullAddressArray() {
return std::array<Address, 1>({{Address(nullptr, CharUnits::Zero())}});
diff --git a/lib/CodeGen/CGObjC.cpp b/lib/CodeGen/CGObjC.cpp
index 1dd7ec52230e..1fa72678081a 100644
--- a/lib/CodeGen/CGObjC.cpp
+++ b/lib/CodeGen/CGObjC.cpp
@@ -143,7 +143,7 @@ llvm::Value *CodeGenFunction::EmitObjCCollectionLiteral(const Expr *E,
NumElements);
QualType ElementType = Context.getObjCIdType().withConst();
QualType ElementArrayType
- = Context.getConstantArrayType(ElementType, APNumElements,
+ = Context.getConstantArrayType(ElementType, APNumElements, nullptr,
ArrayType::Normal, /*IndexTypeQuals=*/0);
// Allocate the temporary array(s).
@@ -1661,7 +1661,7 @@ void CodeGenFunction::EmitObjCForCollectionStmt(const ObjCForCollectionStmt &S){
QualType ItemsTy =
getContext().getConstantArrayType(getContext().getObjCIdType(),
- llvm::APInt(32, NumItems),
+ llvm::APInt(32, NumItems), nullptr,
ArrayType::Normal, 0);
Address ItemsPtr = CreateMemTemp(ItemsTy, "items.ptr");
diff --git a/lib/CodeGen/CGObjCGNU.cpp b/lib/CodeGen/CGObjCGNU.cpp
index ee5c12aa35bd..d2c089d0360e 100644
--- a/lib/CodeGen/CGObjCGNU.cpp
+++ b/lib/CodeGen/CGObjCGNU.cpp
@@ -1294,7 +1294,7 @@ class CGObjCGNUstep2 : public CGObjCGNUstep {
// Emit a placeholder symbol.
GV = new llvm::GlobalVariable(TheModule, ProtocolTy, false,
llvm::GlobalValue::ExternalLinkage, nullptr, Name);
- GV->setAlignment(CGM.getPointerAlign().getQuantity());
+ GV->setAlignment(CGM.getPointerAlign().getAsAlign());
}
return llvm::ConstantExpr::getBitCast(GV, ProtocolPtrTy);
}
@@ -1318,7 +1318,7 @@ class CGObjCGNUstep2 : public CGObjCGNUstep {
llvm::ConstantExpr::getBitCast(Protocol, ProtocolPtrTy), RefName);
GV->setComdat(TheModule.getOrInsertComdat(RefName));
GV->setSection(sectionName<ProtocolReferenceSection>());
- GV->setAlignment(CGM.getPointerAlign().getQuantity());
+ GV->setAlignment(CGM.getPointerAlign().getAsAlign());
Ref = GV;
}
EmittedProtocolRef = true;
@@ -1497,7 +1497,7 @@ class CGObjCGNUstep2 : public CGObjCGNUstep {
Sym->setSection((Section + SecSuffix).str());
Sym->setComdat(TheModule.getOrInsertComdat((Prefix +
Section).str()));
- Sym->setAlignment(CGM.getPointerAlign().getQuantity());
+ Sym->setAlignment(CGM.getPointerAlign().getAsAlign());
return Sym;
};
return { Sym("__start_", "$a"), Sym("__stop", "$z") };
@@ -1854,7 +1854,8 @@ class CGObjCGNUstep2 : public CGObjCGNUstep {
ivarBuilder.addInt(Int32Ty,
CGM.getContext().getTypeSizeInChars(ivarTy).getQuantity());
// Alignment will be stored as a base-2 log of the alignment.
- int align = llvm::Log2_32(Context.getTypeAlignInChars(ivarTy).getQuantity());
+ unsigned align =
+ llvm::Log2_32(Context.getTypeAlignInChars(ivarTy).getQuantity());
// Objects that require more than 2^64-byte alignment should be impossible!
assert(align < 64);
// uint32_t flags;
@@ -4039,7 +4040,7 @@ LValue CGObjCGNU::EmitObjCValueForIvar(CodeGenFunction &CGF,
const ObjCIvarDecl *Ivar,
unsigned CVRQualifiers) {
const ObjCInterfaceDecl *ID =
- ObjectTy->getAs<ObjCObjectType>()->getInterface();
+ ObjectTy->castAs<ObjCObjectType>()->getInterface();
return EmitValueForIvarAtOffset(CGF, ID, BaseValue, Ivar, CVRQualifiers,
EmitIvarOffset(CGF, ID, Ivar));
}
@@ -4086,7 +4087,7 @@ llvm::Value *CGObjCGNU::EmitIvarOffset(CodeGenFunction &CGF,
auto GV = new llvm::GlobalVariable(TheModule, IntTy,
false, llvm::GlobalValue::LinkOnceAnyLinkage,
llvm::Constant::getNullValue(IntTy), name);
- GV->setAlignment(Align.getQuantity());
+ GV->setAlignment(Align.getAsAlign());
Offset = GV;
}
Offset = CGF.Builder.CreateAlignedLoad(Offset, Align);
diff --git a/lib/CodeGen/CGObjCMac.cpp b/lib/CodeGen/CGObjCMac.cpp
index 12880fecbadf..8e28b2f05c16 100644
--- a/lib/CodeGen/CGObjCMac.cpp
+++ b/lib/CodeGen/CGObjCMac.cpp
@@ -2018,7 +2018,7 @@ CGObjCCommonMac::GenerateConstantNSString(const StringLiteral *Literal) {
GV->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
// Don't enforce the target's minimum global alignment, since the only use
// of the string is via this class initializer.
- GV->setAlignment(1);
+ GV->setAlignment(llvm::Align::None());
Fields.addBitCast(GV, CGM.Int8PtrTy);
// String length.
@@ -2517,14 +2517,12 @@ void CGObjCCommonMac::BuildRCRecordLayout(const llvm::StructLayout *RecLayout,
}
if (const ArrayType *Array = CGM.getContext().getAsArrayType(FQT)) {
- const ConstantArrayType *CArray =
- dyn_cast_or_null<ConstantArrayType>(Array);
+ auto *CArray = cast<ConstantArrayType>(Array);
uint64_t ElCount = CArray->getSize().getZExtValue();
assert(CArray && "only array with known element size is supported");
FQT = CArray->getElementType();
while (const ArrayType *Array = CGM.getContext().getAsArrayType(FQT)) {
- const ConstantArrayType *CArray =
- dyn_cast_or_null<ConstantArrayType>(Array);
+ auto *CArray = cast<ConstantArrayType>(Array);
ElCount *= CArray->getSize().getZExtValue();
FQT = CArray->getElementType();
}
@@ -3103,7 +3101,7 @@ llvm::Constant *CGObjCMac::GetOrEmitProtocolRef(const ObjCProtocolDecl *PD) {
nullptr, "OBJC_PROTOCOL_" + PD->getName());
Entry->setSection("__OBJC,__protocol,regular,no_dead_strip");
// FIXME: Is this necessary? Why only for protocol?
- Entry->setAlignment(4);
+ Entry->setAlignment(llvm::Align(4));
}
return Entry;
@@ -3609,7 +3607,7 @@ void CGObjCMac::GenerateClass(const ObjCImplementationDecl *ID) {
"Forward metaclass reference has incorrect type.");
values.finishAndSetAsInitializer(GV);
GV->setSection(Section);
- GV->setAlignment(CGM.getPointerAlign().getQuantity());
+ GV->setAlignment(CGM.getPointerAlign().getAsAlign());
CGM.addCompilerUsedGlobal(GV);
} else
GV = CreateMetadataVar(Name, values, Section, CGM.getPointerAlign(), true);
@@ -4016,7 +4014,7 @@ llvm::GlobalVariable *CGObjCCommonMac::CreateMetadataVar(Twine Name,
new llvm::GlobalVariable(CGM.getModule(), Ty, false, LT, Init, Name);
if (!Section.empty())
GV->setSection(Section);
- GV->setAlignment(Align.getQuantity());
+ GV->setAlignment(Align.getAsAlign());
if (AddToUsed)
CGM.addCompilerUsedGlobal(GV);
return GV;
@@ -4064,7 +4062,7 @@ CGObjCCommonMac::CreateCStringLiteral(StringRef Name, ObjCLabelType Type,
if (CGM.getTriple().isOSBinFormatMachO())
GV->setSection(Section);
GV->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
- GV->setAlignment(CharUnits::One().getQuantity());
+ GV->setAlignment(CharUnits::One().getAsAlign());
CGM.addCompilerUsedGlobal(GV);
return GV;
@@ -4902,7 +4900,7 @@ LValue CGObjCMac::EmitObjCValueForIvar(CodeGen::CodeGenFunction &CGF,
const ObjCIvarDecl *Ivar,
unsigned CVRQualifiers) {
const ObjCInterfaceDecl *ID =
- ObjectTy->getAs<ObjCObjectType>()->getInterface();
+ ObjectTy->castAs<ObjCObjectType>()->getInterface();
return EmitValueForIvarAtOffset(CGF, ID, BaseValue, Ivar, CVRQualifiers,
EmitIvarOffset(CGF, ID, Ivar));
}
@@ -6076,7 +6074,8 @@ void CGObjCNonFragileABIMac::AddModuleClassList(
llvm::GlobalVariable *GV =
new llvm::GlobalVariable(CGM.getModule(), Init->getType(), false, LT, Init,
SymbolName);
- GV->setAlignment(CGM.getDataLayout().getABITypeAlignment(Init->getType()));
+ GV->setAlignment(
+ llvm::Align(CGM.getDataLayout().getABITypeAlignment(Init->getType())));
GV->setSection(SectionName);
CGM.addCompilerUsedGlobal(GV);
}
@@ -6319,8 +6318,8 @@ CGObjCNonFragileABIMac::BuildClassObject(const ObjCInterfaceDecl *CI,
if (CGM.getTriple().isOSBinFormatMachO())
GV->setSection("__DATA, __objc_data");
- GV->setAlignment(
- CGM.getDataLayout().getABITypeAlignment(ObjCTypes.ClassnfABITy));
+ GV->setAlignment(llvm::Align(
+ CGM.getDataLayout().getABITypeAlignment(ObjCTypes.ClassnfABITy)));
if (!CGM.getTriple().isOSBinFormatCOFF())
if (HiddenVisibility)
GV->setVisibility(llvm::GlobalValue::HiddenVisibility);
@@ -6527,7 +6526,7 @@ llvm::Value *CGObjCNonFragileABIMac::GenerateProtocolRef(CodeGenFunction &CGF,
PTGV->setSection(GetSectionName("__objc_protorefs",
"coalesced,no_dead_strip"));
PTGV->setVisibility(llvm::GlobalValue::HiddenVisibility);
- PTGV->setAlignment(Align.getQuantity());
+ PTGV->setAlignment(Align.getAsAlign());
if (!CGM.getTriple().isOSBinFormatMachO())
PTGV->setComdat(CGM.getModule().getOrInsertComdat(ProtocolName));
CGM.addUsedGlobal(PTGV);
@@ -6759,8 +6758,8 @@ CGObjCNonFragileABIMac::EmitIvarOffsetVar(const ObjCInterfaceDecl *ID,
llvm::GlobalVariable *IvarOffsetGV = ObjCIvarOffsetVariable(ID, Ivar);
IvarOffsetGV->setInitializer(
llvm::ConstantInt::get(ObjCTypes.IvarOffsetVarTy, Offset));
- IvarOffsetGV->setAlignment(
- CGM.getDataLayout().getABITypeAlignment(ObjCTypes.IvarOffsetVarTy));
+ IvarOffsetGV->setAlignment(llvm::Align(
+ CGM.getDataLayout().getABITypeAlignment(ObjCTypes.IvarOffsetVarTy)));
if (!CGM.getTriple().isOSBinFormatCOFF()) {
// FIXME: This matches gcc, but shouldn't the visibility be set on the use
@@ -6986,8 +6985,8 @@ llvm::Constant *CGObjCNonFragileABIMac::GetOrEmitProtocol(
ProtocolRef);
if (!CGM.getTriple().isOSBinFormatMachO())
PTGV->setComdat(CGM.getModule().getOrInsertComdat(ProtocolRef));
- PTGV->setAlignment(
- CGM.getDataLayout().getABITypeAlignment(ObjCTypes.ProtocolnfABIPtrTy));
+ PTGV->setAlignment(llvm::Align(
+ CGM.getDataLayout().getABITypeAlignment(ObjCTypes.ProtocolnfABIPtrTy)));
PTGV->setSection(GetSectionName("__objc_protolist",
"coalesced,no_dead_strip"));
PTGV->setVisibility(llvm::GlobalValue::HiddenVisibility);
@@ -7053,7 +7052,7 @@ LValue CGObjCNonFragileABIMac::EmitObjCValueForIvar(
llvm::Value *BaseValue,
const ObjCIvarDecl *Ivar,
unsigned CVRQualifiers) {
- ObjCInterfaceDecl *ID = ObjectTy->getAs<ObjCObjectType>()->getInterface();
+ ObjCInterfaceDecl *ID = ObjectTy->castAs<ObjCObjectType>()->getInterface();
llvm::Value *Offset = EmitIvarOffset(CGF, ID, Ivar);
return EmitValueForIvarAtOffset(CGF, ID, BaseValue, Ivar, CVRQualifiers,
Offset);
@@ -7338,7 +7337,7 @@ CGObjCNonFragileABIMac::EmitClassRefFromId(CodeGenFunction &CGF,
CGM.getModule(), ClassGV->getType(), false,
getLinkageTypeForObjCMetadata(CGM, SectionName), ClassGV,
"OBJC_CLASSLIST_REFERENCES_$_");
- Entry->setAlignment(CGF.getPointerAlign().getQuantity());
+ Entry->setAlignment(CGF.getPointerAlign().getAsAlign());
if (!ID || !ID->hasAttr<ObjCClassStubAttr>())
Entry->setSection(SectionName);
@@ -7377,7 +7376,7 @@ CGObjCNonFragileABIMac::EmitSuperClassRef(CodeGenFunction &CGF,
CGM.getModule(), ClassGV->getType(), false,
getLinkageTypeForObjCMetadata(CGM, SectionName), ClassGV,
"OBJC_CLASSLIST_SUP_REFS_$_");
- Entry->setAlignment(CGF.getPointerAlign().getQuantity());
+ Entry->setAlignment(CGF.getPointerAlign().getAsAlign());
Entry->setSection(SectionName);
CGM.addCompilerUsedGlobal(Entry);
}
@@ -7401,7 +7400,7 @@ llvm::Value *CGObjCNonFragileABIMac::EmitMetaClassRef(CodeGenFunction &CGF,
CGM.getModule(), ObjCTypes.ClassnfABIPtrTy, false,
getLinkageTypeForObjCMetadata(CGM, SectionName), MetaClassGV,
"OBJC_CLASSLIST_SUP_REFS_$_");
- Entry->setAlignment(Align.getQuantity());
+ Entry->setAlignment(Align.getAsAlign());
Entry->setSection(SectionName);
CGM.addCompilerUsedGlobal(Entry);
}
@@ -7500,7 +7499,7 @@ Address CGObjCNonFragileABIMac::EmitSelectorAddr(CodeGenFunction &CGF,
"OBJC_SELECTOR_REFERENCES_");
Entry->setExternallyInitialized(true);
Entry->setSection(SectionName);
- Entry->setAlignment(Align.getQuantity());
+ Entry->setAlignment(Align.getAsAlign());
CGM.addCompilerUsedGlobal(Entry);
}
@@ -7733,7 +7732,7 @@ CGObjCNonFragileABIMac::GetInterfaceEHType(const ObjCInterfaceDecl *ID,
: llvm::GlobalValue::WeakAnyLinkage;
if (Entry) {
values.finishAndSetAsInitializer(Entry);
- Entry->setAlignment(CGM.getPointerAlign().getQuantity());
+ Entry->setAlignment(CGM.getPointerAlign().getAsAlign());
} else {
Entry = values.finishAndCreateGlobal("OBJC_EHTYPE_$_" + ClassName,
CGM.getPointerAlign(),
diff --git a/lib/CodeGen/CGOpenMPRuntime.cpp b/lib/CodeGen/CGOpenMPRuntime.cpp
index 27e7175da841..2a13a2a58156 100644
--- a/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -752,6 +752,11 @@ enum OpenMPRTLFunction {
// arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
// *arg_types);
OMPRTL__tgt_target_data_update_nowait,
+ // Call to int64_t __tgt_mapper_num_components(void *rt_mapper_handle);
+ OMPRTL__tgt_mapper_num_components,
+ // Call to void __tgt_push_mapper_component(void *rt_mapper_handle, void
+ // *base, void *begin, int64_t size, int64_t type);
+ OMPRTL__tgt_push_mapper_component,
};
/// A basic class for pre|post-action for advanced codegen sequence for OpenMP
@@ -1259,6 +1264,52 @@ CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator,
loadOffloadInfoMetadata();
}
+bool CGOpenMPRuntime::tryEmitDeclareVariant(const GlobalDecl &NewGD,
+ const GlobalDecl &OldGD,
+ llvm::GlobalValue *OrigAddr,
+ bool IsForDefinition) {
+ // Emit at least a definition for the aliasee if the the address of the
+ // original function is requested.
+ if (IsForDefinition || OrigAddr)
+ (void)CGM.GetAddrOfGlobal(NewGD);
+ StringRef NewMangledName = CGM.getMangledName(NewGD);
+ llvm::GlobalValue *Addr = CGM.GetGlobalValue(NewMangledName);
+ if (Addr && !Addr->isDeclaration()) {
+ const auto *D = cast<FunctionDecl>(OldGD.getDecl());
+ const CGFunctionInfo &FI = CGM.getTypes().arrangeGlobalDeclaration(OldGD);
+ llvm::Type *DeclTy = CGM.getTypes().GetFunctionType(FI);
+
+ // Create a reference to the named value. This ensures that it is emitted
+ // if a deferred decl.
+ llvm::GlobalValue::LinkageTypes LT = CGM.getFunctionLinkage(OldGD);
+
+ // Create the new alias itself, but don't set a name yet.
+ auto *GA =
+ llvm::GlobalAlias::create(DeclTy, 0, LT, "", Addr, &CGM.getModule());
+
+ if (OrigAddr) {
+ assert(OrigAddr->isDeclaration() && "Expected declaration");
+
+ GA->takeName(OrigAddr);
+ OrigAddr->replaceAllUsesWith(
+ llvm::ConstantExpr::getBitCast(GA, OrigAddr->getType()));
+ OrigAddr->eraseFromParent();
+ } else {
+ GA->setName(CGM.getMangledName(OldGD));
+ }
+
+ // Set attributes which are particular to an alias; this is a
+ // specialization of the attributes which may be set on a global function.
+ if (D->hasAttr<WeakAttr>() || D->hasAttr<WeakRefAttr>() ||
+ D->isWeakImported())
+ GA->setLinkage(llvm::Function::WeakAnyLinkage);
+
+ CGM.SetCommonAttributes(OldGD, GA);
+ return true;
+ }
+ return false;
+}
+
void CGOpenMPRuntime::clear() {
InternalVars.clear();
// Clean non-target variable declarations possibly used only in debug info.
@@ -1272,6 +1323,14 @@ void CGOpenMPRuntime::clear() {
continue;
GV->eraseFromParent();
}
+ // Emit aliases for the deferred aliasees.
+ for (const auto &Pair : DeferredVariantFunction) {
+ StringRef MangledName = CGM.getMangledName(Pair.second.second);
+ llvm::GlobalValue *Addr = CGM.GetGlobalValue(MangledName);
+ // If not able to emit alias, just emit original declaration.
+ (void)tryEmitDeclareVariant(Pair.second.first, Pair.second.second, Addr,
+ /*IsForDefinition=*/false);
+ }
}
std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const {
@@ -1638,18 +1697,23 @@ llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
return ThreadID;
}
// If exceptions are enabled, do not use parameter to avoid possible crash.
- if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
- !CGF.getLangOpts().CXXExceptions ||
- CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) {
- if (auto *OMPRegionInfo =
- dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
- if (OMPRegionInfo->getThreadIDVariable()) {
- // Check if this an outlined function with thread id passed as argument.
- LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
+ if (auto *OMPRegionInfo =
+ dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
+ if (OMPRegionInfo->getThreadIDVariable()) {
+ // Check if this an outlined function with thread id passed as argument.
+ LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
+ llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent();
+ if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
+ !CGF.getLangOpts().CXXExceptions ||
+ CGF.Builder.GetInsertBlock() == TopBlock ||
+ !isa<llvm::Instruction>(LVal.getPointer()) ||
+ cast<llvm::Instruction>(LVal.getPointer())->getParent() == TopBlock ||
+ cast<llvm::Instruction>(LVal.getPointer())->getParent() ==
+ CGF.Builder.GetInsertBlock()) {
ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
// If value loaded in entry block, cache it and use it everywhere in
// function.
- if (CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) {
+ if (CGF.Builder.GetInsertBlock() == TopBlock) {
auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
Elem.second.ThreadID = ThreadID;
}
@@ -1686,6 +1750,12 @@ void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
UDRMap.erase(D);
FunctionUDRMap.erase(CGF.CurFn);
}
+ auto I = FunctionUDMMap.find(CGF.CurFn);
+ if (I != FunctionUDMMap.end()) {
+ for(auto *D : I->second)
+ UDMMap.erase(D);
+ FunctionUDMMap.erase(I);
+ }
}
llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
@@ -2459,6 +2529,24 @@ llvm::FunctionCallee CGOpenMPRuntime::createRuntimeFunction(unsigned Function) {
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update_nowait");
break;
}
+ case OMPRTL__tgt_mapper_num_components: {
+ // Build int64_t __tgt_mapper_num_components(void *rt_mapper_handle);
+ llvm::Type *TypeParams[] = {CGM.VoidPtrTy};
+ auto *FnTy =
+ llvm::FunctionType::get(CGM.Int64Ty, TypeParams, /*isVarArg*/ false);
+ RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_mapper_num_components");
+ break;
+ }
+ case OMPRTL__tgt_push_mapper_component: {
+ // Build void __tgt_push_mapper_component(void *rt_mapper_handle, void
+ // *base, void *begin, int64_t size, int64_t type);
+ llvm::Type *TypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy, CGM.VoidPtrTy,
+ CGM.Int64Ty, CGM.Int64Ty};
+ auto *FnTy =
+ llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
+ RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_push_mapper_component");
+ break;
+ }
}
assert(RTLFn && "Unable to find OpenMP runtime function");
return RTLFn;
@@ -2552,6 +2640,32 @@ CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) {
return CGM.CreateRuntimeFunction(FnTy, Name);
}
+/// Obtain information that uniquely identifies a target entry. This
+/// consists of the file and device IDs as well as line number associated with
+/// the relevant entry source location.
+static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc,
+ unsigned &DeviceID, unsigned &FileID,
+ unsigned &LineNum) {
+ SourceManager &SM = C.getSourceManager();
+
+ // The loc should be always valid and have a file ID (the user cannot use
+ // #pragma directives in macros)
+
+ assert(Loc.isValid() && "Source location is expected to be always valid.");
+
+ PresumedLoc PLoc = SM.getPresumedLoc(Loc);
+ assert(PLoc.isValid() && "Source location is expected to be always valid.");
+
+ llvm::sys::fs::UniqueID ID;
+ if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID))
+ SM.getDiagnostics().Report(diag::err_cannot_open_file)
+ << PLoc.getFilename() << EC.message();
+
+ DeviceID = ID.getDevice();
+ FileID = ID.getFile();
+ LineNum = PLoc.getLine();
+}
+
Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) {
if (CGM.getLangOpts().OpenMPSimd)
return Address::invalid();
@@ -2563,19 +2677,27 @@ Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) {
SmallString<64> PtrName;
{
llvm::raw_svector_ostream OS(PtrName);
- OS << CGM.getMangledName(GlobalDecl(VD)) << "_decl_tgt_ref_ptr";
+ OS << CGM.getMangledName(GlobalDecl(VD));
+ if (!VD->isExternallyVisible()) {
+ unsigned DeviceID, FileID, Line;
+ getTargetEntryUniqueInfo(CGM.getContext(),
+ VD->getCanonicalDecl()->getBeginLoc(),
+ DeviceID, FileID, Line);
+ OS << llvm::format("_%x", FileID);
+ }
+ OS << "_decl_tgt_ref_ptr";
}
llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName);
if (!Ptr) {
QualType PtrTy = CGM.getContext().getPointerType(VD->getType());
Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy),
PtrName);
- if (!CGM.getLangOpts().OpenMPIsDevice) {
- auto *GV = cast<llvm::GlobalVariable>(Ptr);
- GV->setLinkage(llvm::GlobalValue::ExternalLinkage);
+
+ auto *GV = cast<llvm::GlobalVariable>(Ptr);
+ GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
+
+ if (!CGM.getLangOpts().OpenMPIsDevice)
GV->setInitializer(CGM.GetAddrOfGlobal(VD));
- }
- CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ptr));
registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr));
}
return Address(Ptr, CGM.getContext().getDeclAlign(VD));
@@ -2749,35 +2871,12 @@ llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
return nullptr;
}
-/// Obtain information that uniquely identifies a target entry. This
-/// consists of the file and device IDs as well as line number associated with
-/// the relevant entry source location.
-static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc,
- unsigned &DeviceID, unsigned &FileID,
- unsigned &LineNum) {
- SourceManager &SM = C.getSourceManager();
-
- // The loc should be always valid and have a file ID (the user cannot use
- // #pragma directives in macros)
-
- assert(Loc.isValid() && "Source location is expected to be always valid.");
-
- PresumedLoc PLoc = SM.getPresumedLoc(Loc);
- assert(PLoc.isValid() && "Source location is expected to be always valid.");
-
- llvm::sys::fs::UniqueID ID;
- if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID))
- SM.getDiagnostics().Report(diag::err_cannot_open_file)
- << PLoc.getFilename() << EC.message();
-
- DeviceID = ID.getDevice();
- FileID = ID.getFile();
- LineNum = PLoc.getLine();
-}
-
bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD,
llvm::GlobalVariable *Addr,
bool PerformInit) {
+ if (CGM.getLangOpts().OMPTargetTriples.empty() &&
+ !CGM.getLangOpts().OpenMPIsDevice)
+ return false;
Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
@@ -2981,14 +3080,16 @@ void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
CGF.EmitRuntimeCall(
RT.createRuntimeFunction(OMPRTL__kmpc_serialized_parallel), Args);
- // OutlinedFn(&GTid, &zero, CapturedStruct);
- Address ZeroAddr = CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
- /*Name*/ ".zero.addr");
- CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
+ // OutlinedFn(&GTid, &zero_bound, CapturedStruct);
+ Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
+ Address ZeroAddrBound =
+ CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
+ /*Name=*/".bound.zero.addr");
+ CGF.InitTempAlloca(ZeroAddrBound, CGF.Builder.getInt32(/*C*/ 0));
llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
// ThreadId for serialized parallels is 0.
- OutlinedFnArgs.push_back(ZeroAddr.getPointer());
- OutlinedFnArgs.push_back(ZeroAddr.getPointer());
+ OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
+ OutlinedFnArgs.push_back(ZeroAddrBound.getPointer());
OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
@@ -3283,9 +3384,9 @@ void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
// <copy_func>, did_it);
if (DidIt.isValid()) {
llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
- QualType CopyprivateArrayTy =
- C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal,
- /*IndexTypeQuals=*/0);
+ QualType CopyprivateArrayTy = C.getConstantArrayType(
+ C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
+ /*IndexTypeQuals=*/0);
// Create a list of all private variables for copyprivate.
Address CopyprivateList =
CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
@@ -3472,7 +3573,7 @@ bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
return Schedule != OMP_sch_static;
}
-static int addMonoNonMonoModifier(OpenMPSchedType Schedule,
+static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule,
OpenMPScheduleClauseModifier M1,
OpenMPScheduleClauseModifier M2) {
int Modifier = 0;
@@ -3506,6 +3607,18 @@ static int addMonoNonMonoModifier(OpenMPSchedType Schedule,
case OMPC_SCHEDULE_MODIFIER_unknown:
break;
}
+ // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription.
+ // If the static schedule kind is specified or if the ordered clause is
+ // specified, and if the nonmonotonic modifier is not specified, the effect is
+ // as if the monotonic modifier is specified. Otherwise, unless the monotonic
+ // modifier is specified, the effect is as if the nonmonotonic modifier is
+ // specified.
+ if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) {
+ if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static ||
+ Schedule == OMP_sch_static_balanced_chunked ||
+ Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static))
+ Modifier = OMP_sch_modifier_nonmonotonic;
+ }
return Schedule | Modifier;
}
@@ -3530,13 +3643,14 @@ void CGOpenMPRuntime::emitForDispatchInit(
llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
: CGF.Builder.getIntN(IVSize, 1);
llvm::Value *Args[] = {
- emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
+ emitUpdateLocation(CGF, Loc),
+ getThreadID(CGF, Loc),
CGF.Builder.getInt32(addMonoNonMonoModifier(
- Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
- DispatchValues.LB, // Lower
- DispatchValues.UB, // Upper
- CGF.Builder.getIntN(IVSize, 1), // Stride
- Chunk // Chunk
+ CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
+ DispatchValues.LB, // Lower
+ DispatchValues.UB, // Upper
+ CGF.Builder.getIntN(IVSize, 1), // Stride
+ Chunk // Chunk
};
CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
}
@@ -3578,7 +3692,7 @@ static void emitForStaticInitCall(
llvm::Value *Args[] = {
UpdateLocation,
ThreadId,
- CGF.Builder.getInt32(addMonoNonMonoModifier(Schedule, M1,
+ CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1,
M2)), // Schedule type
Values.IL.getPointer(), // &isLastIter
Values.LB.getPointer(), // &LB
@@ -3899,157 +4013,6 @@ void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
Action(E.getKey(), E.getValue());
}
-llvm::Function *
-CGOpenMPRuntime::createOffloadingBinaryDescriptorRegistration() {
- // If we don't have entries or if we are emitting code for the device, we
- // don't need to do anything.
- if (CGM.getLangOpts().OpenMPIsDevice || OffloadEntriesInfoManager.empty())
- return nullptr;
-
- llvm::Module &M = CGM.getModule();
- ASTContext &C = CGM.getContext();
-
- // Get list of devices we care about
- const std::vector<llvm::Triple> &Devices = CGM.getLangOpts().OMPTargetTriples;
-
- // We should be creating an offloading descriptor only if there are devices
- // specified.
- assert(!Devices.empty() && "No OpenMP offloading devices??");
-
- // Create the external variables that will point to the begin and end of the
- // host entries section. These will be defined by the linker.
- llvm::Type *OffloadEntryTy =
- CGM.getTypes().ConvertTypeForMem(getTgtOffloadEntryQTy());
- std::string EntriesBeginName = getName({"omp_offloading", "entries_begin"});
- auto *HostEntriesBegin = new llvm::GlobalVariable(
- M, OffloadEntryTy, /*isConstant=*/true,
- llvm::GlobalValue::ExternalLinkage, /*Initializer=*/nullptr,
- EntriesBeginName);
- std::string EntriesEndName = getName({"omp_offloading", "entries_end"});
- auto *HostEntriesEnd =
- new llvm::GlobalVariable(M, OffloadEntryTy, /*isConstant=*/true,
- llvm::GlobalValue::ExternalLinkage,
- /*Initializer=*/nullptr, EntriesEndName);
-
- // Create all device images
- auto *DeviceImageTy = cast<llvm::StructType>(
- CGM.getTypes().ConvertTypeForMem(getTgtDeviceImageQTy()));
- ConstantInitBuilder DeviceImagesBuilder(CGM);
- ConstantArrayBuilder DeviceImagesEntries =
- DeviceImagesBuilder.beginArray(DeviceImageTy);
-
- for (const llvm::Triple &Device : Devices) {
- StringRef T = Device.getTriple();
- std::string BeginName = getName({"omp_offloading", "img_start", ""});
- auto *ImgBegin = new llvm::GlobalVariable(
- M, CGM.Int8Ty, /*isConstant=*/true,
- llvm::GlobalValue::ExternalWeakLinkage,
- /*Initializer=*/nullptr, Twine(BeginName).concat(T));
- std::string EndName = getName({"omp_offloading", "img_end", ""});
- auto *ImgEnd = new llvm::GlobalVariable(
- M, CGM.Int8Ty, /*isConstant=*/true,
- llvm::GlobalValue::ExternalWeakLinkage,
- /*Initializer=*/nullptr, Twine(EndName).concat(T));
-
- llvm::Constant *Data[] = {ImgBegin, ImgEnd, HostEntriesBegin,
- HostEntriesEnd};
- createConstantGlobalStructAndAddToParent(CGM, getTgtDeviceImageQTy(), Data,
- DeviceImagesEntries);
- }
-
- // Create device images global array.
- std::string ImagesName = getName({"omp_offloading", "device_images"});
- llvm::GlobalVariable *DeviceImages =
- DeviceImagesEntries.finishAndCreateGlobal(ImagesName,
- CGM.getPointerAlign(),
- /*isConstant=*/true);
- DeviceImages->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
-
- // This is a Zero array to be used in the creation of the constant expressions
- llvm::Constant *Index[] = {llvm::Constant::getNullValue(CGM.Int32Ty),
- llvm::Constant::getNullValue(CGM.Int32Ty)};
-
- // Create the target region descriptor.
- llvm::Constant *Data[] = {
- llvm::ConstantInt::get(CGM.Int32Ty, Devices.size()),
- llvm::ConstantExpr::getGetElementPtr(DeviceImages->getValueType(),
- DeviceImages, Index),
- HostEntriesBegin, HostEntriesEnd};
- std::string Descriptor = getName({"omp_offloading", "descriptor"});
- llvm::GlobalVariable *Desc = createGlobalStruct(
- CGM, getTgtBinaryDescriptorQTy(), /*IsConstant=*/true, Data, Descriptor);
-
- // Emit code to register or unregister the descriptor at execution
- // startup or closing, respectively.
-
- llvm::Function *UnRegFn;
- {
- FunctionArgList Args;
- ImplicitParamDecl DummyPtr(C, C.VoidPtrTy, ImplicitParamDecl::Other);
- Args.push_back(&DummyPtr);
-
- CodeGenFunction CGF(CGM);
- // Disable debug info for global (de-)initializer because they are not part
- // of some particular construct.
- CGF.disableDebugInfo();
- const auto &FI =
- CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
- llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
- std::string UnregName = getName({"omp_offloading", "descriptor_unreg"});
- UnRegFn = CGM.CreateGlobalInitOrDestructFunction(FTy, UnregName, FI);
- CGF.StartFunction(GlobalDecl(), C.VoidTy, UnRegFn, FI, Args);
- CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_unregister_lib),
- Desc);
- CGF.FinishFunction();
- }
- llvm::Function *RegFn;
- {
- CodeGenFunction CGF(CGM);
- // Disable debug info for global (de-)initializer because they are not part
- // of some particular construct.
- CGF.disableDebugInfo();
- const auto &FI = CGM.getTypes().arrangeNullaryFunction();
- llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
-
- // Encode offload target triples into the registration function name. It
- // will serve as a comdat key for the registration/unregistration code for
- // this particular combination of offloading targets.
- SmallVector<StringRef, 4U> RegFnNameParts(Devices.size() + 2U);
- RegFnNameParts[0] = "omp_offloading";
- RegFnNameParts[1] = "descriptor_reg";
- llvm::transform(Devices, std::next(RegFnNameParts.begin(), 2),
- [](const llvm::Triple &T) -> const std::string& {
- return T.getTriple();
- });
- llvm::sort(std::next(RegFnNameParts.begin(), 2), RegFnNameParts.end());
- std::string Descriptor = getName(RegFnNameParts);
- RegFn = CGM.CreateGlobalInitOrDestructFunction(FTy, Descriptor, FI);
- CGF.StartFunction(GlobalDecl(), C.VoidTy, RegFn, FI, FunctionArgList());
- CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_register_lib), Desc);
- // Create a variable to drive the registration and unregistration of the
- // descriptor, so we can reuse the logic that emits Ctors and Dtors.
- ImplicitParamDecl RegUnregVar(C, C.getTranslationUnitDecl(),
- SourceLocation(), nullptr, C.CharTy,
- ImplicitParamDecl::Other);
- CGM.getCXXABI().registerGlobalDtor(CGF, RegUnregVar, UnRegFn, Desc);
- CGF.FinishFunction();
- }
- if (CGM.supportsCOMDAT()) {
- // It is sufficient to call registration function only once, so create a
- // COMDAT group for registration/unregistration functions and associated
- // data. That would reduce startup time and code size. Registration
- // function serves as a COMDAT group key.
- llvm::Comdat *ComdatKey = M.getOrInsertComdat(RegFn->getName());
- RegFn->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage);
- RegFn->setVisibility(llvm::GlobalValue::HiddenVisibility);
- RegFn->setComdat(ComdatKey);
- UnRegFn->setComdat(ComdatKey);
- DeviceImages->setComdat(ComdatKey);
- Desc->setComdat(ComdatKey);
- }
- return RegFn;
-}
-
void CGOpenMPRuntime::createOffloadEntry(
llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags,
llvm::GlobalValue::LinkageTypes Linkage) {
@@ -4077,8 +4040,7 @@ void CGOpenMPRuntime::createOffloadEntry(
Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage);
// The entry has to be created in the section the linker expects it to be.
- std::string Section = getName({"omp_offloading", "entries"});
- Entry->setSection(Section);
+ Entry->setSection("omp_offloading_entries");
}
void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
@@ -4091,13 +4053,16 @@ void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
// Right now we only generate metadata for function that contain target
// regions.
- // If we do not have entries, we don't need to do anything.
- if (OffloadEntriesInfoManager.empty())
+ // If we are in simd mode or there are no entries, we don't need to do
+ // anything.
+ if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty())
return;
llvm::Module &M = CGM.getModule();
llvm::LLVMContext &C = M.getContext();
- SmallVector<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *, 16>
+ SmallVector<std::tuple<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *,
+ SourceLocation, StringRef>,
+ 16>
OrderedEntries(OffloadEntriesInfoManager.size());
llvm::SmallVector<StringRef, 16> ParentFunctions(
OffloadEntriesInfoManager.size());
@@ -4115,7 +4080,8 @@ void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
// Create function that emits metadata for each target region entry;
auto &&TargetRegionMetadataEmitter =
- [&C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt, &GetMDString](
+ [this, &C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt,
+ &GetMDString](
unsigned DeviceID, unsigned FileID, StringRef ParentName,
unsigned Line,
const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) {
@@ -4133,8 +4099,19 @@ void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
GetMDInt(FileID), GetMDString(ParentName),
GetMDInt(Line), GetMDInt(E.getOrder())};
+ SourceLocation Loc;
+ for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(),
+ E = CGM.getContext().getSourceManager().fileinfo_end();
+ I != E; ++I) {
+ if (I->getFirst()->getUniqueID().getDevice() == DeviceID &&
+ I->getFirst()->getUniqueID().getFile() == FileID) {
+ Loc = CGM.getContext().getSourceManager().translateFileLineCol(
+ I->getFirst(), Line, 1);
+ break;
+ }
+ }
// Save this entry in the right position of the ordered entries array.
- OrderedEntries[E.getOrder()] = &E;
+ OrderedEntries[E.getOrder()] = std::make_tuple(&E, Loc, ParentName);
ParentFunctions[E.getOrder()] = ParentName;
// Add metadata to the named metadata node.
@@ -4162,7 +4139,8 @@ void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
GetMDInt(E.getFlags()), GetMDInt(E.getOrder())};
// Save this entry in the right position of the ordered entries array.
- OrderedEntries[E.getOrder()] = &E;
+ OrderedEntries[E.getOrder()] =
+ std::make_tuple(&E, SourceLocation(), MangledName);
// Add metadata to the named metadata node.
MD->addOperand(llvm::MDNode::get(C, Ops));
@@ -4171,11 +4149,11 @@ void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo(
DeviceGlobalVarMetadataEmitter);
- for (const auto *E : OrderedEntries) {
- assert(E && "All ordered entries must exist!");
+ for (const auto &E : OrderedEntries) {
+ assert(std::get<0>(E) && "All ordered entries must exist!");
if (const auto *CE =
dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>(
- E)) {
+ std::get<0>(E))) {
if (!CE->getID() || !CE->getAddress()) {
// Do not blame the entry if the parent funtion is not emitted.
StringRef FnName = ParentFunctions[CE->getOrder()];
@@ -4183,16 +4161,16 @@ void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
continue;
unsigned DiagID = CGM.getDiags().getCustomDiagID(
DiagnosticsEngine::Error,
- "Offloading entry for target region is incorrect: either the "
+ "Offloading entry for target region in %0 is incorrect: either the "
"address or the ID is invalid.");
- CGM.getDiags().Report(DiagID);
+ CGM.getDiags().Report(std::get<1>(E), DiagID) << FnName;
continue;
}
createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0,
CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage);
- } else if (const auto *CE =
- dyn_cast<OffloadEntriesInfoManagerTy::
- OffloadEntryInfoDeviceGlobalVar>(E)) {
+ } else if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy::
+ OffloadEntryInfoDeviceGlobalVar>(
+ std::get<0>(E))) {
OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags =
static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
CE->getFlags());
@@ -4203,10 +4181,10 @@ void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
continue;
if (!CE->getAddress()) {
unsigned DiagID = CGM.getDiags().getCustomDiagID(
- DiagnosticsEngine::Error,
- "Offloading entry for declare target variable is incorrect: the "
- "address is invalid.");
- CGM.getDiags().Report(DiagID);
+ DiagnosticsEngine::Error, "Offloading entry for declare target "
+ "variable %0 is incorrect: the "
+ "address is invalid.");
+ CGM.getDiags().Report(std::get<1>(E), DiagID) << std::get<2>(E);
continue;
}
// The vaiable has no definition - no need to add the entry.
@@ -5242,7 +5220,7 @@ void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
// Define type kmp_depend_info[<Dependences.size()>];
QualType KmpDependInfoArrayTy = C.getConstantArrayType(
KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies),
- ArrayType::Normal, /*IndexTypeQuals=*/0);
+ nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
// kmp_depend_info[<Dependences.size()>] deps;
DependenciesArray =
CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
@@ -5763,7 +5741,7 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
}
llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
QualType ReductionArrayTy =
- C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal,
+ C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
/*IndexTypeQuals=*/0);
Address ReductionList =
CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
@@ -6235,7 +6213,7 @@ llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
unsigned Size = Data.ReductionVars.size();
llvm::APInt ArraySize(/*numBits=*/64, Size);
QualType ArrayRDType = C.getConstantArrayType(
- RDType, ArraySize, ArrayType::Normal, /*IndexTypeQuals=*/0);
+ RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
// kmp_task_red_input_t .rd_input.[Size];
Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input.");
ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionCopies,
@@ -6720,12 +6698,16 @@ emitNumTeamsForTargetDirective(CodeGenFunction &CGF,
case OMPD_teams_distribute_parallel_for_simd:
case OMPD_target_update:
case OMPD_declare_simd:
+ case OMPD_declare_variant:
case OMPD_declare_target:
case OMPD_end_declare_target:
case OMPD_declare_reduction:
case OMPD_declare_mapper:
case OMPD_taskloop:
case OMPD_taskloop_simd:
+ case OMPD_master_taskloop:
+ case OMPD_master_taskloop_simd:
+ case OMPD_parallel_master_taskloop:
case OMPD_requires:
case OMPD_unknown:
break;
@@ -7025,12 +7007,16 @@ emitNumThreadsForTargetDirective(CodeGenFunction &CGF,
case OMPD_teams_distribute_parallel_for_simd:
case OMPD_target_update:
case OMPD_declare_simd:
+ case OMPD_declare_variant:
case OMPD_declare_target:
case OMPD_end_declare_target:
case OMPD_declare_reduction:
case OMPD_declare_mapper:
case OMPD_taskloop:
case OMPD_taskloop_simd:
+ case OMPD_master_taskloop:
+ case OMPD_master_taskloop_simd:
+ case OMPD_parallel_master_taskloop:
case OMPD_requires:
case OMPD_unknown:
break;
@@ -7079,12 +7065,24 @@ public:
OMP_MAP_LITERAL = 0x100,
/// Implicit map
OMP_MAP_IMPLICIT = 0x200,
+ /// Close is a hint to the runtime to allocate memory close to
+ /// the target device.
+ OMP_MAP_CLOSE = 0x400,
/// The 16 MSBs of the flags indicate whether the entry is member of some
/// struct/class.
OMP_MAP_MEMBER_OF = 0xffff000000000000,
LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF),
};
+ /// Get the offset of the OMP_MAP_MEMBER_OF field.
+ static unsigned getFlagMemberOffset() {
+ unsigned Offset = 0;
+ for (uint64_t Remain = OMP_MAP_MEMBER_OF; !(Remain & 1);
+ Remain = Remain >> 1)
+ Offset++;
+ return Offset;
+ }
+
/// Class that associates information with a base pointer to be passed to the
/// runtime library.
class BasePointerInfo {
@@ -7148,8 +7146,11 @@ private:
: IE(IE), VD(VD) {}
};
- /// Directive from where the map clauses were extracted.
- const OMPExecutableDirective &CurDir;
+ /// The target directive from where the mappable clauses were extracted. It
+ /// is either a executable directive or a user-defined mapper directive.
+ llvm::PointerUnion<const OMPExecutableDirective *,
+ const OMPDeclareMapperDecl *>
+ CurDir;
/// Function the directive is being generated for.
CodeGenFunction &CGF;
@@ -7181,9 +7182,11 @@ private:
OAE->getBase()->IgnoreParenImpCasts())
.getCanonicalType();
- // If there is no length associated with the expression, that means we
- // are using the whole length of the base.
- if (!OAE->getLength() && OAE->getColonLoc().isValid())
+ // If there is no length associated with the expression and lower bound is
+ // not specified too, that means we are using the whole length of the
+ // base.
+ if (!OAE->getLength() && OAE->getColonLoc().isValid() &&
+ !OAE->getLowerBound())
return CGF.getTypeSize(BaseTy);
llvm::Value *ElemSize;
@@ -7197,13 +7200,30 @@ private:
// If we don't have a length at this point, that is because we have an
// array section with a single element.
- if (!OAE->getLength())
+ if (!OAE->getLength() && OAE->getColonLoc().isInvalid())
return ElemSize;
- llvm::Value *LengthVal = CGF.EmitScalarExpr(OAE->getLength());
- LengthVal =
- CGF.Builder.CreateIntCast(LengthVal, CGF.SizeTy, /*isSigned=*/false);
- return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
+ if (const Expr *LenExpr = OAE->getLength()) {
+ llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr);
+ LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(),
+ CGF.getContext().getSizeType(),
+ LenExpr->getExprLoc());
+ return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
+ }
+ assert(!OAE->getLength() && OAE->getColonLoc().isValid() &&
+ OAE->getLowerBound() && "expected array_section[lb:].");
+ // Size = sizetype - lb * elemtype;
+ llvm::Value *LengthVal = CGF.getTypeSize(BaseTy);
+ llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound());
+ LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(),
+ CGF.getContext().getSizeType(),
+ OAE->getLowerBound()->getExprLoc());
+ LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize);
+ llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal);
+ llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal);
+ LengthVal = CGF.Builder.CreateSelect(
+ Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0));
+ return LengthVal;
}
return CGF.getTypeSize(ExprTy);
}
@@ -7247,6 +7267,9 @@ private:
if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_always)
!= MapModifiers.end())
Bits |= OMP_MAP_ALWAYS;
+ if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_close)
+ != MapModifiers.end())
+ Bits |= OMP_MAP_CLOSE;
return Bits;
}
@@ -7675,10 +7698,10 @@ private:
if (!IsExpressionFirstInfo) {
// If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
- // then we reset the TO/FROM/ALWAYS/DELETE flags.
+ // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags.
if (IsPointer)
Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS |
- OMP_MAP_DELETE);
+ OMP_MAP_DELETE | OMP_MAP_CLOSE);
if (ShouldBeMemberOf) {
// Set placeholder value MEMBER_OF=FFFF to indicate that the flag
@@ -7752,9 +7775,9 @@ private:
}
static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) {
- // Member of is given by the 16 MSB of the flag, so rotate by 48 bits.
+ // Rotate by getFlagMemberOffset() bits.
return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1)
- << 48);
+ << getFlagMemberOffset());
}
static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags,
@@ -7834,7 +7857,7 @@ private:
public:
MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
- : CurDir(Dir), CGF(CGF) {
+ : CurDir(&Dir), CGF(CGF) {
// Extract firstprivate clause information.
for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
for (const auto *D : C->varlists())
@@ -7846,6 +7869,10 @@ public:
DevPointersMap[L.first].push_back(L.second);
}
+ /// Constructor for the declare mapper directive.
+ MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF)
+ : CurDir(&Dir), CGF(CGF) {}
+
/// Generate code for the combined entry if we have a partially mapped struct
/// and take care of the mapping flags of the arguments corresponding to
/// individual struct members.
@@ -7907,18 +7934,20 @@ public:
IsImplicit);
};
- // FIXME: MSVC 2013 seems to require this-> to find member CurDir.
- for (const auto *C : this->CurDir.getClausesOfKind<OMPMapClause>())
+ assert(CurDir.is<const OMPExecutableDirective *>() &&
+ "Expect a executable directive");
+ const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
+ for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>())
for (const auto &L : C->component_lists()) {
InfoGen(L.first, L.second, C->getMapType(), C->getMapTypeModifiers(),
/*ReturnDevicePointer=*/false, C->isImplicit());
}
- for (const auto *C : this->CurDir.getClausesOfKind<OMPToClause>())
+ for (const auto *C : CurExecDir->getClausesOfKind<OMPToClause>())
for (const auto &L : C->component_lists()) {
InfoGen(L.first, L.second, OMPC_MAP_to, llvm::None,
/*ReturnDevicePointer=*/false, C->isImplicit());
}
- for (const auto *C : this->CurDir.getClausesOfKind<OMPFromClause>())
+ for (const auto *C : CurExecDir->getClausesOfKind<OMPFromClause>())
for (const auto &L : C->component_lists()) {
InfoGen(L.first, L.second, OMPC_MAP_from, llvm::None,
/*ReturnDevicePointer=*/false, C->isImplicit());
@@ -7933,9 +7962,8 @@ public:
llvm::MapVector<const ValueDecl *, SmallVector<DeferredDevicePtrEntryTy, 4>>
DeferredInfo;
- // FIXME: MSVC 2013 seems to require this-> to find member CurDir.
for (const auto *C :
- this->CurDir.getClausesOfKind<OMPUseDevicePtrClause>()) {
+ CurExecDir->getClausesOfKind<OMPUseDevicePtrClause>()) {
for (const auto &L : C->component_lists()) {
assert(!L.second.empty() && "Not expecting empty list of components!");
const ValueDecl *VD = L.second.back().getAssociatedDeclaration();
@@ -7964,7 +7992,6 @@ public:
// We didn't find any match in our map information - generate a zero
// size array section - if the pointer is a struct member we defer this
// action until the whole struct has been processed.
- // FIXME: MSVC 2013 seems to require this-> to find member CGF.
if (isa<MemberExpr>(IE)) {
// Insert the pointer into Info to be processed by
// generateInfoForComponentList. Because it is a member pointer
@@ -7977,11 +8004,11 @@ public:
/*ReturnDevicePointer=*/false, C->isImplicit());
DeferredInfo[nullptr].emplace_back(IE, VD);
} else {
- llvm::Value *Ptr = this->CGF.EmitLoadOfScalar(
- this->CGF.EmitLValue(IE), IE->getExprLoc());
+ llvm::Value *Ptr =
+ CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc());
BasePointers.emplace_back(Ptr, VD);
Pointers.push_back(Ptr);
- Sizes.push_back(llvm::Constant::getNullValue(this->CGF.Int64Ty));
+ Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_TARGET_PARAM);
}
}
@@ -8005,11 +8032,10 @@ public:
// Remember the current base pointer index.
unsigned CurrentBasePointersIdx = CurBasePointers.size();
- // FIXME: MSVC 2013 seems to require this-> to find the member method.
- this->generateInfoForComponentList(
- L.MapType, L.MapModifiers, L.Components, CurBasePointers,
- CurPointers, CurSizes, CurTypes, PartialStruct,
- IsFirstComponentList, L.IsImplicit);
+ generateInfoForComponentList(L.MapType, L.MapModifiers, L.Components,
+ CurBasePointers, CurPointers, CurSizes,
+ CurTypes, PartialStruct,
+ IsFirstComponentList, L.IsImplicit);
// If this entry relates with a device pointer, set the relevant
// declaration and add the 'return pointer' flag.
@@ -8061,6 +8087,78 @@ public:
}
}
+ /// Generate all the base pointers, section pointers, sizes and map types for
+ /// the extracted map clauses of user-defined mapper.
+ void generateAllInfoForMapper(MapBaseValuesArrayTy &BasePointers,
+ MapValuesArrayTy &Pointers,
+ MapValuesArrayTy &Sizes,
+ MapFlagsArrayTy &Types) const {
+ assert(CurDir.is<const OMPDeclareMapperDecl *>() &&
+ "Expect a declare mapper directive");
+ const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>();
+ // We have to process the component lists that relate with the same
+ // declaration in a single chunk so that we can generate the map flags
+ // correctly. Therefore, we organize all lists in a map.
+ llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info;
+
+ // Helper function to fill the information map for the different supported
+ // clauses.
+ auto &&InfoGen = [&Info](
+ const ValueDecl *D,
+ OMPClauseMappableExprCommon::MappableExprComponentListRef L,
+ OpenMPMapClauseKind MapType,
+ ArrayRef<OpenMPMapModifierKind> MapModifiers,
+ bool ReturnDevicePointer, bool IsImplicit) {
+ const ValueDecl *VD =
+ D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr;
+ Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer,
+ IsImplicit);
+ };
+
+ for (const auto *C : CurMapperDir->clauselists()) {
+ const auto *MC = cast<OMPMapClause>(C);
+ for (const auto &L : MC->component_lists()) {
+ InfoGen(L.first, L.second, MC->getMapType(), MC->getMapTypeModifiers(),
+ /*ReturnDevicePointer=*/false, MC->isImplicit());
+ }
+ }
+
+ for (const auto &M : Info) {
+ // We need to know when we generate information for the first component
+ // associated with a capture, because the mapping flags depend on it.
+ bool IsFirstComponentList = true;
+
+ // Temporary versions of arrays
+ MapBaseValuesArrayTy CurBasePointers;
+ MapValuesArrayTy CurPointers;
+ MapValuesArrayTy CurSizes;
+ MapFlagsArrayTy CurTypes;
+ StructRangeInfoTy PartialStruct;
+
+ for (const MapInfo &L : M.second) {
+ assert(!L.Components.empty() &&
+ "Not expecting declaration with no component lists.");
+ generateInfoForComponentList(L.MapType, L.MapModifiers, L.Components,
+ CurBasePointers, CurPointers, CurSizes,
+ CurTypes, PartialStruct,
+ IsFirstComponentList, L.IsImplicit);
+ IsFirstComponentList = false;
+ }
+
+ // If there is an entry in PartialStruct it means we have a struct with
+ // individual members mapped. Emit an extra combined entry.
+ if (PartialStruct.Base.isValid())
+ emitCombinedEntry(BasePointers, Pointers, Sizes, Types, CurTypes,
+ PartialStruct);
+
+ // We need to append the results of this capture to what we already have.
+ BasePointers.append(CurBasePointers.begin(), CurBasePointers.end());
+ Pointers.append(CurPointers.begin(), CurPointers.end());
+ Sizes.append(CurSizes.begin(), CurSizes.end());
+ Types.append(CurTypes.begin(), CurTypes.end());
+ }
+ }
+
/// Emit capture info for lambdas for variables captured by reference.
void generateInfoForLambdaCaptures(
const ValueDecl *VD, llvm::Value *Arg, MapBaseValuesArrayTy &BasePointers,
@@ -8184,8 +8282,10 @@ public:
std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef,
OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool>;
SmallVector<MapData, 4> DeclComponentLists;
- // FIXME: MSVC 2013 seems to require this-> to find member CurDir.
- for (const auto *C : this->CurDir.getClausesOfKind<OMPMapClause>()) {
+ assert(CurDir.is<const OMPExecutableDirective *>() &&
+ "Expect a executable directive");
+ const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
+ for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
for (const auto &L : C->decl_component_lists(VD)) {
assert(L.first == VD &&
"We got information for the wrong declaration??");
@@ -8333,9 +8433,12 @@ public:
MapValuesArrayTy &Pointers,
MapValuesArrayTy &Sizes,
MapFlagsArrayTy &Types) const {
+ assert(CurDir.is<const OMPExecutableDirective *>() &&
+ "Expect a executable directive");
+ const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
// Map other list items in the map clause which are not captured variables
// but "declare target link" global variables.
- for (const auto *C : this->CurDir.getClausesOfKind<OMPMapClause>()) {
+ for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
for (const auto &L : C->component_lists()) {
if (!L.first)
continue;
@@ -8472,9 +8575,9 @@ emitOffloadingArrays(CodeGenFunction &CGF,
}
llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true);
- QualType PointerArrayType =
- Ctx.getConstantArrayType(Ctx.VoidPtrTy, PointerNumAP, ArrayType::Normal,
- /*IndexTypeQuals=*/0);
+ QualType PointerArrayType = Ctx.getConstantArrayType(
+ Ctx.VoidPtrTy, PointerNumAP, nullptr, ArrayType::Normal,
+ /*IndexTypeQuals=*/0);
Info.BasePointersArray =
CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer();
@@ -8487,9 +8590,9 @@ emitOffloadingArrays(CodeGenFunction &CGF,
QualType Int64Ty =
Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
if (hasRuntimeEvaluationCaptureSize) {
- QualType SizeArrayType =
- Ctx.getConstantArrayType(Int64Ty, PointerNumAP, ArrayType::Normal,
- /*IndexTypeQuals=*/0);
+ QualType SizeArrayType = Ctx.getConstantArrayType(
+ Int64Ty, PointerNumAP, nullptr, ArrayType::Normal,
+ /*IndexTypeQuals=*/0);
Info.SizesArray =
CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer();
} else {
@@ -8562,6 +8665,7 @@ emitOffloadingArrays(CodeGenFunction &CGF,
}
}
}
+
/// Emit the arguments to be passed to the runtime library based on the
/// arrays of pointers, sizes and map types.
static void emitOffloadingArraysArgument(
@@ -8677,12 +8781,16 @@ getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) {
case OMPD_teams_distribute_parallel_for_simd:
case OMPD_target_update:
case OMPD_declare_simd:
+ case OMPD_declare_variant:
case OMPD_declare_target:
case OMPD_end_declare_target:
case OMPD_declare_reduction:
case OMPD_declare_mapper:
case OMPD_taskloop:
case OMPD_taskloop_simd:
+ case OMPD_master_taskloop:
+ case OMPD_master_taskloop_simd:
+ case OMPD_parallel_master_taskloop:
case OMPD_requires:
case OMPD_unknown:
llvm_unreachable("Unexpected directive.");
@@ -8692,10 +8800,343 @@ getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) {
return nullptr;
}
+/// Emit the user-defined mapper function. The code generation follows the
+/// pattern in the example below.
+/// \code
+/// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle,
+/// void *base, void *begin,
+/// int64_t size, int64_t type) {
+/// // Allocate space for an array section first.
+/// if (size > 1 && !maptype.IsDelete)
+/// __tgt_push_mapper_component(rt_mapper_handle, base, begin,
+/// size*sizeof(Ty), clearToFrom(type));
+/// // Map members.
+/// for (unsigned i = 0; i < size; i++) {
+/// // For each component specified by this mapper:
+/// for (auto c : all_components) {
+/// if (c.hasMapper())
+/// (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size,
+/// c.arg_type);
+/// else
+/// __tgt_push_mapper_component(rt_mapper_handle, c.arg_base,
+/// c.arg_begin, c.arg_size, c.arg_type);
+/// }
+/// }
+/// // Delete the array section.
+/// if (size > 1 && maptype.IsDelete)
+/// __tgt_push_mapper_component(rt_mapper_handle, base, begin,
+/// size*sizeof(Ty), clearToFrom(type));
+/// }
+/// \endcode
+void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D,
+ CodeGenFunction *CGF) {
+ if (UDMMap.count(D) > 0)
+ return;
+ ASTContext &C = CGM.getContext();
+ QualType Ty = D->getType();
+ QualType PtrTy = C.getPointerType(Ty).withRestrict();
+ QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
+ auto *MapperVarDecl =
+ cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl());
+ SourceLocation Loc = D->getLocation();
+ CharUnits ElementSize = C.getTypeSizeInChars(Ty);
+
+ // Prepare mapper function arguments and attributes.
+ ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
+ C.VoidPtrTy, ImplicitParamDecl::Other);
+ ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
+ ImplicitParamDecl::Other);
+ ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
+ C.VoidPtrTy, ImplicitParamDecl::Other);
+ ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
+ ImplicitParamDecl::Other);
+ ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
+ ImplicitParamDecl::Other);
+ FunctionArgList Args;
+ Args.push_back(&HandleArg);
+ Args.push_back(&BaseArg);
+ Args.push_back(&BeginArg);
+ Args.push_back(&SizeArg);
+ Args.push_back(&TypeArg);
+ const CGFunctionInfo &FnInfo =
+ CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
+ llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
+ SmallString<64> TyStr;
+ llvm::raw_svector_ostream Out(TyStr);
+ CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out);
+ std::string Name = getName({"omp_mapper", TyStr, D->getName()});
+ auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
+ Name, &CGM.getModule());
+ CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
+ Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
+ // Start the mapper function code generation.
+ CodeGenFunction MapperCGF(CGM);
+ MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
+ // Compute the starting and end addreses of array elements.
+ llvm::Value *Size = MapperCGF.EmitLoadOfScalar(
+ MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false,
+ C.getPointerType(Int64Ty), Loc);
+ llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast(
+ MapperCGF.GetAddrOfLocalVar(&BeginArg).getPointer(),
+ CGM.getTypes().ConvertTypeForMem(C.getPointerType(PtrTy)));
+ llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(PtrBegin, Size);
+ llvm::Value *MapType = MapperCGF.EmitLoadOfScalar(
+ MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false,
+ C.getPointerType(Int64Ty), Loc);
+ // Prepare common arguments for array initiation and deletion.
+ llvm::Value *Handle = MapperCGF.EmitLoadOfScalar(
+ MapperCGF.GetAddrOfLocalVar(&HandleArg),
+ /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
+ llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar(
+ MapperCGF.GetAddrOfLocalVar(&BaseArg),
+ /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
+ llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar(
+ MapperCGF.GetAddrOfLocalVar(&BeginArg),
+ /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
+
+ // Emit array initiation if this is an array section and \p MapType indicates
+ // that memory allocation is required.
+ llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head");
+ emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
+ ElementSize, HeadBB, /*IsInit=*/true);
+
+ // Emit a for loop to iterate through SizeArg of elements and map all of them.
+
+ // Emit the loop header block.
+ MapperCGF.EmitBlock(HeadBB);
+ llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body");
+ llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done");
+ // Evaluate whether the initial condition is satisfied.
+ llvm::Value *IsEmpty =
+ MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty");
+ MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
+ llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock();
+
+ // Emit the loop body block.
+ MapperCGF.EmitBlock(BodyBB);
+ llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI(
+ PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent");
+ PtrPHI->addIncoming(PtrBegin, EntryBB);
+ Address PtrCurrent =
+ Address(PtrPHI, MapperCGF.GetAddrOfLocalVar(&BeginArg)
+ .getAlignment()
+ .alignmentOfArrayElement(ElementSize));
+ // Privatize the declared variable of mapper to be the current array element.
+ CodeGenFunction::OMPPrivateScope Scope(MapperCGF);
+ Scope.addPrivate(MapperVarDecl, [&MapperCGF, PtrCurrent, PtrTy]() {
+ return MapperCGF
+ .EmitLoadOfPointerLValue(PtrCurrent, PtrTy->castAs<PointerType>())
+ .getAddress();
+ });
+ (void)Scope.Privatize();
+
+ // Get map clause information. Fill up the arrays with all mapped variables.
+ MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
+ MappableExprsHandler::MapValuesArrayTy Pointers;
+ MappableExprsHandler::MapValuesArrayTy Sizes;
+ MappableExprsHandler::MapFlagsArrayTy MapTypes;
+ MappableExprsHandler MEHandler(*D, MapperCGF);
+ MEHandler.generateAllInfoForMapper(BasePointers, Pointers, Sizes, MapTypes);
+
+ // Call the runtime API __tgt_mapper_num_components to get the number of
+ // pre-existing components.
+ llvm::Value *OffloadingArgs[] = {Handle};
+ llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall(
+ createRuntimeFunction(OMPRTL__tgt_mapper_num_components), OffloadingArgs);
+ llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl(
+ PreviousSize,
+ MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset()));
+
+ // Fill up the runtime mapper handle for all components.
+ for (unsigned I = 0; I < BasePointers.size(); ++I) {
+ llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast(
+ *BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
+ llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast(
+ Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
+ llvm::Value *CurSizeArg = Sizes[I];
+
+ // Extract the MEMBER_OF field from the map type.
+ llvm::BasicBlock *MemberBB = MapperCGF.createBasicBlock("omp.member");
+ MapperCGF.EmitBlock(MemberBB);
+ llvm::Value *OriMapType = MapperCGF.Builder.getInt64(MapTypes[I]);
+ llvm::Value *Member = MapperCGF.Builder.CreateAnd(
+ OriMapType,
+ MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_MEMBER_OF));
+ llvm::BasicBlock *MemberCombineBB =
+ MapperCGF.createBasicBlock("omp.member.combine");
+ llvm::BasicBlock *TypeBB = MapperCGF.createBasicBlock("omp.type");
+ llvm::Value *IsMember = MapperCGF.Builder.CreateIsNull(Member);
+ MapperCGF.Builder.CreateCondBr(IsMember, TypeBB, MemberCombineBB);
+ // Add the number of pre-existing components to the MEMBER_OF field if it
+ // is valid.
+ MapperCGF.EmitBlock(MemberCombineBB);
+ llvm::Value *CombinedMember =
+ MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize);
+ // Do nothing if it is not a member of previous components.
+ MapperCGF.EmitBlock(TypeBB);
+ llvm::PHINode *MemberMapType =
+ MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.membermaptype");
+ MemberMapType->addIncoming(OriMapType, MemberBB);
+ MemberMapType->addIncoming(CombinedMember, MemberCombineBB);
+
+ // Combine the map type inherited from user-defined mapper with that
+ // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM
+ // bits of the \a MapType, which is the input argument of the mapper
+ // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM
+ // bits of MemberMapType.
+ // [OpenMP 5.0], 1.2.6. map-type decay.
+ // | alloc | to | from | tofrom | release | delete
+ // ----------------------------------------------------------
+ // alloc | alloc | alloc | alloc | alloc | release | delete
+ // to | alloc | to | alloc | to | release | delete
+ // from | alloc | alloc | from | from | release | delete
+ // tofrom | alloc | to | from | tofrom | release | delete
+ llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd(
+ MapType,
+ MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO |
+ MappableExprsHandler::OMP_MAP_FROM));
+ llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc");
+ llvm::BasicBlock *AllocElseBB =
+ MapperCGF.createBasicBlock("omp.type.alloc.else");
+ llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to");
+ llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else");
+ llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from");
+ llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end");
+ llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom);
+ MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB);
+ // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM.
+ MapperCGF.EmitBlock(AllocBB);
+ llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd(
+ MemberMapType,
+ MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
+ MappableExprsHandler::OMP_MAP_FROM)));
+ MapperCGF.Builder.CreateBr(EndBB);
+ MapperCGF.EmitBlock(AllocElseBB);
+ llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ(
+ LeftToFrom,
+ MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO));
+ MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB);
+ // In case of to, clear OMP_MAP_FROM.
+ MapperCGF.EmitBlock(ToBB);
+ llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd(
+ MemberMapType,
+ MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_FROM));
+ MapperCGF.Builder.CreateBr(EndBB);
+ MapperCGF.EmitBlock(ToElseBB);
+ llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ(
+ LeftToFrom,
+ MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_FROM));
+ MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB);
+ // In case of from, clear OMP_MAP_TO.
+ MapperCGF.EmitBlock(FromBB);
+ llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd(
+ MemberMapType,
+ MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO));
+ // In case of tofrom, do nothing.
+ MapperCGF.EmitBlock(EndBB);
+ llvm::PHINode *CurMapType =
+ MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype");
+ CurMapType->addIncoming(AllocMapType, AllocBB);
+ CurMapType->addIncoming(ToMapType, ToBB);
+ CurMapType->addIncoming(FromMapType, FromBB);
+ CurMapType->addIncoming(MemberMapType, ToElseBB);
+
+ // TODO: call the corresponding mapper function if a user-defined mapper is
+ // associated with this map clause.
+ // Call the runtime API __tgt_push_mapper_component to fill up the runtime
+ // data structure.
+ llvm::Value *OffloadingArgs[] = {Handle, CurBaseArg, CurBeginArg,
+ CurSizeArg, CurMapType};
+ MapperCGF.EmitRuntimeCall(
+ createRuntimeFunction(OMPRTL__tgt_push_mapper_component),
+ OffloadingArgs);
+ }
+
+ // Update the pointer to point to the next element that needs to be mapped,
+ // and check whether we have mapped all elements.
+ llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32(
+ PtrPHI, /*Idx0=*/1, "omp.arraymap.next");
+ PtrPHI->addIncoming(PtrNext, BodyBB);
+ llvm::Value *IsDone =
+ MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone");
+ llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit");
+ MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB);
+
+ MapperCGF.EmitBlock(ExitBB);
+ // Emit array deletion if this is an array section and \p MapType indicates
+ // that deletion is required.
+ emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
+ ElementSize, DoneBB, /*IsInit=*/false);
+
+ // Emit the function exit block.
+ MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true);
+ MapperCGF.FinishFunction();
+ UDMMap.try_emplace(D, Fn);
+ if (CGF) {
+ auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn);
+ Decls.second.push_back(D);
+ }
+}
+
+/// Emit the array initialization or deletion portion for user-defined mapper
+/// code generation. First, it evaluates whether an array section is mapped and
+/// whether the \a MapType instructs to delete this section. If \a IsInit is
+/// true, and \a MapType indicates to not delete this array, array
+/// initialization code is generated. If \a IsInit is false, and \a MapType
+/// indicates to not this array, array deletion code is generated.
+void CGOpenMPRuntime::emitUDMapperArrayInitOrDel(
+ CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base,
+ llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType,
+ CharUnits ElementSize, llvm::BasicBlock *ExitBB, bool IsInit) {
+ StringRef Prefix = IsInit ? ".init" : ".del";
+
+ // Evaluate if this is an array section.
+ llvm::BasicBlock *IsDeleteBB =
+ MapperCGF.createBasicBlock("omp.array" + Prefix + ".evaldelete");
+ llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.array" + Prefix);
+ llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGE(
+ Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray");
+ MapperCGF.Builder.CreateCondBr(IsArray, IsDeleteBB, ExitBB);
+
+ // Evaluate if we are going to delete this section.
+ MapperCGF.EmitBlock(IsDeleteBB);
+ llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd(
+ MapType,
+ MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_DELETE));
+ llvm::Value *DeleteCond;
+ if (IsInit) {
+ DeleteCond = MapperCGF.Builder.CreateIsNull(
+ DeleteBit, "omp.array" + Prefix + ".delete");
+ } else {
+ DeleteCond = MapperCGF.Builder.CreateIsNotNull(
+ DeleteBit, "omp.array" + Prefix + ".delete");
+ }
+ MapperCGF.Builder.CreateCondBr(DeleteCond, BodyBB, ExitBB);
+
+ MapperCGF.EmitBlock(BodyBB);
+ // Get the array size by multiplying element size and element number (i.e., \p
+ // Size).
+ llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul(
+ Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
+ // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves
+ // memory allocation/deletion purpose only.
+ llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd(
+ MapType,
+ MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
+ MappableExprsHandler::OMP_MAP_FROM)));
+ // Call the runtime API __tgt_push_mapper_component to fill up the runtime
+ // data structure.
+ llvm::Value *OffloadingArgs[] = {Handle, Base, Begin, ArraySize, MapTypeArg};
+ MapperCGF.EmitRuntimeCall(
+ createRuntimeFunction(OMPRTL__tgt_push_mapper_component), OffloadingArgs);
+}
+
void CGOpenMPRuntime::emitTargetNumIterationsCall(
- CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *Device,
- const llvm::function_ref<llvm::Value *(
- CodeGenFunction &CGF, const OMPLoopDirective &D)> &SizeEmitter) {
+ CodeGenFunction &CGF, const OMPExecutableDirective &D,
+ llvm::Value *DeviceID,
+ llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
+ const OMPLoopDirective &D)>
+ SizeEmitter) {
OpenMPDirectiveKind Kind = D.getDirectiveKind();
const OMPExecutableDirective *TD = &D;
// Get nested teams distribute kind directive, if any.
@@ -8704,30 +9145,24 @@ void CGOpenMPRuntime::emitTargetNumIterationsCall(
if (!TD)
return;
const auto *LD = cast<OMPLoopDirective>(TD);
- auto &&CodeGen = [LD, &Device, &SizeEmitter, this](CodeGenFunction &CGF,
+ auto &&CodeGen = [LD, DeviceID, SizeEmitter, this](CodeGenFunction &CGF,
PrePostActionTy &) {
- llvm::Value *NumIterations = SizeEmitter(CGF, *LD);
-
- // Emit device ID if any.
- llvm::Value *DeviceID;
- if (Device)
- DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
- CGF.Int64Ty, /*isSigned=*/true);
- else
- DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
-
- llvm::Value *Args[] = {DeviceID, NumIterations};
- CGF.EmitRuntimeCall(
- createRuntimeFunction(OMPRTL__kmpc_push_target_tripcount), Args);
+ if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) {
+ llvm::Value *Args[] = {DeviceID, NumIterations};
+ CGF.EmitRuntimeCall(
+ createRuntimeFunction(OMPRTL__kmpc_push_target_tripcount), Args);
+ }
};
emitInlinedDirective(CGF, OMPD_unknown, CodeGen);
}
-void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF,
- const OMPExecutableDirective &D,
- llvm::Function *OutlinedFn,
- llvm::Value *OutlinedFnID,
- const Expr *IfCond, const Expr *Device) {
+void CGOpenMPRuntime::emitTargetCall(
+ CodeGenFunction &CGF, const OMPExecutableDirective &D,
+ llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
+ const Expr *Device,
+ llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
+ const OMPLoopDirective &D)>
+ SizeEmitter) {
if (!CGF.HaveInsertPoint())
return;
@@ -8746,8 +9181,8 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF,
llvm::Value *MapTypesArray = nullptr;
// Fill up the pointer arrays and transfer execution to the device.
auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo,
- &MapTypesArray, &CS, RequiresOuterTask,
- &CapturedVars](CodeGenFunction &CGF, PrePostActionTy &) {
+ &MapTypesArray, &CS, RequiresOuterTask, &CapturedVars,
+ SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) {
// On top of the arrays that were filled up, the target offloading call
// takes as arguments the device id as well as the host pointer. The host
// pointer is used by the runtime library to identify the current target
@@ -8779,6 +9214,9 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF,
llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D);
llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D);
+ // Emit tripcount for the target loop-based directive.
+ emitTargetNumIterationsCall(CGF, D, DeviceID, SizeEmitter);
+
bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
// The target region is an outlined function launched by the runtime
// via calls __tgt_target() or __tgt_target_teams().
@@ -9103,12 +9541,16 @@ void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
case OMPD_teams_distribute_parallel_for_simd:
case OMPD_target_update:
case OMPD_declare_simd:
+ case OMPD_declare_variant:
case OMPD_declare_target:
case OMPD_end_declare_target:
case OMPD_declare_reduction:
case OMPD_declare_mapper:
case OMPD_taskloop:
case OMPD_taskloop_simd:
+ case OMPD_master_taskloop:
+ case OMPD_master_taskloop_simd:
+ case OMPD_parallel_master_taskloop:
case OMPD_requires:
case OMPD_unknown:
llvm_unreachable("Unknown target directive for OpenMP device codegen.");
@@ -9137,14 +9579,28 @@ void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) {
// If emitting code for the host, we do not process FD here. Instead we do
// the normal code generation.
- if (!CGM.getLangOpts().OpenMPIsDevice)
+ if (!CGM.getLangOpts().OpenMPIsDevice) {
+ if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl())) {
+ Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
+ OMPDeclareTargetDeclAttr::getDeviceType(FD);
+ // Do not emit device_type(nohost) functions for the host.
+ if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_NoHost)
+ return true;
+ }
return false;
+ }
const ValueDecl *VD = cast<ValueDecl>(GD.getDecl());
StringRef Name = CGM.getMangledName(GD);
// Try to detect target regions in the function.
- if (const auto *FD = dyn_cast<FunctionDecl>(VD))
+ if (const auto *FD = dyn_cast<FunctionDecl>(VD)) {
scanForTargetRegionsFunctions(FD->getBody(), Name);
+ Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
+ OMPDeclareTargetDeclAttr::getDeviceType(FD);
+ // Do not emit device_type(nohost) functions for the host.
+ if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_Host)
+ return true;
+ }
// Do not to emit function if it is not marked as declare target.
return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) &&
@@ -9221,6 +9677,9 @@ CGOpenMPRuntime::registerTargetFirstprivateCopy(CodeGenFunction &CGF,
void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD,
llvm::Constant *Addr) {
+ if (CGM.getLangOpts().OMPTargetTriples.empty() &&
+ !CGM.getLangOpts().OpenMPIsDevice)
+ return;
llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
if (!Res) {
@@ -9433,17 +9892,6 @@ llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() {
return RequiresRegFn;
}
-llvm::Function *CGOpenMPRuntime::emitRegistrationFunction() {
- // If we have offloading in the current module, we need to emit the entries
- // now and register the offloading descriptor.
- createOffloadEntriesAndInfoMetadata();
-
- // Create and register the offloading binary descriptors. This is the main
- // entity that captures all the information about offloading in the current
- // compilation unit.
- return createOffloadingBinaryDescriptorRegistration();
-}
-
void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF,
const OMPExecutableDirective &D,
SourceLocation Loc,
@@ -9711,12 +10159,16 @@ void CGOpenMPRuntime::emitTargetDataStandAloneCall(
case OMPD_teams_distribute_parallel_for:
case OMPD_teams_distribute_parallel_for_simd:
case OMPD_declare_simd:
+ case OMPD_declare_variant:
case OMPD_declare_target:
case OMPD_end_declare_target:
case OMPD_declare_reduction:
case OMPD_declare_mapper:
case OMPD_taskloop:
case OMPD_taskloop_simd:
+ case OMPD_master_taskloop:
+ case OMPD_master_taskloop_simd:
+ case OMPD_parallel_master_taskloop:
case OMPD_target:
case OMPD_target_simd:
case OMPD_target_teams_distribute:
@@ -10377,7 +10829,7 @@ void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF,
}
llvm::APInt Size(/*numBits=*/32, NumIterations.size());
QualType ArrayTy =
- C.getConstantArrayType(KmpDimTy, Size, ArrayType::Normal, 0);
+ C.getConstantArrayType(KmpDimTy, Size, nullptr, ArrayType::Normal, 0);
Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
CGF.EmitNullInitialization(DimsAddr, ArrayTy);
@@ -10428,7 +10880,7 @@ void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
llvm::APInt Size(/*numBits=*/32, C->getNumLoops());
QualType ArrayTy = CGM.getContext().getConstantArrayType(
- Int64Ty, Size, ArrayType::Normal, 0);
+ Int64Ty, Size, nullptr, ArrayType::Normal, 0);
Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr");
for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) {
const Expr *CounterVal = C->getLoopData(I);
@@ -10566,6 +11018,131 @@ Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF,
return Address(Addr, Align);
}
+/// Checks current context and returns true if it matches the context selector.
+template <OMPDeclareVariantAttr::CtxSelectorSetType CtxSet,
+ OMPDeclareVariantAttr::CtxSelectorType Ctx>
+static bool checkContext(const OMPDeclareVariantAttr *A) {
+ assert(CtxSet != OMPDeclareVariantAttr::CtxSetUnknown &&
+ Ctx != OMPDeclareVariantAttr::CtxUnknown &&
+ "Unknown context selector or context selector set.");
+ return false;
+}
+
+/// Checks for implementation={vendor(<vendor>)} context selector.
+/// \returns true iff <vendor>="llvm", false otherwise.
+template <>
+bool checkContext<OMPDeclareVariantAttr::CtxSetImplementation,
+ OMPDeclareVariantAttr::CtxVendor>(
+ const OMPDeclareVariantAttr *A) {
+ return llvm::all_of(A->implVendors(),
+ [](StringRef S) { return !S.compare_lower("llvm"); });
+}
+
+static bool greaterCtxScore(ASTContext &Ctx, const Expr *LHS, const Expr *RHS) {
+ // If both scores are unknown, choose the very first one.
+ if (!LHS && !RHS)
+ return true;
+ // If only one is known, return this one.
+ if (LHS && !RHS)
+ return true;
+ if (!LHS && RHS)
+ return false;
+ llvm::APSInt LHSVal = LHS->EvaluateKnownConstInt(Ctx);
+ llvm::APSInt RHSVal = RHS->EvaluateKnownConstInt(Ctx);
+ return llvm::APSInt::compareValues(LHSVal, RHSVal) >= 0;
+}
+
+namespace {
+/// Comparator for the priority queue for context selector.
+class OMPDeclareVariantAttrComparer
+ : public std::greater<const OMPDeclareVariantAttr *> {
+private:
+ ASTContext &Ctx;
+
+public:
+ OMPDeclareVariantAttrComparer(ASTContext &Ctx) : Ctx(Ctx) {}
+ bool operator()(const OMPDeclareVariantAttr *LHS,
+ const OMPDeclareVariantAttr *RHS) const {
+ const Expr *LHSExpr = nullptr;
+ const Expr *RHSExpr = nullptr;
+ if (LHS->getCtxScore() == OMPDeclareVariantAttr::ScoreSpecified)
+ LHSExpr = LHS->getScore();
+ if (RHS->getCtxScore() == OMPDeclareVariantAttr::ScoreSpecified)
+ RHSExpr = RHS->getScore();
+ return greaterCtxScore(Ctx, LHSExpr, RHSExpr);
+ }
+};
+} // anonymous namespace
+
+/// Finds the variant function that matches current context with its context
+/// selector.
+static const FunctionDecl *getDeclareVariantFunction(ASTContext &Ctx,
+ const FunctionDecl *FD) {
+ if (!FD->hasAttrs() || !FD->hasAttr<OMPDeclareVariantAttr>())
+ return FD;
+ // Iterate through all DeclareVariant attributes and check context selectors.
+ auto &&Comparer = [&Ctx](const OMPDeclareVariantAttr *LHS,
+ const OMPDeclareVariantAttr *RHS) {
+ const Expr *LHSExpr = nullptr;
+ const Expr *RHSExpr = nullptr;
+ if (LHS->getCtxScore() == OMPDeclareVariantAttr::ScoreSpecified)
+ LHSExpr = LHS->getScore();
+ if (RHS->getCtxScore() == OMPDeclareVariantAttr::ScoreSpecified)
+ RHSExpr = RHS->getScore();
+ return greaterCtxScore(Ctx, LHSExpr, RHSExpr);
+ };
+ const OMPDeclareVariantAttr *TopMostAttr = nullptr;
+ for (const auto *A : FD->specific_attrs<OMPDeclareVariantAttr>()) {
+ const OMPDeclareVariantAttr *SelectedAttr = nullptr;
+ switch (A->getCtxSelectorSet()) {
+ case OMPDeclareVariantAttr::CtxSetImplementation:
+ switch (A->getCtxSelector()) {
+ case OMPDeclareVariantAttr::CtxVendor:
+ if (checkContext<OMPDeclareVariantAttr::CtxSetImplementation,
+ OMPDeclareVariantAttr::CtxVendor>(A))
+ SelectedAttr = A;
+ break;
+ case OMPDeclareVariantAttr::CtxUnknown:
+ llvm_unreachable(
+ "Unknown context selector in implementation selector set.");
+ }
+ break;
+ case OMPDeclareVariantAttr::CtxSetUnknown:
+ llvm_unreachable("Unknown context selector set.");
+ }
+ // If the attribute matches the context, find the attribute with the highest
+ // score.
+ if (SelectedAttr && (!TopMostAttr || !Comparer(TopMostAttr, SelectedAttr)))
+ TopMostAttr = SelectedAttr;
+ }
+ if (!TopMostAttr)
+ return FD;
+ return cast<FunctionDecl>(
+ cast<DeclRefExpr>(TopMostAttr->getVariantFuncRef()->IgnoreParenImpCasts())
+ ->getDecl());
+}
+
+bool CGOpenMPRuntime::emitDeclareVariant(GlobalDecl GD, bool IsForDefinition) {
+ const auto *D = cast<FunctionDecl>(GD.getDecl());
+ // If the original function is defined already, use its definition.
+ StringRef MangledName = CGM.getMangledName(GD);
+ llvm::GlobalValue *Orig = CGM.GetGlobalValue(MangledName);
+ if (Orig && !Orig->isDeclaration())
+ return false;
+ const FunctionDecl *NewFD = getDeclareVariantFunction(CGM.getContext(), D);
+ // Emit original function if it does not have declare variant attribute or the
+ // context does not match.
+ if (NewFD == D)
+ return false;
+ GlobalDecl NewGD = GD.getWithDecl(NewFD);
+ if (tryEmitDeclareVariant(NewGD, GD, Orig, IsForDefinition)) {
+ DeferredVariantFunction.erase(D);
+ return true;
+ }
+ DeferredVariantFunction.insert(std::make_pair(D, std::make_pair(NewGD, GD)));
+ return true;
+}
+
llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction(
const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
@@ -10786,12 +11363,13 @@ void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction(
llvm_unreachable("Not supported in SIMD-only mode");
}
-void CGOpenMPSIMDRuntime::emitTargetCall(CodeGenFunction &CGF,
- const OMPExecutableDirective &D,
- llvm::Function *OutlinedFn,
- llvm::Value *OutlinedFnID,
- const Expr *IfCond,
- const Expr *Device) {
+void CGOpenMPSIMDRuntime::emitTargetCall(
+ CodeGenFunction &CGF, const OMPExecutableDirective &D,
+ llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
+ const Expr *Device,
+ llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
+ const OMPLoopDirective &D)>
+ SizeEmitter) {
llvm_unreachable("Not supported in SIMD-only mode");
}
@@ -10807,10 +11385,6 @@ bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) {
return false;
}
-llvm::Function *CGOpenMPSIMDRuntime::emitRegistrationFunction() {
- return nullptr;
-}
-
void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF,
const OMPExecutableDirective &D,
SourceLocation Loc,
diff --git a/lib/CodeGen/CGOpenMPRuntime.h b/lib/CodeGen/CGOpenMPRuntime.h
index 3f842ce96407..bf8e0ac80909 100644
--- a/lib/CodeGen/CGOpenMPRuntime.h
+++ b/lib/CodeGen/CGOpenMPRuntime.h
@@ -15,6 +15,7 @@
#include "CGValue.h"
#include "clang/AST/DeclOpenMP.h"
+#include "clang/AST/GlobalDecl.h"
#include "clang/AST/Type.h"
#include "clang/Basic/OpenMPKinds.h"
#include "clang/Basic/SourceLocation.h"
@@ -36,7 +37,6 @@ class Value;
namespace clang {
class Expr;
-class GlobalDecl;
class OMPDependClause;
class OMPExecutableDirective;
class OMPLoopDirective;
@@ -291,6 +291,17 @@ protected:
/// default location.
virtual unsigned getDefaultLocationReserved2Flags() const { return 0; }
+ /// Tries to emit declare variant function for \p OldGD from \p NewGD.
+ /// \param OrigAddr LLVM IR value for \p OldGD.
+ /// \param IsForDefinition true, if requested emission for the definition of
+ /// \p OldGD.
+ /// \returns true, was able to emit a definition function for \p OldGD, which
+ /// points to \p NewGD.
+ virtual bool tryEmitDeclareVariant(const GlobalDecl &NewGD,
+ const GlobalDecl &OldGD,
+ llvm::GlobalValue *OrigAddr,
+ bool IsForDefinition);
+
/// Returns default flags for the barriers depending on the directive, for
/// which this barier is going to be emitted.
static unsigned getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind);
@@ -345,6 +356,14 @@ private:
SmallVector<const OMPDeclareReductionDecl *, 4>>
FunctionUDRMapTy;
FunctionUDRMapTy FunctionUDRMap;
+ /// Map from the user-defined mapper declaration to its corresponding
+ /// functions.
+ llvm::DenseMap<const OMPDeclareMapperDecl *, llvm::Function *> UDMMap;
+ /// Map of functions and their local user-defined mappers.
+ using FunctionUDMMapTy =
+ llvm::DenseMap<llvm::Function *,
+ SmallVector<const OMPDeclareMapperDecl *, 4>>;
+ FunctionUDMMapTy FunctionUDMMap;
/// Type kmp_critical_name, originally defined as typedef kmp_int32
/// kmp_critical_name[8];
llvm::ArrayType *KmpCriticalNameTy;
@@ -636,6 +655,12 @@ private:
/// must be emitted.
llvm::SmallDenseSet<const VarDecl *> DeferredGlobalVariables;
+ /// Mapping of the original functions to their variants and original global
+ /// decl.
+ llvm::MapVector<CanonicalDeclPtr<const FunctionDecl>,
+ std::pair<GlobalDecl, GlobalDecl>>
+ DeferredVariantFunction;
+
/// Flag for keeping track of weather a requires unified_shared_memory
/// directive is present.
bool HasRequiresUnifiedSharedMemory = false;
@@ -647,14 +672,6 @@ private:
/// Device routines are specific to the
bool HasEmittedDeclareTargetRegion = false;
- /// Creates and registers offloading binary descriptor for the current
- /// compilation unit. The function that does the registration is returned.
- llvm::Function *createOffloadingBinaryDescriptorRegistration();
-
- /// Creates all the offload entries in the current compilation unit
- /// along with the associated metadata.
- void createOffloadEntriesAndInfoMetadata();
-
/// Loads all the offload entries information from the host IR
/// metadata.
void loadOffloadInfoMetadata();
@@ -738,6 +755,14 @@ private:
llvm::Value *Ctor, llvm::Value *CopyCtor,
llvm::Value *Dtor, SourceLocation Loc);
+ /// Emit the array initialization or deletion portion for user-defined mapper
+ /// code generation.
+ void emitUDMapperArrayInitOrDel(CodeGenFunction &MapperCGF,
+ llvm::Value *Handle, llvm::Value *BasePtr,
+ llvm::Value *Ptr, llvm::Value *Size,
+ llvm::Value *MapType, CharUnits ElementSize,
+ llvm::BasicBlock *ExitBB, bool IsInit);
+
struct TaskResultTy {
llvm::Value *NewTask = nullptr;
llvm::Function *TaskEntry = nullptr;
@@ -777,6 +802,17 @@ private:
/// default.
virtual unsigned getDefaultFirstprivateAddressSpace() const { return 0; }
+ /// Emit code that pushes the trip count of loops associated with constructs
+ /// 'target teams distribute' and 'teams distribute parallel for'.
+ /// \param SizeEmitter Emits the int64 value for the number of iterations of
+ /// the associated loop.
+ void emitTargetNumIterationsCall(
+ CodeGenFunction &CGF, const OMPExecutableDirective &D,
+ llvm::Value *DeviceID,
+ llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
+ const OMPLoopDirective &D)>
+ SizeEmitter);
+
public:
explicit CGOpenMPRuntime(CodeGenModule &CGM)
: CGOpenMPRuntime(CGM, ".", ".") {}
@@ -798,6 +834,10 @@ public:
virtual std::pair<llvm::Function *, llvm::Function *>
getUserDefinedReduction(const OMPDeclareReductionDecl *D);
+ /// Emit the function for the user defined mapper construct.
+ void emitUserDefinedMapper(const OMPDeclareMapperDecl *D,
+ CodeGenFunction *CGF = nullptr);
+
/// Emits outlined function for the specified OpenMP parallel directive
/// \a D. This outlined function has type void(*)(kmp_int32 *ThreadID,
/// kmp_int32 BoundID, struct context_vars*).
@@ -1394,15 +1434,6 @@ public:
bool IsOffloadEntry,
const RegionCodeGenTy &CodeGen);
- /// Emit code that pushes the trip count of loops associated with constructs
- /// 'target teams distribute' and 'teams distribute parallel for'.
- /// \param SizeEmitter Emits the int64 value for the number of iterations of
- /// the associated loop.
- virtual void emitTargetNumIterationsCall(
- CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *Device,
- const llvm::function_ref<llvm::Value *(
- CodeGenFunction &CGF, const OMPLoopDirective &D)> &SizeEmitter);
-
/// Emit the target offloading code associated with \a D. The emitted
/// code attempts offloading the execution to the device, an the event of
/// a failure it executes the host version outlined in \a OutlinedFn.
@@ -1413,11 +1444,15 @@ public:
/// directive, or null if no if clause is used.
/// \param Device Expression evaluated in device clause associated with the
/// target directive, or null if no device clause is used.
- virtual void emitTargetCall(CodeGenFunction &CGF,
- const OMPExecutableDirective &D,
- llvm::Function *OutlinedFn,
- llvm::Value *OutlinedFnID, const Expr *IfCond,
- const Expr *Device);
+ /// \param SizeEmitter Callback to emit number of iterations for loop-based
+ /// directives.
+ virtual void
+ emitTargetCall(CodeGenFunction &CGF, const OMPExecutableDirective &D,
+ llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID,
+ const Expr *IfCond, const Expr *Device,
+ llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
+ const OMPLoopDirective &D)>
+ SizeEmitter);
/// Emit the target regions enclosed in \a GD function definition or
/// the function itself in case it is a valid device function. Returns true if
@@ -1449,10 +1484,9 @@ public:
/// requires directives was used in the current module.
llvm::Function *emitRequiresDirectiveRegFun();
- /// Creates the offloading descriptor in the event any target region
- /// was emitted in the current module and return the function that registers
- /// it.
- virtual llvm::Function *emitRegistrationFunction();
+ /// Creates all the offload entries in the current compilation unit
+ /// along with the associated metadata.
+ void createOffloadEntriesAndInfoMetadata();
/// Emits code for teams call of the \a OutlinedFn with
/// variables captured in a record which address is stored in \a
@@ -1626,6 +1660,9 @@ public:
/// Return whether the unified_shared_memory has been specified.
bool hasRequiresUnifiedSharedMemory() const;
+
+ /// Emits the definition of the declare variant function.
+ virtual bool emitDeclareVariant(GlobalDecl GD, bool IsForDefinition);
};
/// Class supports emissionof SIMD-only code.
@@ -2097,9 +2134,13 @@ public:
/// directive, or null if no if clause is used.
/// \param Device Expression evaluated in device clause associated with the
/// target directive, or null if no device clause is used.
- void emitTargetCall(CodeGenFunction &CGF, const OMPExecutableDirective &D,
- llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID,
- const Expr *IfCond, const Expr *Device) override;
+ void
+ emitTargetCall(CodeGenFunction &CGF, const OMPExecutableDirective &D,
+ llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID,
+ const Expr *IfCond, const Expr *Device,
+ llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
+ const OMPLoopDirective &D)>
+ SizeEmitter) override;
/// Emit the target regions enclosed in \a GD function definition or
/// the function itself in case it is a valid device function. Returns true if
@@ -2117,11 +2158,6 @@ public:
/// \param GD Global to scan.
bool emitTargetGlobal(GlobalDecl GD) override;
- /// Creates the offloading descriptor in the event any target region
- /// was emitted in the current module and return the function that registers
- /// it.
- llvm::Function *emitRegistrationFunction() override;
-
/// Emits code for teams call of the \a OutlinedFn with
/// variables captured in a record which address is stored in \a
/// CapturedStruct.
diff --git a/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp b/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
index 48dcbbf3cabd..708260429f68 100644
--- a/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
+++ b/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
@@ -107,6 +107,10 @@ enum OpenMPRTLFunctionNVPTX {
/// Call to void __kmpc_barrier_simple_spmd(ident_t *loc, kmp_int32
/// global_tid);
OMPRTL__kmpc_barrier_simple_spmd,
+ /// Call to int32_t __kmpc_warp_active_thread_mask(void);
+ OMPRTL_NVPTX__kmpc_warp_active_thread_mask,
+ /// Call to void __kmpc_syncwarp(int32_t Mask);
+ OMPRTL_NVPTX__kmpc_syncwarp,
};
/// Pre(post)-action for different OpenMP constructs specialized for NVPTX.
@@ -276,7 +280,8 @@ static RecordDecl *buildRecordForGlobalizedVars(
}
} else {
llvm::APInt ArraySize(32, BufSize);
- Type = C.getConstantArrayType(Type, ArraySize, ArrayType::Normal, 0);
+ Type = C.getConstantArrayType(Type, ArraySize, nullptr, ArrayType::Normal,
+ 0);
Field = FieldDecl::Create(
C, GlobalizedRD, Loc, Loc, VD->getIdentifier(), Type,
C.getTrivialTypeSourceInfo(Type, SourceLocation()),
@@ -287,10 +292,11 @@ static RecordDecl *buildRecordForGlobalizedVars(
static_cast<CharUnits::QuantityType>(
GlobalMemoryAlignment)));
Field->addAttr(AlignedAttr::CreateImplicit(
- C, AlignedAttr::GNU_aligned, /*IsAlignmentExpr=*/true,
+ C, /*IsAlignmentExpr=*/true,
IntegerLiteral::Create(C, Align,
C.getIntTypeForBitwidth(32, /*Signed=*/0),
- SourceLocation())));
+ SourceLocation()),
+ {}, AttributeCommonInfo::AS_GNU, AlignedAttr::GNU_aligned));
}
GlobalizedRD->addDecl(Field);
MappedDeclsFields.try_emplace(VD, Field);
@@ -790,12 +796,16 @@ static bool hasNestedSPMDDirective(ASTContext &Ctx,
case OMPD_teams_distribute_parallel_for_simd:
case OMPD_target_update:
case OMPD_declare_simd:
+ case OMPD_declare_variant:
case OMPD_declare_target:
case OMPD_end_declare_target:
case OMPD_declare_reduction:
case OMPD_declare_mapper:
case OMPD_taskloop:
case OMPD_taskloop_simd:
+ case OMPD_master_taskloop:
+ case OMPD_master_taskloop_simd:
+ case OMPD_parallel_master_taskloop:
case OMPD_requires:
case OMPD_unknown:
llvm_unreachable("Unexpected directive.");
@@ -860,12 +870,16 @@ static bool supportsSPMDExecutionMode(ASTContext &Ctx,
case OMPD_teams_distribute_parallel_for_simd:
case OMPD_target_update:
case OMPD_declare_simd:
+ case OMPD_declare_variant:
case OMPD_declare_target:
case OMPD_end_declare_target:
case OMPD_declare_reduction:
case OMPD_declare_mapper:
case OMPD_taskloop:
case OMPD_taskloop_simd:
+ case OMPD_master_taskloop:
+ case OMPD_master_taskloop_simd:
+ case OMPD_parallel_master_taskloop:
case OMPD_requires:
case OMPD_unknown:
break;
@@ -1023,12 +1037,16 @@ static bool hasNestedLightweightDirective(ASTContext &Ctx,
case OMPD_teams_distribute_parallel_for_simd:
case OMPD_target_update:
case OMPD_declare_simd:
+ case OMPD_declare_variant:
case OMPD_declare_target:
case OMPD_end_declare_target:
case OMPD_declare_reduction:
case OMPD_declare_mapper:
case OMPD_taskloop:
case OMPD_taskloop_simd:
+ case OMPD_master_taskloop:
+ case OMPD_master_taskloop_simd:
+ case OMPD_parallel_master_taskloop:
case OMPD_requires:
case OMPD_unknown:
llvm_unreachable("Unexpected directive.");
@@ -1099,12 +1117,16 @@ static bool supportsLightweightRuntime(ASTContext &Ctx,
case OMPD_teams_distribute_parallel_for_simd:
case OMPD_target_update:
case OMPD_declare_simd:
+ case OMPD_declare_variant:
case OMPD_declare_target:
case OMPD_end_declare_target:
case OMPD_declare_reduction:
case OMPD_declare_mapper:
case OMPD_taskloop:
case OMPD_taskloop_simd:
+ case OMPD_master_taskloop:
+ case OMPD_master_taskloop_simd:
+ case OMPD_parallel_master_taskloop:
case OMPD_requires:
case OMPD_unknown:
break;
@@ -1794,6 +1816,20 @@ CGOpenMPRuntimeNVPTX::createNVPTXRuntimeFunction(unsigned Function) {
->addFnAttr(llvm::Attribute::Convergent);
break;
}
+ case OMPRTL_NVPTX__kmpc_warp_active_thread_mask: {
+ // Build int32_t __kmpc_warp_active_thread_mask(void);
+ auto *FnTy =
+ llvm::FunctionType::get(CGM.Int32Ty, llvm::None, /*isVarArg=*/false);
+ RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_warp_active_thread_mask");
+ break;
+ }
+ case OMPRTL_NVPTX__kmpc_syncwarp: {
+ // Build void __kmpc_syncwarp(kmp_int32 Mask);
+ auto *FnTy =
+ llvm::FunctionType::get(CGM.VoidTy, CGM.Int32Ty, /*isVarArg=*/false);
+ RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_syncwarp");
+ break;
+ }
}
return RTLFn;
}
@@ -1871,6 +1907,19 @@ unsigned CGOpenMPRuntimeNVPTX::getDefaultLocationReserved2Flags() const {
llvm_unreachable("Unknown flags are requested.");
}
+bool CGOpenMPRuntimeNVPTX::tryEmitDeclareVariant(const GlobalDecl &NewGD,
+ const GlobalDecl &OldGD,
+ llvm::GlobalValue *OrigAddr,
+ bool IsForDefinition) {
+ // Emit the function in OldGD with the body from NewGD, if NewGD is defined.
+ auto *NewFD = cast<FunctionDecl>(NewGD.getDecl());
+ if (NewFD->isDefined()) {
+ CGM.emitOpenMPDeviceFunctionRedefinition(OldGD, NewGD, OrigAddr);
+ return true;
+ }
+ return false;
+}
+
CGOpenMPRuntimeNVPTX::CGOpenMPRuntimeNVPTX(CodeGenModule &CGM)
: CGOpenMPRuntime(CGM, "_", "$") {
if (!CGM.getLangOpts().OpenMPIsDevice)
@@ -2030,7 +2079,7 @@ llvm::Function *CGOpenMPRuntimeNVPTX::emitTeamsOutlinedFunction(
auto I = Rt.FunctionGlobalizedDecls.try_emplace(CGF.CurFn).first;
I->getSecond().GlobalRecord = GlobalizedRD;
I->getSecond().MappedParams =
- llvm::make_unique<CodeGenFunction::OMPMapVars>();
+ std::make_unique<CodeGenFunction::OMPMapVars>();
DeclToAddrMapTy &Data = I->getSecond().LocalVarData;
for (const auto &Pair : MappedDeclsFields) {
assert(Pair.getFirst()->isCanonicalDecl() &&
@@ -2414,9 +2463,8 @@ void CGOpenMPRuntimeNVPTX::emitTeamsCall(CodeGenFunction &CGF,
if (!CGF.HaveInsertPoint())
return;
- Address ZeroAddr = CGF.CreateMemTemp(
- CGF.getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1),
- /*Name*/ ".zero.addr");
+ Address ZeroAddr = CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
+ /*Name=*/".zero.addr");
CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
OutlinedFnArgs.push_back(emitThreadIDAddress(CGF, Loc).getPointer());
@@ -2445,16 +2493,19 @@ void CGOpenMPRuntimeNVPTX::emitNonSPMDParallelCall(
// Force inline this outlined function at its call site.
Fn->setLinkage(llvm::GlobalValue::InternalLinkage);
- Address ZeroAddr = CGF.CreateMemTemp(CGF.getContext().getIntTypeForBitwidth(
- /*DestWidth=*/32, /*Signed=*/1),
- ".zero.addr");
+ Address ZeroAddr = CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
+ /*Name=*/".zero.addr");
CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
// ThreadId for serialized parallels is 0.
Address ThreadIDAddr = ZeroAddr;
- auto &&CodeGen = [this, Fn, CapturedVars, Loc, ZeroAddr, &ThreadIDAddr](
+ auto &&CodeGen = [this, Fn, CapturedVars, Loc, &ThreadIDAddr](
CodeGenFunction &CGF, PrePostActionTy &Action) {
Action.Enter(CGF);
+ Address ZeroAddr =
+ CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
+ /*Name=*/".bound.zero.addr");
+ CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
OutlinedFnArgs.push_back(ZeroAddr.getPointer());
@@ -2611,17 +2662,19 @@ void CGOpenMPRuntimeNVPTX::emitSPMDParallelCall(
//
llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
- Address ZeroAddr = CGF.CreateMemTemp(CGF.getContext().getIntTypeForBitwidth(
- /*DestWidth=*/32, /*Signed=*/1),
- ".zero.addr");
+ Address ZeroAddr = CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
+ /*Name=*/".zero.addr");
CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
// ThreadId for serialized parallels is 0.
Address ThreadIDAddr = ZeroAddr;
- auto &&CodeGen = [this, OutlinedFn, CapturedVars, Loc, ZeroAddr,
- &ThreadIDAddr](CodeGenFunction &CGF,
- PrePostActionTy &Action) {
+ auto &&CodeGen = [this, OutlinedFn, CapturedVars, Loc, &ThreadIDAddr](
+ CodeGenFunction &CGF, PrePostActionTy &Action) {
Action.Enter(CGF);
+ Address ZeroAddr =
+ CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
+ /*Name=*/".bound.zero.addr");
+ CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
OutlinedFnArgs.push_back(ZeroAddr.getPointer());
@@ -2669,8 +2722,9 @@ void CGOpenMPRuntimeNVPTX::syncCTAThreads(CodeGenFunction &CGF) {
llvm::ConstantPointerNull::get(
cast<llvm::PointerType>(getIdentTyPointerTy())),
llvm::ConstantInt::get(CGF.Int32Ty, /*V=*/0, /*isSigned=*/true)};
- CGF.EmitRuntimeCall(
+ llvm::CallInst *Call = CGF.EmitRuntimeCall(
createNVPTXRuntimeFunction(OMPRTL__kmpc_barrier_simple_spmd), Args);
+ Call->setConvergent();
}
void CGOpenMPRuntimeNVPTX::emitBarrierCall(CodeGenFunction &CGF,
@@ -2684,7 +2738,9 @@ void CGOpenMPRuntimeNVPTX::emitBarrierCall(CodeGenFunction &CGF,
unsigned Flags = getDefaultFlagsForBarriers(Kind);
llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
getThreadID(CGF, Loc)};
- CGF.EmitRuntimeCall(createNVPTXRuntimeFunction(OMPRTL__kmpc_barrier), Args);
+ llvm::CallInst *Call = CGF.EmitRuntimeCall(
+ createNVPTXRuntimeFunction(OMPRTL__kmpc_barrier), Args);
+ Call->setConvergent();
}
void CGOpenMPRuntimeNVPTX::emitCriticalRegion(
@@ -2697,6 +2753,9 @@ void CGOpenMPRuntimeNVPTX::emitCriticalRegion(
llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.critical.body");
llvm::BasicBlock *ExitBB = CGF.createBasicBlock("omp.critical.exit");
+ // Get the mask of active threads in the warp.
+ llvm::Value *Mask = CGF.EmitRuntimeCall(
+ createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_warp_active_thread_mask));
// Fetch team-local id of the thread.
llvm::Value *ThreadID = getNVPTXThreadID(CGF);
@@ -2737,8 +2796,9 @@ void CGOpenMPRuntimeNVPTX::emitCriticalRegion(
// Block waits for all threads in current team to finish then increments the
// counter variable and returns to the loop.
CGF.EmitBlock(SyncBB);
- emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false,
- /*ForceSimpleCall=*/true);
+ // Reconverge active threads in the warp.
+ (void)CGF.EmitRuntimeCall(
+ createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_syncwarp), Mask);
llvm::Value *IncCounterVal =
CGF.Builder.CreateNSWAdd(CounterVal, CGF.Builder.getInt32(1));
@@ -4239,7 +4299,7 @@ void CGOpenMPRuntimeNVPTX::emitReduction(
}
llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
QualType ReductionArrayTy =
- C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal,
+ C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
/*IndexTypeQuals=*/0);
Address ReductionList =
CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
@@ -4515,9 +4575,8 @@ llvm::Function *CGOpenMPRuntimeNVPTX::createParallelDataSharingWrapper(
const auto *RD = CS.getCapturedRecordDecl();
auto CurField = RD->field_begin();
- Address ZeroAddr = CGF.CreateMemTemp(
- CGF.getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1),
- /*Name*/ ".zero.addr");
+ Address ZeroAddr = CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
+ /*Name=*/".zero.addr");
CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
// Get the array of arguments.
SmallVector<llvm::Value *, 8> Args;
@@ -4634,7 +4693,7 @@ void CGOpenMPRuntimeNVPTX::emitFunctionProlog(CodeGenFunction &CGF,
return;
auto I = FunctionGlobalizedDecls.try_emplace(CGF.CurFn).first;
I->getSecond().MappedParams =
- llvm::make_unique<CodeGenFunction::OMPMapVars>();
+ std::make_unique<CodeGenFunction::OMPMapVars>();
I->getSecond().GlobalRecord = GlobalizedVarsRecord;
I->getSecond().EscapedParameters.insert(
VarChecker.getEscapedParameters().begin(),
@@ -4700,7 +4759,7 @@ Address CGOpenMPRuntimeNVPTX::getAddressOfLocalVariable(CodeGenFunction &CGF,
/*InsertBefore=*/nullptr, llvm::GlobalValue::NotThreadLocal,
CGM.getContext().getTargetAddressSpace(LangAS::cuda_constant));
CharUnits Align = CGM.getContext().getDeclAlign(VD);
- GV->setAlignment(Align.getQuantity());
+ GV->setAlignment(Align.getAsAlign());
return Address(GV, Align);
}
case OMPAllocateDeclAttr::OMPPTeamMemAlloc: {
@@ -4712,7 +4771,7 @@ Address CGOpenMPRuntimeNVPTX::getAddressOfLocalVariable(CodeGenFunction &CGF,
/*InsertBefore=*/nullptr, llvm::GlobalValue::NotThreadLocal,
CGM.getContext().getTargetAddressSpace(LangAS::cuda_shared));
CharUnits Align = CGM.getContext().getDeclAlign(VD);
- GV->setAlignment(Align.getQuantity());
+ GV->setAlignment(Align.getAsAlign());
return Address(GV, Align);
}
case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
@@ -4723,7 +4782,7 @@ Address CGOpenMPRuntimeNVPTX::getAddressOfLocalVariable(CodeGenFunction &CGF,
llvm::GlobalValue::InternalLinkage,
llvm::Constant::getNullValue(VarTy), VD->getName());
CharUnits Align = CGM.getContext().getDeclAlign(VD);
- GV->setAlignment(Align.getQuantity());
+ GV->setAlignment(Align.getAsAlign());
return Address(GV, Align);
}
}
@@ -5026,7 +5085,7 @@ void CGOpenMPRuntimeNVPTX::clear() {
Size = llvm::alignTo(Size, RecAlignment);
llvm::APInt ArySize(/*numBits=*/64, Size);
QualType SubTy = C.getConstantArrayType(
- C.CharTy, ArySize, ArrayType::Normal, /*IndexTypeQuals=*/0);
+ C.CharTy, ArySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
const bool UseSharedMemory = Size <= SharedMemorySize;
auto *Field =
FieldDecl::Create(C, UseSharedMemory ? SharedStaticRD : StaticRD,
@@ -5053,7 +5112,7 @@ void CGOpenMPRuntimeNVPTX::clear() {
if (!SharedStaticRD->field_empty()) {
llvm::APInt ArySize(/*numBits=*/64, SharedMemorySize);
QualType SubTy = C.getConstantArrayType(
- C.CharTy, ArySize, ArrayType::Normal, /*IndexTypeQuals=*/0);
+ C.CharTy, ArySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
auto *Field = FieldDecl::Create(
C, SharedStaticRD, SourceLocation(), SourceLocation(), nullptr, SubTy,
C.getTrivialTypeSourceInfo(SubTy, SourceLocation()),
@@ -5086,11 +5145,12 @@ void CGOpenMPRuntimeNVPTX::clear() {
std::pair<unsigned, unsigned> SMsBlockPerSM = getSMsBlocksPerSM(CGM);
llvm::APInt Size1(32, SMsBlockPerSM.second);
QualType Arr1Ty =
- C.getConstantArrayType(StaticTy, Size1, ArrayType::Normal,
+ C.getConstantArrayType(StaticTy, Size1, nullptr, ArrayType::Normal,
/*IndexTypeQuals=*/0);
llvm::APInt Size2(32, SMsBlockPerSM.first);
- QualType Arr2Ty = C.getConstantArrayType(Arr1Ty, Size2, ArrayType::Normal,
- /*IndexTypeQuals=*/0);
+ QualType Arr2Ty =
+ C.getConstantArrayType(Arr1Ty, Size2, nullptr, ArrayType::Normal,
+ /*IndexTypeQuals=*/0);
llvm::Type *LLVMArr2Ty = CGM.getTypes().ConvertTypeForMem(Arr2Ty);
// FIXME: nvlink does not handle weak linkage correctly (object with the
// different size are reported as erroneous).
diff --git a/lib/CodeGen/CGOpenMPRuntimeNVPTX.h b/lib/CodeGen/CGOpenMPRuntimeNVPTX.h
index e7fd458e7271..0f78627c95e6 100644
--- a/lib/CodeGen/CGOpenMPRuntimeNVPTX.h
+++ b/lib/CodeGen/CGOpenMPRuntimeNVPTX.h
@@ -193,6 +193,18 @@ protected:
/// Full/Lightweight runtime mode. Used for better optimization.
unsigned getDefaultLocationReserved2Flags() const override;
+ /// Tries to emit declare variant function for \p OldGD from \p NewGD.
+ /// \param OrigAddr LLVM IR value for \p OldGD.
+ /// \param IsForDefinition true, if requested emission for the definition of
+ /// \p OldGD.
+ /// \returns true, was able to emit a definition function for \p OldGD, which
+ /// points to \p NewGD.
+ /// NVPTX backend does not support global aliases, so just use the function,
+ /// emitted for \p NewGD instead of \p OldGD.
+ bool tryEmitDeclareVariant(const GlobalDecl &NewGD, const GlobalDecl &OldGD,
+ llvm::GlobalValue *OrigAddr,
+ bool IsForDefinition) override;
+
public:
explicit CGOpenMPRuntimeNVPTX(CodeGenModule &CGM);
void clear() override;
diff --git a/lib/CodeGen/CGStmt.cpp b/lib/CodeGen/CGStmt.cpp
index dd0dea5b94a0..bb2629f89d3d 100644
--- a/lib/CodeGen/CGStmt.cpp
+++ b/lib/CodeGen/CGStmt.cpp
@@ -281,6 +281,17 @@ void CodeGenFunction::EmitStmt(const Stmt *S, ArrayRef<const Attr *> Attrs) {
case Stmt::OMPTaskLoopSimdDirectiveClass:
EmitOMPTaskLoopSimdDirective(cast<OMPTaskLoopSimdDirective>(*S));
break;
+ case Stmt::OMPMasterTaskLoopDirectiveClass:
+ EmitOMPMasterTaskLoopDirective(cast<OMPMasterTaskLoopDirective>(*S));
+ break;
+ case Stmt::OMPMasterTaskLoopSimdDirectiveClass:
+ EmitOMPMasterTaskLoopSimdDirective(
+ cast<OMPMasterTaskLoopSimdDirective>(*S));
+ break;
+ case Stmt::OMPParallelMasterTaskLoopDirectiveClass:
+ EmitOMPParallelMasterTaskLoopDirective(
+ cast<OMPParallelMasterTaskLoopDirective>(*S));
+ break;
case Stmt::OMPDistributeDirectiveClass:
EmitOMPDistributeDirective(cast<OMPDistributeDirective>(*S));
break;
@@ -1846,11 +1857,9 @@ llvm::Value* CodeGenFunction::EmitAsmInput(
InputExpr->EvaluateAsRValue(EVResult, getContext(), true);
llvm::APSInt IntResult;
- if (!EVResult.Val.toIntegralConstant(IntResult, InputExpr->getType(),
- getContext()))
- llvm_unreachable("Invalid immediate constant!");
-
- return llvm::ConstantInt::get(getLLVMContext(), IntResult);
+ if (EVResult.Val.toIntegralConstant(IntResult, InputExpr->getType(),
+ getContext()))
+ return llvm::ConstantInt::get(getLLVMContext(), IntResult);
}
Expr::EvalResult Result;
@@ -1986,6 +1995,7 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) {
std::vector<llvm::Type *> ResultTruncRegTypes;
std::vector<llvm::Type *> ArgTypes;
std::vector<llvm::Value*> Args;
+ llvm::BitVector ResultTypeRequiresCast;
// Keep track of inout constraints.
std::string InOutConstraints;
@@ -2024,13 +2034,23 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) {
// If this is a register output, then make the inline asm return it
// by-value. If this is a memory result, return the value by-reference.
- if (!Info.allowsMemory() && hasScalarEvaluationKind(OutExpr->getType())) {
+ bool isScalarizableAggregate =
+ hasAggregateEvaluationKind(OutExpr->getType());
+ if (!Info.allowsMemory() && (hasScalarEvaluationKind(OutExpr->getType()) ||
+ isScalarizableAggregate)) {
Constraints += "=" + OutputConstraint;
ResultRegQualTys.push_back(OutExpr->getType());
ResultRegDests.push_back(Dest);
- ResultRegTypes.push_back(ConvertTypeForMem(OutExpr->getType()));
- ResultTruncRegTypes.push_back(ResultRegTypes.back());
-
+ ResultTruncRegTypes.push_back(ConvertTypeForMem(OutExpr->getType()));
+ if (Info.allowsRegister() && isScalarizableAggregate) {
+ ResultTypeRequiresCast.push_back(true);
+ unsigned Size = getContext().getTypeSize(OutExpr->getType());
+ llvm::Type *ConvTy = llvm::IntegerType::get(getLLVMContext(), Size);
+ ResultRegTypes.push_back(ConvTy);
+ } else {
+ ResultTypeRequiresCast.push_back(false);
+ ResultRegTypes.push_back(ResultTruncRegTypes.back());
+ }
// If this output is tied to an input, and if the input is larger, then
// we need to set the actual result type of the inline asm node to be the
// same as the input type.
@@ -2064,8 +2084,8 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) {
// Update largest vector width for any vector types.
if (auto *VT = dyn_cast<llvm::VectorType>(ResultRegTypes.back()))
- LargestVectorWidth = std::max(LargestVectorWidth,
- VT->getPrimitiveSizeInBits());
+ LargestVectorWidth = std::max((uint64_t)LargestVectorWidth,
+ VT->getPrimitiveSizeInBits().getFixedSize());
} else {
ArgTypes.push_back(Dest.getAddress().getType());
Args.push_back(Dest.getPointer());
@@ -2089,8 +2109,8 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) {
// Update largest vector width for any vector types.
if (auto *VT = dyn_cast<llvm::VectorType>(Arg->getType()))
- LargestVectorWidth = std::max(LargestVectorWidth,
- VT->getPrimitiveSizeInBits());
+ LargestVectorWidth = std::max((uint64_t)LargestVectorWidth,
+ VT->getPrimitiveSizeInBits().getFixedSize());
if (Info.allowsRegister())
InOutConstraints += llvm::utostr(i);
else
@@ -2176,8 +2196,8 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) {
// Update largest vector width for any vector types.
if (auto *VT = dyn_cast<llvm::VectorType>(Arg->getType()))
- LargestVectorWidth = std::max(LargestVectorWidth,
- VT->getPrimitiveSizeInBits());
+ LargestVectorWidth = std::max((uint64_t)LargestVectorWidth,
+ VT->getPrimitiveSizeInBits().getFixedSize());
ArgTypes.push_back(Arg->getType());
Args.push_back(Arg);
@@ -2273,6 +2293,9 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) {
assert(RegResults.size() == ResultRegTypes.size());
assert(RegResults.size() == ResultTruncRegTypes.size());
assert(RegResults.size() == ResultRegDests.size());
+ // ResultRegDests can be also populated by addReturnRegisterOutputs() above,
+ // in which case its size may grow.
+ assert(ResultTypeRequiresCast.size() <= ResultRegDests.size());
for (unsigned i = 0, e = RegResults.size(); i != e; ++i) {
llvm::Value *Tmp = RegResults[i];
@@ -2302,7 +2325,24 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) {
}
}
- EmitStoreThroughLValue(RValue::get(Tmp), ResultRegDests[i]);
+ LValue Dest = ResultRegDests[i];
+ // ResultTypeRequiresCast elements correspond to the first
+ // ResultTypeRequiresCast.size() elements of RegResults.
+ if ((i < ResultTypeRequiresCast.size()) && ResultTypeRequiresCast[i]) {
+ unsigned Size = getContext().getTypeSize(ResultRegQualTys[i]);
+ Address A = Builder.CreateBitCast(Dest.getAddress(),
+ ResultRegTypes[i]->getPointerTo());
+ QualType Ty = getContext().getIntTypeForBitwidth(Size, /*Signed*/ false);
+ if (Ty.isNull()) {
+ const Expr *OutExpr = S.getOutputExpr(i);
+ CGM.Error(
+ OutExpr->getExprLoc(),
+ "impossible constraint in asm: can't store value into a register");
+ return;
+ }
+ Dest = MakeAddrLValue(A, Ty);
+ }
+ EmitStoreThroughLValue(RValue::get(Tmp), Dest);
}
}
diff --git a/lib/CodeGen/CGStmtOpenMP.cpp b/lib/CodeGen/CGStmtOpenMP.cpp
index e8fbca5108ad..6ece69d51daf 100644
--- a/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/lib/CodeGen/CGStmtOpenMP.cpp
@@ -120,12 +120,46 @@ public:
class OMPLoopScope : public CodeGenFunction::RunCleanupsScope {
void emitPreInitStmt(CodeGenFunction &CGF, const OMPLoopDirective &S) {
CodeGenFunction::OMPMapVars PreCondVars;
+ llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
for (const auto *E : S.counters()) {
const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
+ EmittedAsPrivate.insert(VD->getCanonicalDecl());
(void)PreCondVars.setVarAddr(
CGF, VD, CGF.CreateMemTemp(VD->getType().getNonReferenceType()));
}
+ // Mark private vars as undefs.
+ for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
+ for (const Expr *IRef : C->varlists()) {
+ const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(IRef)->getDecl());
+ if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
+ (void)PreCondVars.setVarAddr(
+ CGF, OrigVD,
+ Address(llvm::UndefValue::get(
+ CGF.ConvertTypeForMem(CGF.getContext().getPointerType(
+ OrigVD->getType().getNonReferenceType()))),
+ CGF.getContext().getDeclAlign(OrigVD)));
+ }
+ }
+ }
(void)PreCondVars.apply(CGF);
+ // Emit init, __range and __end variables for C++ range loops.
+ const Stmt *Body =
+ S.getInnermostCapturedStmt()->getCapturedStmt()->IgnoreContainers();
+ for (unsigned Cnt = 0; Cnt < S.getCollapsedNumber(); ++Cnt) {
+ Body = Body->IgnoreContainers();
+ if (auto *For = dyn_cast<ForStmt>(Body)) {
+ Body = For->getBody();
+ } else {
+ assert(isa<CXXForRangeStmt>(Body) &&
+ "Expected canonical for loop or range-based for loop.");
+ auto *CXXFor = cast<CXXForRangeStmt>(Body);
+ if (const Stmt *Init = CXXFor->getInit())
+ CGF.EmitStmt(Init);
+ CGF.EmitStmt(CXXFor->getRangeStmt());
+ CGF.EmitStmt(CXXFor->getEndStmt());
+ Body = CXXFor->getBody();
+ }
+ }
if (const auto *PreInits = cast_or_null<DeclStmt>(S.getPreInits())) {
for (const auto *I : PreInits->decls())
CGF.EmitVarDecl(cast<VarDecl>(*I));
@@ -1324,6 +1358,31 @@ void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &D,
// On a continue in the body, jump to the end.
JumpDest Continue = getJumpDestInCurrentScope("omp.body.continue");
BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
+ for (const Expr *E : D.finals_conditions()) {
+ if (!E)
+ continue;
+ // Check that loop counter in non-rectangular nest fits into the iteration
+ // space.
+ llvm::BasicBlock *NextBB = createBasicBlock("omp.body.next");
+ EmitBranchOnBoolExpr(E, NextBB, Continue.getBlock(),
+ getProfileCount(D.getBody()));
+ EmitBlock(NextBB);
+ }
+ // Emit loop variables for C++ range loops.
+ const Stmt *Body =
+ D.getInnermostCapturedStmt()->getCapturedStmt()->IgnoreContainers();
+ for (unsigned Cnt = 0; Cnt < D.getCollapsedNumber(); ++Cnt) {
+ Body = Body->IgnoreContainers();
+ if (auto *For = dyn_cast<ForStmt>(Body)) {
+ Body = For->getBody();
+ } else {
+ assert(isa<CXXForRangeStmt>(Body) &&
+ "Expected canonical for loop or range-based for loop.");
+ auto *CXXFor = cast<CXXForRangeStmt>(Body);
+ EmitStmt(CXXFor->getLoopVarStmt());
+ Body = CXXFor->getBody();
+ }
+ }
// Emit loop body.
EmitStmt(D.getBody());
// The end (updates/cleanups).
@@ -1460,14 +1519,14 @@ static void emitAlignedClause(CodeGenFunction &CGF,
if (!CGF.HaveInsertPoint())
return;
for (const auto *Clause : D.getClausesOfKind<OMPAlignedClause>()) {
- unsigned ClauseAlignment = 0;
+ llvm::APInt ClauseAlignment(64, 0);
if (const Expr *AlignmentExpr = Clause->getAlignment()) {
auto *AlignmentCI =
cast<llvm::ConstantInt>(CGF.EmitScalarExpr(AlignmentExpr));
- ClauseAlignment = static_cast<unsigned>(AlignmentCI->getZExtValue());
+ ClauseAlignment = AlignmentCI->getValue();
}
for (const Expr *E : Clause->varlists()) {
- unsigned Alignment = ClauseAlignment;
+ llvm::APInt Alignment(ClauseAlignment);
if (Alignment == 0) {
// OpenMP [2.8.1, Description]
// If no optional parameter is specified, implementation-defined default
@@ -1478,12 +1537,13 @@ static void emitAlignedClause(CodeGenFunction &CGF,
E->getType()->getPointeeType()))
.getQuantity();
}
- assert((Alignment == 0 || llvm::isPowerOf2_32(Alignment)) &&
+ assert((Alignment == 0 || Alignment.isPowerOf2()) &&
"alignment is not power of 2");
if (Alignment != 0) {
llvm::Value *PtrValue = CGF.EmitScalarExpr(E);
CGF.EmitAlignmentAssumption(
- PtrValue, E, /*No second loc needed*/ SourceLocation(), Alignment);
+ PtrValue, E, /*No second loc needed*/ SourceLocation(),
+ llvm::ConstantInt::get(CGF.getLLVMContext(), Alignment));
}
}
}
@@ -1553,8 +1613,28 @@ static void emitPreCond(CodeGenFunction &CGF, const OMPLoopDirective &S,
CGF.EmitIgnoredExpr(I);
}
}
+ // Create temp loop control variables with their init values to support
+ // non-rectangular loops.
+ CodeGenFunction::OMPMapVars PreCondVars;
+ for (const Expr * E: S.dependent_counters()) {
+ if (!E)
+ continue;
+ assert(!E->getType().getNonReferenceType()->isRecordType() &&
+ "dependent counter must not be an iterator.");
+ const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
+ Address CounterAddr =
+ CGF.CreateMemTemp(VD->getType().getNonReferenceType());
+ (void)PreCondVars.setVarAddr(CGF, VD, CounterAddr);
+ }
+ (void)PreCondVars.apply(CGF);
+ for (const Expr *E : S.dependent_inits()) {
+ if (!E)
+ continue;
+ CGF.EmitIgnoredExpr(E);
+ }
// Check that loop is executed at least one time.
CGF.EmitBranchOnBoolExpr(Cond, TrueBlock, FalseBlock, TrueCount);
+ PreCondVars.restore(CGF);
}
void CodeGenFunction::EmitOMPLinearClause(
@@ -3044,7 +3124,8 @@ void CodeGenFunction::EmitOMPTaskBasedDirective(
llvm::Function *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction(
S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, Data.Tied,
Data.NumberOfParts);
- OMPLexicalScope Scope(*this, S);
+ OMPLexicalScope Scope(*this, S, llvm::None,
+ !isOpenMPParallelDirective(S.getDirectiveKind()));
TaskGen(*this, OutlinedFn, Data);
}
@@ -3112,7 +3193,7 @@ void CodeGenFunction::EmitOMPTargetTaskBasedDirective(
getContext(), getContext().getTranslationUnitDecl(), /*NumParams=*/0);
llvm::APInt ArrSize(/*numBits=*/32, InputInfo.NumberOfTargetItems);
QualType BaseAndPointersType = getContext().getConstantArrayType(
- getContext().VoidPtrTy, ArrSize, ArrayType::Normal,
+ getContext().VoidPtrTy, ArrSize, nullptr, ArrayType::Normal,
/*IndexTypeQuals=*/0);
BPVD = createImplicitFirstprivateForType(
getContext(), Data, BaseAndPointersType, CD, S.getBeginLoc());
@@ -3120,7 +3201,7 @@ void CodeGenFunction::EmitOMPTargetTaskBasedDirective(
getContext(), Data, BaseAndPointersType, CD, S.getBeginLoc());
QualType SizesType = getContext().getConstantArrayType(
getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1),
- ArrSize, ArrayType::Normal,
+ ArrSize, nullptr, ArrayType::Normal,
/*IndexTypeQuals=*/0);
SVD = createImplicitFirstprivateForType(getContext(), Data, SizesType, CD,
S.getBeginLoc());
@@ -3991,6 +4072,8 @@ static void emitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind,
case OMPC_reverse_offload:
case OMPC_dynamic_allocators:
case OMPC_atomic_default_mem_order:
+ case OMPC_device_type:
+ case OMPC_match:
llvm_unreachable("Clause is not allowed in 'omp atomic'.");
}
}
@@ -4090,18 +4173,21 @@ static void emitCommonOMPTargetDirective(CodeGenFunction &CGF,
CGM.getOpenMPRuntime().emitTargetOutlinedFunction(S, ParentName, Fn, FnID,
IsOffloadEntry, CodeGen);
OMPLexicalScope Scope(CGF, S, OMPD_task);
- auto &&SizeEmitter = [](CodeGenFunction &CGF, const OMPLoopDirective &D) {
- OMPLoopScope(CGF, D);
- // Emit calculation of the iterations count.
- llvm::Value *NumIterations = CGF.EmitScalarExpr(D.getNumIterations());
- NumIterations = CGF.Builder.CreateIntCast(NumIterations, CGF.Int64Ty,
- /*isSigned=*/false);
- return NumIterations;
+ auto &&SizeEmitter =
+ [IsOffloadEntry](CodeGenFunction &CGF,
+ const OMPLoopDirective &D) -> llvm::Value * {
+ if (IsOffloadEntry) {
+ OMPLoopScope(CGF, D);
+ // Emit calculation of the iterations count.
+ llvm::Value *NumIterations = CGF.EmitScalarExpr(D.getNumIterations());
+ NumIterations = CGF.Builder.CreateIntCast(NumIterations, CGF.Int64Ty,
+ /*isSigned=*/false);
+ return NumIterations;
+ }
+ return nullptr;
};
- if (IsOffloadEntry)
- CGM.getOpenMPRuntime().emitTargetNumIterationsCall(CGF, S, Device,
- SizeEmitter);
- CGM.getOpenMPRuntime().emitTargetCall(CGF, S, Fn, FnID, IfCond, Device);
+ CGM.getOpenMPRuntime().emitTargetCall(CGF, S, Fn, FnID, IfCond, Device,
+ SizeEmitter);
}
static void emitTargetRegion(CodeGenFunction &CGF, const OMPTargetDirective &S,
@@ -5025,6 +5111,42 @@ void CodeGenFunction::EmitOMPTaskLoopSimdDirective(
EmitOMPTaskLoopBasedDirective(S);
}
+void CodeGenFunction::EmitOMPMasterTaskLoopDirective(
+ const OMPMasterTaskLoopDirective &S) {
+ auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) {
+ Action.Enter(CGF);
+ EmitOMPTaskLoopBasedDirective(S);
+ };
+ OMPLexicalScope Scope(*this, S, llvm::None, /*EmitPreInitStmt=*/false);
+ CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getBeginLoc());
+}
+
+void CodeGenFunction::EmitOMPMasterTaskLoopSimdDirective(
+ const OMPMasterTaskLoopSimdDirective &S) {
+ auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) {
+ Action.Enter(CGF);
+ EmitOMPTaskLoopBasedDirective(S);
+ };
+ OMPLexicalScope Scope(*this, S, llvm::None, /*EmitPreInitStmt=*/false);
+ CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getBeginLoc());
+}
+
+void CodeGenFunction::EmitOMPParallelMasterTaskLoopDirective(
+ const OMPParallelMasterTaskLoopDirective &S) {
+ auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) {
+ auto &&TaskLoopCodeGen = [&S](CodeGenFunction &CGF,
+ PrePostActionTy &Action) {
+ Action.Enter(CGF);
+ CGF.EmitOMPTaskLoopBasedDirective(S);
+ };
+ OMPLexicalScope Scope(CGF, S, llvm::None, /*EmitPreInitStmt=*/false);
+ CGM.getOpenMPRuntime().emitMasterRegion(CGF, TaskLoopCodeGen,
+ S.getBeginLoc());
+ };
+ emitCommonOMPParallelDirective(*this, S, OMPD_master_taskloop, CodeGen,
+ emitEmptyBoundParameters);
+}
+
// Generate the instructions for '#pragma omp target update' directive.
void CodeGenFunction::EmitOMPTargetUpdateDirective(
const OMPTargetUpdateDirective &S) {
diff --git a/lib/CodeGen/CGVTables.cpp b/lib/CodeGen/CGVTables.cpp
index 3cb3d3544838..f9f25e7e57ad 100644
--- a/lib/CodeGen/CGVTables.cpp
+++ b/lib/CodeGen/CGVTables.cpp
@@ -157,7 +157,7 @@ CodeGenFunction::GenerateVarArgsThunk(llvm::Function *Fn,
const CGFunctionInfo &FnInfo,
GlobalDecl GD, const ThunkInfo &Thunk) {
const CXXMethodDecl *MD = cast<CXXMethodDecl>(GD.getDecl());
- const FunctionProtoType *FPT = MD->getType()->getAs<FunctionProtoType>();
+ const FunctionProtoType *FPT = MD->getType()->castAs<FunctionProtoType>();
QualType ResultType = FPT->getReturnType();
// Get the original function
@@ -166,6 +166,15 @@ CodeGenFunction::GenerateVarArgsThunk(llvm::Function *Fn,
llvm::Value *Callee = CGM.GetAddrOfFunction(GD, Ty, /*ForVTable=*/true);
llvm::Function *BaseFn = cast<llvm::Function>(Callee);
+ // Cloning can't work if we don't have a definition. The Microsoft ABI may
+ // require thunks when a definition is not available. Emit an error in these
+ // cases.
+ if (!MD->isDefined()) {
+ CGM.ErrorUnsupported(MD, "return-adjusting thunk with variadic arguments");
+ return Fn;
+ }
+ assert(!BaseFn->isDeclaration() && "cannot clone undefined variadic method");
+
// Clone to thunk.
llvm::ValueToValueMapTy VMap;
@@ -201,6 +210,8 @@ CodeGenFunction::GenerateVarArgsThunk(llvm::Function *Fn,
Builder.SetInsertPoint(&*ThisStore);
llvm::Value *AdjustedThisPtr =
CGM.getCXXABI().performThisAdjustment(*this, ThisPtr, Thunk.This);
+ AdjustedThisPtr = Builder.CreateBitCast(AdjustedThisPtr,
+ ThisStore->getOperand(0)->getType());
ThisStore->setOperand(0, AdjustedThisPtr);
if (!Thunk.Return.isEmpty()) {
@@ -231,7 +242,6 @@ void CodeGenFunction::StartThunk(llvm::Function *Fn, GlobalDecl GD,
// Build FunctionArgs.
const CXXMethodDecl *MD = cast<CXXMethodDecl>(GD.getDecl());
QualType ThisType = MD->getThisType();
- const FunctionProtoType *FPT = MD->getType()->getAs<FunctionProtoType>();
QualType ResultType;
if (IsUnprototyped)
ResultType = CGM.getContext().VoidTy;
@@ -240,7 +250,7 @@ void CodeGenFunction::StartThunk(llvm::Function *Fn, GlobalDecl GD,
else if (CGM.getCXXABI().hasMostDerivedReturn(GD))
ResultType = CGM.getContext().VoidPtrTy;
else
- ResultType = FPT->getReturnType();
+ ResultType = MD->getType()->castAs<FunctionProtoType>()->getReturnType();
FunctionArgList FunctionArgs;
// Create the implicit 'this' parameter declaration.
@@ -291,14 +301,17 @@ void CodeGenFunction::EmitCallAndReturnForThunk(llvm::FunctionCallee Callee,
*this, LoadCXXThisAddress(), Thunk->This)
: LoadCXXThis();
- if (CurFnInfo->usesInAlloca() || IsUnprototyped) {
- // We don't handle return adjusting thunks, because they require us to call
- // the copy constructor. For now, fall through and pretend the return
- // adjustment was empty so we don't crash.
+ // If perfect forwarding is required a variadic method, a method using
+ // inalloca, or an unprototyped thunk, use musttail. Emit an error if this
+ // thunk requires a return adjustment, since that is impossible with musttail.
+ if (CurFnInfo->usesInAlloca() || CurFnInfo->isVariadic() || IsUnprototyped) {
if (Thunk && !Thunk->Return.isEmpty()) {
if (IsUnprototyped)
CGM.ErrorUnsupported(
MD, "return-adjusting thunk with incomplete parameter type");
+ else if (CurFnInfo->isVariadic())
+ llvm_unreachable("shouldn't try to emit musttail return-adjusting "
+ "thunks for variadic functions");
else
CGM.ErrorUnsupported(
MD, "non-trivial argument copy for return-adjusting thunk");
@@ -549,16 +562,32 @@ llvm::Constant *CodeGenVTables::maybeEmitThunk(GlobalDecl GD,
CGM.SetLLVMFunctionAttributesForDefinition(GD.getDecl(), ThunkFn);
+ // Thunks for variadic methods are special because in general variadic
+ // arguments cannot be perferctly forwarded. In the general case, clang
+ // implements such thunks by cloning the original function body. However, for
+ // thunks with no return adjustment on targets that support musttail, we can
+ // use musttail to perfectly forward the variadic arguments.
+ bool ShouldCloneVarArgs = false;
if (!IsUnprototyped && ThunkFn->isVarArg()) {
- // Varargs thunks are special; we can't just generate a call because
- // we can't copy the varargs. Our implementation is rather
- // expensive/sucky at the moment, so don't generate the thunk unless
- // we have to.
- // FIXME: Do something better here; GenerateVarArgsThunk is extremely ugly.
+ ShouldCloneVarArgs = true;
+ if (TI.Return.isEmpty()) {
+ switch (CGM.getTriple().getArch()) {
+ case llvm::Triple::x86_64:
+ case llvm::Triple::x86:
+ case llvm::Triple::aarch64:
+ ShouldCloneVarArgs = false;
+ break;
+ default:
+ break;
+ }
+ }
+ }
+
+ if (ShouldCloneVarArgs) {
if (UseAvailableExternallyLinkage)
return ThunkFn;
- ThunkFn = CodeGenFunction(CGM).GenerateVarArgsThunk(ThunkFn, FnInfo, GD,
- TI);
+ ThunkFn =
+ CodeGenFunction(CGM).GenerateVarArgsThunk(ThunkFn, FnInfo, GD, TI);
} else {
// Normal thunk body generation.
CodeGenFunction(CGM).generateThunk(ThunkFn, FnInfo, GD, TI, IsUnprototyped);
@@ -779,7 +808,7 @@ CodeGenVTables::GenerateConstructionVTable(const CXXRecordDecl *RD,
assert(!VTable->isDeclaration() && "Shouldn't set properties on declaration");
CGM.setGVProperties(VTable, RD);
- CGM.EmitVTableTypeMetadata(VTable, *VTLayout.get());
+ CGM.EmitVTableTypeMetadata(RD, VTable, *VTLayout.get());
return VTable;
}
@@ -1010,7 +1039,32 @@ bool CodeGenModule::HasHiddenLTOVisibility(const CXXRecordDecl *RD) {
return true;
}
-void CodeGenModule::EmitVTableTypeMetadata(llvm::GlobalVariable *VTable,
+llvm::GlobalObject::VCallVisibility
+CodeGenModule::GetVCallVisibilityLevel(const CXXRecordDecl *RD) {
+ LinkageInfo LV = RD->getLinkageAndVisibility();
+ llvm::GlobalObject::VCallVisibility TypeVis;
+ if (!isExternallyVisible(LV.getLinkage()))
+ TypeVis = llvm::GlobalObject::VCallVisibilityTranslationUnit;
+ else if (HasHiddenLTOVisibility(RD))
+ TypeVis = llvm::GlobalObject::VCallVisibilityLinkageUnit;
+ else
+ TypeVis = llvm::GlobalObject::VCallVisibilityPublic;
+
+ for (auto B : RD->bases())
+ if (B.getType()->getAsCXXRecordDecl()->isDynamicClass())
+ TypeVis = std::min(TypeVis,
+ GetVCallVisibilityLevel(B.getType()->getAsCXXRecordDecl()));
+
+ for (auto B : RD->vbases())
+ if (B.getType()->getAsCXXRecordDecl()->isDynamicClass())
+ TypeVis = std::min(TypeVis,
+ GetVCallVisibilityLevel(B.getType()->getAsCXXRecordDecl()));
+
+ return TypeVis;
+}
+
+void CodeGenModule::EmitVTableTypeMetadata(const CXXRecordDecl *RD,
+ llvm::GlobalVariable *VTable,
const VTableLayout &VTLayout) {
if (!getCodeGenOpts().LTOUnit)
return;
@@ -1070,4 +1124,10 @@ void CodeGenModule::EmitVTableTypeMetadata(llvm::GlobalVariable *VTable,
VTable->addTypeMetadata((PointerWidth * I).getQuantity(), MD);
}
}
+
+ if (getCodeGenOpts().VirtualFunctionElimination) {
+ llvm::GlobalObject::VCallVisibility TypeVis = GetVCallVisibilityLevel(RD);
+ if (TypeVis != llvm::GlobalObject::VCallVisibilityPublic)
+ VTable->addVCallVisibilityMetadata(TypeVis);
+ }
}
diff --git a/lib/CodeGen/CodeGenAction.cpp b/lib/CodeGen/CodeGenAction.cpp
index 0ae9ea427d65..87bda4a0fc2c 100644
--- a/lib/CodeGen/CodeGenAction.cpp
+++ b/lib/CodeGen/CodeGenAction.cpp
@@ -14,7 +14,9 @@
#include "clang/AST/ASTContext.h"
#include "clang/AST/DeclCXX.h"
#include "clang/AST/DeclGroup.h"
+#include "clang/Basic/DiagnosticFrontend.h"
#include "clang/Basic/FileManager.h"
+#include "clang/Basic/LangStandard.h"
#include "clang/Basic/SourceManager.h"
#include "clang/Basic/TargetInfo.h"
#include "clang/CodeGen/BackendUtil.h"
@@ -37,6 +39,7 @@
#include "llvm/Pass.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/TimeProfiler.h"
#include "llvm/Support/Timer.h"
#include "llvm/Support/ToolOutputFile.h"
#include "llvm/Support/YAMLTraits.h"
@@ -228,6 +231,7 @@ namespace clang {
void HandleTranslationUnit(ASTContext &C) override {
{
+ llvm::TimeTraceScope TimeScope("Frontend", StringRef(""));
PrettyStackTraceString CrashInfo("Per-file LLVM IR generation");
if (FrontendTimesIsEnabled) {
LLVMIRGenerationRefCount += 1;
@@ -260,7 +264,7 @@ namespace clang {
std::unique_ptr<DiagnosticHandler> OldDiagnosticHandler =
Ctx.getDiagnosticHandler();
- Ctx.setDiagnosticHandler(llvm::make_unique<ClangDiagnosticHandler>(
+ Ctx.setDiagnosticHandler(std::make_unique<ClangDiagnosticHandler>(
CodeGenOpts, this));
Expected<std::unique_ptr<llvm::ToolOutputFile>> OptRecordFileOrErr =
@@ -362,6 +366,9 @@ namespace clang {
bool StackSizeDiagHandler(const llvm::DiagnosticInfoStackSize &D);
/// Specialized handler for unsupported backend feature diagnostic.
void UnsupportedDiagHandler(const llvm::DiagnosticInfoUnsupported &D);
+ /// Specialized handler for misexpect warnings.
+ /// Note that misexpect remarks are emitted through ORE
+ void MisExpectDiagHandler(const llvm::DiagnosticInfoMisExpect &D);
/// Specialized handlers for optimization remarks.
/// Note that these handlers only accept remarks and they always handle
/// them.
@@ -561,13 +568,13 @@ const FullSourceLoc BackendConsumer::getBestLocationFromDebugLoc(
if (D.isLocationAvailable()) {
D.getLocation(Filename, Line, Column);
if (Line > 0) {
- const FileEntry *FE = FileMgr.getFile(Filename);
+ auto FE = FileMgr.getFile(Filename);
if (!FE)
FE = FileMgr.getFile(D.getAbsolutePath());
if (FE) {
// If -gcolumn-info was not used, Column will be 0. This upsets the
// source manager, so pass 1 if Column is not set.
- DILoc = SourceMgr.translateFileLineCol(FE, Line, Column ? Column : 1);
+ DILoc = SourceMgr.translateFileLineCol(*FE, Line, Column ? Column : 1);
}
}
BadDebugInfo = DILoc.isInvalid();
@@ -614,6 +621,25 @@ void BackendConsumer::UnsupportedDiagHandler(
<< Filename << Line << Column;
}
+void BackendConsumer::MisExpectDiagHandler(
+ const llvm::DiagnosticInfoMisExpect &D) {
+ StringRef Filename;
+ unsigned Line, Column;
+ bool BadDebugInfo = false;
+ FullSourceLoc Loc =
+ getBestLocationFromDebugLoc(D, BadDebugInfo, Filename, Line, Column);
+
+ Diags.Report(Loc, diag::warn_profile_data_misexpect) << D.getMsg().str();
+
+ if (BadDebugInfo)
+ // If we were not able to translate the file:line:col information
+ // back to a SourceLocation, at least emit a note stating that
+ // we could not translate this location. This can happen in the
+ // case of #line directives.
+ Diags.Report(Loc, diag::note_fe_backend_invalid_loc)
+ << Filename << Line << Column;
+}
+
void BackendConsumer::EmitOptimizationMessage(
const llvm::DiagnosticInfoOptimizationBase &D, unsigned DiagID) {
// We only support warnings and remarks.
@@ -784,6 +810,9 @@ void BackendConsumer::DiagnosticHandlerImpl(const DiagnosticInfo &DI) {
case llvm::DK_Unsupported:
UnsupportedDiagHandler(cast<DiagnosticInfoUnsupported>(DI));
return;
+ case llvm::DK_MisExpect:
+ MisExpectDiagHandler(cast<DiagnosticInfoMisExpect>(DI));
+ return;
default:
// Plugin IDs are not bound to any value as they are set dynamically.
ComputeDiagRemarkID(Severity, backend_plugin, DiagID);
@@ -914,7 +943,7 @@ CodeGenAction::CreateASTConsumer(CompilerInstance &CI, StringRef InFile) {
if (CI.getCodeGenOpts().getDebugInfo() != codegenoptions::NoDebugInfo &&
CI.getCodeGenOpts().MacroDebugInfo) {
std::unique_ptr<PPCallbacks> Callbacks =
- llvm::make_unique<MacroPPCallbacks>(BEConsumer->getCodeGenerator(),
+ std::make_unique<MacroPPCallbacks>(BEConsumer->getCodeGenerator(),
CI.getPreprocessor());
CI.getPreprocessor().addPPCallbacks(std::move(Callbacks));
}
@@ -975,7 +1004,7 @@ CodeGenAction::loadModule(MemoryBufferRef MBRef) {
// the file was already processed by indexing and will be passed to the
// linker using merged object file.
if (!Bm) {
- auto M = llvm::make_unique<llvm::Module>("empty", *VMContext);
+ auto M = std::make_unique<llvm::Module>("empty", *VMContext);
M->setTargetTriple(CI.getTargetOpts().Triple);
return M;
}
@@ -1014,7 +1043,7 @@ CodeGenAction::loadModule(MemoryBufferRef MBRef) {
void CodeGenAction::ExecuteAction() {
// If this is an IR file, we have to treat it specially.
- if (getCurrentFileKind().getLanguage() == InputKind::LLVM_IR) {
+ if (getCurrentFileKind().getLanguage() == Language::LLVM_IR) {
BackendAction BA = static_cast<BackendAction>(Act);
CompilerInstance &CI = getCompilerInstance();
std::unique_ptr<raw_pwrite_stream> OS =
diff --git a/lib/CodeGen/CodeGenFunction.cpp b/lib/CodeGen/CodeGenFunction.cpp
index eafe26674434..3f9a52ab7638 100644
--- a/lib/CodeGen/CodeGenFunction.cpp
+++ b/lib/CodeGen/CodeGenFunction.cpp
@@ -47,13 +47,10 @@ static bool shouldEmitLifetimeMarkers(const CodeGenOptions &CGOpts,
if (CGOpts.DisableLifetimeMarkers)
return false;
- // Disable lifetime markers in msan builds.
- // FIXME: Remove this when msan works with lifetime markers.
- if (LangOpts.Sanitize.has(SanitizerKind::Memory))
- return false;
-
- // Asan uses markers for use-after-scope checks.
- if (CGOpts.SanitizeAddressUseAfterScope)
+ // Sanitizers may use markers.
+ if (CGOpts.SanitizeAddressUseAfterScope ||
+ LangOpts.Sanitize.has(SanitizerKind::HWAddress) ||
+ LangOpts.Sanitize.has(SanitizerKind::Memory))
return true;
// For now, only in optimized builds.
@@ -197,7 +194,7 @@ TypeEvaluationKind CodeGenFunction::getEvaluationKind(QualType type) {
#define NON_CANONICAL_TYPE(name, parent) case Type::name:
#define DEPENDENT_TYPE(name, parent) case Type::name:
#define NON_CANONICAL_UNLESS_DEPENDENT_TYPE(name, parent) case Type::name:
-#include "clang/AST/TypeNodes.def"
+#include "clang/AST/TypeNodes.inc"
llvm_unreachable("non-canonical or dependent type in IR-generation");
case Type::Auto:
@@ -434,13 +431,13 @@ void CodeGenFunction::FinishFunction(SourceLocation EndLoc) {
// Scan function arguments for vector width.
for (llvm::Argument &A : CurFn->args())
if (auto *VT = dyn_cast<llvm::VectorType>(A.getType()))
- LargestVectorWidth = std::max(LargestVectorWidth,
- VT->getPrimitiveSizeInBits());
+ LargestVectorWidth = std::max((uint64_t)LargestVectorWidth,
+ VT->getPrimitiveSizeInBits().getFixedSize());
// Update vector width based on return type.
if (auto *VT = dyn_cast<llvm::VectorType>(CurFn->getReturnType()))
- LargestVectorWidth = std::max(LargestVectorWidth,
- VT->getPrimitiveSizeInBits());
+ LargestVectorWidth = std::max((uint64_t)LargestVectorWidth,
+ VT->getPrimitiveSizeInBits().getFixedSize());
// Add the required-vector-width attribute. This contains the max width from:
// 1. min-vector-width attribute used in the source program.
@@ -732,6 +729,15 @@ void CodeGenFunction::StartFunction(GlobalDecl GD,
SanOpts.Mask &= ~SanitizerKind::CFIUnrelatedCast;
}
+ // Ignore null checks in coroutine functions since the coroutines passes
+ // are not aware of how to move the extra UBSan instructions across the split
+ // coroutine boundaries.
+ if (D && SanOpts.has(SanitizerKind::Null))
+ if (const auto *FD = dyn_cast<FunctionDecl>(D))
+ if (FD->getBody() &&
+ FD->getBody()->getStmtClass() == Stmt::CoroutineBodyStmtClass)
+ SanOpts.Mask &= ~SanitizerKind::Null;
+
// Apply xray attributes to the function (as a string, for now)
if (D) {
if (const auto *XRayAttr = D->getAttr<XRayInstrumentAttr>()) {
@@ -762,6 +768,9 @@ void CodeGenFunction::StartFunction(GlobalDecl GD,
if (CGM.getCodeGenOpts().ProfileSampleAccurate)
Fn->addFnAttr("profile-sample-accurate");
+ if (D && D->hasAttr<CFICanonicalJumpTableAttr>())
+ Fn->addFnAttr("cfi-canonical-jump-table");
+
if (getLangOpts().OpenCL) {
// Add metadata for a kernel function.
if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D))
@@ -1662,7 +1671,7 @@ CodeGenFunction::EmitNullInitialization(Address DestPtr, QualType Ty) {
llvm::GlobalVariable::PrivateLinkage,
NullConstant, Twine());
CharUnits NullAlign = DestPtr.getAlignment();
- NullVariable->setAlignment(NullAlign.getQuantity());
+ NullVariable->setAlignment(NullAlign.getAsAlign());
Address SrcPtr(Builder.CreateBitCast(NullVariable, Builder.getInt8PtrTy()),
NullAlign);
@@ -1862,7 +1871,7 @@ void CodeGenFunction::EmitVariablyModifiedType(QualType type) {
#define NON_CANONICAL_TYPE(Class, Base)
#define DEPENDENT_TYPE(Class, Base) case Type::Class:
#define NON_CANONICAL_UNLESS_DEPENDENT_TYPE(Class, Base)
-#include "clang/AST/TypeNodes.def"
+#include "clang/AST/TypeNodes.inc"
llvm_unreachable("unexpected dependent type!");
// These types are never variably-modified.
@@ -2048,24 +2057,9 @@ void CodeGenFunction::EmitAlignmentAssumption(llvm::Value *PtrValue,
}
void CodeGenFunction::EmitAlignmentAssumption(llvm::Value *PtrValue,
- QualType Ty, SourceLocation Loc,
- SourceLocation AssumptionLoc,
- unsigned Alignment,
- llvm::Value *OffsetValue) {
- llvm::Value *TheCheck;
- llvm::Instruction *Assumption = Builder.CreateAlignmentAssumption(
- CGM.getDataLayout(), PtrValue, Alignment, OffsetValue, &TheCheck);
- if (SanOpts.has(SanitizerKind::Alignment)) {
- llvm::Value *AlignmentVal = llvm::ConstantInt::get(IntPtrTy, Alignment);
- EmitAlignmentAssumptionCheck(PtrValue, Ty, Loc, AssumptionLoc, AlignmentVal,
- OffsetValue, TheCheck, Assumption);
- }
-}
-
-void CodeGenFunction::EmitAlignmentAssumption(llvm::Value *PtrValue,
const Expr *E,
SourceLocation AssumptionLoc,
- unsigned Alignment,
+ llvm::Value *Alignment,
llvm::Value *OffsetValue) {
if (auto *CE = dyn_cast<CastExpr>(E))
E = CE->getSubExprAsWritten();
@@ -2194,7 +2188,7 @@ void CodeGenFunction::checkTargetFeatures(SourceLocation Loc,
// Get the current enclosing function if it exists. If it doesn't
// we can't check the target features anyhow.
- const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(CurFuncDecl);
+ const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(CurCodeDecl);
if (!FD)
return;
diff --git a/lib/CodeGen/CodeGenFunction.h b/lib/CodeGen/CodeGenFunction.h
index c3060d1fb351..99bc85ba3773 100644
--- a/lib/CodeGen/CodeGenFunction.h
+++ b/lib/CodeGen/CodeGenFunction.h
@@ -1034,7 +1034,7 @@ public:
assert(isInConditionalBranch());
llvm::BasicBlock *block = OutermostConditional->getStartingBlock();
auto store = new llvm::StoreInst(value, addr.getPointer(), &block->back());
- store->setAlignment(addr.getAlignment().getQuantity());
+ store->setAlignment(addr.getAlignment().getAsAlign());
}
/// An RAII object to record that we're evaluating a statement
@@ -2829,13 +2829,8 @@ public:
llvm::Value *Alignment,
llvm::Value *OffsetValue = nullptr);
- void EmitAlignmentAssumption(llvm::Value *PtrValue, QualType Ty,
- SourceLocation Loc, SourceLocation AssumptionLoc,
- unsigned Alignment,
- llvm::Value *OffsetValue = nullptr);
-
void EmitAlignmentAssumption(llvm::Value *PtrValue, const Expr *E,
- SourceLocation AssumptionLoc, unsigned Alignment,
+ SourceLocation AssumptionLoc, llvm::Value *Alignment,
llvm::Value *OffsetValue = nullptr);
//===--------------------------------------------------------------------===//
@@ -3160,6 +3155,11 @@ public:
void EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S);
void EmitOMPTaskLoopDirective(const OMPTaskLoopDirective &S);
void EmitOMPTaskLoopSimdDirective(const OMPTaskLoopSimdDirective &S);
+ void EmitOMPMasterTaskLoopDirective(const OMPMasterTaskLoopDirective &S);
+ void
+ EmitOMPMasterTaskLoopSimdDirective(const OMPMasterTaskLoopSimdDirective &S);
+ void EmitOMPParallelMasterTaskLoopDirective(
+ const OMPParallelMasterTaskLoopDirective &S);
void EmitOMPDistributeDirective(const OMPDistributeDirective &S);
void EmitOMPDistributeParallelForDirective(
const OMPDistributeParallelForDirective &S);
@@ -3760,6 +3760,7 @@ public:
llvm::Value *vectorWrapScalar16(llvm::Value *Op);
llvm::Value *EmitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E,
llvm::Triple::ArchType Arch);
+ llvm::Value *EmitBPFBuiltinExpr(unsigned BuiltinID, const CallExpr *E);
llvm::Value *BuildVector(ArrayRef<llvm::Value*> Ops);
llvm::Value *EmitX86BuiltinExpr(unsigned BuiltinID, const CallExpr *E);
diff --git a/lib/CodeGen/CodeGenModule.cpp b/lib/CodeGen/CodeGenModule.cpp
index 6ff72ec045e6..b05a58848e82 100644
--- a/lib/CodeGen/CodeGenModule.cpp
+++ b/lib/CodeGen/CodeGenModule.cpp
@@ -414,12 +414,7 @@ void CodeGenModule::Release() {
OpenMPRuntime->emitRequiresDirectiveRegFun()) {
AddGlobalCtor(OpenMPRequiresDirectiveRegFun, 0);
}
- if (llvm::Function *OpenMPRegistrationFunction =
- OpenMPRuntime->emitRegistrationFunction()) {
- auto ComdatKey = OpenMPRegistrationFunction->hasComdat() ?
- OpenMPRegistrationFunction : nullptr;
- AddGlobalCtor(OpenMPRegistrationFunction, 0, ComdatKey);
- }
+ OpenMPRuntime->createOffloadEntriesAndInfoMetadata();
OpenMPRuntime->clear();
}
if (PGOReader) {
@@ -535,6 +530,12 @@ void CodeGenModule::Release() {
getModule().addModuleFlag(llvm::Module::Override, "Cross-DSO CFI", 1);
}
+ if (LangOpts.Sanitize.has(SanitizerKind::CFIICall)) {
+ getModule().addModuleFlag(llvm::Module::Override,
+ "CFI Canonical Jump Tables",
+ CodeGenOpts.SanitizeCfiCanonicalJumpTables);
+ }
+
if (CodeGenOpts.CFProtectionReturn &&
Target.checkCFProtectionReturnSupported(getDiags())) {
// Indicate that we want to instrument return control flow protection.
@@ -1176,7 +1177,7 @@ void CodeGenModule::EmitCtorList(CtorList &Fns, const char *GlobalName) {
// The LTO linker doesn't seem to like it when we set an alignment
// on appending variables. Take it off as a workaround.
- list->setAlignment(0);
+ list->setAlignment(llvm::None);
Fns.clear();
}
@@ -1590,11 +1591,11 @@ void CodeGenModule::SetLLVMFunctionAttributesForDefinition(const Decl *D,
unsigned alignment = D->getMaxAlignment() / Context.getCharWidth();
if (alignment)
- F->setAlignment(alignment);
+ F->setAlignment(llvm::Align(alignment));
if (!D->hasAttr<AlignedAttr>())
if (LangOpts.FunctionAlignment)
- F->setAlignment(1 << LangOpts.FunctionAlignment);
+ F->setAlignment(llvm::Align(1ull << LangOpts.FunctionAlignment));
// Some C++ ABIs require 2-byte alignment for member functions, in order to
// reserve a bit for differentiating between virtual and non-virtual member
@@ -1602,13 +1603,20 @@ void CodeGenModule::SetLLVMFunctionAttributesForDefinition(const Decl *D,
// member function, set its alignment accordingly.
if (getTarget().getCXXABI().areMemberFunctionsAligned()) {
if (F->getAlignment() < 2 && isa<CXXMethodDecl>(D))
- F->setAlignment(2);
+ F->setAlignment(llvm::Align(2));
}
- // In the cross-dso CFI mode, we want !type attributes on definitions only.
- if (CodeGenOpts.SanitizeCfiCrossDso)
- if (auto *FD = dyn_cast<FunctionDecl>(D))
- CreateFunctionTypeMetadataForIcall(FD, F);
+ // In the cross-dso CFI mode with canonical jump tables, we want !type
+ // attributes on definitions only.
+ if (CodeGenOpts.SanitizeCfiCrossDso &&
+ CodeGenOpts.SanitizeCfiCanonicalJumpTables) {
+ if (auto *FD = dyn_cast<FunctionDecl>(D)) {
+ // Skip available_externally functions. They won't be codegen'ed in the
+ // current module anyway.
+ if (getContext().GetGVALinkageForFunction(FD) != GVA_AvailableExternally)
+ CreateFunctionTypeMetadataForIcall(FD, F);
+ }
+ }
// Emit type metadata on member functions for member function pointer checks.
// These are only ever necessary on definitions; we're guaranteed that the
@@ -1704,6 +1712,8 @@ void CodeGenModule::setNonAliasAttributes(GlobalDecl GD,
GV->addAttribute("data-section", SA->getName());
if (auto *SA = D->getAttr<PragmaClangRodataSectionAttr>())
GV->addAttribute("rodata-section", SA->getName());
+ if (auto *SA = D->getAttr<PragmaClangRelroSectionAttr>())
+ GV->addAttribute("relro-section", SA->getName());
}
if (auto *F = dyn_cast<llvm::Function>(GO)) {
@@ -1765,14 +1775,6 @@ void CodeGenModule::CreateFunctionTypeMetadataForIcall(const FunctionDecl *FD,
if (isa<CXXMethodDecl>(FD) && !cast<CXXMethodDecl>(FD)->isStatic())
return;
- // Additionally, if building with cross-DSO support...
- if (CodeGenOpts.SanitizeCfiCrossDso) {
- // Skip available_externally functions. They won't be codegen'ed in the
- // current module anyway.
- if (getContext().GetGVALinkageForFunction(FD) == GVA_AvailableExternally)
- return;
- }
-
llvm::Metadata *MD = CreateMetadataIdentifierForType(FD->getType());
F->addTypeMetadata(0, MD);
F->addTypeMetadata(0, CreateMetadataIdentifierGeneralized(FD->getType()));
@@ -1849,8 +1851,11 @@ void CodeGenModule::SetFunctionAttributes(GlobalDecl GD, llvm::Function *F,
F->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
// Don't emit entries for function declarations in the cross-DSO mode. This
- // is handled with better precision by the receiving DSO.
- if (!CodeGenOpts.SanitizeCfiCrossDso)
+ // is handled with better precision by the receiving DSO. But if jump tables
+ // are non-canonical then we need type metadata in order to produce the local
+ // jump table.
+ if (!CodeGenOpts.SanitizeCfiCrossDso ||
+ !CodeGenOpts.SanitizeCfiCanonicalJumpTables)
CreateFunctionTypeMetadataForIcall(FD, F);
if (getLangOpts().OpenMP && FD->hasAttr<OMPDeclareSimdDeclAttr>())
@@ -2114,6 +2119,10 @@ void CodeGenModule::EmitDeferred() {
if (!GV->isDeclaration())
continue;
+ // If this is OpenMP, check if it is legal to emit this global normally.
+ if (LangOpts.OpenMP && OpenMPRuntime && OpenMPRuntime->emitTargetGlobal(D))
+ continue;
+
// Otherwise, emit the definition and move on to the next one.
EmitGlobalDefinition(D, GV);
@@ -2310,11 +2319,20 @@ bool CodeGenModule::MustBeEmitted(const ValueDecl *Global) {
}
bool CodeGenModule::MayBeEmittedEagerly(const ValueDecl *Global) {
- if (const auto *FD = dyn_cast<FunctionDecl>(Global))
+ if (const auto *FD = dyn_cast<FunctionDecl>(Global)) {
if (FD->getTemplateSpecializationKind() == TSK_ImplicitInstantiation)
// Implicit template instantiations may change linkage if they are later
// explicitly instantiated, so they should not be emitted eagerly.
return false;
+ // In OpenMP 5.0 function may be marked as device_type(nohost) and we should
+ // not emit them eagerly unless we sure that the function must be emitted on
+ // the host.
+ if (LangOpts.OpenMP >= 50 && !LangOpts.OpenMPSimd &&
+ !LangOpts.OpenMPIsDevice &&
+ !OMPDeclareTargetDeclAttr::getDeviceType(FD) &&
+ !FD->isUsed(/*CheckUsedAttr=*/false) && !FD->isReferenced())
+ return false;
+ }
if (const auto *VD = dyn_cast<VarDecl>(Global))
if (Context.getInlineVariableDefinitionKind(VD) ==
ASTContext::InlineVariableDefinitionKind::WeakUnknown)
@@ -2437,8 +2455,7 @@ void CodeGenModule::EmitGlobal(GlobalDecl GD) {
}
if (LangOpts.OpenMP) {
- // If this is OpenMP device, check if it is legal to emit this global
- // normally.
+ // If this is OpenMP, check if it is legal to emit this global normally.
if (OpenMPRuntime && OpenMPRuntime->emitTargetGlobal(GD))
return;
if (auto *DRD = dyn_cast<OMPDeclareReductionDecl>(Global)) {
@@ -2512,6 +2529,11 @@ void CodeGenModule::EmitGlobal(GlobalDecl GD) {
return;
}
+ // Check if this must be emitted as declare variant.
+ if (LangOpts.OpenMP && isa<FunctionDecl>(Global) && OpenMPRuntime &&
+ OpenMPRuntime->emitDeclareVariant(GD, /*IsForDefinition=*/false))
+ return;
+
// If we're deferring emission of a C++ variable with an
// initializer, remember the order in which it appeared in the file.
if (getLangOpts().CPlusPlus && isa<VarDecl>(Global) &&
@@ -2717,6 +2739,50 @@ void CodeGenModule::EmitMultiVersionFunctionDefinition(GlobalDecl GD,
EmitGlobalFunctionDefinition(GD, GV);
}
+void CodeGenModule::emitOpenMPDeviceFunctionRedefinition(
+ GlobalDecl OldGD, GlobalDecl NewGD, llvm::GlobalValue *GV) {
+ assert(getLangOpts().OpenMP && getLangOpts().OpenMPIsDevice &&
+ OpenMPRuntime && "Expected OpenMP device mode.");
+ const auto *D = cast<FunctionDecl>(OldGD.getDecl());
+
+ // Compute the function info and LLVM type.
+ const CGFunctionInfo &FI = getTypes().arrangeGlobalDeclaration(OldGD);
+ llvm::FunctionType *Ty = getTypes().GetFunctionType(FI);
+
+ // Get or create the prototype for the function.
+ if (!GV || (GV->getType()->getElementType() != Ty)) {
+ GV = cast<llvm::GlobalValue>(GetOrCreateLLVMFunction(
+ getMangledName(OldGD), Ty, GlobalDecl(), /*ForVTable=*/false,
+ /*DontDefer=*/true, /*IsThunk=*/false, llvm::AttributeList(),
+ ForDefinition));
+ SetFunctionAttributes(OldGD, cast<llvm::Function>(GV),
+ /*IsIncompleteFunction=*/false,
+ /*IsThunk=*/false);
+ }
+ // We need to set linkage and visibility on the function before
+ // generating code for it because various parts of IR generation
+ // want to propagate this information down (e.g. to local static
+ // declarations).
+ auto *Fn = cast<llvm::Function>(GV);
+ setFunctionLinkage(OldGD, Fn);
+
+ // FIXME: this is redundant with part of
+ // setFunctionDefinitionAttributes
+ setGVProperties(Fn, OldGD);
+
+ MaybeHandleStaticInExternC(D, Fn);
+
+ maybeSetTrivialComdat(*D, *Fn);
+
+ CodeGenFunction(*this).GenerateCode(NewGD, Fn, FI);
+
+ setNonAliasAttributes(OldGD, Fn);
+ SetLLVMFunctionAttributesForDefinition(D, Fn);
+
+ if (D->hasAttr<AnnotateAttr>())
+ AddGlobalAnnotations(D, Fn);
+}
+
void CodeGenModule::EmitGlobalDefinition(GlobalDecl GD, llvm::GlobalValue *GV) {
const auto *D = cast<ValueDecl>(GD.getDecl());
@@ -2816,11 +2882,13 @@ void CodeGenModule::emitMultiVersionFunctions() {
llvm::Function *ResolverFunc;
const TargetInfo &TI = getTarget();
- if (TI.supportsIFunc() || FD->isTargetMultiVersion())
+ if (TI.supportsIFunc() || FD->isTargetMultiVersion()) {
ResolverFunc = cast<llvm::Function>(
GetGlobalValue((getMangledName(GD) + ".resolver").str()));
- else
+ ResolverFunc->setLinkage(llvm::Function::WeakODRLinkage);
+ } else {
ResolverFunc = cast<llvm::Function>(GetGlobalValue(getMangledName(GD)));
+ }
if (supportsCOMDAT())
ResolverFunc->setComdat(
@@ -2864,6 +2932,10 @@ void CodeGenModule::emitCPUDispatchDefinition(GlobalDecl GD) {
auto *ResolverFunc = cast<llvm::Function>(GetOrCreateLLVMFunction(
ResolverName, ResolverType, ResolverGD, /*ForVTable=*/false));
+ ResolverFunc->setLinkage(llvm::Function::WeakODRLinkage);
+ if (supportsCOMDAT())
+ ResolverFunc->setComdat(
+ getModule().getOrInsertComdat(ResolverFunc->getName()));
SmallVector<CodeGenFunction::MultiVersionResolverOption, 10> Options;
const TargetInfo &Target = getTarget();
@@ -2928,6 +3000,21 @@ void CodeGenModule::emitCPUDispatchDefinition(GlobalDecl GD) {
CodeGenFunction CGF(*this);
CGF.EmitMultiVersionResolver(ResolverFunc, Options);
+
+ if (getTarget().supportsIFunc()) {
+ std::string AliasName = getMangledNameImpl(
+ *this, GD, FD, /*OmitMultiVersionMangling=*/true);
+ llvm::Constant *AliasFunc = GetGlobalValue(AliasName);
+ if (!AliasFunc) {
+ auto *IFunc = cast<llvm::GlobalIFunc>(GetOrCreateLLVMFunction(
+ AliasName, DeclTy, GD, /*ForVTable=*/false, /*DontDefer=*/true,
+ /*IsThunk=*/false, llvm::AttributeList(), NotForDefinition));
+ auto *GA = llvm::GlobalAlias::create(
+ DeclTy, 0, getFunctionLinkage(GD), AliasName, IFunc, &getModule());
+ GA->setLinkage(llvm::Function::WeakODRLinkage);
+ SetCommonAttributes(GD, GA);
+ }
+ }
}
/// If a dispatcher for the specified mangled name is not in the module, create
@@ -2964,7 +3051,7 @@ llvm::Constant *CodeGenModule::GetOrCreateMultiVersionResolver(
MangledName + ".resolver", ResolverType, GlobalDecl{},
/*ForVTable=*/false);
llvm::GlobalIFunc *GIF = llvm::GlobalIFunc::create(
- DeclTy, 0, llvm::Function::ExternalLinkage, "", Resolver, &getModule());
+ DeclTy, 0, llvm::Function::WeakODRLinkage, "", Resolver, &getModule());
GIF->setName(ResolverName);
SetCommonAttributes(FD, GIF);
@@ -3010,6 +3097,10 @@ llvm::Constant *CodeGenModule::GetOrCreateLLVMFunction(
EmitGlobal(GDDef);
}
}
+ // Check if this must be emitted as declare variant and emit reference to
+ // the the declare variant function.
+ if (LangOpts.OpenMP && OpenMPRuntime)
+ (void)OpenMPRuntime->emitDeclareVariant(GD, /*IsForDefinition=*/true);
if (FD->isMultiVersion()) {
const auto *TA = FD->getAttr<TargetAttr>();
@@ -3398,7 +3489,7 @@ CodeGenModule::GetOrCreateLLVMGlobal(StringRef MangledName,
// handling.
GV->setConstant(isTypeConstant(D->getType(), false));
- GV->setAlignment(getContext().getDeclAlign(D).getQuantity());
+ GV->setAlignment(getContext().getDeclAlign(D).getAsAlign());
setLinkageForGV(GV, D);
@@ -3455,7 +3546,8 @@ CodeGenModule::GetOrCreateLLVMGlobal(StringRef MangledName,
// Make a new global with the correct type, this is now guaranteed
// to work.
auto *NewGV = cast<llvm::GlobalVariable>(
- GetAddrOfGlobalVar(D, InitType, IsForDefinition));
+ GetAddrOfGlobalVar(D, InitType, IsForDefinition)
+ ->stripPointerCasts());
// Erase the old global, since it is no longer used.
GV->eraseFromParent();
@@ -3548,7 +3640,7 @@ llvm::GlobalVariable *CodeGenModule::CreateOrReplaceCXXRuntimeVariable(
!GV->hasAvailableExternallyLinkage())
GV->setComdat(TheModule.getOrInsertComdat(GV->getName()));
- GV->setAlignment(Alignment);
+ GV->setAlignment(llvm::MaybeAlign(Alignment));
return GV;
}
@@ -3768,9 +3860,9 @@ void CodeGenModule::EmitGlobalVarDefinition(const VarDecl *D,
return;
llvm::Constant *Init = nullptr;
- CXXRecordDecl *RD = ASTTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl();
bool NeedsGlobalCtor = false;
- bool NeedsGlobalDtor = RD && !RD->hasTrivialDestructor();
+ bool NeedsGlobalDtor =
+ D->needsDestruction(getContext()) == QualType::DK_cxx_destructor;
const VarDecl *InitDecl;
const Expr *InitExpr = D->getAnyInitializer(InitDecl);
@@ -3837,14 +3929,8 @@ void CodeGenModule::EmitGlobalVarDefinition(const VarDecl *D,
llvm::Constant *Entry =
GetAddrOfGlobalVar(D, InitType, ForDefinition_t(!IsTentative));
- // Strip off a bitcast if we got one back.
- if (auto *CE = dyn_cast<llvm::ConstantExpr>(Entry)) {
- assert(CE->getOpcode() == llvm::Instruction::BitCast ||
- CE->getOpcode() == llvm::Instruction::AddrSpaceCast ||
- // All zero index gep.
- CE->getOpcode() == llvm::Instruction::GetElementPtr);
- Entry = CE->getOperand(0);
- }
+ // Strip off pointer casts if we got them.
+ Entry = Entry->stripPointerCasts();
// Entry is now either a Function or GlobalVariable.
auto *GV = dyn_cast<llvm::GlobalVariable>(Entry);
@@ -3867,7 +3953,8 @@ void CodeGenModule::EmitGlobalVarDefinition(const VarDecl *D,
// Make a new global with the correct type, this is now guaranteed to work.
GV = cast<llvm::GlobalVariable>(
- GetAddrOfGlobalVar(D, InitType, ForDefinition_t(!IsTentative)));
+ GetAddrOfGlobalVar(D, InitType, ForDefinition_t(!IsTentative))
+ ->stripPointerCasts());
// Replace all uses of the old global with the new global
llvm::Constant *NewPtrForOldDecl =
@@ -3944,8 +4031,7 @@ void CodeGenModule::EmitGlobalVarDefinition(const VarDecl *D,
GV->setConstant(true);
}
- GV->setAlignment(getContext().getDeclAlign(D).getQuantity());
-
+ GV->setAlignment(getContext().getDeclAlign(D).getAsAlign());
// On Darwin, if the normal linkage of a C++ thread_local variable is
// LinkOnce or Weak, we keep the normal linkage to prevent multiple
@@ -4025,6 +4111,7 @@ static bool isVarDeclStrongDefinition(const ASTContext &Context,
// If no specialized section name is applicable, it will resort to default.
if (D->hasAttr<PragmaClangBSSSectionAttr>() ||
D->hasAttr<PragmaClangDataSectionAttr>() ||
+ D->hasAttr<PragmaClangRelroSectionAttr>() ||
D->hasAttr<PragmaClangRodataSectionAttr>())
return true;
@@ -4286,6 +4373,11 @@ void CodeGenModule::HandleCXXStaticMemberVarInstantiation(VarDecl *VD) {
void CodeGenModule::EmitGlobalFunctionDefinition(GlobalDecl GD,
llvm::GlobalValue *GV) {
+ // Check if this must be emitted as declare variant.
+ if (LangOpts.OpenMP && OpenMPRuntime &&
+ OpenMPRuntime->emitDeclareVariant(GD, /*IsForDefinition=*/true))
+ return;
+
const auto *D = cast<FunctionDecl>(GD.getDecl());
// Compute the function info and LLVM type.
@@ -4355,17 +4447,22 @@ void CodeGenModule::EmitAliasDefinition(GlobalDecl GD) {
// Create a reference to the named value. This ensures that it is emitted
// if a deferred decl.
llvm::Constant *Aliasee;
- if (isa<llvm::FunctionType>(DeclTy))
+ llvm::GlobalValue::LinkageTypes LT;
+ if (isa<llvm::FunctionType>(DeclTy)) {
Aliasee = GetOrCreateLLVMFunction(AA->getAliasee(), DeclTy, GD,
/*ForVTable=*/false);
- else
+ LT = getFunctionLinkage(GD);
+ } else {
Aliasee = GetOrCreateLLVMGlobal(AA->getAliasee(),
llvm::PointerType::getUnqual(DeclTy),
/*D=*/nullptr);
+ LT = getLLVMLinkageVarDefinition(cast<VarDecl>(GD.getDecl()),
+ D->getType().isConstQualified());
+ }
// Create the new alias itself, but don't set a name yet.
- auto *GA = llvm::GlobalAlias::create(
- DeclTy, 0, llvm::Function::ExternalLinkage, "", Aliasee, &getModule());
+ auto *GA =
+ llvm::GlobalAlias::create(DeclTy, 0, LT, "", Aliasee, &getModule());
if (Entry) {
if (GA->getAliasee() == Entry) {
@@ -4634,7 +4731,7 @@ CodeGenModule::GetAddrOfConstantCFString(const StringLiteral *Literal) {
// of the string is via this class initializer.
CharUnits Align = isUTF16 ? Context.getTypeAlignInChars(Context.ShortTy)
: Context.getTypeAlignInChars(Context.CharTy);
- GV->setAlignment(Align.getQuantity());
+ GV->setAlignment(Align.getAsAlign());
// FIXME: We set the section explicitly to avoid a bug in ld64 224.1.
// Without it LLVM can merge the string with a non unnamed_addr one during
@@ -4669,7 +4766,10 @@ CodeGenModule::GetAddrOfConstantCFString(const StringLiteral *Literal) {
}
Fields.addInt(LengthTy, StringLength);
- CharUnits Alignment = getPointerAlign();
+ // Swift ABI requires 8-byte alignment to ensure that the _Atomic(uint64_t) is
+ // properly aligned on 32-bit platforms.
+ CharUnits Alignment =
+ IsSwiftABI ? Context.toCharUnitsFromBits(64) : getPointerAlign();
// The struct.
GV = Fields.finishAndCreateGlobal("_unnamed_cfstring_", Alignment,
@@ -4709,7 +4809,7 @@ QualType CodeGenModule::getObjCFastEnumerationStateType() {
Context.getPointerType(Context.getObjCIdType()),
Context.getPointerType(Context.UnsignedLongTy),
Context.getConstantArrayType(Context.UnsignedLongTy,
- llvm::APInt(32, 5), ArrayType::Normal, 0)
+ llvm::APInt(32, 5), nullptr, ArrayType::Normal, 0)
};
for (size_t i = 0; i < 4; ++i) {
@@ -4784,7 +4884,7 @@ GenerateStringLiteral(llvm::Constant *C, llvm::GlobalValue::LinkageTypes LT,
auto *GV = new llvm::GlobalVariable(
M, C->getType(), !CGM.getLangOpts().WritableStrings, LT, C, GlobalName,
nullptr, llvm::GlobalVariable::NotThreadLocal, AddrSpace);
- GV->setAlignment(Alignment.getQuantity());
+ GV->setAlignment(Alignment.getAsAlign());
GV->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
if (GV->isWeakForLinker()) {
assert(CGM.supportsCOMDAT() && "Only COFF uses weak string literals");
@@ -4808,7 +4908,7 @@ CodeGenModule::GetAddrOfConstantStringFromLiteral(const StringLiteral *S,
Entry = &ConstantStringMap[C];
if (auto GV = *Entry) {
if (Alignment.getQuantity() > GV->getAlignment())
- GV->setAlignment(Alignment.getQuantity());
+ GV->setAlignment(Alignment.getAsAlign());
return ConstantAddress(castStringLiteralToDefaultAddressSpace(*this, GV),
Alignment);
}
@@ -4871,7 +4971,7 @@ ConstantAddress CodeGenModule::GetAddrOfConstantCString(
Entry = &ConstantStringMap[C];
if (auto GV = *Entry) {
if (Alignment.getQuantity() > GV->getAlignment())
- GV->setAlignment(Alignment.getQuantity());
+ GV->setAlignment(Alignment.getAsAlign());
return ConstantAddress(castStringLiteralToDefaultAddressSpace(*this, GV),
Alignment);
}
@@ -4916,14 +5016,13 @@ ConstantAddress CodeGenModule::GetAddrOfGlobalTemporary(
VD, E->getManglingNumber(), Out);
APValue *Value = nullptr;
- if (E->getStorageDuration() == SD_Static) {
- // We might have a cached constant initializer for this temporary. Note
- // that this might have a different value from the value computed by
- // evaluating the initializer if the surrounding constant expression
- // modifies the temporary.
+ if (E->getStorageDuration() == SD_Static && VD && VD->evaluateValue()) {
+ // If the initializer of the extending declaration is a constant
+ // initializer, we should have a cached constant initializer for this
+ // temporary. Note that this might have a different value from the value
+ // computed by evaluating the initializer if the surrounding constant
+ // expression modifies the temporary.
Value = getContext().getMaterializedTemporaryValue(E, false);
- if (Value && Value->isAbsent())
- Value = nullptr;
}
// Try evaluating it now, it might have a constant initializer.
@@ -4974,7 +5073,7 @@ ConstantAddress CodeGenModule::GetAddrOfGlobalTemporary(
/*InsertBefore=*/nullptr, llvm::GlobalVariable::NotThreadLocal, TargetAS);
if (emitter) emitter->finalize(GV);
setGVProperties(GV, VD);
- GV->setAlignment(Align.getQuantity());
+ GV->setAlignment(Align.getAsAlign());
if (supportsCOMDAT() && GV->isWeakForLinker())
GV->setComdat(TheModule.getOrInsertComdat(GV->getName()));
if (VD->getTLSKind())
@@ -5083,7 +5182,9 @@ void CodeGenModule::EmitObjCIvarInitializations(ObjCImplementationDecl *D) {
// EmitLinkageSpec - Emit all declarations in a linkage spec.
void CodeGenModule::EmitLinkageSpec(const LinkageSpecDecl *LSD) {
if (LSD->getLanguage() != LinkageSpecDecl::lang_c &&
- LSD->getLanguage() != LinkageSpecDecl::lang_cxx) {
+ LSD->getLanguage() != LinkageSpecDecl::lang_cxx &&
+ LSD->getLanguage() != LinkageSpecDecl::lang_cxx_11 &&
+ LSD->getLanguage() != LinkageSpecDecl::lang_cxx_14) {
ErrorUnsupported(LSD, "linkage spec");
return;
}
@@ -5804,7 +5905,7 @@ void CodeGenModule::getFunctionFeatureMap(llvm::StringMap<bool> &FeatureMap,
llvm::SanitizerStatReport &CodeGenModule::getSanStats() {
if (!SanStats)
- SanStats = llvm::make_unique<llvm::SanitizerStatReport>(&getModule());
+ SanStats = std::make_unique<llvm::SanitizerStatReport>(&getModule());
return *SanStats;
}
diff --git a/lib/CodeGen/CodeGenModule.h b/lib/CodeGen/CodeGenModule.h
index 95964afed4ec..73f81adae35f 100644
--- a/lib/CodeGen/CodeGenModule.h
+++ b/lib/CodeGen/CodeGenModule.h
@@ -1270,13 +1270,26 @@ public:
/// \param D Requires declaration
void EmitOMPRequiresDecl(const OMPRequiresDecl *D);
+ /// Emits the definition of \p OldGD function with body from \p NewGD.
+ /// Required for proper handling of declare variant directive on the GPU.
+ void emitOpenMPDeviceFunctionRedefinition(GlobalDecl OldGD, GlobalDecl NewGD,
+ llvm::GlobalValue *GV);
+
/// Returns whether the given record has hidden LTO visibility and therefore
/// may participate in (single-module) CFI and whole-program vtable
/// optimization.
bool HasHiddenLTOVisibility(const CXXRecordDecl *RD);
+ /// Returns the vcall visibility of the given type. This is the scope in which
+ /// a virtual function call could be made which ends up being dispatched to a
+ /// member function of this class. This scope can be wider than the visibility
+ /// of the class itself when the class has a more-visible dynamic base class.
+ llvm::GlobalObject::VCallVisibility
+ GetVCallVisibilityLevel(const CXXRecordDecl *RD);
+
/// Emit type metadata for the given vtable using the given layout.
- void EmitVTableTypeMetadata(llvm::GlobalVariable *VTable,
+ void EmitVTableTypeMetadata(const CXXRecordDecl *RD,
+ llvm::GlobalVariable *VTable,
const VTableLayout &VTLayout);
/// Generate a cross-DSO type identifier for MD.
diff --git a/lib/CodeGen/CodeGenPGO.cpp b/lib/CodeGen/CodeGenPGO.cpp
index d10a321dc3d7..e525abe979e3 100644
--- a/lib/CodeGen/CodeGenPGO.cpp
+++ b/lib/CodeGen/CodeGenPGO.cpp
@@ -980,7 +980,7 @@ void CodeGenPGO::loadRegionCounts(llvm::IndexedInstrProfReader *PGOReader,
return;
}
ProfRecord =
- llvm::make_unique<llvm::InstrProfRecord>(std::move(RecordExpected.get()));
+ std::make_unique<llvm::InstrProfRecord>(std::move(RecordExpected.get()));
RegionCounts = ProfRecord->Counts;
}
diff --git a/lib/CodeGen/CodeGenPGO.h b/lib/CodeGen/CodeGenPGO.h
index 2e740f789243..a3778b549910 100644
--- a/lib/CodeGen/CodeGenPGO.h
+++ b/lib/CodeGen/CodeGenPGO.h
@@ -41,8 +41,8 @@ private:
public:
CodeGenPGO(CodeGenModule &CGM)
- : CGM(CGM), NumValueSites({{0}}), NumRegionCounters(0), FunctionHash(0),
- CurrentRegionCount(0) {}
+ : CGM(CGM), FuncNameVar(nullptr), NumValueSites({{0}}),
+ NumRegionCounters(0), FunctionHash(0), CurrentRegionCount(0) {}
/// Whether or not we have PGO region data for the current function. This is
/// false both when we have no data at all and when our data has been
diff --git a/lib/CodeGen/CodeGenTypes.cpp b/lib/CodeGen/CodeGenTypes.cpp
index 79b29b3d916f..a458811d7a30 100644
--- a/lib/CodeGen/CodeGenTypes.cpp
+++ b/lib/CodeGen/CodeGenTypes.cpp
@@ -135,8 +135,8 @@ isSafeToConvert(const RecordDecl *RD, CodeGenTypes &CGT,
// the class.
if (const CXXRecordDecl *CRD = dyn_cast<CXXRecordDecl>(RD)) {
for (const auto &I : CRD->bases())
- if (!isSafeToConvert(I.getType()->getAs<RecordType>()->getDecl(),
- CGT, AlreadyChecked))
+ if (!isSafeToConvert(I.getType()->castAs<RecordType>()->getDecl(), CGT,
+ AlreadyChecked))
return false;
}
@@ -402,7 +402,7 @@ llvm::Type *CodeGenTypes::ConvertType(QualType T) {
#define NON_CANONICAL_TYPE(Class, Base) case Type::Class:
#define DEPENDENT_TYPE(Class, Base) case Type::Class:
#define NON_CANONICAL_UNLESS_DEPENDENT_TYPE(Class, Base) case Type::Class:
-#include "clang/AST/TypeNodes.def"
+#include "clang/AST/TypeNodes.inc"
llvm_unreachable("Non-canonical or dependent types aren't possible.");
case Type::Builtin: {
@@ -512,6 +512,22 @@ llvm::Type *CodeGenTypes::ConvertType(QualType T) {
ResultType = CGM.getOpenCLRuntime().convertOpenCLSpecificType(Ty);
break;
+ // TODO: real CodeGen support for SVE types requires more infrastructure
+ // to be added first. Report an error until then.
+#define SVE_TYPE(Name, Id, SingletonId) case BuiltinType::Id:
+#include "clang/Basic/AArch64SVEACLETypes.def"
+ {
+ unsigned DiagID = CGM.getDiags().getCustomDiagID(
+ DiagnosticsEngine::Error,
+ "cannot yet generate code for SVE type '%0'");
+ auto *BT = cast<BuiltinType>(Ty);
+ auto Name = BT->getName(CGM.getContext().getPrintingPolicy());
+ CGM.getDiags().Report(DiagID) << Name;
+ // Return something safe.
+ ResultType = llvm::IntegerType::get(getLLVMContext(), 32);
+ break;
+ }
+
case BuiltinType::Dependent:
#define BUILTIN_TYPE(Id, SingletonId)
#define PLACEHOLDER_TYPE(Id, SingletonId) \
@@ -728,8 +744,7 @@ llvm::StructType *CodeGenTypes::ConvertRecordDeclType(const RecordDecl *RD) {
if (const CXXRecordDecl *CRD = dyn_cast<CXXRecordDecl>(RD)) {
for (const auto &I : CRD->bases()) {
if (I.isVirtual()) continue;
-
- ConvertRecordDeclType(I.getType()->getAs<RecordType>()->getDecl());
+ ConvertRecordDeclType(I.getType()->castAs<RecordType>()->getDecl());
}
}
diff --git a/lib/CodeGen/ConstantInitBuilder.cpp b/lib/CodeGen/ConstantInitBuilder.cpp
index 40b1607b5626..2d63d88020be 100644
--- a/lib/CodeGen/ConstantInitBuilder.cpp
+++ b/lib/CodeGen/ConstantInitBuilder.cpp
@@ -79,7 +79,7 @@ ConstantInitBuilderBase::createGlobal(llvm::Constant *initializer,
/*insert before*/ nullptr,
llvm::GlobalValue::NotThreadLocal,
addressSpace);
- GV->setAlignment(alignment.getQuantity());
+ GV->setAlignment(alignment.getAsAlign());
resolveSelfReferences(GV);
return GV;
}
diff --git a/lib/CodeGen/CoverageMappingGen.cpp b/lib/CodeGen/CoverageMappingGen.cpp
index 6d18027f16a8..a6f6e38d5f14 100644
--- a/lib/CodeGen/CoverageMappingGen.cpp
+++ b/lib/CodeGen/CoverageMappingGen.cpp
@@ -1278,13 +1278,6 @@ std::string getCoverageSection(const CodeGenModule &CGM) {
CGM.getContext().getTargetInfo().getTriple().getObjectFormat());
}
-std::string normalizeFilename(StringRef Filename) {
- llvm::SmallString<256> Path(Filename);
- llvm::sys::fs::make_absolute(Path);
- llvm::sys::path::remove_dots(Path, /*remove_dot_dot=*/true);
- return Path.str().str();
-}
-
} // end anonymous namespace
static void dump(llvm::raw_ostream &OS, StringRef FunctionName,
@@ -1317,6 +1310,24 @@ static void dump(llvm::raw_ostream &OS, StringRef FunctionName,
}
}
+CoverageMappingModuleGen::CoverageMappingModuleGen(
+ CodeGenModule &CGM, CoverageSourceInfo &SourceInfo)
+ : CGM(CGM), SourceInfo(SourceInfo), FunctionRecordTy(nullptr) {
+ // Honor -fdebug-compilation-dir in paths in coverage data. Otherwise, use the
+ // regular working directory when normalizing paths.
+ if (!CGM.getCodeGenOpts().DebugCompilationDir.empty())
+ CWD = CGM.getCodeGenOpts().DebugCompilationDir;
+ else
+ llvm::sys::fs::current_path(CWD);
+}
+
+std::string CoverageMappingModuleGen::normalizeFilename(StringRef Filename) {
+ llvm::SmallString<256> Path(Filename);
+ llvm::sys::fs::make_absolute(CWD, Path);
+ llvm::sys::path::remove_dots(Path, /*remove_dot_dot=*/true);
+ return Path.str().str();
+}
+
void CoverageMappingModuleGen::addFunctionMappingRecord(
llvm::GlobalVariable *NamePtr, StringRef NameValue, uint64_t FuncHash,
const std::string &CoverageMapping, bool IsUsed) {
@@ -1442,7 +1453,7 @@ void CoverageMappingModuleGen::emit() {
CovDataVal, llvm::getCoverageMappingVarName());
CovData->setSection(getCoverageSection(CGM));
- CovData->setAlignment(8);
+ CovData->setAlignment(llvm::Align(8));
// Make sure the data doesn't get deleted.
CGM.addUsedGlobal(CovData);
diff --git a/lib/CodeGen/CoverageMappingGen.h b/lib/CodeGen/CoverageMappingGen.h
index 3bf51f590479..2bdc00e25668 100644
--- a/lib/CodeGen/CoverageMappingGen.h
+++ b/lib/CodeGen/CoverageMappingGen.h
@@ -54,10 +54,14 @@ class CoverageMappingModuleGen {
std::vector<llvm::Constant *> FunctionNames;
llvm::StructType *FunctionRecordTy;
std::vector<std::string> CoverageMappings;
+ SmallString<256> CWD;
+
+ /// Make the filename absolute, remove dots, and normalize slashes to local
+ /// path style.
+ std::string normalizeFilename(StringRef Filename);
public:
- CoverageMappingModuleGen(CodeGenModule &CGM, CoverageSourceInfo &SourceInfo)
- : CGM(CGM), SourceInfo(SourceInfo), FunctionRecordTy(nullptr) {}
+ CoverageMappingModuleGen(CodeGenModule &CGM, CoverageSourceInfo &SourceInfo);
CoverageSourceInfo &getSourceInfo() const {
return SourceInfo;
diff --git a/lib/CodeGen/EHScopeStack.h b/lib/CodeGen/EHScopeStack.h
index 3b0db35d982b..0ed67aabcd62 100644
--- a/lib/CodeGen/EHScopeStack.h
+++ b/lib/CodeGen/EHScopeStack.h
@@ -199,14 +199,14 @@ public:
SavedTuple Saved;
template <std::size_t... Is>
- T restore(CodeGenFunction &CGF, llvm::index_sequence<Is...>) {
+ T restore(CodeGenFunction &CGF, std::index_sequence<Is...>) {
// It's important that the restores are emitted in order. The braced init
// list guarantees that.
return T{DominatingValue<As>::restore(CGF, std::get<Is>(Saved))...};
}
void Emit(CodeGenFunction &CGF, Flags flags) override {
- restore(CGF, llvm::index_sequence_for<As...>()).Emit(CGF, flags);
+ restore(CGF, std::index_sequence_for<As...>()).Emit(CGF, flags);
}
public:
diff --git a/lib/CodeGen/ItaniumCXXABI.cpp b/lib/CodeGen/ItaniumCXXABI.cpp
index 3b2413d960d6..8f9b16470b64 100644
--- a/lib/CodeGen/ItaniumCXXABI.cpp
+++ b/lib/CodeGen/ItaniumCXXABI.cpp
@@ -43,6 +43,10 @@ class ItaniumCXXABI : public CodeGen::CGCXXABI {
/// VTables - All the vtables which have been defined.
llvm::DenseMap<const CXXRecordDecl *, llvm::GlobalVariable *> VTables;
+ /// All the thread wrapper functions that have been used.
+ llvm::SmallVector<std::pair<const VarDecl *, llvm::Function *>, 8>
+ ThreadWrappers;
+
protected:
bool UseARMMethodPtrABI;
bool UseARMGuardVarABI;
@@ -322,7 +326,43 @@ public:
ArrayRef<llvm::Function *> CXXThreadLocalInits,
ArrayRef<const VarDecl *> CXXThreadLocalInitVars) override;
- bool usesThreadWrapperFunction() const override { return true; }
+ /// Determine whether we will definitely emit this variable with a constant
+ /// initializer, either because the language semantics demand it or because
+ /// we know that the initializer is a constant.
+ bool isEmittedWithConstantInitializer(const VarDecl *VD) const {
+ VD = VD->getMostRecentDecl();
+ if (VD->hasAttr<ConstInitAttr>())
+ return true;
+
+ // All later checks examine the initializer specified on the variable. If
+ // the variable is weak, such examination would not be correct.
+ if (VD->isWeak() || VD->hasAttr<SelectAnyAttr>())
+ return false;
+
+ const VarDecl *InitDecl = VD->getInitializingDeclaration();
+ if (!InitDecl)
+ return false;
+
+ // If there's no initializer to run, this is constant initialization.
+ if (!InitDecl->hasInit())
+ return true;
+
+ // If we have the only definition, we don't need a thread wrapper if we
+ // will emit the value as a constant.
+ if (isUniqueGVALinkage(getContext().GetGVALinkageForVariable(VD)))
+ return !VD->needsDestruction(getContext()) && InitDecl->evaluateValue();
+
+ // Otherwise, we need a thread wrapper unless we know that every
+ // translation unit will emit the value as a constant. We rely on
+ // ICE-ness not varying between translation units, which isn't actually
+ // guaranteed by the standard but is necessary for sanity.
+ return InitDecl->isInitKnownICE() && InitDecl->isInitICE();
+ }
+
+ bool usesThreadWrapperFunction(const VarDecl *VD) const override {
+ return !isEmittedWithConstantInitializer(VD) ||
+ VD->needsDestruction(getContext());
+ }
LValue EmitThreadLocalVarDeclLValue(CodeGenFunction &CGF, const VarDecl *VD,
QualType LValType) override;
@@ -415,8 +455,8 @@ public:
class ARMCXXABI : public ItaniumCXXABI {
public:
ARMCXXABI(CodeGen::CodeGenModule &CGM) :
- ItaniumCXXABI(CGM, /* UseARMMethodPtrABI = */ true,
- /* UseARMGuardVarABI = */ true) {}
+ ItaniumCXXABI(CGM, /*UseARMMethodPtrABI=*/true,
+ /*UseARMGuardVarABI=*/true) {}
bool HasThisReturn(GlobalDecl GD) const override {
return (isa<CXXConstructorDecl>(GD.getDecl()) || (
@@ -480,11 +520,11 @@ CodeGen::CGCXXABI *CodeGen::CreateItaniumCXXABI(CodeGenModule &CGM) {
// include the other 32-bit ARM oddities: constructor/destructor return values
// and array cookies.
case TargetCXXABI::GenericAArch64:
- return new ItaniumCXXABI(CGM, /* UseARMMethodPtrABI = */ true,
- /* UseARMGuardVarABI = */ true);
+ return new ItaniumCXXABI(CGM, /*UseARMMethodPtrABI=*/true,
+ /*UseARMGuardVarABI=*/true);
case TargetCXXABI::GenericMIPS:
- return new ItaniumCXXABI(CGM, /* UseARMMethodPtrABI = */ true);
+ return new ItaniumCXXABI(CGM, /*UseARMMethodPtrABI=*/true);
case TargetCXXABI::WebAssembly:
return new WebAssemblyCXXABI(CGM);
@@ -495,8 +535,7 @@ CodeGen::CGCXXABI *CodeGen::CreateItaniumCXXABI(CodeGenModule &CGM) {
// For PNaCl, use ARM-style method pointers so that PNaCl code
// does not assume anything about the alignment of function
// pointers.
- return new ItaniumCXXABI(CGM, /* UseARMMethodPtrABI = */ true,
- /* UseARMGuardVarABI = */ false);
+ return new ItaniumCXXABI(CGM, /*UseARMMethodPtrABI=*/true);
}
return new ItaniumCXXABI(CGM);
@@ -541,8 +580,8 @@ CGCallee ItaniumCXXABI::EmitLoadOfMemberFunctionPointer(
const FunctionProtoType *FPT =
MPT->getPointeeType()->getAs<FunctionProtoType>();
- const CXXRecordDecl *RD =
- cast<CXXRecordDecl>(MPT->getClass()->getAs<RecordType>()->getDecl());
+ auto *RD =
+ cast<CXXRecordDecl>(MPT->getClass()->castAs<RecordType>()->getDecl());
llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(
CGM.getTypes().arrangeCXXMethodType(RD, FPT, /*FD=*/nullptr));
@@ -605,8 +644,6 @@ CGCallee ItaniumCXXABI::EmitLoadOfMemberFunctionPointer(
VTableOffset = Builder.CreateTrunc(VTableOffset, CGF.Int32Ty);
VTableOffset = Builder.CreateZExt(VTableOffset, CGM.PtrDiffTy);
}
- // Compute the address of the virtual function pointer.
- llvm::Value *VFPAddr = Builder.CreateGEP(VTable, VTableOffset);
// Check the address of the function pointer if CFI on member function
// pointers is enabled.
@@ -614,44 +651,81 @@ CGCallee ItaniumCXXABI::EmitLoadOfMemberFunctionPointer(
llvm::Constant *CheckTypeDesc;
bool ShouldEmitCFICheck = CGF.SanOpts.has(SanitizerKind::CFIMFCall) &&
CGM.HasHiddenLTOVisibility(RD);
- if (ShouldEmitCFICheck) {
- CodeGenFunction::SanitizerScope SanScope(&CGF);
-
- CheckSourceLocation = CGF.EmitCheckSourceLocation(E->getBeginLoc());
- CheckTypeDesc = CGF.EmitCheckTypeDescriptor(QualType(MPT, 0));
- llvm::Constant *StaticData[] = {
- llvm::ConstantInt::get(CGF.Int8Ty, CodeGenFunction::CFITCK_VMFCall),
- CheckSourceLocation,
- CheckTypeDesc,
- };
-
- llvm::Metadata *MD =
- CGM.CreateMetadataIdentifierForVirtualMemPtrType(QualType(MPT, 0));
- llvm::Value *TypeId = llvm::MetadataAsValue::get(CGF.getLLVMContext(), MD);
+ bool ShouldEmitVFEInfo = CGM.getCodeGenOpts().VirtualFunctionElimination &&
+ CGM.HasHiddenLTOVisibility(RD);
+ llvm::Value *VirtualFn = nullptr;
- llvm::Value *TypeTest = Builder.CreateCall(
- CGM.getIntrinsic(llvm::Intrinsic::type_test), {VFPAddr, TypeId});
+ {
+ CodeGenFunction::SanitizerScope SanScope(&CGF);
+ llvm::Value *TypeId = nullptr;
+ llvm::Value *CheckResult = nullptr;
+
+ if (ShouldEmitCFICheck || ShouldEmitVFEInfo) {
+ // If doing CFI or VFE, we will need the metadata node to check against.
+ llvm::Metadata *MD =
+ CGM.CreateMetadataIdentifierForVirtualMemPtrType(QualType(MPT, 0));
+ TypeId = llvm::MetadataAsValue::get(CGF.getLLVMContext(), MD);
+ }
- if (CGM.getCodeGenOpts().SanitizeTrap.has(SanitizerKind::CFIMFCall)) {
- CGF.EmitTrapCheck(TypeTest);
+ llvm::Value *VFPAddr = Builder.CreateGEP(VTable, VTableOffset);
+
+ if (ShouldEmitVFEInfo) {
+ // If doing VFE, load from the vtable with a type.checked.load intrinsic
+ // call. Note that we use the GEP to calculate the address to load from
+ // and pass 0 as the offset to the intrinsic. This is because every
+ // vtable slot of the correct type is marked with matching metadata, and
+ // we know that the load must be from one of these slots.
+ llvm::Value *CheckedLoad = Builder.CreateCall(
+ CGM.getIntrinsic(llvm::Intrinsic::type_checked_load),
+ {VFPAddr, llvm::ConstantInt::get(CGM.Int32Ty, 0), TypeId});
+ CheckResult = Builder.CreateExtractValue(CheckedLoad, 1);
+ VirtualFn = Builder.CreateExtractValue(CheckedLoad, 0);
+ VirtualFn = Builder.CreateBitCast(VirtualFn, FTy->getPointerTo(),
+ "memptr.virtualfn");
} else {
- llvm::Value *AllVtables = llvm::MetadataAsValue::get(
- CGM.getLLVMContext(),
- llvm::MDString::get(CGM.getLLVMContext(), "all-vtables"));
- llvm::Value *ValidVtable = Builder.CreateCall(
- CGM.getIntrinsic(llvm::Intrinsic::type_test), {VTable, AllVtables});
- CGF.EmitCheck(std::make_pair(TypeTest, SanitizerKind::CFIMFCall),
- SanitizerHandler::CFICheckFail, StaticData,
- {VTable, ValidVtable});
+ // When not doing VFE, emit a normal load, as it allows more
+ // optimisations than type.checked.load.
+ if (ShouldEmitCFICheck) {
+ CheckResult = Builder.CreateCall(
+ CGM.getIntrinsic(llvm::Intrinsic::type_test),
+ {Builder.CreateBitCast(VFPAddr, CGF.Int8PtrTy), TypeId});
+ }
+ VFPAddr =
+ Builder.CreateBitCast(VFPAddr, FTy->getPointerTo()->getPointerTo());
+ VirtualFn = Builder.CreateAlignedLoad(VFPAddr, CGF.getPointerAlign(),
+ "memptr.virtualfn");
}
+ assert(VirtualFn && "Virtual fuction pointer not created!");
+ assert((!ShouldEmitCFICheck || !ShouldEmitVFEInfo || CheckResult) &&
+ "Check result required but not created!");
+
+ if (ShouldEmitCFICheck) {
+ // If doing CFI, emit the check.
+ CheckSourceLocation = CGF.EmitCheckSourceLocation(E->getBeginLoc());
+ CheckTypeDesc = CGF.EmitCheckTypeDescriptor(QualType(MPT, 0));
+ llvm::Constant *StaticData[] = {
+ llvm::ConstantInt::get(CGF.Int8Ty, CodeGenFunction::CFITCK_VMFCall),
+ CheckSourceLocation,
+ CheckTypeDesc,
+ };
- FnVirtual = Builder.GetInsertBlock();
- }
+ if (CGM.getCodeGenOpts().SanitizeTrap.has(SanitizerKind::CFIMFCall)) {
+ CGF.EmitTrapCheck(CheckResult);
+ } else {
+ llvm::Value *AllVtables = llvm::MetadataAsValue::get(
+ CGM.getLLVMContext(),
+ llvm::MDString::get(CGM.getLLVMContext(), "all-vtables"));
+ llvm::Value *ValidVtable = Builder.CreateCall(
+ CGM.getIntrinsic(llvm::Intrinsic::type_test), {VTable, AllVtables});
+ CGF.EmitCheck(std::make_pair(CheckResult, SanitizerKind::CFIMFCall),
+ SanitizerHandler::CFICheckFail, StaticData,
+ {VTable, ValidVtable});
+ }
+
+ FnVirtual = Builder.GetInsertBlock();
+ }
+ } // End of sanitizer scope
- // Load the virtual function to call.
- VFPAddr = Builder.CreateBitCast(VFPAddr, FTy->getPointerTo()->getPointerTo());
- llvm::Value *VirtualFn = Builder.CreateAlignedLoad(
- VFPAddr, CGF.getPointerAlign(), "memptr.virtualfn");
CGF.EmitBranch(FnEnd);
// In the non-virtual path, the function pointer is actually a
@@ -1104,7 +1178,7 @@ void ItaniumCXXABI::emitVirtualObjectDelete(CodeGenFunction &CGF,
// Grab the vtable pointer as an intptr_t*.
auto *ClassDecl =
- cast<CXXRecordDecl>(ElementType->getAs<RecordType>()->getDecl());
+ cast<CXXRecordDecl>(ElementType->castAs<RecordType>()->getDecl());
llvm::Value *VTable =
CGF.GetVTablePtr(Ptr, CGF.IntPtrTy->getPointerTo(), ClassDecl);
@@ -1307,7 +1381,7 @@ llvm::Value *ItaniumCXXABI::EmitTypeid(CodeGenFunction &CGF,
Address ThisPtr,
llvm::Type *StdTypeInfoPtrTy) {
auto *ClassDecl =
- cast<CXXRecordDecl>(SrcRecordTy->getAs<RecordType>()->getDecl());
+ cast<CXXRecordDecl>(SrcRecordTy->castAs<RecordType>()->getDecl());
llvm::Value *Value =
CGF.GetVTablePtr(ThisPtr, StdTypeInfoPtrTy->getPointerTo(), ClassDecl);
@@ -1373,7 +1447,7 @@ llvm::Value *ItaniumCXXABI::EmitDynamicCastToVoid(CodeGenFunction &CGF,
llvm::Type *DestLTy = CGF.ConvertType(DestTy);
auto *ClassDecl =
- cast<CXXRecordDecl>(SrcRecordTy->getAs<RecordType>()->getDecl());
+ cast<CXXRecordDecl>(SrcRecordTy->castAs<RecordType>()->getDecl());
// Get the vtable pointer.
llvm::Value *VTable = CGF.GetVTablePtr(ThisAddr, PtrDiffLTy->getPointerTo(),
ClassDecl);
@@ -1595,7 +1669,7 @@ void ItaniumCXXABI::emitVTableDefinitions(CodeGenVTables &CGVT,
EmitFundamentalRTTIDescriptors(RD);
if (!VTable->isDeclarationForLinker())
- CGM.EmitVTableTypeMetadata(VTable, VTLayout);
+ CGM.EmitVTableTypeMetadata(RD, VTable, VTLayout);
}
bool ItaniumCXXABI::isVirtualOffsetNeededForVTableField(
@@ -1755,10 +1829,11 @@ llvm::Value *ItaniumCXXABI::EmitVirtualDestructorCall(
CGCallee Callee = CGCallee::forVirtual(CE, GD, This, Ty);
QualType ThisTy;
- if (CE)
- ThisTy = CE->getImplicitObjectArgument()->getType()->getPointeeType();
- else
+ if (CE) {
+ ThisTy = CE->getObjectType();
+ } else {
ThisTy = D->getDestroyedType();
+ }
CGF.EmitCXXDestructorCall(GD, Callee, This.getPointer(), ThisTy, nullptr,
QualType(), nullptr);
@@ -2154,7 +2229,7 @@ void ItaniumCXXABI::EmitGuardedInit(CodeGenFunction &CGF,
guard->setVisibility(var->getVisibility());
// If the variable is thread-local, so is its guard variable.
guard->setThreadLocalMode(var->getThreadLocalMode());
- guard->setAlignment(guardAlignment.getQuantity());
+ guard->setAlignment(guardAlignment.getAsAlign());
// The ABI says: "It is suggested that it be emitted in the same COMDAT
// group as the associated data object." In practice, this doesn't work for
@@ -2456,9 +2531,6 @@ ItaniumCXXABI::getOrCreateThreadLocalWrapper(const VarDecl *VD,
CGM.SetLLVMFunctionAttributes(GlobalDecl(), FI, Wrapper);
- if (VD->hasDefinition())
- CGM.SetLLVMFunctionAttributesForDefinition(nullptr, Wrapper);
-
// Always resolve references to the wrapper at link time.
if (!Wrapper->hasLocalLinkage())
if (!isThreadWrapperReplaceable(VD, CGM) ||
@@ -2471,6 +2543,8 @@ ItaniumCXXABI::getOrCreateThreadLocalWrapper(const VarDecl *VD,
Wrapper->setCallingConv(llvm::CallingConv::CXX_FAST_TLS);
Wrapper->addFnAttr(llvm::Attribute::NoUnwind);
}
+
+ ThreadWrappers.push_back({VD, Wrapper});
return Wrapper;
}
@@ -2508,7 +2582,7 @@ void ItaniumCXXABI::EmitThreadLocalInitFuncs(
Guard->setThreadLocal(true);
CharUnits GuardAlign = CharUnits::One();
- Guard->setAlignment(GuardAlign.getQuantity());
+ Guard->setAlignment(GuardAlign.getAsAlign());
CodeGenFunction(CGM).GenerateCXXGlobalInitFunc(
InitFunc, OrderedInits, ConstantAddress(Guard, GuardAlign));
@@ -2519,20 +2593,40 @@ void ItaniumCXXABI::EmitThreadLocalInitFuncs(
}
}
- // Emit thread wrappers.
+ // Create declarations for thread wrappers for all thread-local variables
+ // with non-discardable definitions in this translation unit.
for (const VarDecl *VD : CXXThreadLocals) {
+ if (VD->hasDefinition() &&
+ !isDiscardableGVALinkage(getContext().GetGVALinkageForVariable(VD))) {
+ llvm::GlobalValue *GV = CGM.GetGlobalValue(CGM.getMangledName(VD));
+ getOrCreateThreadLocalWrapper(VD, GV);
+ }
+ }
+
+ // Emit all referenced thread wrappers.
+ for (auto VDAndWrapper : ThreadWrappers) {
+ const VarDecl *VD = VDAndWrapper.first;
llvm::GlobalVariable *Var =
cast<llvm::GlobalVariable>(CGM.GetGlobalValue(CGM.getMangledName(VD)));
- llvm::Function *Wrapper = getOrCreateThreadLocalWrapper(VD, Var);
+ llvm::Function *Wrapper = VDAndWrapper.second;
// Some targets require that all access to thread local variables go through
// the thread wrapper. This means that we cannot attempt to create a thread
// wrapper or a thread helper.
- if (isThreadWrapperReplaceable(VD, CGM) && !VD->hasDefinition()) {
- Wrapper->setLinkage(llvm::Function::ExternalLinkage);
- continue;
+ if (!VD->hasDefinition()) {
+ if (isThreadWrapperReplaceable(VD, CGM)) {
+ Wrapper->setLinkage(llvm::Function::ExternalLinkage);
+ continue;
+ }
+
+ // If this isn't a TU in which this variable is defined, the thread
+ // wrapper is discardable.
+ if (Wrapper->getLinkage() == llvm::Function::WeakODRLinkage)
+ Wrapper->setLinkage(llvm::Function::LinkOnceODRLinkage);
}
+ CGM.SetLLVMFunctionAttributesForDefinition(nullptr, Wrapper);
+
// Mangle the name for the thread_local initialization function.
SmallString<256> InitFnName;
{
@@ -2547,7 +2641,10 @@ void ItaniumCXXABI::EmitThreadLocalInitFuncs(
// produce a declaration of the initialization function.
llvm::GlobalValue *Init = nullptr;
bool InitIsInitFunc = false;
- if (VD->hasDefinition()) {
+ bool HasConstantInitialization = false;
+ if (!usesThreadWrapperFunction(VD)) {
+ HasConstantInitialization = true;
+ } else if (VD->hasDefinition()) {
InitIsInitFunc = true;
llvm::Function *InitFuncToUse = InitFunc;
if (isTemplateInstantiation(VD->getTemplateSpecializationKind()))
@@ -2576,7 +2673,9 @@ void ItaniumCXXABI::EmitThreadLocalInitFuncs(
llvm::LLVMContext &Context = CGM.getModule().getContext();
llvm::BasicBlock *Entry = llvm::BasicBlock::Create(Context, "", Wrapper);
CGBuilderTy Builder(CGM, Entry);
- if (InitIsInitFunc) {
+ if (HasConstantInitialization) {
+ // No dynamic initialization to invoke.
+ } else if (InitIsInitFunc) {
if (Init) {
llvm::CallInst *CallVal = Builder.CreateCall(InitFnTy, Init);
if (isThreadWrapperReplaceable(VD, CGM)) {
@@ -2860,6 +2959,9 @@ static bool TypeInfoIsInStandardLibrary(const BuiltinType *Ty) {
case BuiltinType::OCLClkEvent:
case BuiltinType::OCLQueue:
case BuiltinType::OCLReserveID:
+#define SVE_TYPE(Name, Id, SingletonId) \
+ case BuiltinType::Id:
+#include "clang/Basic/AArch64SVEACLETypes.def"
case BuiltinType::ShortAccum:
case BuiltinType::Accum:
case BuiltinType::LongAccum:
@@ -3033,8 +3135,8 @@ static bool CanUseSingleInheritance(const CXXRecordDecl *RD) {
return false;
// Check that the class is dynamic iff the base is.
- const CXXRecordDecl *BaseDecl =
- cast<CXXRecordDecl>(Base->getType()->getAs<RecordType>()->getDecl());
+ auto *BaseDecl =
+ cast<CXXRecordDecl>(Base->getType()->castAs<RecordType>()->getDecl());
if (!BaseDecl->isEmpty() &&
BaseDecl->isDynamicClass() != RD->isDynamicClass())
return false;
@@ -3061,7 +3163,7 @@ void ItaniumRTTIBuilder::BuildVTablePointer(const Type *Ty) {
#define NON_CANONICAL_UNLESS_DEPENDENT_TYPE(Class, Base) case Type::Class:
#define NON_CANONICAL_TYPE(Class, Base) case Type::Class:
#define DEPENDENT_TYPE(Class, Base) case Type::Class:
-#include "clang/AST/TypeNodes.def"
+#include "clang/AST/TypeNodes.inc"
llvm_unreachable("Non-canonical and dependent types shouldn't get here");
case Type::LValueReference:
@@ -3307,7 +3409,7 @@ llvm::Constant *ItaniumRTTIBuilder::BuildTypeInfo(
#define NON_CANONICAL_UNLESS_DEPENDENT_TYPE(Class, Base) case Type::Class:
#define NON_CANONICAL_TYPE(Class, Base) case Type::Class:
#define DEPENDENT_TYPE(Class, Base) case Type::Class:
-#include "clang/AST/TypeNodes.def"
+#include "clang/AST/TypeNodes.inc"
llvm_unreachable("Non-canonical and dependent types shouldn't get here");
// GCC treats vector types as fundamental types.
@@ -3412,7 +3514,7 @@ llvm::Constant *ItaniumRTTIBuilder::BuildTypeInfo(
CharUnits Align =
CGM.getContext().toCharUnitsFromBits(CGM.getTarget().getPointerAlign(0));
- GV->setAlignment(Align.getQuantity());
+ GV->setAlignment(Align.getAsAlign());
// The Itanium ABI specifies that type_info objects must be globally
// unique, with one exception: if the type is an incomplete class
@@ -3497,8 +3599,8 @@ static unsigned ComputeVMIClassTypeInfoFlags(const CXXBaseSpecifier *Base,
unsigned Flags = 0;
- const CXXRecordDecl *BaseDecl =
- cast<CXXRecordDecl>(Base->getType()->getAs<RecordType>()->getDecl());
+ auto *BaseDecl =
+ cast<CXXRecordDecl>(Base->getType()->castAs<RecordType>()->getDecl());
if (Base->isVirtual()) {
// Mark the virtual base as seen.
@@ -3596,8 +3698,8 @@ void ItaniumRTTIBuilder::BuildVMIClassTypeInfo(const CXXRecordDecl *RD) {
// The __base_type member points to the RTTI for the base type.
Fields.push_back(ItaniumRTTIBuilder(CXXABI).BuildTypeInfo(Base.getType()));
- const CXXRecordDecl *BaseDecl =
- cast<CXXRecordDecl>(Base.getType()->getAs<RecordType>()->getDecl());
+ auto *BaseDecl =
+ cast<CXXRecordDecl>(Base.getType()->castAs<RecordType>()->getDecl());
int64_t OffsetFlags = 0;
diff --git a/lib/CodeGen/MicrosoftCXXABI.cpp b/lib/CodeGen/MicrosoftCXXABI.cpp
index fa34414de5da..2d8b538bc2ee 100644
--- a/lib/CodeGen/MicrosoftCXXABI.cpp
+++ b/lib/CodeGen/MicrosoftCXXABI.cpp
@@ -386,7 +386,9 @@ public:
ArrayRef<llvm::Function *> CXXThreadLocalInits,
ArrayRef<const VarDecl *> CXXThreadLocalInitVars) override;
- bool usesThreadWrapperFunction() const override { return false; }
+ bool usesThreadWrapperFunction(const VarDecl *VD) const override {
+ return false;
+ }
LValue EmitThreadLocalVarDeclLValue(CodeGenFunction &CGF, const VarDecl *VD,
QualType LValType) override;
@@ -1208,7 +1210,7 @@ static bool hasDefaultCXXMethodCC(ASTContext &Context,
CallingConv ExpectedCallingConv = Context.getDefaultCallingConvention(
/*IsVariadic=*/false, /*IsCXXMethod=*/true);
CallingConv ActualCallingConv =
- MD->getType()->getAs<FunctionProtoType>()->getCallConv();
+ MD->getType()->castAs<FunctionProtoType>()->getCallConv();
return ExpectedCallingConv == ActualCallingConv;
}
@@ -1921,10 +1923,11 @@ llvm::Value *MicrosoftCXXABI::EmitVirtualDestructorCall(
DtorType == Dtor_Deleting);
QualType ThisTy;
- if (CE)
- ThisTy = CE->getImplicitObjectArgument()->getType()->getPointeeType();
- else
+ if (CE) {
+ ThisTy = CE->getObjectType();
+ } else {
ThisTy = D->getDestroyedType();
+ }
This = adjustThisArgumentForVirtualFunctionCall(CGF, GD, This, true);
RValue RV = CGF.EmitCXXDestructorCall(GD, Callee, This.getPointer(), ThisTy,
@@ -2352,7 +2355,7 @@ static ConstantAddress getInitThreadEpochPtr(CodeGenModule &CGM) {
/*isConstant=*/false, llvm::GlobalVariable::ExternalLinkage,
/*Initializer=*/nullptr, VarName,
/*InsertBefore=*/nullptr, llvm::GlobalVariable::GeneralDynamicTLSModel);
- GV->setAlignment(Align.getQuantity());
+ GV->setAlignment(Align.getAsAlign());
return ConstantAddress(GV, Align);
}
@@ -2495,7 +2498,7 @@ void MicrosoftCXXABI::EmitGuardedInit(CodeGenFunction &CGF, const VarDecl &D,
GV->getLinkage(), Zero, GuardName.str());
GuardVar->setVisibility(GV->getVisibility());
GuardVar->setDLLStorageClass(GV->getDLLStorageClass());
- GuardVar->setAlignment(GuardAlign.getQuantity());
+ GuardVar->setAlignment(GuardAlign.getAsAlign());
if (GuardVar->isWeakForLinker())
GuardVar->setComdat(
CGM.getModule().getOrInsertComdat(GuardVar->getName()));
diff --git a/lib/CodeGen/ModuleBuilder.cpp b/lib/CodeGen/ModuleBuilder.cpp
index 3b4e06045a37..4154f6ebe736 100644
--- a/lib/CodeGen/ModuleBuilder.cpp
+++ b/lib/CodeGen/ModuleBuilder.cpp
@@ -65,6 +65,13 @@ namespace {
private:
SmallVector<FunctionDecl *, 8> DeferredInlineMemberFuncDefs;
+ static llvm::StringRef ExpandModuleName(llvm::StringRef ModuleName,
+ const CodeGenOptions &CGO) {
+ if (ModuleName == "-" && !CGO.MainFileName.empty())
+ return CGO.MainFileName;
+ return ModuleName;
+ }
+
public:
CodeGeneratorImpl(DiagnosticsEngine &diags, llvm::StringRef ModuleName,
const HeaderSearchOptions &HSO,
@@ -73,7 +80,8 @@ namespace {
CoverageSourceInfo *CoverageInfo = nullptr)
: Diags(diags), Ctx(nullptr), HeaderSearchOpts(HSO),
PreprocessorOpts(PPO), CodeGenOpts(CGO), HandlingTopLevelDecls(0),
- CoverageInfo(CoverageInfo), M(new llvm::Module(ModuleName, C)) {
+ CoverageInfo(CoverageInfo),
+ M(new llvm::Module(ExpandModuleName(ModuleName, CGO), C)) {
C.setDiscardValueNames(CGO.DiscardValueNames);
}
@@ -121,7 +129,7 @@ namespace {
llvm::Module *StartModule(llvm::StringRef ModuleName,
llvm::LLVMContext &C) {
assert(!M && "Replacing existing Module?");
- M.reset(new llvm::Module(ModuleName, C));
+ M.reset(new llvm::Module(ExpandModuleName(ModuleName, CodeGenOpts), C));
Initialize(*Ctx);
return M.get();
}
@@ -232,6 +240,9 @@ namespace {
if (auto *DRD = dyn_cast<OMPDeclareReductionDecl>(Member)) {
if (Ctx->DeclMustBeEmitted(DRD))
Builder->EmitGlobal(DRD);
+ } else if (auto *DMD = dyn_cast<OMPDeclareMapperDecl>(Member)) {
+ if (Ctx->DeclMustBeEmitted(DMD))
+ Builder->EmitGlobal(DMD);
}
}
}
diff --git a/lib/CodeGen/ObjectFilePCHContainerOperations.cpp b/lib/CodeGen/ObjectFilePCHContainerOperations.cpp
index 15a2ab99fdac..284e8022a3c4 100644
--- a/lib/CodeGen/ObjectFilePCHContainerOperations.cpp
+++ b/lib/CodeGen/ObjectFilePCHContainerOperations.cpp
@@ -279,7 +279,7 @@ public:
*M, Ty, /*constant*/ true, llvm::GlobalVariable::InternalLinkage, Data,
"__clang_ast");
// The on-disk hashtable needs to be aligned.
- ASTSym->setAlignment(8);
+ ASTSym->setAlignment(llvm::Align(8));
// Mach-O also needs a segment name.
if (Triple.isOSBinFormatMachO())
@@ -297,7 +297,7 @@ public:
Diags, HeaderSearchOpts, CodeGenOpts, TargetOpts, LangOpts,
Ctx.getTargetInfo().getDataLayout(), M.get(),
BackendAction::Backend_EmitLL,
- llvm::make_unique<llvm::raw_svector_ostream>(Buffer));
+ std::make_unique<llvm::raw_svector_ostream>(Buffer));
llvm::dbgs() << Buffer;
});
@@ -321,7 +321,7 @@ ObjectFilePCHContainerWriter::CreatePCHContainerGenerator(
const std::string &OutputFileName,
std::unique_ptr<llvm::raw_pwrite_stream> OS,
std::shared_ptr<PCHBuffer> Buffer) const {
- return llvm::make_unique<PCHContainerGenerator>(
+ return std::make_unique<PCHContainerGenerator>(
CI, MainFileName, OutputFileName, std::move(OS), Buffer);
}
@@ -335,7 +335,11 @@ ObjectFilePCHContainerReader::ExtractPCH(llvm::MemoryBufferRef Buffer) const {
// Find the clang AST section in the container.
for (auto &Section : OF->sections()) {
StringRef Name;
- Section.getName(Name);
+ if (Expected<StringRef> NameOrErr = Section.getName())
+ Name = *NameOrErr;
+ else
+ consumeError(NameOrErr.takeError());
+
if ((!IsCOFF && Name == "__clangast") || (IsCOFF && Name == "clangast")) {
if (Expected<StringRef> E = Section.getContents())
return *E;
diff --git a/lib/CodeGen/TargetInfo.cpp b/lib/CodeGen/TargetInfo.cpp
index 5da988fb8a3c..c2c7b8bf653b 100644
--- a/lib/CodeGen/TargetInfo.cpp
+++ b/lib/CodeGen/TargetInfo.cpp
@@ -833,10 +833,13 @@ ABIArgInfo WebAssemblyABIInfo::classifyReturnType(QualType RetTy) const {
Address WebAssemblyABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
QualType Ty) const {
- return emitVoidPtrVAArg(CGF, VAListAddr, Ty, /*IsIndirect=*/ false,
+ bool IsIndirect = isAggregateTypeForABI(Ty) &&
+ !isEmptyRecord(getContext(), Ty, true) &&
+ !isSingleElementStruct(Ty, getContext());
+ return emitVoidPtrVAArg(CGF, VAListAddr, Ty, IsIndirect,
getContext().getTypeInfoInChars(Ty),
CharUnits::fromQuantity(4),
- /*AllowHigherAlign=*/ true);
+ /*AllowHigherAlign=*/true);
}
//===----------------------------------------------------------------------===//
@@ -2177,6 +2180,17 @@ class X86_64ABIInfo : public SwiftABIInfo {
return true;
}
+ // GCC classifies vectors of __int128 as memory.
+ bool passInt128VectorsInMem() const {
+ // Clang <= 9.0 did not do this.
+ if (getContext().getLangOpts().getClangABICompat() <=
+ LangOptions::ClangABI::Ver9)
+ return false;
+
+ const llvm::Triple &T = getTarget().getTriple();
+ return T.isOSLinux() || T.isOSNetBSD();
+ }
+
X86AVXABILevel AVXLevel;
// Some ABIs (e.g. X32 ABI and Native Client OS) use 32 bit pointers on
// 64-bit hardware.
@@ -2657,6 +2671,14 @@ void X86_64ABIInfo::classify(QualType Ty, uint64_t OffsetBase,
Hi = Lo;
} else if (Size == 128 ||
(isNamedArg && Size <= getNativeVectorSizeForAVXABI(AVXLevel))) {
+ QualType ElementType = VT->getElementType();
+
+ // gcc passes 256 and 512 bit <X x __int128> vectors in memory. :(
+ if (passInt128VectorsInMem() && Size != 128 &&
+ (ElementType->isSpecificBuiltinType(BuiltinType::Int128) ||
+ ElementType->isSpecificBuiltinType(BuiltinType::UInt128)))
+ return;
+
// Arguments of 256-bits are split into four eightbyte chunks. The
// least significant one belongs to class SSE and all the others to class
// SSEUP. The original Lo and Hi design considers that types can't be
@@ -2787,8 +2809,8 @@ void X86_64ABIInfo::classify(QualType Ty, uint64_t OffsetBase,
for (const auto &I : CXXRD->bases()) {
assert(!I.isVirtual() && !I.getType()->isDependentType() &&
"Unexpected base class!");
- const CXXRecordDecl *Base =
- cast<CXXRecordDecl>(I.getType()->getAs<RecordType>()->getDecl());
+ const auto *Base =
+ cast<CXXRecordDecl>(I.getType()->castAs<RecordType>()->getDecl());
// Classify this field.
//
@@ -2899,6 +2921,11 @@ bool X86_64ABIInfo::IsIllegalVectorType(QualType Ty) const {
unsigned LargestVector = getNativeVectorSizeForAVXABI(AVXLevel);
if (Size <= 64 || Size > LargestVector)
return true;
+ QualType EltTy = VecTy->getElementType();
+ if (passInt128VectorsInMem() &&
+ (EltTy->isSpecificBuiltinType(BuiltinType::Int128) ||
+ EltTy->isSpecificBuiltinType(BuiltinType::UInt128)))
+ return true;
}
return false;
@@ -2973,14 +3000,28 @@ llvm::Type *X86_64ABIInfo::GetByteVectorType(QualType Ty) const {
Ty = QualType(InnerTy, 0);
llvm::Type *IRType = CGT.ConvertType(Ty);
- if (isa<llvm::VectorType>(IRType) ||
- IRType->getTypeID() == llvm::Type::FP128TyID)
+ if (isa<llvm::VectorType>(IRType)) {
+ // Don't pass vXi128 vectors in their native type, the backend can't
+ // legalize them.
+ if (passInt128VectorsInMem() &&
+ IRType->getVectorElementType()->isIntegerTy(128)) {
+ // Use a vXi64 vector.
+ uint64_t Size = getContext().getTypeSize(Ty);
+ return llvm::VectorType::get(llvm::Type::getInt64Ty(getVMContext()),
+ Size / 64);
+ }
+
+ return IRType;
+ }
+
+ if (IRType->getTypeID() == llvm::Type::FP128TyID)
return IRType;
// We couldn't find the preferred IR vector type for 'Ty'.
uint64_t Size = getContext().getTypeSize(Ty);
assert((Size == 128 || Size == 256 || Size == 512) && "Invalid type found!");
+
// Return a LLVM IR vector type based on the size of 'Ty'.
return llvm::VectorType::get(llvm::Type::getDoubleTy(getVMContext()),
Size / 64);
@@ -3030,8 +3071,8 @@ static bool BitsContainNoUserData(QualType Ty, unsigned StartBit,
for (const auto &I : CXXRD->bases()) {
assert(!I.isVirtual() && !I.getType()->isDependentType() &&
"Unexpected base class!");
- const CXXRecordDecl *Base =
- cast<CXXRecordDecl>(I.getType()->getAs<RecordType>()->getDecl());
+ const auto *Base =
+ cast<CXXRecordDecl>(I.getType()->castAs<RecordType>()->getDecl());
// If the base is after the span we care about, ignore it.
unsigned BaseOffset = Context.toBits(Layout.getBaseClassOffset(Base));
@@ -7909,8 +7950,12 @@ void AMDGPUTargetCodeGenInfo::setTargetAttributes(
const auto *ReqdWGS = M.getLangOpts().OpenCL ?
FD->getAttr<ReqdWorkGroupSizeAttr>() : nullptr;
- if (((M.getLangOpts().OpenCL && FD->hasAttr<OpenCLKernelAttr>()) ||
- (M.getLangOpts().HIP && FD->hasAttr<CUDAGlobalAttr>())) &&
+
+ const bool IsOpenCLKernel = M.getLangOpts().OpenCL &&
+ FD->hasAttr<OpenCLKernelAttr>();
+ const bool IsHIPKernel = M.getLangOpts().HIP &&
+ FD->hasAttr<CUDAGlobalAttr>();
+ if ((IsOpenCLKernel || IsHIPKernel) &&
(M.getTriple().getOS() == llvm::Triple::AMDHSA))
F->addFnAttr("amdgpu-implicitarg-num-bytes", "56");
@@ -7936,6 +7981,9 @@ void AMDGPUTargetCodeGenInfo::setTargetAttributes(
F->addFnAttr("amdgpu-flat-work-group-size", AttrVal);
} else
assert(Max == 0 && "Max must be zero");
+ } else if (IsOpenCLKernel || IsHIPKernel) {
+ // By default, restrict the maximum size to 256.
+ F->addFnAttr("amdgpu-flat-work-group-size", "1,256");
}
if (const auto *Attr = FD->getAttr<AMDGPUWavesPerEUAttr>()) {
@@ -9188,25 +9236,45 @@ static bool getTypeString(SmallStringEnc &Enc, const Decl *D,
namespace {
class RISCVABIInfo : public DefaultABIInfo {
private:
- unsigned XLen; // Size of the integer ('x') registers in bits.
+ // Size of the integer ('x') registers in bits.
+ unsigned XLen;
+ // Size of the floating point ('f') registers in bits. Note that the target
+ // ISA might have a wider FLen than the selected ABI (e.g. an RV32IF target
+ // with soft float ABI has FLen==0).
+ unsigned FLen;
static const int NumArgGPRs = 8;
+ static const int NumArgFPRs = 8;
+ bool detectFPCCEligibleStructHelper(QualType Ty, CharUnits CurOff,
+ llvm::Type *&Field1Ty,
+ CharUnits &Field1Off,
+ llvm::Type *&Field2Ty,
+ CharUnits &Field2Off) const;
public:
- RISCVABIInfo(CodeGen::CodeGenTypes &CGT, unsigned XLen)
- : DefaultABIInfo(CGT), XLen(XLen) {}
+ RISCVABIInfo(CodeGen::CodeGenTypes &CGT, unsigned XLen, unsigned FLen)
+ : DefaultABIInfo(CGT), XLen(XLen), FLen(FLen) {}
// DefaultABIInfo's classifyReturnType and classifyArgumentType are
// non-virtual, but computeInfo is virtual, so we overload it.
void computeInfo(CGFunctionInfo &FI) const override;
- ABIArgInfo classifyArgumentType(QualType Ty, bool IsFixed,
- int &ArgGPRsLeft) const;
+ ABIArgInfo classifyArgumentType(QualType Ty, bool IsFixed, int &ArgGPRsLeft,
+ int &ArgFPRsLeft) const;
ABIArgInfo classifyReturnType(QualType RetTy) const;
Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
QualType Ty) const override;
ABIArgInfo extendType(QualType Ty) const;
+
+ bool detectFPCCEligibleStruct(QualType Ty, llvm::Type *&Field1Ty,
+ CharUnits &Field1Off, llvm::Type *&Field2Ty,
+ CharUnits &Field2Off, int &NeededArgGPRs,
+ int &NeededArgFPRs) const;
+ ABIArgInfo coerceAndExpandFPCCEligibleStruct(llvm::Type *Field1Ty,
+ CharUnits Field1Off,
+ llvm::Type *Field2Ty,
+ CharUnits Field2Off) const;
};
} // end anonymous namespace
@@ -9228,18 +9296,215 @@ void RISCVABIInfo::computeInfo(CGFunctionInfo &FI) const {
// different for variadic arguments, we must also track whether we are
// examining a vararg or not.
int ArgGPRsLeft = IsRetIndirect ? NumArgGPRs - 1 : NumArgGPRs;
+ int ArgFPRsLeft = FLen ? NumArgFPRs : 0;
int NumFixedArgs = FI.getNumRequiredArgs();
int ArgNum = 0;
for (auto &ArgInfo : FI.arguments()) {
bool IsFixed = ArgNum < NumFixedArgs;
- ArgInfo.info = classifyArgumentType(ArgInfo.type, IsFixed, ArgGPRsLeft);
+ ArgInfo.info =
+ classifyArgumentType(ArgInfo.type, IsFixed, ArgGPRsLeft, ArgFPRsLeft);
ArgNum++;
}
}
+// Returns true if the struct is a potential candidate for the floating point
+// calling convention. If this function returns true, the caller is
+// responsible for checking that if there is only a single field then that
+// field is a float.
+bool RISCVABIInfo::detectFPCCEligibleStructHelper(QualType Ty, CharUnits CurOff,
+ llvm::Type *&Field1Ty,
+ CharUnits &Field1Off,
+ llvm::Type *&Field2Ty,
+ CharUnits &Field2Off) const {
+ bool IsInt = Ty->isIntegralOrEnumerationType();
+ bool IsFloat = Ty->isRealFloatingType();
+
+ if (IsInt || IsFloat) {
+ uint64_t Size = getContext().getTypeSize(Ty);
+ if (IsInt && Size > XLen)
+ return false;
+ // Can't be eligible if larger than the FP registers. Half precision isn't
+ // currently supported on RISC-V and the ABI hasn't been confirmed, so
+ // default to the integer ABI in that case.
+ if (IsFloat && (Size > FLen || Size < 32))
+ return false;
+ // Can't be eligible if an integer type was already found (int+int pairs
+ // are not eligible).
+ if (IsInt && Field1Ty && Field1Ty->isIntegerTy())
+ return false;
+ if (!Field1Ty) {
+ Field1Ty = CGT.ConvertType(Ty);
+ Field1Off = CurOff;
+ return true;
+ }
+ if (!Field2Ty) {
+ Field2Ty = CGT.ConvertType(Ty);
+ Field2Off = CurOff;
+ return true;
+ }
+ return false;
+ }
+
+ if (auto CTy = Ty->getAs<ComplexType>()) {
+ if (Field1Ty)
+ return false;
+ QualType EltTy = CTy->getElementType();
+ if (getContext().getTypeSize(EltTy) > FLen)
+ return false;
+ Field1Ty = CGT.ConvertType(EltTy);
+ Field1Off = CurOff;
+ assert(CurOff.isZero() && "Unexpected offset for first field");
+ Field2Ty = Field1Ty;
+ Field2Off = Field1Off + getContext().getTypeSizeInChars(EltTy);
+ return true;
+ }
+
+ if (const ConstantArrayType *ATy = getContext().getAsConstantArrayType(Ty)) {
+ uint64_t ArraySize = ATy->getSize().getZExtValue();
+ QualType EltTy = ATy->getElementType();
+ CharUnits EltSize = getContext().getTypeSizeInChars(EltTy);
+ for (uint64_t i = 0; i < ArraySize; ++i) {
+ bool Ret = detectFPCCEligibleStructHelper(EltTy, CurOff, Field1Ty,
+ Field1Off, Field2Ty, Field2Off);
+ if (!Ret)
+ return false;
+ CurOff += EltSize;
+ }
+ return true;
+ }
+
+ if (const auto *RTy = Ty->getAs<RecordType>()) {
+ // Structures with either a non-trivial destructor or a non-trivial
+ // copy constructor are not eligible for the FP calling convention.
+ if (getRecordArgABI(Ty, CGT.getCXXABI()))
+ return false;
+ if (isEmptyRecord(getContext(), Ty, true))
+ return true;
+ const RecordDecl *RD = RTy->getDecl();
+ // Unions aren't eligible unless they're empty (which is caught above).
+ if (RD->isUnion())
+ return false;
+ int ZeroWidthBitFieldCount = 0;
+ for (const FieldDecl *FD : RD->fields()) {
+ const ASTRecordLayout &Layout = getContext().getASTRecordLayout(RD);
+ uint64_t FieldOffInBits = Layout.getFieldOffset(FD->getFieldIndex());
+ QualType QTy = FD->getType();
+ if (FD->isBitField()) {
+ unsigned BitWidth = FD->getBitWidthValue(getContext());
+ // Allow a bitfield with a type greater than XLen as long as the
+ // bitwidth is XLen or less.
+ if (getContext().getTypeSize(QTy) > XLen && BitWidth <= XLen)
+ QTy = getContext().getIntTypeForBitwidth(XLen, false);
+ if (BitWidth == 0) {
+ ZeroWidthBitFieldCount++;
+ continue;
+ }
+ }
+
+ bool Ret = detectFPCCEligibleStructHelper(
+ QTy, CurOff + getContext().toCharUnitsFromBits(FieldOffInBits),
+ Field1Ty, Field1Off, Field2Ty, Field2Off);
+ if (!Ret)
+ return false;
+
+ // As a quirk of the ABI, zero-width bitfields aren't ignored for fp+fp
+ // or int+fp structs, but are ignored for a struct with an fp field and
+ // any number of zero-width bitfields.
+ if (Field2Ty && ZeroWidthBitFieldCount > 0)
+ return false;
+ }
+ return Field1Ty != nullptr;
+ }
+
+ return false;
+}
+
+// Determine if a struct is eligible for passing according to the floating
+// point calling convention (i.e., when flattened it contains a single fp
+// value, fp+fp, or int+fp of appropriate size). If so, NeededArgFPRs and
+// NeededArgGPRs are incremented appropriately.
+bool RISCVABIInfo::detectFPCCEligibleStruct(QualType Ty, llvm::Type *&Field1Ty,
+ CharUnits &Field1Off,
+ llvm::Type *&Field2Ty,
+ CharUnits &Field2Off,
+ int &NeededArgGPRs,
+ int &NeededArgFPRs) const {
+ Field1Ty = nullptr;
+ Field2Ty = nullptr;
+ NeededArgGPRs = 0;
+ NeededArgFPRs = 0;
+ bool IsCandidate = detectFPCCEligibleStructHelper(
+ Ty, CharUnits::Zero(), Field1Ty, Field1Off, Field2Ty, Field2Off);
+ // Not really a candidate if we have a single int but no float.
+ if (Field1Ty && !Field2Ty && !Field1Ty->isFloatingPointTy())
+ return false;
+ if (!IsCandidate)
+ return false;
+ if (Field1Ty && Field1Ty->isFloatingPointTy())
+ NeededArgFPRs++;
+ else if (Field1Ty)
+ NeededArgGPRs++;
+ if (Field2Ty && Field2Ty->isFloatingPointTy())
+ NeededArgFPRs++;
+ else if (Field2Ty)
+ NeededArgGPRs++;
+ return IsCandidate;
+}
+
+// Call getCoerceAndExpand for the two-element flattened struct described by
+// Field1Ty, Field1Off, Field2Ty, Field2Off. This method will create an
+// appropriate coerceToType and unpaddedCoerceToType.
+ABIArgInfo RISCVABIInfo::coerceAndExpandFPCCEligibleStruct(
+ llvm::Type *Field1Ty, CharUnits Field1Off, llvm::Type *Field2Ty,
+ CharUnits Field2Off) const {
+ SmallVector<llvm::Type *, 3> CoerceElts;
+ SmallVector<llvm::Type *, 2> UnpaddedCoerceElts;
+ if (!Field1Off.isZero())
+ CoerceElts.push_back(llvm::ArrayType::get(
+ llvm::Type::getInt8Ty(getVMContext()), Field1Off.getQuantity()));
+
+ CoerceElts.push_back(Field1Ty);
+ UnpaddedCoerceElts.push_back(Field1Ty);
+
+ if (!Field2Ty) {
+ return ABIArgInfo::getCoerceAndExpand(
+ llvm::StructType::get(getVMContext(), CoerceElts, !Field1Off.isZero()),
+ UnpaddedCoerceElts[0]);
+ }
+
+ CharUnits Field2Align =
+ CharUnits::fromQuantity(getDataLayout().getABITypeAlignment(Field2Ty));
+ CharUnits Field1Size =
+ CharUnits::fromQuantity(getDataLayout().getTypeStoreSize(Field1Ty));
+ CharUnits Field2OffNoPadNoPack = Field1Size.alignTo(Field2Align);
+
+ CharUnits Padding = CharUnits::Zero();
+ if (Field2Off > Field2OffNoPadNoPack)
+ Padding = Field2Off - Field2OffNoPadNoPack;
+ else if (Field2Off != Field2Align && Field2Off > Field1Size)
+ Padding = Field2Off - Field1Size;
+
+ bool IsPacked = !Field2Off.isMultipleOf(Field2Align);
+
+ if (!Padding.isZero())
+ CoerceElts.push_back(llvm::ArrayType::get(
+ llvm::Type::getInt8Ty(getVMContext()), Padding.getQuantity()));
+
+ CoerceElts.push_back(Field2Ty);
+ UnpaddedCoerceElts.push_back(Field2Ty);
+
+ auto CoerceToType =
+ llvm::StructType::get(getVMContext(), CoerceElts, IsPacked);
+ auto UnpaddedCoerceToType =
+ llvm::StructType::get(getVMContext(), UnpaddedCoerceElts, IsPacked);
+
+ return ABIArgInfo::getCoerceAndExpand(CoerceToType, UnpaddedCoerceToType);
+}
+
ABIArgInfo RISCVABIInfo::classifyArgumentType(QualType Ty, bool IsFixed,
- int &ArgGPRsLeft) const {
+ int &ArgGPRsLeft,
+ int &ArgFPRsLeft) const {
assert(ArgGPRsLeft <= NumArgGPRs && "Arg GPR tracking underflow");
Ty = useFirstFieldIfTransparentUnion(Ty);
@@ -9257,6 +9522,42 @@ ABIArgInfo RISCVABIInfo::classifyArgumentType(QualType Ty, bool IsFixed,
return ABIArgInfo::getIgnore();
uint64_t Size = getContext().getTypeSize(Ty);
+
+ // Pass floating point values via FPRs if possible.
+ if (IsFixed && Ty->isFloatingType() && FLen >= Size && ArgFPRsLeft) {
+ ArgFPRsLeft--;
+ return ABIArgInfo::getDirect();
+ }
+
+ // Complex types for the hard float ABI must be passed direct rather than
+ // using CoerceAndExpand.
+ if (IsFixed && Ty->isComplexType() && FLen && ArgFPRsLeft >= 2) {
+ QualType EltTy = Ty->castAs<ComplexType>()->getElementType();
+ if (getContext().getTypeSize(EltTy) <= FLen) {
+ ArgFPRsLeft -= 2;
+ return ABIArgInfo::getDirect();
+ }
+ }
+
+ if (IsFixed && FLen && Ty->isStructureOrClassType()) {
+ llvm::Type *Field1Ty = nullptr;
+ llvm::Type *Field2Ty = nullptr;
+ CharUnits Field1Off = CharUnits::Zero();
+ CharUnits Field2Off = CharUnits::Zero();
+ int NeededArgGPRs;
+ int NeededArgFPRs;
+ bool IsCandidate =
+ detectFPCCEligibleStruct(Ty, Field1Ty, Field1Off, Field2Ty, Field2Off,
+ NeededArgGPRs, NeededArgFPRs);
+ if (IsCandidate && NeededArgGPRs <= ArgGPRsLeft &&
+ NeededArgFPRs <= ArgFPRsLeft) {
+ ArgGPRsLeft -= NeededArgGPRs;
+ ArgFPRsLeft -= NeededArgFPRs;
+ return coerceAndExpandFPCCEligibleStruct(Field1Ty, Field1Off, Field2Ty,
+ Field2Off);
+ }
+ }
+
uint64_t NeededAlign = getContext().getTypeAlign(Ty);
bool MustUseStack = false;
// Determine the number of GPRs needed to pass the current argument
@@ -9315,10 +9616,12 @@ ABIArgInfo RISCVABIInfo::classifyReturnType(QualType RetTy) const {
return ABIArgInfo::getIgnore();
int ArgGPRsLeft = 2;
+ int ArgFPRsLeft = FLen ? 2 : 0;
// The rules for return and argument types are the same, so defer to
// classifyArgumentType.
- return classifyArgumentType(RetTy, /*IsFixed=*/true, ArgGPRsLeft);
+ return classifyArgumentType(RetTy, /*IsFixed=*/true, ArgGPRsLeft,
+ ArgFPRsLeft);
}
Address RISCVABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
@@ -9353,8 +9656,9 @@ ABIArgInfo RISCVABIInfo::extendType(QualType Ty) const {
namespace {
class RISCVTargetCodeGenInfo : public TargetCodeGenInfo {
public:
- RISCVTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, unsigned XLen)
- : TargetCodeGenInfo(new RISCVABIInfo(CGT, XLen)) {}
+ RISCVTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, unsigned XLen,
+ unsigned FLen)
+ : TargetCodeGenInfo(new RISCVABIInfo(CGT, XLen, FLen)) {}
void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
CodeGen::CodeGenModule &CGM) const override {
@@ -9460,7 +9764,8 @@ const TargetCodeGenInfo &CodeGenModule::getTargetCodeGenInfo() {
case llvm::Triple::ppc:
return SetCGInfo(
- new PPC32TargetCodeGenInfo(Types, CodeGenOpts.FloatABI == "soft"));
+ new PPC32TargetCodeGenInfo(Types, CodeGenOpts.FloatABI == "soft" ||
+ getTarget().hasFeature("spe")));
case llvm::Triple::ppc64:
if (Triple.isOSBinFormatELF()) {
PPC64_SVR4_ABIInfo::ABIKind Kind = PPC64_SVR4_ABIInfo::ELFv1;
@@ -9493,9 +9798,16 @@ const TargetCodeGenInfo &CodeGenModule::getTargetCodeGenInfo() {
return SetCGInfo(new MSP430TargetCodeGenInfo(Types));
case llvm::Triple::riscv32:
- return SetCGInfo(new RISCVTargetCodeGenInfo(Types, 32));
- case llvm::Triple::riscv64:
- return SetCGInfo(new RISCVTargetCodeGenInfo(Types, 64));
+ case llvm::Triple::riscv64: {
+ StringRef ABIStr = getTarget().getABI();
+ unsigned XLen = getTarget().getPointerWidth(0);
+ unsigned ABIFLen = 0;
+ if (ABIStr.endswith("f"))
+ ABIFLen = 32;
+ else if (ABIStr.endswith("d"))
+ ABIFLen = 64;
+ return SetCGInfo(new RISCVTargetCodeGenInfo(Types, XLen, ABIFLen));
+ }
case llvm::Triple::systemz: {
bool HasVector = getTarget().getABI() == "vector";
@@ -9642,7 +9954,7 @@ llvm::Function *AMDGPUTargetCodeGenInfo::createEnqueuedBlockKernel(
Builder.SetInsertPoint(BB);
unsigned BlockAlign = CGF.CGM.getDataLayout().getPrefTypeAlignment(BlockTy);
auto *BlockPtr = Builder.CreateAlloca(BlockTy, nullptr);
- BlockPtr->setAlignment(BlockAlign);
+ BlockPtr->setAlignment(llvm::MaybeAlign(BlockAlign));
Builder.CreateAlignedStore(F->arg_begin(), BlockPtr, BlockAlign);
auto *Cast = Builder.CreatePointerCast(BlockPtr, InvokeFT->getParamType(0));
llvm::SmallVector<llvm::Value *, 2> Args;