aboutsummaryrefslogtreecommitdiffstats
path: root/lib/CodeGen
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2019-01-19 10:04:05 +0000
committerDimitry Andric <dim@FreeBSD.org>2019-01-19 10:04:05 +0000
commit676fbe8105eeb6ff4bb2ed261cb212fcfdbe7b63 (patch)
tree02a1ac369cb734d0abfa5000dd86e5b7797e6a74 /lib/CodeGen
parentc7e70c433efc6953dc3888b9fbf9f3512d7da2b0 (diff)
downloadsrc-676fbe8105eeb6ff4bb2ed261cb212fcfdbe7b63.tar.gz
src-676fbe8105eeb6ff4bb2ed261cb212fcfdbe7b63.zip
Vendor import of clang trunk r351319 (just before the release_80 branchvendor/clang/clang-trunk-r351319
Notes
Notes: svn path=/vendor/clang/dist/; revision=343173 svn path=/vendor/clang/clang-trunk-r351319/; revision=343174; tag=vendor/clang/clang-trunk-r351319
Diffstat (limited to 'lib/CodeGen')
-rw-r--r--lib/CodeGen/BackendUtil.cpp129
-rw-r--r--lib/CodeGen/CGAtomic.cpp28
-rw-r--r--lib/CodeGen/CGBlocks.cpp886
-rw-r--r--lib/CodeGen/CGBlocks.h10
-rw-r--r--lib/CodeGen/CGBuiltin.cpp1787
-rw-r--r--lib/CodeGen/CGCUDANV.cpp33
-rw-r--r--lib/CodeGen/CGCXX.cpp4
-rw-r--r--lib/CodeGen/CGCXXABI.cpp2
-rw-r--r--lib/CodeGen/CGCall.cpp128
-rw-r--r--lib/CodeGen/CGCall.h14
-rw-r--r--lib/CodeGen/CGClass.cpp33
-rw-r--r--lib/CodeGen/CGCleanup.cpp4
-rw-r--r--lib/CodeGen/CGCoroutine.cpp14
-rw-r--r--lib/CodeGen/CGDebugInfo.cpp505
-rw-r--r--lib/CodeGen/CGDebugInfo.h64
-rw-r--r--lib/CodeGen/CGDecl.cpp568
-rw-r--r--lib/CodeGen/CGDeclCXX.cpp103
-rw-r--r--lib/CodeGen/CGException.cpp45
-rw-r--r--lib/CodeGen/CGExpr.cpp159
-rw-r--r--lib/CodeGen/CGExprAgg.cpp9
-rw-r--r--lib/CodeGen/CGExprCXX.cpp36
-rw-r--r--lib/CodeGen/CGExprComplex.cpp8
-rw-r--r--lib/CodeGen/CGExprConstant.cpp46
-rw-r--r--lib/CodeGen/CGExprScalar.cpp455
-rw-r--r--lib/CodeGen/CGLoopInfo.cpp129
-rw-r--r--lib/CodeGen/CGLoopInfo.h33
-rw-r--r--lib/CodeGen/CGNonTrivialStruct.cpp31
-rw-r--r--lib/CodeGen/CGObjC.cpp511
-rw-r--r--lib/CodeGen/CGObjCGNU.cpp335
-rw-r--r--lib/CodeGen/CGObjCMac.cpp84
-rw-r--r--lib/CodeGen/CGObjCRuntime.cpp60
-rw-r--r--lib/CodeGen/CGObjCRuntime.h7
-rw-r--r--lib/CodeGen/CGOpenCLRuntime.cpp40
-rw-r--r--lib/CodeGen/CGOpenCLRuntime.h5
-rw-r--r--lib/CodeGen/CGOpenMPRuntime.cpp1079
-rw-r--r--lib/CodeGen/CGOpenMPRuntime.h117
-rw-r--r--lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp2371
-rw-r--r--lib/CodeGen/CGOpenMPRuntimeNVPTX.h99
-rw-r--r--lib/CodeGen/CGRecordLayoutBuilder.cpp2
-rw-r--r--lib/CodeGen/CGStmt.cpp54
-rw-r--r--lib/CodeGen/CGStmtOpenMP.cpp443
-rw-r--r--lib/CodeGen/CGVTT.cpp6
-rw-r--r--lib/CodeGen/CGVTables.cpp27
-rw-r--r--lib/CodeGen/CGValue.h5
-rw-r--r--lib/CodeGen/CMakeLists.txt1
-rw-r--r--lib/CodeGen/CodeGenABITypes.cpp1
-rw-r--r--lib/CodeGen/CodeGenAction.cpp17
-rw-r--r--lib/CodeGen/CodeGenFunction.cpp325
-rw-r--r--lib/CodeGen/CodeGenFunction.h179
-rw-r--r--lib/CodeGen/CodeGenModule.cpp648
-rw-r--r--lib/CodeGen/CodeGenModule.h50
-rw-r--r--lib/CodeGen/CodeGenPGO.cpp11
-rw-r--r--lib/CodeGen/CodeGenPGO.h1
-rw-r--r--lib/CodeGen/CodeGenTBAA.cpp2
-rw-r--r--lib/CodeGen/CodeGenTypes.cpp3
-rw-r--r--lib/CodeGen/CodeGenTypes.h1
-rw-r--r--lib/CodeGen/ConstantEmitter.h3
-rw-r--r--lib/CodeGen/CoverageMappingGen.cpp107
-rw-r--r--lib/CodeGen/CoverageMappingGen.h1
-rw-r--r--lib/CodeGen/ItaniumCXXABI.cpp103
-rw-r--r--lib/CodeGen/MacroPPCallbacks.cpp17
-rw-r--r--lib/CodeGen/MacroPPCallbacks.h5
-rw-r--r--lib/CodeGen/MicrosoftCXXABI.cpp22
-rw-r--r--lib/CodeGen/ModuleBuilder.cpp34
-rw-r--r--lib/CodeGen/ObjectFilePCHContainerOperations.cpp5
-rw-r--r--lib/CodeGen/SwiftCallingConv.cpp40
-rw-r--r--lib/CodeGen/TargetInfo.cpp331
-rw-r--r--lib/CodeGen/VarBypassDetector.cpp2
-rw-r--r--lib/CodeGen/VarBypassDetector.h1
69 files changed, 9104 insertions, 3314 deletions
diff --git a/lib/CodeGen/BackendUtil.cpp b/lib/CodeGen/BackendUtil.cpp
index 415bd9626220..b927acabac59 100644
--- a/lib/CodeGen/BackendUtil.cpp
+++ b/lib/CodeGen/BackendUtil.cpp
@@ -8,10 +8,10 @@
//===----------------------------------------------------------------------===//
#include "clang/CodeGen/BackendUtil.h"
+#include "clang/Basic/CodeGenOptions.h"
#include "clang/Basic/Diagnostic.h"
#include "clang/Basic/LangOptions.h"
#include "clang/Basic/TargetOptions.h"
-#include "clang/Frontend/CodeGenOptions.h"
#include "clang/Frontend/FrontendDiagnostic.h"
#include "clang/Frontend/Utils.h"
#include "clang/Lex/HeaderSearchOptions.h"
@@ -37,6 +37,7 @@
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/SubtargetFeature.h"
#include "llvm/Passes/PassBuilder.h"
+#include "llvm/Support/BuryPointer.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/PrettyStackTrace.h"
@@ -54,10 +55,13 @@
#include "llvm/Transforms/Instrumentation.h"
#include "llvm/Transforms/Instrumentation/BoundsChecking.h"
#include "llvm/Transforms/Instrumentation/GCOVProfiler.h"
+#include "llvm/Transforms/Instrumentation/MemorySanitizer.h"
+#include "llvm/Transforms/Instrumentation/ThreadSanitizer.h"
#include "llvm/Transforms/ObjCARC.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Scalar/GVN.h"
#include "llvm/Transforms/Utils.h"
+#include "llvm/Transforms/Utils/CanonicalizeAliases.h"
#include "llvm/Transforms/Utils/NameAnonGlobals.h"
#include "llvm/Transforms/Utils/SymbolRewriter.h"
#include <memory>
@@ -235,11 +239,12 @@ static void addAddressSanitizerPasses(const PassManagerBuilder &Builder,
const CodeGenOptions &CGOpts = BuilderWrapper.getCGOpts();
bool Recover = CGOpts.SanitizeRecover.has(SanitizerKind::Address);
bool UseAfterScope = CGOpts.SanitizeAddressUseAfterScope;
+ bool UseOdrIndicator = CGOpts.SanitizeAddressUseOdrIndicator;
bool UseGlobalsGC = asanUseGlobalsGC(T, CGOpts);
PM.add(createAddressSanitizerFunctionPass(/*CompileKernel*/ false, Recover,
UseAfterScope));
PM.add(createAddressSanitizerModulePass(/*CompileKernel*/ false, Recover,
- UseGlobalsGC));
+ UseGlobalsGC, UseOdrIndicator));
}
static void addKernelAddressSanitizerPasses(const PassManagerBuilder &Builder,
@@ -247,7 +252,8 @@ static void addKernelAddressSanitizerPasses(const PassManagerBuilder &Builder,
PM.add(createAddressSanitizerFunctionPass(
/*CompileKernel*/ true, /*Recover*/ true, /*UseAfterScope*/ false));
PM.add(createAddressSanitizerModulePass(
- /*CompileKernel*/ true, /*Recover*/ true));
+ /*CompileKernel*/ true, /*Recover*/ true, /*UseGlobalsGC*/ true,
+ /*UseOdrIndicator*/ false));
}
static void addHWAddressSanitizerPasses(const PassManagerBuilder &Builder,
@@ -265,14 +271,15 @@ static void addKernelHWAddressSanitizerPasses(const PassManagerBuilder &Builder,
/*CompileKernel*/ true, /*Recover*/ true));
}
-static void addMemorySanitizerPass(const PassManagerBuilder &Builder,
- legacy::PassManagerBase &PM) {
+static void addGeneralOptsForMemorySanitizer(const PassManagerBuilder &Builder,
+ legacy::PassManagerBase &PM,
+ bool CompileKernel) {
const PassManagerBuilderWrapper &BuilderWrapper =
static_cast<const PassManagerBuilderWrapper&>(Builder);
const CodeGenOptions &CGOpts = BuilderWrapper.getCGOpts();
int TrackOrigins = CGOpts.SanitizeMemoryTrackOrigins;
bool Recover = CGOpts.SanitizeRecover.has(SanitizerKind::Memory);
- PM.add(createMemorySanitizerPass(TrackOrigins, Recover));
+ PM.add(createMemorySanitizerLegacyPassPass(TrackOrigins, Recover, CompileKernel));
// MemorySanitizer inserts complex instrumentation that mostly follows
// the logic of the original code, but operates on "shadow" values.
@@ -287,9 +294,19 @@ static void addMemorySanitizerPass(const PassManagerBuilder &Builder,
}
}
+static void addMemorySanitizerPass(const PassManagerBuilder &Builder,
+ legacy::PassManagerBase &PM) {
+ addGeneralOptsForMemorySanitizer(Builder, PM, /*CompileKernel*/ false);
+}
+
+static void addKernelMemorySanitizerPass(const PassManagerBuilder &Builder,
+ legacy::PassManagerBase &PM) {
+ addGeneralOptsForMemorySanitizer(Builder, PM, /*CompileKernel*/ true);
+}
+
static void addThreadSanitizerPass(const PassManagerBuilder &Builder,
legacy::PassManagerBase &PM) {
- PM.add(createThreadSanitizerPass());
+ PM.add(createThreadSanitizerLegacyPassPass());
}
static void addDataFlowSanitizerPass(const PassManagerBuilder &Builder,
@@ -368,6 +385,7 @@ static CodeGenOpt::Level getCGOptLevel(const CodeGenOptions &CodeGenOpts) {
static Optional<llvm::CodeModel::Model>
getCodeModel(const CodeGenOptions &CodeGenOpts) {
unsigned CodeModel = llvm::StringSwitch<unsigned>(CodeGenOpts.CodeModel)
+ .Case("tiny", llvm::CodeModel::Tiny)
.Case("small", llvm::CodeModel::Small)
.Case("kernel", llvm::CodeModel::Kernel)
.Case("medium", llvm::CodeModel::Medium)
@@ -416,7 +434,7 @@ static void initTargetOptions(llvm::TargetOptions &Options,
switch (LangOpts.getDefaultFPContractMode()) {
case LangOptions::FPC_Off:
// Preserve any contraction performed by the front-end. (Strict performs
- // splitting of the muladd instrinsic in the backend.)
+ // splitting of the muladd intrinsic in the backend.)
Options.AllowFPOpFusion = llvm::FPOpFusion::Standard;
break;
case LangOptions::FPC_On:
@@ -456,7 +474,7 @@ static void initTargetOptions(llvm::TargetOptions &Options,
Options.EmitStackSizeSection = CodeGenOpts.StackSizeSection;
Options.EmitAddrsig = CodeGenOpts.Addrsig;
- if (CodeGenOpts.EnableSplitDwarf)
+ if (CodeGenOpts.getSplitDwarfMode() != CodeGenOptions::NoFission)
Options.MCOptions.SplitDwarfFile = CodeGenOpts.SplitDwarfFile;
Options.MCOptions.MCRelaxAll = CodeGenOpts.RelaxAll;
Options.MCOptions.MCSaveTempLabels = CodeGenOpts.SaveTempLabels;
@@ -491,6 +509,8 @@ static Optional<GCOVOptions> getGCOVOptions(const CodeGenOptions &CodeGenOpts) {
Options.UseCfgChecksum = CodeGenOpts.CoverageExtraChecksum;
Options.NoRedZone = CodeGenOpts.DisableRedZone;
Options.FunctionNamesInData = !CodeGenOpts.CoverageNoFunctionNamesInData;
+ Options.Filter = CodeGenOpts.ProfileFilterFiles;
+ Options.Exclude = CodeGenOpts.ProfileExcludeFiles;
Options.ExitBlockBeforeBody = CodeGenOpts.CoverageExitBlockBeforeBody;
return Options;
}
@@ -613,6 +633,13 @@ void EmitAssemblyHelper::CreatePasses(legacy::PassManager &MPM,
addMemorySanitizerPass);
}
+ if (LangOpts.Sanitize.has(SanitizerKind::KernelMemory)) {
+ PMBuilder.addExtension(PassManagerBuilder::EP_OptimizerLast,
+ addKernelMemorySanitizerPass);
+ PMBuilder.addExtension(PassManagerBuilder::EP_EnabledOnOptLevel0,
+ addKernelMemorySanitizerPass);
+ }
+
if (LangOpts.Sanitize.has(SanitizerKind::Thread)) {
PMBuilder.addExtension(PassManagerBuilder::EP_OptimizerLast,
addThreadSanitizerPass);
@@ -653,6 +680,11 @@ void EmitAssemblyHelper::CreatePasses(legacy::PassManager &MPM,
InstrProfOptions Options;
Options.NoRedZone = CodeGenOpts.DisableRedZone;
Options.InstrProfileOutput = CodeGenOpts.InstrProfileOutput;
+
+ // TODO: Surface the option to emit atomic profile counter increments at
+ // the driver level.
+ Options.Atomic = LangOpts.Sanitize.has(SanitizerKind::Thread);
+
MPM.add(createInstrProfilingLegacyPass(Options));
}
if (CodeGenOpts.hasProfileIRInstr()) {
@@ -777,12 +809,14 @@ void EmitAssemblyHelper::EmitAssembly(BackendAction Action,
break;
case Backend_EmitBC:
- if (CodeGenOpts.PrepareForThinLTO) {
+ if (CodeGenOpts.PrepareForThinLTO && !CodeGenOpts.DisableLLVMPasses) {
if (!CodeGenOpts.ThinLinkBitcodeFile.empty()) {
ThinLinkOS = openOutputFile(CodeGenOpts.ThinLinkBitcodeFile);
if (!ThinLinkOS)
return;
}
+ TheModule->addModuleFlag(Module::Error, "EnableSplitLTOUnit",
+ CodeGenOpts.EnableSplitLTOUnit);
PerModulePasses.add(createWriteThinLTOBitcodePass(
*OS, ThinLinkOS ? &ThinLinkOS->os() : nullptr));
} else {
@@ -790,14 +824,18 @@ void EmitAssemblyHelper::EmitAssembly(BackendAction Action,
// targets
bool EmitLTOSummary =
(CodeGenOpts.PrepareForLTO &&
+ !CodeGenOpts.DisableLLVMPasses &&
llvm::Triple(TheModule->getTargetTriple()).getVendor() !=
llvm::Triple::Apple);
- if (EmitLTOSummary && !TheModule->getModuleFlag("ThinLTO"))
- TheModule->addModuleFlag(Module::Error, "ThinLTO", uint32_t(0));
+ if (EmitLTOSummary) {
+ if (!TheModule->getModuleFlag("ThinLTO"))
+ TheModule->addModuleFlag(Module::Error, "ThinLTO", uint32_t(0));
+ TheModule->addModuleFlag(Module::Error, "EnableSplitLTOUnit",
+ CodeGenOpts.EnableSplitLTOUnit);
+ }
- PerModulePasses.add(
- createBitcodeWriterPass(*OS, CodeGenOpts.EmitLLVMUseLists,
- EmitLTOSummary));
+ PerModulePasses.add(createBitcodeWriterPass(
+ *OS, CodeGenOpts.EmitLLVMUseLists, EmitLTOSummary));
}
break;
@@ -807,7 +845,8 @@ void EmitAssemblyHelper::EmitAssembly(BackendAction Action,
break;
default:
- if (!CodeGenOpts.SplitDwarfFile.empty()) {
+ if (!CodeGenOpts.SplitDwarfFile.empty() &&
+ (CodeGenOpts.getSplitDwarfMode() == CodeGenOptions::SplitFileFission)) {
DwoOS = openOutputFile(CodeGenOpts.SplitDwarfFile);
if (!DwoOS)
return;
@@ -905,18 +944,21 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager(
PGOOpt = PGOOptions(CodeGenOpts.InstrProfileOutput.empty()
? DefaultProfileGenName
: CodeGenOpts.InstrProfileOutput,
- "", "", true, CodeGenOpts.DebugInfoForProfiling);
+ "", "", "", true,
+ CodeGenOpts.DebugInfoForProfiling);
else if (CodeGenOpts.hasProfileIRUse())
// -fprofile-use.
- PGOOpt = PGOOptions("", CodeGenOpts.ProfileInstrumentUsePath, "", false,
+ PGOOpt = PGOOptions("", CodeGenOpts.ProfileInstrumentUsePath, "",
+ CodeGenOpts.ProfileRemappingFile, false,
CodeGenOpts.DebugInfoForProfiling);
else if (!CodeGenOpts.SampleProfileFile.empty())
// -fprofile-sample-use
- PGOOpt = PGOOptions("", "", CodeGenOpts.SampleProfileFile, false,
+ PGOOpt = PGOOptions("", "", CodeGenOpts.SampleProfileFile,
+ CodeGenOpts.ProfileRemappingFile, false,
CodeGenOpts.DebugInfoForProfiling);
else if (CodeGenOpts.DebugInfoForProfiling)
// -fdebug-info-for-profiling
- PGOOpt = PGOOptions("", "", "", false, true);
+ PGOOpt = PGOOptions("", "", "", "", false, true);
PassBuilder PB(TM.get(), PGOOpt);
@@ -961,9 +1003,11 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager(
if (LangOpts.Sanitize.has(SanitizerKind::LocalBounds))
MPM.addPass(createModuleToFunctionPassAdaptor(BoundsCheckingPass()));
- // Lastly, add a semantically necessary pass for LTO.
- if (IsLTO || IsThinLTO)
+ // Lastly, add semantically necessary passes for LTO.
+ if (IsLTO || IsThinLTO) {
+ MPM.addPass(CanonicalizeAliasesPass());
MPM.addPass(NameAnonGlobalPass());
+ }
} else {
// Map our optimization levels into one of the distinct levels used to
// configure the pipeline.
@@ -984,10 +1028,12 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager(
if (IsThinLTO) {
MPM = PB.buildThinLTOPreLinkDefaultPipeline(
Level, CodeGenOpts.DebugPassManager);
+ MPM.addPass(CanonicalizeAliasesPass());
MPM.addPass(NameAnonGlobalPass());
} else if (IsLTO) {
MPM = PB.buildLTOPreLinkDefaultPipeline(Level,
CodeGenOpts.DebugPassManager);
+ MPM.addPass(CanonicalizeAliasesPass());
MPM.addPass(NameAnonGlobalPass());
} else {
MPM = PB.buildPerModuleDefaultPipeline(Level,
@@ -1008,12 +1054,14 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager(
break;
case Backend_EmitBC:
- if (CodeGenOpts.PrepareForThinLTO) {
+ if (CodeGenOpts.PrepareForThinLTO && !CodeGenOpts.DisableLLVMPasses) {
if (!CodeGenOpts.ThinLinkBitcodeFile.empty()) {
ThinLinkOS = openOutputFile(CodeGenOpts.ThinLinkBitcodeFile);
if (!ThinLinkOS)
return;
}
+ TheModule->addModuleFlag(Module::Error, "EnableSplitLTOUnit",
+ CodeGenOpts.EnableSplitLTOUnit);
MPM.addPass(ThinLTOBitcodeWriterPass(*OS, ThinLinkOS ? &ThinLinkOS->os()
: nullptr));
} else {
@@ -1021,13 +1069,17 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager(
// targets
bool EmitLTOSummary =
(CodeGenOpts.PrepareForLTO &&
+ !CodeGenOpts.DisableLLVMPasses &&
llvm::Triple(TheModule->getTargetTriple()).getVendor() !=
llvm::Triple::Apple);
- if (EmitLTOSummary && !TheModule->getModuleFlag("ThinLTO"))
- TheModule->addModuleFlag(Module::Error, "ThinLTO", uint32_t(0));
-
- MPM.addPass(BitcodeWriterPass(*OS, CodeGenOpts.EmitLLVMUseLists,
- EmitLTOSummary));
+ if (EmitLTOSummary) {
+ if (!TheModule->getModuleFlag("ThinLTO"))
+ TheModule->addModuleFlag(Module::Error, "ThinLTO", uint32_t(0));
+ TheModule->addModuleFlag(Module::Error, "EnableSplitLTOUnit",
+ CodeGenOpts.EnableSplitLTOUnit);
+ }
+ MPM.addPass(
+ BitcodeWriterPass(*OS, CodeGenOpts.EmitLLVMUseLists, EmitLTOSummary));
}
break;
@@ -1104,6 +1156,7 @@ static void runThinLTOBackend(ModuleSummaryIndex *CombinedIndex, Module *M,
const LangOptions &LOpts,
std::unique_ptr<raw_pwrite_stream> OS,
std::string SampleProfile,
+ std::string ProfileRemapping,
BackendAction Action) {
StringMap<DenseMap<GlobalValue::GUID, GlobalValueSummary *>>
ModuleToDefinedGVSummaries;
@@ -1121,15 +1174,14 @@ static void runThinLTOBackend(ModuleSummaryIndex *CombinedIndex, Module *M,
continue;
auto GUID = GlobalList.first;
- assert(GlobalList.second.SummaryList.size() == 1 &&
- "Expected individual combined index to have one summary per GUID");
- auto &Summary = GlobalList.second.SummaryList[0];
- // Skip the summaries for the importing module. These are included to
- // e.g. record required linkage changes.
- if (Summary->modulePath() == M->getModuleIdentifier())
- continue;
- // Add an entry to provoke importing by thinBackend.
- ImportList[Summary->modulePath()].insert(GUID);
+ for (auto &Summary : GlobalList.second.SummaryList) {
+ // Skip the summaries for the importing module. These are included to
+ // e.g. record required linkage changes.
+ if (Summary->modulePath() == M->getModuleIdentifier())
+ continue;
+ // Add an entry to provoke importing by thinBackend.
+ ImportList[Summary->modulePath()].insert(GUID);
+ }
}
std::vector<std::unique_ptr<llvm::MemoryBuffer>> OwnedImports;
@@ -1176,6 +1228,7 @@ static void runThinLTOBackend(ModuleSummaryIndex *CombinedIndex, Module *M,
Conf.CGOptLevel = getCGOptLevel(CGOpts);
initTargetOptions(Conf.Options, CGOpts, TOpts, LOpts, HeaderOpts);
Conf.SampleProfile = std::move(SampleProfile);
+ Conf.ProfileRemapping = std::move(ProfileRemapping);
Conf.UseNewPM = CGOpts.ExperimentalNewPassManager;
Conf.DebugPassManager = CGOpts.DebugPassManager;
Conf.RemarksWithHotness = CGOpts.DiagnosticsWithHotness;
@@ -1242,7 +1295,7 @@ void clang::EmitBackendOutput(DiagnosticsEngine &Diags,
if (!CombinedIndex->skipModuleByDistributedBackend()) {
runThinLTOBackend(CombinedIndex.get(), M, HeaderOpts, CGOpts, TOpts,
LOpts, std::move(OS), CGOpts.SampleProfileFile,
- Action);
+ CGOpts.ProfileRemappingFile, Action);
return;
}
// Distributed indexing detected that nothing from the module is needed
diff --git a/lib/CodeGen/CGAtomic.cpp b/lib/CodeGen/CGAtomic.cpp
index b34bcdc1fc38..24056a449def 100644
--- a/lib/CodeGen/CGAtomic.cpp
+++ b/lib/CodeGen/CGAtomic.cpp
@@ -18,7 +18,7 @@
#include "TargetInfo.h"
#include "clang/AST/ASTContext.h"
#include "clang/CodeGen/CGFunctionInfo.h"
-#include "clang/Sema/SemaDiagnostic.h"
+#include "clang/Frontend/FrontendDiagnostic.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Intrinsics.h"
@@ -765,11 +765,15 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
std::tie(sizeChars, alignChars) = getContext().getTypeInfoInChars(AtomicTy);
uint64_t Size = sizeChars.getQuantity();
unsigned MaxInlineWidthInBits = getTarget().getMaxAtomicInlineWidth();
- bool UseLibcall = ((Ptr.getAlignment() % sizeChars) != 0 ||
- getContext().toBits(sizeChars) > MaxInlineWidthInBits);
- if (UseLibcall)
- CGM.getDiags().Report(E->getLocStart(), diag::warn_atomic_op_misaligned);
+ bool Oversized = getContext().toBits(sizeChars) > MaxInlineWidthInBits;
+ bool Misaligned = (Ptr.getAlignment() % sizeChars) != 0;
+ bool UseLibcall = Misaligned | Oversized;
+
+ if (UseLibcall) {
+ CGM.getDiags().Report(E->getBeginLoc(), diag::warn_atomic_op_misaligned)
+ << !Oversized;
+ }
llvm::Value *Order = EmitScalarExpr(E->getOrder());
llvm::Value *Scope =
@@ -923,6 +927,15 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
UseOptimizedLibcall = true;
break;
+ case AtomicExpr::AO__atomic_load:
+ case AtomicExpr::AO__atomic_store:
+ case AtomicExpr::AO__atomic_exchange:
+ case AtomicExpr::AO__atomic_compare_exchange:
+ // Use the generic version if we don't know that the operand will be
+ // suitably aligned for the optimized version.
+ if (Misaligned)
+ break;
+ LLVM_FALLTHROUGH;
case AtomicExpr::AO__c11_atomic_load:
case AtomicExpr::AO__c11_atomic_store:
case AtomicExpr::AO__c11_atomic_exchange:
@@ -934,14 +947,11 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
case AtomicExpr::AO__opencl_atomic_compare_exchange_weak:
case AtomicExpr::AO__opencl_atomic_compare_exchange_strong:
case AtomicExpr::AO__atomic_load_n:
- case AtomicExpr::AO__atomic_load:
case AtomicExpr::AO__atomic_store_n:
- case AtomicExpr::AO__atomic_store:
case AtomicExpr::AO__atomic_exchange_n:
- case AtomicExpr::AO__atomic_exchange:
case AtomicExpr::AO__atomic_compare_exchange_n:
- case AtomicExpr::AO__atomic_compare_exchange:
// Only use optimized library calls for sizes for which they exist.
+ // FIXME: Size == 16 optimized library functions exist too.
if (Size == 1 || Size == 2 || Size == 4 || Size == 8)
UseOptimizedLibcall = true;
break;
diff --git a/lib/CodeGen/CGBlocks.cpp b/lib/CodeGen/CGBlocks.cpp
index 8269b5b229a2..fa3c3ee8610c 100644
--- a/lib/CodeGen/CGBlocks.cpp
+++ b/lib/CodeGen/CGBlocks.cpp
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "CGBlocks.h"
+#include "CGCXXABI.h"
#include "CGDebugInfo.h"
#include "CGObjCRuntime.h"
#include "CGOpenCLRuntime.h"
@@ -25,6 +26,7 @@
#include "llvm/IR/CallSite.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Module.h"
+#include "llvm/Support/ScopedPrinter.h"
#include <algorithm>
#include <cstdio>
@@ -34,8 +36,8 @@ using namespace CodeGen;
CGBlockInfo::CGBlockInfo(const BlockDecl *block, StringRef name)
: Name(name), CXXThisIndex(0), CanBeGlobal(false), NeedsCopyDispose(false),
HasCXXObject(false), UsesStret(false), HasCapturedVariableLayout(false),
- LocalAddress(Address::invalid()), StructureType(nullptr), Block(block),
- DominatingIP(nullptr) {
+ CapturesNonExternalType(false), LocalAddress(Address::invalid()),
+ StructureType(nullptr), Block(block), DominatingIP(nullptr) {
// Skip asm prefix, if any. 'name' is usually taken directly from
// the mangled name of the enclosing function.
@@ -63,6 +65,110 @@ static llvm::Constant *buildDisposeHelper(CodeGenModule &CGM,
return CodeGenFunction(CGM).GenerateDestroyHelperFunction(blockInfo);
}
+namespace {
+
+/// Represents a type of copy/destroy operation that should be performed for an
+/// entity that's captured by a block.
+enum class BlockCaptureEntityKind {
+ CXXRecord, // Copy or destroy
+ ARCWeak,
+ ARCStrong,
+ NonTrivialCStruct,
+ BlockObject, // Assign or release
+ None
+};
+
+/// Represents a captured entity that requires extra operations in order for
+/// this entity to be copied or destroyed correctly.
+struct BlockCaptureManagedEntity {
+ BlockCaptureEntityKind CopyKind, DisposeKind;
+ BlockFieldFlags CopyFlags, DisposeFlags;
+ const BlockDecl::Capture *CI;
+ const CGBlockInfo::Capture *Capture;
+
+ BlockCaptureManagedEntity(BlockCaptureEntityKind CopyType,
+ BlockCaptureEntityKind DisposeType,
+ BlockFieldFlags CopyFlags,
+ BlockFieldFlags DisposeFlags,
+ const BlockDecl::Capture &CI,
+ const CGBlockInfo::Capture &Capture)
+ : CopyKind(CopyType), DisposeKind(DisposeType), CopyFlags(CopyFlags),
+ DisposeFlags(DisposeFlags), CI(&CI), Capture(&Capture) {}
+
+ bool operator<(const BlockCaptureManagedEntity &Other) const {
+ return Capture->getOffset() < Other.Capture->getOffset();
+ }
+};
+
+enum class CaptureStrKind {
+ // String for the copy helper.
+ CopyHelper,
+ // String for the dispose helper.
+ DisposeHelper,
+ // Merge the strings for the copy helper and dispose helper.
+ Merged
+};
+
+} // end anonymous namespace
+
+static void findBlockCapturedManagedEntities(
+ const CGBlockInfo &BlockInfo, const LangOptions &LangOpts,
+ SmallVectorImpl<BlockCaptureManagedEntity> &ManagedCaptures);
+
+static std::string getBlockCaptureStr(const BlockCaptureManagedEntity &E,
+ CaptureStrKind StrKind,
+ CharUnits BlockAlignment,
+ CodeGenModule &CGM);
+
+static std::string getBlockDescriptorName(const CGBlockInfo &BlockInfo,
+ CodeGenModule &CGM) {
+ std::string Name = "__block_descriptor_";
+ Name += llvm::to_string(BlockInfo.BlockSize.getQuantity()) + "_";
+
+ if (BlockInfo.needsCopyDisposeHelpers()) {
+ if (CGM.getLangOpts().Exceptions)
+ Name += "e";
+ if (CGM.getCodeGenOpts().ObjCAutoRefCountExceptions)
+ Name += "a";
+ Name += llvm::to_string(BlockInfo.BlockAlign.getQuantity()) + "_";
+
+ SmallVector<BlockCaptureManagedEntity, 4> ManagedCaptures;
+ findBlockCapturedManagedEntities(BlockInfo, CGM.getContext().getLangOpts(),
+ ManagedCaptures);
+
+ for (const BlockCaptureManagedEntity &E : ManagedCaptures) {
+ Name += llvm::to_string(E.Capture->getOffset().getQuantity());
+
+ if (E.CopyKind == E.DisposeKind) {
+ // If CopyKind and DisposeKind are the same, merge the capture
+ // information.
+ assert(E.CopyKind != BlockCaptureEntityKind::None &&
+ "shouldn't see BlockCaptureManagedEntity that is None");
+ Name += getBlockCaptureStr(E, CaptureStrKind::Merged,
+ BlockInfo.BlockAlign, CGM);
+ } else {
+ // If CopyKind and DisposeKind are not the same, which can happen when
+ // either Kind is None or the captured object is a __strong block,
+ // concatenate the copy and dispose strings.
+ Name += getBlockCaptureStr(E, CaptureStrKind::CopyHelper,
+ BlockInfo.BlockAlign, CGM);
+ Name += getBlockCaptureStr(E, CaptureStrKind::DisposeHelper,
+ BlockInfo.BlockAlign, CGM);
+ }
+ }
+ Name += "_";
+ }
+
+ std::string TypeAtEncoding =
+ CGM.getContext().getObjCEncodingForBlock(BlockInfo.getBlockExpr());
+ /// Replace occurrences of '@' with '\1'. '@' is reserved on ELF platforms as
+ /// a separator between symbol name and symbol version.
+ std::replace(TypeAtEncoding.begin(), TypeAtEncoding.end(), '@', '\1');
+ Name += "e" + llvm::to_string(TypeAtEncoding.size()) + "_" + TypeAtEncoding;
+ Name += "l" + CGM.getObjCRuntime().getRCBlockLayoutStr(CGM, BlockInfo);
+ return Name;
+}
+
/// buildBlockDescriptor - Build the block descriptor meta-data for a block.
/// buildBlockDescriptor is accessed from 5th field of the Block_literal
/// meta-data and contains stationary information about the block literal.
@@ -72,7 +178,7 @@ static llvm::Constant *buildDisposeHelper(CodeGenModule &CGM,
/// unsigned long reserved;
/// unsigned long size; // size of Block_literal metadata in bytes.
/// void *copy_func_helper_decl; // optional copy helper.
-/// void *destroy_func_decl; // optioanl destructor helper.
+/// void *destroy_func_decl; // optional destructor helper.
/// void *block_method_encoding_address; // @encode for block literal signature.
/// void *block_layout_info; // encoding of captured block variables.
/// };
@@ -91,6 +197,19 @@ static llvm::Constant *buildBlockDescriptor(CodeGenModule &CGM,
else
i8p = CGM.VoidPtrTy;
+ std::string descName;
+
+ // If an equivalent block descriptor global variable exists, return it.
+ if (C.getLangOpts().ObjC &&
+ CGM.getLangOpts().getGC() == LangOptions::NonGC) {
+ descName = getBlockDescriptorName(blockInfo, CGM);
+ if (llvm::GlobalValue *desc = CGM.getModule().getNamedValue(descName))
+ return llvm::ConstantExpr::getBitCast(desc,
+ CGM.getBlockDescriptorType());
+ }
+
+ // If there isn't an equivalent block descriptor global variable, create a new
+ // one.
ConstantInitBuilder builder(CGM);
auto elements = builder.beginStruct();
@@ -104,12 +223,20 @@ static llvm::Constant *buildBlockDescriptor(CodeGenModule &CGM,
elements.addInt(ulong, blockInfo.BlockSize.getQuantity());
// Optional copy/dispose helpers.
+ bool hasInternalHelper = false;
if (blockInfo.needsCopyDisposeHelpers()) {
// copy_func_helper_decl
- elements.add(buildCopyHelper(CGM, blockInfo));
+ llvm::Constant *copyHelper = buildCopyHelper(CGM, blockInfo);
+ elements.add(copyHelper);
// destroy_func_decl
- elements.add(buildDisposeHelper(CGM, blockInfo));
+ llvm::Constant *disposeHelper = buildDisposeHelper(CGM, blockInfo);
+ elements.add(disposeHelper);
+
+ if (cast<llvm::Function>(copyHelper->getOperand(0))->hasInternalLinkage() ||
+ cast<llvm::Function>(disposeHelper->getOperand(0))
+ ->hasInternalLinkage())
+ hasInternalHelper = true;
}
// Signature. Mandatory ObjC-style method descriptor @encode sequence.
@@ -119,7 +246,7 @@ static llvm::Constant *buildBlockDescriptor(CodeGenModule &CGM,
CGM.GetAddrOfConstantCString(typeAtEncoding).getPointer(), i8p));
// GC layout.
- if (C.getLangOpts().ObjC1) {
+ if (C.getLangOpts().ObjC) {
if (CGM.getLangOpts().getGC() != LangOptions::NonGC)
elements.add(CGM.getObjCRuntime().BuildGCBlockLayout(CGM, blockInfo));
else
@@ -132,12 +259,26 @@ static llvm::Constant *buildBlockDescriptor(CodeGenModule &CGM,
if (C.getLangOpts().OpenCL)
AddrSpace = C.getTargetAddressSpace(LangAS::opencl_constant);
+ llvm::GlobalValue::LinkageTypes linkage;
+ if (descName.empty()) {
+ linkage = llvm::GlobalValue::InternalLinkage;
+ descName = "__block_descriptor_tmp";
+ } else if (hasInternalHelper) {
+ // If either the copy helper or the dispose helper has internal linkage,
+ // the block descriptor must have internal linkage too.
+ linkage = llvm::GlobalValue::InternalLinkage;
+ } else {
+ linkage = llvm::GlobalValue::LinkOnceODRLinkage;
+ }
+
llvm::GlobalVariable *global =
- elements.finishAndCreateGlobal("__block_descriptor_tmp",
- CGM.getPointerAlign(),
- /*constant*/ true,
- llvm::GlobalValue::InternalLinkage,
- AddrSpace);
+ elements.finishAndCreateGlobal(descName, CGM.getPointerAlign(),
+ /*constant*/ true, linkage, AddrSpace);
+
+ if (linkage == llvm::GlobalValue::LinkOnceODRLinkage) {
+ global->setVisibility(llvm::GlobalValue::HiddenVisibility);
+ global->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
+ }
return llvm::ConstantExpr::getBitCast(global, CGM.getBlockDescriptorType());
}
@@ -308,12 +449,25 @@ static void initializeForBlockHeader(CodeGenModule &CGM, CGBlockInfo &info,
assert(elementTypes.empty());
if (CGM.getLangOpts().OpenCL) {
- // The header is basically 'struct { int; int;
+ // The header is basically 'struct { int; int; generic void *;
// custom_fields; }'. Assert that struct is packed.
+ auto GenericAS =
+ CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic);
+ auto GenPtrAlign =
+ CharUnits::fromQuantity(CGM.getTarget().getPointerAlign(GenericAS) / 8);
+ auto GenPtrSize =
+ CharUnits::fromQuantity(CGM.getTarget().getPointerWidth(GenericAS) / 8);
+ assert(CGM.getIntSize() <= GenPtrSize);
+ assert(CGM.getIntAlign() <= GenPtrAlign);
+ assert((2 * CGM.getIntSize()).isMultipleOf(GenPtrAlign));
elementTypes.push_back(CGM.IntTy); /* total size */
elementTypes.push_back(CGM.IntTy); /* align */
- unsigned Offset = 2 * CGM.getIntSize().getQuantity();
- unsigned BlockAlign = CGM.getIntAlign().getQuantity();
+ elementTypes.push_back(
+ CGM.getOpenCLRuntime()
+ .getGenericVoidPointerType()); /* invoke function */
+ unsigned Offset =
+ 2 * CGM.getIntSize().getQuantity() + GenPtrSize.getQuantity();
+ unsigned BlockAlign = GenPtrAlign.getQuantity();
if (auto *Helper =
CGM.getTargetCodeGenInfo().getTargetOpenCLBlockHelper()) {
for (auto I : Helper->getCustomFieldTypes()) /* custom fields */ {
@@ -355,7 +509,11 @@ static QualType getCaptureFieldType(const CodeGenFunction &CGF,
return CGF.BlockInfo->getCapture(VD).fieldType();
if (auto *FD = CGF.LambdaCaptureFields.lookup(VD))
return FD->getType();
- return VD->getType();
+ // If the captured variable is a non-escaping __block variable, the field
+ // type is the reference type. If the variable is a __block variable that
+ // already has a reference type, the field type is the variable's type.
+ return VD->isNonEscapingByref() ?
+ CGF.getContext().getLValueReferenceType(VD->getType()) : VD->getType();
}
/// Compute the layout of the given block. Attempts to lay the block
@@ -378,7 +536,7 @@ static void computeBlockInfo(CodeGenModule &CGM, CodeGenFunction *CGF,
info.CanBeGlobal = true;
return;
}
- else if (C.getLangOpts().ObjC1 &&
+ else if (C.getLangOpts().ObjC &&
CGM.getLangOpts().getGC() == LangOptions::NonGC)
info.HasCapturedVariableLayout = true;
@@ -393,7 +551,7 @@ static void computeBlockInfo(CodeGenModule &CGM, CodeGenFunction *CGF,
if (block->capturesCXXThis()) {
assert(CGF && CGF->CurFuncDecl && isa<CXXMethodDecl>(CGF->CurFuncDecl) &&
"Can't capture 'this' outside a method");
- QualType thisType = cast<CXXMethodDecl>(CGF->CurFuncDecl)->getThisType(C);
+ QualType thisType = cast<CXXMethodDecl>(CGF->CurFuncDecl)->getThisType();
// Theoretically, this could be in a different address space, so
// don't assume standard pointer size/align.
@@ -411,7 +569,7 @@ static void computeBlockInfo(CodeGenModule &CGM, CodeGenFunction *CGF,
for (const auto &CI : block->captures()) {
const VarDecl *variable = CI.getVariable();
- if (CI.isByRef()) {
+ if (CI.isEscapingByref()) {
// We have to copy/dispose of the __block reference.
info.NeedsCopyDispose = true;
@@ -419,6 +577,10 @@ static void computeBlockInfo(CodeGenModule &CGM, CodeGenFunction *CGF,
CharUnits align = CGM.getPointerAlign();
maxFieldAlign = std::max(maxFieldAlign, align);
+ // Since a __block variable cannot be captured by lambdas, its type and
+ // the capture field type should always match.
+ assert(getCaptureFieldType(*CGF, CI) == variable->getType() &&
+ "capture type differs from the variable type");
layout.push_back(BlockLayoutChunk(align, CGM.getPointerSize(),
Qualifiers::OCL_None, &CI,
CGM.VoidPtrTy, variable->getType()));
@@ -432,10 +594,11 @@ static void computeBlockInfo(CodeGenModule &CGM, CodeGenFunction *CGF,
continue;
}
+ QualType VT = getCaptureFieldType(*CGF, CI);
+
// If we have a lifetime qualifier, honor it for capture purposes.
// That includes *not* copying it if it's __unsafe_unretained.
- Qualifiers::ObjCLifetime lifetime =
- variable->getType().getObjCLifetime();
+ Qualifiers::ObjCLifetime lifetime = VT.getObjCLifetime();
if (lifetime) {
switch (lifetime) {
case Qualifiers::OCL_None: llvm_unreachable("impossible");
@@ -449,10 +612,10 @@ static void computeBlockInfo(CodeGenModule &CGM, CodeGenFunction *CGF,
}
// Block pointers require copy/dispose. So do Objective-C pointers.
- } else if (variable->getType()->isObjCRetainableType()) {
+ } else if (VT->isObjCRetainableType()) {
// But honor the inert __unsafe_unretained qualifier, which doesn't
// actually make it into the type system.
- if (variable->getType()->isObjCInertUnsafeUnretainedType()) {
+ if (VT->isObjCInertUnsafeUnretainedType()) {
lifetime = Qualifiers::OCL_ExplicitNone;
} else {
info.NeedsCopyDispose = true;
@@ -464,27 +627,27 @@ static void computeBlockInfo(CodeGenModule &CGM, CodeGenFunction *CGF,
} else if (CI.hasCopyExpr()) {
info.NeedsCopyDispose = true;
info.HasCXXObject = true;
+ if (!VT->getAsCXXRecordDecl()->isExternallyVisible())
+ info.CapturesNonExternalType = true;
// So do C structs that require non-trivial copy construction or
// destruction.
- } else if (variable->getType().isNonTrivialToPrimitiveCopy() ==
- QualType::PCK_Struct ||
- variable->getType().isDestructedType() ==
- QualType::DK_nontrivial_c_struct) {
+ } else if (VT.isNonTrivialToPrimitiveCopy() == QualType::PCK_Struct ||
+ VT.isDestructedType() == QualType::DK_nontrivial_c_struct) {
info.NeedsCopyDispose = true;
// And so do types with destructors.
} else if (CGM.getLangOpts().CPlusPlus) {
- if (const CXXRecordDecl *record =
- variable->getType()->getAsCXXRecordDecl()) {
+ if (const CXXRecordDecl *record = VT->getAsCXXRecordDecl()) {
if (!record->hasTrivialDestructor()) {
info.HasCXXObject = true;
info.NeedsCopyDispose = true;
+ if (!record->isExternallyVisible())
+ info.CapturesNonExternalType = true;
}
}
}
- QualType VT = getCaptureFieldType(*CGF, CI);
CharUnits size = C.getTypeSizeInChars(VT);
CharUnits align = C.getDeclAlign(variable);
@@ -699,10 +862,12 @@ static void enterBlockScope(CodeGenFunction &CGF, BlockDecl *block) {
/// Enter a full-expression with a non-trivial number of objects to
/// clean up. This is in this file because, at the moment, the only
/// kind of cleanup object is a BlockDecl*.
-void CodeGenFunction::enterNonTrivialFullExpression(const ExprWithCleanups *E) {
- assert(E->getNumObjects() != 0);
- for (const ExprWithCleanups::CleanupObject &C : E->getObjects())
- enterBlockScope(*this, C);
+void CodeGenFunction::enterNonTrivialFullExpression(const FullExpr *E) {
+ if (const auto EWC = dyn_cast<ExprWithCleanups>(E)) {
+ assert(EWC->getNumObjects() != 0);
+ for (const ExprWithCleanups::CleanupObject &C : EWC->getObjects())
+ enterBlockScope(*this, C);
+ }
}
/// Find the layout for the given block in a linked list and remove it.
@@ -759,12 +924,20 @@ llvm::Value *CodeGenFunction::EmitBlockLiteral(const BlockExpr *blockExpr) {
llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo) {
bool IsOpenCL = CGM.getContext().getLangOpts().OpenCL;
+ auto GenVoidPtrTy =
+ IsOpenCL ? CGM.getOpenCLRuntime().getGenericVoidPointerType() : VoidPtrTy;
+ LangAS GenVoidPtrAddr = IsOpenCL ? LangAS::opencl_generic : LangAS::Default;
+ auto GenVoidPtrSize = CharUnits::fromQuantity(
+ CGM.getTarget().getPointerWidth(
+ CGM.getContext().getTargetAddressSpace(GenVoidPtrAddr)) /
+ 8);
// Using the computed layout, generate the actual block function.
bool isLambdaConv = blockInfo.getBlockDecl()->isConversionFromLambda();
CodeGenFunction BlockCGF{CGM, true};
BlockCGF.SanOpts = SanOpts;
auto *InvokeFn = BlockCGF.GenerateBlockFunction(
CurGD, blockInfo, LocalDeclMap, isLambdaConv, blockInfo.CanBeGlobal);
+ auto *blockFn = llvm::ConstantExpr::getPointerCast(InvokeFn, GenVoidPtrTy);
// If there is nothing to capture, we can emit this as a global block.
if (blockInfo.CanBeGlobal)
@@ -840,12 +1013,11 @@ llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo) {
llvm::ConstantInt::get(IntTy, blockInfo.BlockAlign.getQuantity()),
getIntSize(), "block.align");
}
- if (!IsOpenCL) {
- addHeaderField(llvm::ConstantExpr::getBitCast(InvokeFn, VoidPtrTy),
- getPointerSize(), "block.invoke");
+ addHeaderField(blockFn, GenVoidPtrSize, "block.invoke");
+ if (!IsOpenCL)
addHeaderField(descriptor, getPointerSize(), "block.descriptor");
- } else if (auto *Helper =
- CGM.getTargetCodeGenInfo().getTargetOpenCLBlockHelper()) {
+ else if (auto *Helper =
+ CGM.getTargetCodeGenInfo().getTargetOpenCLBlockHelper()) {
for (auto I : Helper->getCustomFieldValues(*this, blockInfo)) {
addHeaderField(
I.first,
@@ -889,7 +1061,7 @@ llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo) {
// The lambda capture in a lambda's conversion-to-block-pointer is
// special; we'll simply emit it directly.
src = Address::invalid();
- } else if (CI.isByRef()) {
+ } else if (CI.isEscapingByref()) {
if (BlockInfo && CI.isNested()) {
// We need to use the capture from the enclosing block.
const CGBlockInfo::Capture &enclosingCapture =
@@ -906,7 +1078,7 @@ llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo) {
src = I->second;
}
} else {
- DeclRefExpr declRef(const_cast<VarDecl *>(variable),
+ DeclRefExpr declRef(getContext(), const_cast<VarDecl *>(variable),
/*RefersToEnclosingVariableOrCapture*/ CI.isNested(),
type.getNonReferenceType(), VK_LValue,
SourceLocation());
@@ -917,7 +1089,7 @@ llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo) {
// the block field. There's no need to chase the forwarding
// pointer at this point, since we're building something that will
// live a shorter life than the stack byref anyway.
- if (CI.isByRef()) {
+ if (CI.isEscapingByref()) {
// Get a void* that points to the byref struct.
llvm::Value *byrefPointer;
if (CI.isNested())
@@ -980,7 +1152,7 @@ llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo) {
// We use one of these or the other depending on whether the
// reference is nested.
- DeclRefExpr declRef(const_cast<VarDecl *>(variable),
+ DeclRefExpr declRef(getContext(), const_cast<VarDecl *>(variable),
/*RefersToEnclosingVariableOrCapture*/ CI.isNested(),
type, VK_LValue, SourceLocation());
@@ -1049,23 +1221,38 @@ llvm::Type *CodeGenModule::getBlockDescriptorType() {
}
llvm::Type *CodeGenModule::getGenericBlockLiteralType() {
- assert(!getLangOpts().OpenCL && "OpenCL does not need this");
-
if (GenericBlockLiteralType)
return GenericBlockLiteralType;
llvm::Type *BlockDescPtrTy = getBlockDescriptorType();
- // struct __block_literal_generic {
- // void *__isa;
- // int __flags;
- // int __reserved;
- // void (*__invoke)(void *);
- // struct __block_descriptor *__descriptor;
- // };
- GenericBlockLiteralType =
- llvm::StructType::create("struct.__block_literal_generic", VoidPtrTy,
- IntTy, IntTy, VoidPtrTy, BlockDescPtrTy);
+ if (getLangOpts().OpenCL) {
+ // struct __opencl_block_literal_generic {
+ // int __size;
+ // int __align;
+ // __generic void *__invoke;
+ // /* custom fields */
+ // };
+ SmallVector<llvm::Type *, 8> StructFields(
+ {IntTy, IntTy, getOpenCLRuntime().getGenericVoidPointerType()});
+ if (auto *Helper = getTargetCodeGenInfo().getTargetOpenCLBlockHelper()) {
+ for (auto I : Helper->getCustomFieldTypes())
+ StructFields.push_back(I);
+ }
+ GenericBlockLiteralType = llvm::StructType::create(
+ StructFields, "struct.__opencl_block_literal_generic");
+ } else {
+ // struct __block_literal_generic {
+ // void *__isa;
+ // int __flags;
+ // int __reserved;
+ // void (*__invoke)(void *);
+ // struct __block_descriptor *__descriptor;
+ // };
+ GenericBlockLiteralType =
+ llvm::StructType::create("struct.__block_literal_generic", VoidPtrTy,
+ IntTy, IntTy, VoidPtrTy, BlockDescPtrTy);
+ }
return GenericBlockLiteralType;
}
@@ -1076,21 +1263,27 @@ RValue CodeGenFunction::EmitBlockCallExpr(const CallExpr *E,
E->getCallee()->getType()->getAs<BlockPointerType>();
llvm::Value *BlockPtr = EmitScalarExpr(E->getCallee());
- llvm::Value *FuncPtr;
- if (!CGM.getLangOpts().OpenCL) {
- // Get a pointer to the generic block literal.
- llvm::Type *BlockLiteralTy =
- llvm::PointerType::get(CGM.getGenericBlockLiteralType(), 0);
+ // Get a pointer to the generic block literal.
+ // For OpenCL we generate generic AS void ptr to be able to reuse the same
+ // block definition for blocks with captures generated as private AS local
+ // variables and without captures generated as global AS program scope
+ // variables.
+ unsigned AddrSpace = 0;
+ if (getLangOpts().OpenCL)
+ AddrSpace = getContext().getTargetAddressSpace(LangAS::opencl_generic);
- // Bitcast the callee to a block literal.
- BlockPtr =
- Builder.CreatePointerCast(BlockPtr, BlockLiteralTy, "block.literal");
+ llvm::Type *BlockLiteralTy =
+ llvm::PointerType::get(CGM.getGenericBlockLiteralType(), AddrSpace);
- // Get the function pointer from the literal.
- FuncPtr =
- Builder.CreateStructGEP(CGM.getGenericBlockLiteralType(), BlockPtr, 3);
- }
+ // Bitcast the callee to a block literal.
+ BlockPtr =
+ Builder.CreatePointerCast(BlockPtr, BlockLiteralTy, "block.literal");
+
+ // Get the function pointer from the literal.
+ llvm::Value *FuncPtr =
+ Builder.CreateStructGEP(CGM.getGenericBlockLiteralType(), BlockPtr,
+ CGM.getLangOpts().OpenCL ? 2 : 3);
// Add the block literal.
CallArgList Args;
@@ -1113,11 +1306,7 @@ RValue CodeGenFunction::EmitBlockCallExpr(const CallExpr *E,
EmitCallArgs(Args, FnType->getAs<FunctionProtoType>(), E->arguments());
// Load the function.
- llvm::Value *Func;
- if (CGM.getLangOpts().OpenCL)
- Func = CGM.getOpenCLRuntime().getInvokeFunction(E->getCallee());
- else
- Func = Builder.CreateAlignedLoad(FuncPtr, getPointerAlign());
+ llvm::Value *Func = Builder.CreateAlignedLoad(FuncPtr, getPointerAlign());
const FunctionType *FuncTy = FnType->castAs<FunctionType>();
const CGFunctionInfo &FnInfo =
@@ -1136,8 +1325,7 @@ RValue CodeGenFunction::EmitBlockCallExpr(const CallExpr *E,
return EmitCall(FnInfo, Callee, ReturnValue, Args);
}
-Address CodeGenFunction::GetAddrOfBlockDecl(const VarDecl *variable,
- bool isByRef) {
+Address CodeGenFunction::GetAddrOfBlockDecl(const VarDecl *variable) {
assert(BlockInfo && "evaluating block ref without block information?");
const CGBlockInfo::Capture &capture = BlockInfo->getCapture(variable);
@@ -1148,7 +1336,7 @@ Address CodeGenFunction::GetAddrOfBlockDecl(const VarDecl *variable,
Builder.CreateStructGEP(LoadBlockStruct(), capture.getIndex(),
capture.getOffset(), "block.capture.addr");
- if (isByRef) {
+ if (variable->isEscapingByref()) {
// addr should be a void** right now. Load, then cast the result
// to byref*.
@@ -1162,6 +1350,10 @@ Address CodeGenFunction::GetAddrOfBlockDecl(const VarDecl *variable,
variable->getName());
}
+ assert((!variable->isNonEscapingByref() ||
+ capture.fieldType()->isReferenceType()) &&
+ "the capture field of a non-escaping variable should have a "
+ "reference type");
if (capture.fieldType()->isReferenceType())
addr = EmitLoadOfReference(MakeAddrLValue(addr, capture.fieldType()));
@@ -1213,9 +1405,13 @@ static llvm::Constant *buildGlobalBlock(CodeGenModule &CGM,
auto fields = builder.beginStruct();
bool IsOpenCL = CGM.getLangOpts().OpenCL;
+ bool IsWindows = CGM.getTarget().getTriple().isOSWindows();
if (!IsOpenCL) {
// isa
- fields.add(CGM.getNSConcreteGlobalBlock());
+ if (IsWindows)
+ fields.addNullPointer(CGM.Int8PtrPtrTy);
+ else
+ fields.add(CGM.getNSConcreteGlobalBlock());
// __flags
BlockFlags flags = BLOCK_IS_GLOBAL | BLOCK_HAS_SIGNATURE;
@@ -1226,14 +1422,14 @@ static llvm::Constant *buildGlobalBlock(CodeGenModule &CGM,
// Reserved
fields.addInt(CGM.IntTy, 0);
-
- // Function
- fields.add(blockFn);
} else {
fields.addInt(CGM.IntTy, blockInfo.BlockSize.getQuantity());
fields.addInt(CGM.IntTy, blockInfo.BlockAlign.getQuantity());
}
+ // Function
+ fields.add(blockFn);
+
if (!IsOpenCL) {
// Descriptor
fields.add(buildBlockDescriptor(CGM, blockInfo));
@@ -1250,7 +1446,27 @@ static llvm::Constant *buildGlobalBlock(CodeGenModule &CGM,
llvm::Constant *literal = fields.finishAndCreateGlobal(
"__block_literal_global", blockInfo.BlockAlign,
- /*constant*/ true, llvm::GlobalVariable::InternalLinkage, AddrSpace);
+ /*constant*/ !IsWindows, llvm::GlobalVariable::InternalLinkage, AddrSpace);
+
+ // Windows does not allow globals to be initialised to point to globals in
+ // different DLLs. Any such variables must run code to initialise them.
+ if (IsWindows) {
+ auto *Init = llvm::Function::Create(llvm::FunctionType::get(CGM.VoidTy,
+ {}), llvm::GlobalValue::InternalLinkage, ".block_isa_init",
+ &CGM.getModule());
+ llvm::IRBuilder<> b(llvm::BasicBlock::Create(CGM.getLLVMContext(), "entry",
+ Init));
+ b.CreateAlignedStore(CGM.getNSConcreteGlobalBlock(),
+ b.CreateStructGEP(literal, 0), CGM.getPointerAlign().getQuantity());
+ b.CreateRetVoid();
+ // We can't use the normal LLVM global initialisation array, because we
+ // need to specify that this runs early in library initialisation.
+ auto *InitVar = new llvm::GlobalVariable(CGM.getModule(), Init->getType(),
+ /*isConstant*/true, llvm::GlobalValue::InternalLinkage,
+ Init, ".block_isa_init_ptr");
+ InitVar->setSection(".CRT$XCLa");
+ CGM.addUsedGlobal(InitVar);
+ }
// Return a constant of the appropriately-casted type.
llvm::Type *RequiredType =
@@ -1284,7 +1500,7 @@ void CodeGenFunction::setBlockContextParameter(const ImplicitParamDecl *D,
}
}
- SourceLocation StartLoc = BlockInfo->getBlockExpr()->getBody()->getLocStart();
+ SourceLocation StartLoc = BlockInfo->getBlockExpr()->getBody()->getBeginLoc();
ApplyDebugLocation Scope(*this, StartLoc);
// Instead of messing around with LocalDeclMap, just set the value
@@ -1314,7 +1530,7 @@ CodeGenFunction::GenerateBlockFunction(GlobalDecl GD,
CurGD = GD;
- CurEHLocation = blockInfo.getBlockExpr()->getLocEnd();
+ CurEHLocation = blockInfo.getBlockExpr()->getEndLoc();
BlockInfo = &blockInfo;
@@ -1379,7 +1595,7 @@ CodeGenFunction::GenerateBlockFunction(GlobalDecl GD,
// Begin generating the function.
StartFunction(blockDecl, fnType->getReturnType(), fn, fnInfo, args,
blockDecl->getLocation(),
- blockInfo.getBlockExpr()->getBody()->getLocStart());
+ blockInfo.getBlockExpr()->getBody()->getBeginLoc());
// Okay. Undo some of what StartFunction did.
@@ -1480,35 +1696,6 @@ CodeGenFunction::GenerateBlockFunction(GlobalDecl GD,
return fn;
}
-namespace {
-
-/// Represents a type of copy/destroy operation that should be performed for an
-/// entity that's captured by a block.
-enum class BlockCaptureEntityKind {
- CXXRecord, // Copy or destroy
- ARCWeak,
- ARCStrong,
- NonTrivialCStruct,
- BlockObject, // Assign or release
- None
-};
-
-/// Represents a captured entity that requires extra operations in order for
-/// this entity to be copied or destroyed correctly.
-struct BlockCaptureManagedEntity {
- BlockCaptureEntityKind Kind;
- BlockFieldFlags Flags;
- const BlockDecl::Capture &CI;
- const CGBlockInfo::Capture &Capture;
-
- BlockCaptureManagedEntity(BlockCaptureEntityKind Type, BlockFieldFlags Flags,
- const BlockDecl::Capture &CI,
- const CGBlockInfo::Capture &Capture)
- : Kind(Type), Flags(Flags), CI(CI), Capture(Capture) {}
-};
-
-} // end anonymous namespace
-
static std::pair<BlockCaptureEntityKind, BlockFieldFlags>
computeCopyInfoForBlockCapture(const BlockDecl::Capture &CI, QualType T,
const LangOptions &LangOpts) {
@@ -1518,7 +1705,7 @@ computeCopyInfoForBlockCapture(const BlockDecl::Capture &CI, QualType T,
return std::make_pair(BlockCaptureEntityKind::CXXRecord, BlockFieldFlags());
}
BlockFieldFlags Flags;
- if (CI.isByRef()) {
+ if (CI.isEscapingByref()) {
Flags = BLOCK_FIELD_IS_BYREF;
if (T.isObjCGCWeak())
Flags |= BLOCK_FIELD_IS_WEAK;
@@ -1566,23 +1753,32 @@ computeCopyInfoForBlockCapture(const BlockDecl::Capture &CI, QualType T,
llvm_unreachable("after exhaustive PrimitiveCopyKind switch");
}
+static std::pair<BlockCaptureEntityKind, BlockFieldFlags>
+computeDestroyInfoForBlockCapture(const BlockDecl::Capture &CI, QualType T,
+ const LangOptions &LangOpts);
+
/// Find the set of block captures that need to be explicitly copied or destroy.
static void findBlockCapturedManagedEntities(
const CGBlockInfo &BlockInfo, const LangOptions &LangOpts,
- SmallVectorImpl<BlockCaptureManagedEntity> &ManagedCaptures,
- llvm::function_ref<std::pair<BlockCaptureEntityKind, BlockFieldFlags>(
- const BlockDecl::Capture &, QualType, const LangOptions &)>
- Predicate) {
+ SmallVectorImpl<BlockCaptureManagedEntity> &ManagedCaptures) {
for (const auto &CI : BlockInfo.getBlockDecl()->captures()) {
const VarDecl *Variable = CI.getVariable();
const CGBlockInfo::Capture &Capture = BlockInfo.getCapture(Variable);
if (Capture.isConstant())
continue;
- auto Info = Predicate(CI, Variable->getType(), LangOpts);
- if (Info.first != BlockCaptureEntityKind::None)
- ManagedCaptures.emplace_back(Info.first, Info.second, CI, Capture);
+ QualType VT = Capture.fieldType();
+ auto CopyInfo = computeCopyInfoForBlockCapture(CI, VT, LangOpts);
+ auto DisposeInfo = computeDestroyInfoForBlockCapture(CI, VT, LangOpts);
+ if (CopyInfo.first != BlockCaptureEntityKind::None ||
+ DisposeInfo.first != BlockCaptureEntityKind::None)
+ ManagedCaptures.emplace_back(CopyInfo.first, DisposeInfo.first,
+ CopyInfo.second, DisposeInfo.second, CI,
+ Capture);
}
+
+ // Sort the captures by offset.
+ llvm::sort(ManagedCaptures);
}
namespace {
@@ -1590,10 +1786,12 @@ namespace {
struct CallBlockRelease final : EHScopeStack::Cleanup {
Address Addr;
BlockFieldFlags FieldFlags;
- bool LoadBlockVarAddr;
+ bool LoadBlockVarAddr, CanThrow;
- CallBlockRelease(Address Addr, BlockFieldFlags Flags, bool LoadValue)
- : Addr(Addr), FieldFlags(Flags), LoadBlockVarAddr(LoadValue) {}
+ CallBlockRelease(Address Addr, BlockFieldFlags Flags, bool LoadValue,
+ bool CT)
+ : Addr(Addr), FieldFlags(Flags), LoadBlockVarAddr(LoadValue),
+ CanThrow(CT) {}
void Emit(CodeGenFunction &CGF, Flags flags) override {
llvm::Value *BlockVarAddr;
@@ -1604,15 +1802,145 @@ struct CallBlockRelease final : EHScopeStack::Cleanup {
BlockVarAddr = Addr.getPointer();
}
- CGF.BuildBlockRelease(BlockVarAddr, FieldFlags);
+ CGF.BuildBlockRelease(BlockVarAddr, FieldFlags, CanThrow);
}
};
} // end anonymous namespace
+/// Check if \p T is a C++ class that has a destructor that can throw.
+bool CodeGenFunction::cxxDestructorCanThrow(QualType T) {
+ if (const auto *RD = T->getAsCXXRecordDecl())
+ if (const CXXDestructorDecl *DD = RD->getDestructor())
+ return DD->getType()->getAs<FunctionProtoType>()->canThrow();
+ return false;
+}
+
+// Return a string that has the information about a capture.
+static std::string getBlockCaptureStr(const BlockCaptureManagedEntity &E,
+ CaptureStrKind StrKind,
+ CharUnits BlockAlignment,
+ CodeGenModule &CGM) {
+ std::string Str;
+ ASTContext &Ctx = CGM.getContext();
+ const BlockDecl::Capture &CI = *E.CI;
+ QualType CaptureTy = CI.getVariable()->getType();
+
+ BlockCaptureEntityKind Kind;
+ BlockFieldFlags Flags;
+
+ // CaptureStrKind::Merged should be passed only when the operations and the
+ // flags are the same for copy and dispose.
+ assert((StrKind != CaptureStrKind::Merged ||
+ (E.CopyKind == E.DisposeKind && E.CopyFlags == E.DisposeFlags)) &&
+ "different operations and flags");
+
+ if (StrKind == CaptureStrKind::DisposeHelper) {
+ Kind = E.DisposeKind;
+ Flags = E.DisposeFlags;
+ } else {
+ Kind = E.CopyKind;
+ Flags = E.CopyFlags;
+ }
+
+ switch (Kind) {
+ case BlockCaptureEntityKind::CXXRecord: {
+ Str += "c";
+ SmallString<256> TyStr;
+ llvm::raw_svector_ostream Out(TyStr);
+ CGM.getCXXABI().getMangleContext().mangleTypeName(CaptureTy, Out);
+ Str += llvm::to_string(TyStr.size()) + TyStr.c_str();
+ break;
+ }
+ case BlockCaptureEntityKind::ARCWeak:
+ Str += "w";
+ break;
+ case BlockCaptureEntityKind::ARCStrong:
+ Str += "s";
+ break;
+ case BlockCaptureEntityKind::BlockObject: {
+ const VarDecl *Var = CI.getVariable();
+ unsigned F = Flags.getBitMask();
+ if (F & BLOCK_FIELD_IS_BYREF) {
+ Str += "r";
+ if (F & BLOCK_FIELD_IS_WEAK)
+ Str += "w";
+ else {
+ // If CaptureStrKind::Merged is passed, check both the copy expression
+ // and the destructor.
+ if (StrKind != CaptureStrKind::DisposeHelper) {
+ if (Ctx.getBlockVarCopyInit(Var).canThrow())
+ Str += "c";
+ }
+ if (StrKind != CaptureStrKind::CopyHelper) {
+ if (CodeGenFunction::cxxDestructorCanThrow(CaptureTy))
+ Str += "d";
+ }
+ }
+ } else {
+ assert((F & BLOCK_FIELD_IS_OBJECT) && "unexpected flag value");
+ if (F == BLOCK_FIELD_IS_BLOCK)
+ Str += "b";
+ else
+ Str += "o";
+ }
+ break;
+ }
+ case BlockCaptureEntityKind::NonTrivialCStruct: {
+ bool IsVolatile = CaptureTy.isVolatileQualified();
+ CharUnits Alignment =
+ BlockAlignment.alignmentAtOffset(E.Capture->getOffset());
+
+ Str += "n";
+ std::string FuncStr;
+ if (StrKind == CaptureStrKind::DisposeHelper)
+ FuncStr = CodeGenFunction::getNonTrivialDestructorStr(
+ CaptureTy, Alignment, IsVolatile, Ctx);
+ else
+ // If CaptureStrKind::Merged is passed, use the copy constructor string.
+ // It has all the information that the destructor string has.
+ FuncStr = CodeGenFunction::getNonTrivialCopyConstructorStr(
+ CaptureTy, Alignment, IsVolatile, Ctx);
+ // The underscore is necessary here because non-trivial copy constructor
+ // and destructor strings can start with a number.
+ Str += llvm::to_string(FuncStr.size()) + "_" + FuncStr;
+ break;
+ }
+ case BlockCaptureEntityKind::None:
+ break;
+ }
+
+ return Str;
+}
+
+static std::string getCopyDestroyHelperFuncName(
+ const SmallVectorImpl<BlockCaptureManagedEntity> &Captures,
+ CharUnits BlockAlignment, CaptureStrKind StrKind, CodeGenModule &CGM) {
+ assert((StrKind == CaptureStrKind::CopyHelper ||
+ StrKind == CaptureStrKind::DisposeHelper) &&
+ "unexpected CaptureStrKind");
+ std::string Name = StrKind == CaptureStrKind::CopyHelper
+ ? "__copy_helper_block_"
+ : "__destroy_helper_block_";
+ if (CGM.getLangOpts().Exceptions)
+ Name += "e";
+ if (CGM.getCodeGenOpts().ObjCAutoRefCountExceptions)
+ Name += "a";
+ Name += llvm::to_string(BlockAlignment.getQuantity()) + "_";
+
+ for (const BlockCaptureManagedEntity &E : Captures) {
+ Name += llvm::to_string(E.Capture->getOffset().getQuantity());
+ Name += getBlockCaptureStr(E, StrKind, BlockAlignment, CGM);
+ }
+
+ return Name;
+}
+
static void pushCaptureCleanup(BlockCaptureEntityKind CaptureKind,
Address Field, QualType CaptureType,
- BlockFieldFlags Flags, bool EHOnly,
- CodeGenFunction &CGF) {
+ BlockFieldFlags Flags, bool ForCopyHelper,
+ VarDecl *Var, CodeGenFunction &CGF) {
+ bool EHOnly = ForCopyHelper;
+
switch (CaptureKind) {
case BlockCaptureEntityKind::CXXRecord:
case BlockCaptureEntityKind::ARCWeak:
@@ -1634,15 +1962,34 @@ static void pushCaptureCleanup(BlockCaptureEntityKind CaptureKind,
case BlockCaptureEntityKind::BlockObject: {
if (!EHOnly || CGF.getLangOpts().Exceptions) {
CleanupKind Kind = EHOnly ? EHCleanup : NormalAndEHCleanup;
- CGF.enterByrefCleanup(Kind, Field, Flags, /*LoadBlockVarAddr*/ true);
+ // Calls to _Block_object_dispose along the EH path in the copy helper
+ // function don't throw as newly-copied __block variables always have a
+ // reference count of 2.
+ bool CanThrow =
+ !ForCopyHelper && CGF.cxxDestructorCanThrow(CaptureType);
+ CGF.enterByrefCleanup(Kind, Field, Flags, /*LoadBlockVarAddr*/ true,
+ CanThrow);
}
break;
}
case BlockCaptureEntityKind::None:
- llvm_unreachable("unexpected BlockCaptureEntityKind");
+ break;
}
}
+static void setBlockHelperAttributesVisibility(bool CapturesNonExternalType,
+ llvm::Function *Fn,
+ const CGFunctionInfo &FI,
+ CodeGenModule &CGM) {
+ if (CapturesNonExternalType) {
+ CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FI);
+ } else {
+ Fn->setVisibility(llvm::GlobalValue::HiddenVisibility);
+ Fn->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
+ CGM.SetLLVMFunctionAttributes(GlobalDecl(), FI, Fn);
+ CGM.SetLLVMFunctionAttributesForDefinition(nullptr, Fn);
+ }
+}
/// Generate the copy-helper function for a block closure object:
/// static void block_copy_helper(block_t *dst, block_t *src);
/// The runtime will have previously initialized 'dst' by doing a
@@ -1653,42 +2000,51 @@ static void pushCaptureCleanup(BlockCaptureEntityKind CaptureKind,
/// the contents of an individual __block variable to the heap.
llvm::Constant *
CodeGenFunction::GenerateCopyHelperFunction(const CGBlockInfo &blockInfo) {
+ SmallVector<BlockCaptureManagedEntity, 4> CopiedCaptures;
+ findBlockCapturedManagedEntities(blockInfo, getLangOpts(), CopiedCaptures);
+ std::string FuncName =
+ getCopyDestroyHelperFuncName(CopiedCaptures, blockInfo.BlockAlign,
+ CaptureStrKind::CopyHelper, CGM);
+
+ if (llvm::GlobalValue *Func = CGM.getModule().getNamedValue(FuncName))
+ return llvm::ConstantExpr::getBitCast(Func, VoidPtrTy);
+
ASTContext &C = getContext();
+ QualType ReturnTy = C.VoidTy;
+
FunctionArgList args;
- ImplicitParamDecl DstDecl(getContext(), C.VoidPtrTy,
- ImplicitParamDecl::Other);
+ ImplicitParamDecl DstDecl(C, C.VoidPtrTy, ImplicitParamDecl::Other);
args.push_back(&DstDecl);
- ImplicitParamDecl SrcDecl(getContext(), C.VoidPtrTy,
- ImplicitParamDecl::Other);
+ ImplicitParamDecl SrcDecl(C, C.VoidPtrTy, ImplicitParamDecl::Other);
args.push_back(&SrcDecl);
const CGFunctionInfo &FI =
- CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, args);
+ CGM.getTypes().arrangeBuiltinFunctionDeclaration(ReturnTy, args);
// FIXME: it would be nice if these were mergeable with things with
// identical semantics.
llvm::FunctionType *LTy = CGM.getTypes().GetFunctionType(FI);
llvm::Function *Fn =
- llvm::Function::Create(LTy, llvm::GlobalValue::InternalLinkage,
- "__copy_helper_block_", &CGM.getModule());
+ llvm::Function::Create(LTy, llvm::GlobalValue::LinkOnceODRLinkage,
+ FuncName, &CGM.getModule());
- IdentifierInfo *II
- = &CGM.getContext().Idents.get("__copy_helper_block_");
+ IdentifierInfo *II = &C.Idents.get(FuncName);
- FunctionDecl *FD = FunctionDecl::Create(C,
- C.getTranslationUnitDecl(),
- SourceLocation(),
- SourceLocation(), II, C.VoidTy,
- nullptr, SC_Static,
- false,
- false);
+ SmallVector<QualType, 2> ArgTys;
+ ArgTys.push_back(C.VoidPtrTy);
+ ArgTys.push_back(C.VoidPtrTy);
+ QualType FunctionTy = C.getFunctionType(ReturnTy, ArgTys, {});
- CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FI);
+ FunctionDecl *FD = FunctionDecl::Create(
+ C, C.getTranslationUnitDecl(), SourceLocation(), SourceLocation(), II,
+ FunctionTy, nullptr, SC_Static, false, false);
- StartFunction(FD, C.VoidTy, Fn, FI, args);
- ApplyDebugLocation NL{*this, blockInfo.getBlockExpr()->getLocStart()};
+ setBlockHelperAttributesVisibility(blockInfo.CapturesNonExternalType, Fn, FI,
+ CGM);
+ StartFunction(FD, ReturnTy, Fn, FI, args);
+ ApplyDebugLocation NL{*this, blockInfo.getBlockExpr()->getBeginLoc()};
llvm::Type *structPtrTy = blockInfo.StructureType->getPointerTo();
Address src = GetAddrOfLocalVar(&SrcDecl);
@@ -1699,88 +2055,81 @@ CodeGenFunction::GenerateCopyHelperFunction(const CGBlockInfo &blockInfo) {
dst = Address(Builder.CreateLoad(dst), blockInfo.BlockAlign);
dst = Builder.CreateBitCast(dst, structPtrTy, "block.dest");
- SmallVector<BlockCaptureManagedEntity, 4> CopiedCaptures;
- findBlockCapturedManagedEntities(blockInfo, getLangOpts(), CopiedCaptures,
- computeCopyInfoForBlockCapture);
-
for (const auto &CopiedCapture : CopiedCaptures) {
- const BlockDecl::Capture &CI = CopiedCapture.CI;
- const CGBlockInfo::Capture &capture = CopiedCapture.Capture;
+ const BlockDecl::Capture &CI = *CopiedCapture.CI;
+ const CGBlockInfo::Capture &capture = *CopiedCapture.Capture;
QualType captureType = CI.getVariable()->getType();
- BlockFieldFlags flags = CopiedCapture.Flags;
+ BlockFieldFlags flags = CopiedCapture.CopyFlags;
unsigned index = capture.getIndex();
Address srcField = Builder.CreateStructGEP(src, index, capture.getOffset());
Address dstField = Builder.CreateStructGEP(dst, index, capture.getOffset());
- // If there's an explicit copy expression, we do that.
- if (CI.getCopyExpr()) {
- assert(CopiedCapture.Kind == BlockCaptureEntityKind::CXXRecord);
+ switch (CopiedCapture.CopyKind) {
+ case BlockCaptureEntityKind::CXXRecord:
+ // If there's an explicit copy expression, we do that.
+ assert(CI.getCopyExpr() && "copy expression for variable is missing");
EmitSynthesizedCXXCopyCtor(dstField, srcField, CI.getCopyExpr());
- } else if (CopiedCapture.Kind == BlockCaptureEntityKind::ARCWeak) {
+ break;
+ case BlockCaptureEntityKind::ARCWeak:
EmitARCCopyWeak(dstField, srcField);
- // If this is a C struct that requires non-trivial copy construction, emit a
- // call to its copy constructor.
- } else if (CopiedCapture.Kind ==
- BlockCaptureEntityKind::NonTrivialCStruct) {
+ break;
+ case BlockCaptureEntityKind::NonTrivialCStruct: {
+ // If this is a C struct that requires non-trivial copy construction,
+ // emit a call to its copy constructor.
QualType varType = CI.getVariable()->getType();
callCStructCopyConstructor(MakeAddrLValue(dstField, varType),
MakeAddrLValue(srcField, varType));
- } else {
+ break;
+ }
+ case BlockCaptureEntityKind::ARCStrong: {
llvm::Value *srcValue = Builder.CreateLoad(srcField, "blockcopy.src");
- if (CopiedCapture.Kind == BlockCaptureEntityKind::ARCStrong) {
- // At -O0, store null into the destination field (so that the
- // storeStrong doesn't over-release) and then call storeStrong.
- // This is a workaround to not having an initStrong call.
- if (CGM.getCodeGenOpts().OptimizationLevel == 0) {
- auto *ty = cast<llvm::PointerType>(srcValue->getType());
- llvm::Value *null = llvm::ConstantPointerNull::get(ty);
- Builder.CreateStore(null, dstField);
- EmitARCStoreStrongCall(dstField, srcValue, true);
-
- // With optimization enabled, take advantage of the fact that
- // the blocks runtime guarantees a memcpy of the block data, and
- // just emit a retain of the src field.
- } else {
- EmitARCRetainNonBlock(srcValue);
-
- // Unless EH cleanup is required, we don't need this anymore, so kill
- // it. It's not quite worth the annoyance to avoid creating it in the
- // first place.
- if (!needsEHCleanup(captureType.isDestructedType()))
- cast<llvm::Instruction>(dstField.getPointer())->eraseFromParent();
- }
+ // At -O0, store null into the destination field (so that the
+ // storeStrong doesn't over-release) and then call storeStrong.
+ // This is a workaround to not having an initStrong call.
+ if (CGM.getCodeGenOpts().OptimizationLevel == 0) {
+ auto *ty = cast<llvm::PointerType>(srcValue->getType());
+ llvm::Value *null = llvm::ConstantPointerNull::get(ty);
+ Builder.CreateStore(null, dstField);
+ EmitARCStoreStrongCall(dstField, srcValue, true);
+
+ // With optimization enabled, take advantage of the fact that
+ // the blocks runtime guarantees a memcpy of the block data, and
+ // just emit a retain of the src field.
} else {
- assert(CopiedCapture.Kind == BlockCaptureEntityKind::BlockObject);
- srcValue = Builder.CreateBitCast(srcValue, VoidPtrTy);
- llvm::Value *dstAddr =
- Builder.CreateBitCast(dstField.getPointer(), VoidPtrTy);
- llvm::Value *args[] = {
- dstAddr, srcValue, llvm::ConstantInt::get(Int32Ty, flags.getBitMask())
- };
-
- const VarDecl *variable = CI.getVariable();
- bool copyCanThrow = false;
- if (CI.isByRef() && variable->getType()->getAsCXXRecordDecl()) {
- const Expr *copyExpr =
- CGM.getContext().getBlockVarCopyInits(variable);
- if (copyExpr) {
- copyCanThrow = true; // FIXME: reuse the noexcept logic
- }
- }
+ EmitARCRetainNonBlock(srcValue);
- if (copyCanThrow) {
- EmitRuntimeCallOrInvoke(CGM.getBlockObjectAssign(), args);
- } else {
- EmitNounwindRuntimeCall(CGM.getBlockObjectAssign(), args);
- }
+ // Unless EH cleanup is required, we don't need this anymore, so kill
+ // it. It's not quite worth the annoyance to avoid creating it in the
+ // first place.
+ if (!needsEHCleanup(captureType.isDestructedType()))
+ cast<llvm::Instruction>(dstField.getPointer())->eraseFromParent();
}
+ break;
+ }
+ case BlockCaptureEntityKind::BlockObject: {
+ llvm::Value *srcValue = Builder.CreateLoad(srcField, "blockcopy.src");
+ srcValue = Builder.CreateBitCast(srcValue, VoidPtrTy);
+ llvm::Value *dstAddr =
+ Builder.CreateBitCast(dstField.getPointer(), VoidPtrTy);
+ llvm::Value *args[] = {
+ dstAddr, srcValue, llvm::ConstantInt::get(Int32Ty, flags.getBitMask())
+ };
+
+ if (CI.isByRef() && C.getBlockVarCopyInit(CI.getVariable()).canThrow())
+ EmitRuntimeCallOrInvoke(CGM.getBlockObjectAssign(), args);
+ else
+ EmitNounwindRuntimeCall(CGM.getBlockObjectAssign(), args);
+ break;
+ }
+ case BlockCaptureEntityKind::None:
+ continue;
}
// Ensure that we destroy the copied object if an exception is thrown later
// in the helper function.
- pushCaptureCleanup(CopiedCapture.Kind, dstField, captureType, flags, /*EHOnly*/ true,
- *this);
+ pushCaptureCleanup(CopiedCapture.CopyKind, dstField, captureType, flags,
+ /*ForCopyHelper*/ true, CI.getVariable(), *this);
}
FinishFunction();
@@ -1800,7 +2149,7 @@ getBlockFieldFlagsForObjCObjectPointer(const BlockDecl::Capture &CI,
static std::pair<BlockCaptureEntityKind, BlockFieldFlags>
computeDestroyInfoForBlockCapture(const BlockDecl::Capture &CI, QualType T,
const LangOptions &LangOpts) {
- if (CI.isByRef()) {
+ if (CI.isEscapingByref()) {
BlockFieldFlags Flags = BLOCK_FIELD_IS_BYREF;
if (T.isObjCGCWeak())
Flags |= BLOCK_FIELD_IS_WEAK;
@@ -1844,37 +2193,50 @@ computeDestroyInfoForBlockCapture(const BlockDecl::Capture &CI, QualType T,
/// variable.
llvm::Constant *
CodeGenFunction::GenerateDestroyHelperFunction(const CGBlockInfo &blockInfo) {
+ SmallVector<BlockCaptureManagedEntity, 4> DestroyedCaptures;
+ findBlockCapturedManagedEntities(blockInfo, getLangOpts(), DestroyedCaptures);
+ std::string FuncName =
+ getCopyDestroyHelperFuncName(DestroyedCaptures, blockInfo.BlockAlign,
+ CaptureStrKind::DisposeHelper, CGM);
+
+ if (llvm::GlobalValue *Func = CGM.getModule().getNamedValue(FuncName))
+ return llvm::ConstantExpr::getBitCast(Func, VoidPtrTy);
+
ASTContext &C = getContext();
+ QualType ReturnTy = C.VoidTy;
+
FunctionArgList args;
- ImplicitParamDecl SrcDecl(getContext(), C.VoidPtrTy,
- ImplicitParamDecl::Other);
+ ImplicitParamDecl SrcDecl(C, C.VoidPtrTy, ImplicitParamDecl::Other);
args.push_back(&SrcDecl);
const CGFunctionInfo &FI =
- CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, args);
+ CGM.getTypes().arrangeBuiltinFunctionDeclaration(ReturnTy, args);
// FIXME: We'd like to put these into a mergable by content, with
// internal linkage.
llvm::FunctionType *LTy = CGM.getTypes().GetFunctionType(FI);
llvm::Function *Fn =
- llvm::Function::Create(LTy, llvm::GlobalValue::InternalLinkage,
- "__destroy_helper_block_", &CGM.getModule());
+ llvm::Function::Create(LTy, llvm::GlobalValue::LinkOnceODRLinkage,
+ FuncName, &CGM.getModule());
- IdentifierInfo *II
- = &CGM.getContext().Idents.get("__destroy_helper_block_");
+ IdentifierInfo *II = &C.Idents.get(FuncName);
+
+ SmallVector<QualType, 1> ArgTys;
+ ArgTys.push_back(C.VoidPtrTy);
+ QualType FunctionTy = C.getFunctionType(ReturnTy, ArgTys, {});
- FunctionDecl *FD = FunctionDecl::Create(C, C.getTranslationUnitDecl(),
- SourceLocation(),
- SourceLocation(), II, C.VoidTy,
- nullptr, SC_Static,
- false, false);
+ FunctionDecl *FD = FunctionDecl::Create(
+ C, C.getTranslationUnitDecl(), SourceLocation(), SourceLocation(), II,
+ FunctionTy, nullptr, SC_Static, false, false);
- CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FI);
+ setBlockHelperAttributesVisibility(blockInfo.CapturesNonExternalType, Fn, FI,
+ CGM);
+ StartFunction(FD, ReturnTy, Fn, FI, args);
+ markAsIgnoreThreadCheckingAtRuntime(Fn);
- StartFunction(FD, C.VoidTy, Fn, FI, args);
- ApplyDebugLocation NL{*this, blockInfo.getBlockExpr()->getLocStart()};
+ ApplyDebugLocation NL{*this, blockInfo.getBlockExpr()->getBeginLoc()};
llvm::Type *structPtrTy = blockInfo.StructureType->getPointerTo();
@@ -1884,20 +2246,17 @@ CodeGenFunction::GenerateDestroyHelperFunction(const CGBlockInfo &blockInfo) {
CodeGenFunction::RunCleanupsScope cleanups(*this);
- SmallVector<BlockCaptureManagedEntity, 4> DestroyedCaptures;
- findBlockCapturedManagedEntities(blockInfo, getLangOpts(), DestroyedCaptures,
- computeDestroyInfoForBlockCapture);
-
for (const auto &DestroyedCapture : DestroyedCaptures) {
- const BlockDecl::Capture &CI = DestroyedCapture.CI;
- const CGBlockInfo::Capture &capture = DestroyedCapture.Capture;
- BlockFieldFlags flags = DestroyedCapture.Flags;
+ const BlockDecl::Capture &CI = *DestroyedCapture.CI;
+ const CGBlockInfo::Capture &capture = *DestroyedCapture.Capture;
+ BlockFieldFlags flags = DestroyedCapture.DisposeFlags;
Address srcField =
Builder.CreateStructGEP(src, capture.getIndex(), capture.getOffset());
- pushCaptureCleanup(DestroyedCapture.Kind, srcField,
- CI.getVariable()->getType(), flags, /*EHOnly*/ false, *this);
+ pushCaptureCleanup(DestroyedCapture.DisposeKind, srcField,
+ CI.getVariable()->getType(), flags,
+ /*ForCopyHelper*/ false, CI.getVariable(), *this);
}
cleanups.ForceCleanup();
@@ -1937,7 +2296,7 @@ public:
field = CGF.Builder.CreateBitCast(field, CGF.Int8PtrTy->getPointerTo(0));
llvm::Value *value = CGF.Builder.CreateLoad(field);
- CGF.BuildBlockRelease(value, Flags | BLOCK_BYREF_CALLER);
+ CGF.BuildBlockRelease(value, Flags | BLOCK_BYREF_CALLER, false);
}
void profileImpl(llvm::FoldingSetNodeID &id) const override {
@@ -2093,19 +2452,17 @@ generateByrefCopyHelper(CodeGenFunction &CGF, const BlockByrefInfo &byrefInfo,
BlockByrefHelpers &generator) {
ASTContext &Context = CGF.getContext();
- QualType R = Context.VoidTy;
+ QualType ReturnTy = Context.VoidTy;
FunctionArgList args;
- ImplicitParamDecl Dst(CGF.getContext(), Context.VoidPtrTy,
- ImplicitParamDecl::Other);
+ ImplicitParamDecl Dst(Context, Context.VoidPtrTy, ImplicitParamDecl::Other);
args.push_back(&Dst);
- ImplicitParamDecl Src(CGF.getContext(), Context.VoidPtrTy,
- ImplicitParamDecl::Other);
+ ImplicitParamDecl Src(Context, Context.VoidPtrTy, ImplicitParamDecl::Other);
args.push_back(&Src);
const CGFunctionInfo &FI =
- CGF.CGM.getTypes().arrangeBuiltinFunctionDeclaration(R, args);
+ CGF.CGM.getTypes().arrangeBuiltinFunctionDeclaration(ReturnTy, args);
llvm::FunctionType *LTy = CGF.CGM.getTypes().GetFunctionType(FI);
@@ -2118,16 +2475,18 @@ generateByrefCopyHelper(CodeGenFunction &CGF, const BlockByrefInfo &byrefInfo,
IdentifierInfo *II
= &Context.Idents.get("__Block_byref_object_copy_");
- FunctionDecl *FD = FunctionDecl::Create(Context,
- Context.getTranslationUnitDecl(),
- SourceLocation(),
- SourceLocation(), II, R, nullptr,
- SC_Static,
- false, false);
+ SmallVector<QualType, 2> ArgTys;
+ ArgTys.push_back(Context.VoidPtrTy);
+ ArgTys.push_back(Context.VoidPtrTy);
+ QualType FunctionTy = Context.getFunctionType(ReturnTy, ArgTys, {});
+
+ FunctionDecl *FD = FunctionDecl::Create(
+ Context, Context.getTranslationUnitDecl(), SourceLocation(),
+ SourceLocation(), II, FunctionTy, nullptr, SC_Static, false, false);
CGF.CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FI);
- CGF.StartFunction(FD, R, Fn, FI, args);
+ CGF.StartFunction(FD, ReturnTy, Fn, FI, args);
if (generator.needsCopy()) {
llvm::Type *byrefPtrType = byrefInfo.Type->getPointerTo(0);
@@ -2192,12 +2551,13 @@ generateByrefDisposeHelper(CodeGenFunction &CGF,
IdentifierInfo *II
= &Context.Idents.get("__Block_byref_object_dispose_");
- FunctionDecl *FD = FunctionDecl::Create(Context,
- Context.getTranslationUnitDecl(),
- SourceLocation(),
- SourceLocation(), II, R, nullptr,
- SC_Static,
- false, false);
+ SmallVector<QualType, 1> ArgTys;
+ ArgTys.push_back(Context.VoidPtrTy);
+ QualType FunctionTy = Context.getFunctionType(R, ArgTys, {});
+
+ FunctionDecl *FD = FunctionDecl::Create(
+ Context, Context.getTranslationUnitDecl(), SourceLocation(),
+ SourceLocation(), II, FunctionTy, nullptr, SC_Static, false, false);
CGF.CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FI);
@@ -2254,6 +2614,9 @@ BlockByrefHelpers *
CodeGenFunction::buildByrefHelpers(llvm::StructType &byrefType,
const AutoVarEmission &emission) {
const VarDecl &var = *emission.Variable;
+ assert(var.isEscapingByref() &&
+ "only escaping __block variables need byref helpers");
+
QualType type = var.getType();
auto &byrefInfo = getBlockByrefInfo(&var);
@@ -2264,7 +2627,8 @@ CodeGenFunction::buildByrefHelpers(llvm::StructType &byrefType,
byrefInfo.ByrefAlignment.alignmentAtOffset(byrefInfo.FieldOffset);
if (const CXXRecordDecl *record = type->getAsCXXRecordDecl()) {
- const Expr *copyExpr = CGM.getContext().getBlockVarCopyInits(&var);
+ const Expr *copyExpr =
+ CGM.getContext().getBlockVarCopyInit(&var).getCopyExpr();
if (!copyExpr && record->hasTrivialDestructor()) return nullptr;
return ::buildByrefHelpers(
@@ -2567,19 +2931,25 @@ void CodeGenFunction::emitByrefStructureInit(const AutoVarEmission &emission) {
}
}
-void CodeGenFunction::BuildBlockRelease(llvm::Value *V, BlockFieldFlags flags) {
+void CodeGenFunction::BuildBlockRelease(llvm::Value *V, BlockFieldFlags flags,
+ bool CanThrow) {
llvm::Value *F = CGM.getBlockObjectDispose();
llvm::Value *args[] = {
Builder.CreateBitCast(V, Int8PtrTy),
llvm::ConstantInt::get(Int32Ty, flags.getBitMask())
};
- EmitNounwindRuntimeCall(F, args); // FIXME: throwing destructors?
+
+ if (CanThrow)
+ EmitRuntimeCallOrInvoke(F, args);
+ else
+ EmitNounwindRuntimeCall(F, args);
}
void CodeGenFunction::enterByrefCleanup(CleanupKind Kind, Address Addr,
BlockFieldFlags Flags,
- bool LoadBlockVarAddr) {
- EHStack.pushCleanup<CallBlockRelease>(Kind, Addr, Flags, LoadBlockVarAddr);
+ bool LoadBlockVarAddr, bool CanThrow) {
+ EHStack.pushCleanup<CallBlockRelease>(Kind, Addr, Flags, LoadBlockVarAddr,
+ CanThrow);
}
/// Adjust the declaration of something from the blocks API.
diff --git a/lib/CodeGen/CGBlocks.h b/lib/CodeGen/CGBlocks.h
index 5abf82b3f6e1..3f9fc16d9b10 100644
--- a/lib/CodeGen/CGBlocks.h
+++ b/lib/CodeGen/CGBlocks.h
@@ -60,7 +60,7 @@ enum BlockLiteralFlags {
BLOCK_IS_GLOBAL = (1 << 28),
BLOCK_USE_STRET = (1 << 29),
BLOCK_HAS_SIGNATURE = (1 << 30),
- BLOCK_HAS_EXTENDED_LAYOUT = (1 << 31)
+ BLOCK_HAS_EXTENDED_LAYOUT = (1u << 31)
};
class BlockFlags {
uint32_t flags;
@@ -132,6 +132,9 @@ public:
friend bool operator&(BlockFieldFlags l, BlockFieldFlags r) {
return (l.flags & r.flags);
}
+ bool operator==(BlockFieldFlags Other) const {
+ return flags == Other.flags;
+ }
};
inline BlockFieldFlags operator|(BlockFieldFlag_t l, BlockFieldFlag_t r) {
return BlockFieldFlags(l) | BlockFieldFlags(r);
@@ -231,6 +234,11 @@ public:
/// and their layout meta-data has been generated.
bool HasCapturedVariableLayout : 1;
+ /// Indicates whether an object of a non-external C++ class is captured. This
+ /// bit is used to determine the linkage of the block copy/destroy helper
+ /// functions.
+ bool CapturesNonExternalType : 1;
+
/// The mapping of allocated indexes within the block.
llvm::DenseMap<const VarDecl*, Capture> Captures;
diff --git a/lib/CodeGen/CGBuiltin.cpp b/lib/CodeGen/CGBuiltin.cpp
index e99121c46d9b..a718f2f19aa6 100644
--- a/lib/CodeGen/CGBuiltin.cpp
+++ b/lib/CodeGen/CGBuiltin.cpp
@@ -21,10 +21,11 @@
#include "TargetInfo.h"
#include "clang/AST/ASTContext.h"
#include "clang/AST/Decl.h"
-#include "clang/Analysis/Analyses/OSLog.h"
+#include "clang/AST/OSLog.h"
#include "clang/Basic/TargetBuiltins.h"
#include "clang/Basic/TargetInfo.h"
#include "clang/CodeGen/CGFunctionInfo.h"
+#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/DataLayout.h"
@@ -93,11 +94,11 @@ static Value *EmitFromInt(CodeGenFunction &CGF, llvm::Value *V,
return V;
}
-/// Utility to insert an atomic instruction based on Instrinsic::ID
+/// Utility to insert an atomic instruction based on Intrinsic::ID
/// and the expression node.
-static Value *MakeBinaryAtomicValue(CodeGenFunction &CGF,
- llvm::AtomicRMWInst::BinOp Kind,
- const CallExpr *E) {
+static Value *MakeBinaryAtomicValue(
+ CodeGenFunction &CGF, llvm::AtomicRMWInst::BinOp Kind, const CallExpr *E,
+ AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent) {
QualType T = E->getType();
assert(E->getArg(0)->getType()->isPointerType());
assert(CGF.getContext().hasSameUnqualifiedType(T,
@@ -119,7 +120,7 @@ static Value *MakeBinaryAtomicValue(CodeGenFunction &CGF,
Args[1] = EmitToInt(CGF, Args[1], T, IntType);
llvm::Value *Result = CGF.Builder.CreateAtomicRMW(
- Kind, Args[0], Args[1], llvm::AtomicOrdering::SequentiallyConsistent);
+ Kind, Args[0], Args[1], Ordering);
return EmitFromInt(CGF, Result, T, ValueType);
}
@@ -151,7 +152,7 @@ static RValue EmitBinaryAtomic(CodeGenFunction &CGF,
return RValue::get(MakeBinaryAtomicValue(CGF, Kind, E));
}
-/// Utility to insert an atomic instruction based Instrinsic::ID and
+/// Utility to insert an atomic instruction based Intrinsic::ID and
/// the expression node, where the return value is the result of the
/// operation.
static RValue EmitBinaryAtomicPost(CodeGenFunction &CGF,
@@ -200,6 +201,9 @@ static RValue EmitBinaryAtomicPost(CodeGenFunction &CGF,
/// cmpxchg result or the old value.
///
/// @returns result of cmpxchg, according to ReturnBool
+///
+/// Note: In order to lower Microsoft's _InterlockedCompareExchange* intrinsics
+/// invoke the function EmitAtomicCmpXchgForMSIntrin.
static Value *MakeAtomicCmpXchgValue(CodeGenFunction &CGF, const CallExpr *E,
bool ReturnBool) {
QualType T = ReturnBool ? E->getArg(1)->getType() : E->getType();
@@ -230,6 +234,72 @@ static Value *MakeAtomicCmpXchgValue(CodeGenFunction &CGF, const CallExpr *E,
ValueType);
}
+/// This function should be invoked to emit atomic cmpxchg for Microsoft's
+/// _InterlockedCompareExchange* intrinsics which have the following signature:
+/// T _InterlockedCompareExchange(T volatile *Destination,
+/// T Exchange,
+/// T Comparand);
+///
+/// Whereas the llvm 'cmpxchg' instruction has the following syntax:
+/// cmpxchg *Destination, Comparand, Exchange.
+/// So we need to swap Comparand and Exchange when invoking
+/// CreateAtomicCmpXchg. That is the reason we could not use the above utility
+/// function MakeAtomicCmpXchgValue since it expects the arguments to be
+/// already swapped.
+
+static
+Value *EmitAtomicCmpXchgForMSIntrin(CodeGenFunction &CGF, const CallExpr *E,
+ AtomicOrdering SuccessOrdering = AtomicOrdering::SequentiallyConsistent) {
+ assert(E->getArg(0)->getType()->isPointerType());
+ assert(CGF.getContext().hasSameUnqualifiedType(
+ E->getType(), E->getArg(0)->getType()->getPointeeType()));
+ assert(CGF.getContext().hasSameUnqualifiedType(E->getType(),
+ E->getArg(1)->getType()));
+ assert(CGF.getContext().hasSameUnqualifiedType(E->getType(),
+ E->getArg(2)->getType()));
+
+ auto *Destination = CGF.EmitScalarExpr(E->getArg(0));
+ auto *Comparand = CGF.EmitScalarExpr(E->getArg(2));
+ auto *Exchange = CGF.EmitScalarExpr(E->getArg(1));
+
+ // For Release ordering, the failure ordering should be Monotonic.
+ auto FailureOrdering = SuccessOrdering == AtomicOrdering::Release ?
+ AtomicOrdering::Monotonic :
+ SuccessOrdering;
+
+ auto *Result = CGF.Builder.CreateAtomicCmpXchg(
+ Destination, Comparand, Exchange,
+ SuccessOrdering, FailureOrdering);
+ Result->setVolatile(true);
+ return CGF.Builder.CreateExtractValue(Result, 0);
+}
+
+static Value *EmitAtomicIncrementValue(CodeGenFunction &CGF, const CallExpr *E,
+ AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent) {
+ assert(E->getArg(0)->getType()->isPointerType());
+
+ auto *IntTy = CGF.ConvertType(E->getType());
+ auto *Result = CGF.Builder.CreateAtomicRMW(
+ AtomicRMWInst::Add,
+ CGF.EmitScalarExpr(E->getArg(0)),
+ ConstantInt::get(IntTy, 1),
+ Ordering);
+ return CGF.Builder.CreateAdd(Result, ConstantInt::get(IntTy, 1));
+}
+
+static Value *EmitAtomicDecrementValue(CodeGenFunction &CGF, const CallExpr *E,
+ AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent) {
+ assert(E->getArg(0)->getType()->isPointerType());
+
+ auto *IntTy = CGF.ConvertType(E->getType());
+ auto *Result = CGF.Builder.CreateAtomicRMW(
+ AtomicRMWInst::Sub,
+ CGF.EmitScalarExpr(E->getArg(0)),
+ ConstantInt::get(IntTy, 1),
+ Ordering);
+ return CGF.Builder.CreateSub(Result, ConstantInt::get(IntTy, 1));
+}
+
// Emit a simple mangled intrinsic that has 1 argument and a return type
// matching the argument type.
static Value *emitUnaryBuiltin(CodeGenFunction &CGF,
@@ -316,7 +386,7 @@ static Value *EmitSignBit(CodeGenFunction &CGF, Value *V) {
static RValue emitLibraryCall(CodeGenFunction &CGF, const FunctionDecl *FD,
const CallExpr *E, llvm::Constant *calleeValue) {
- CGCallee callee = CGCallee::forDirect(calleeValue, FD);
+ CGCallee callee = CGCallee::forDirect(calleeValue, GlobalDecl(FD));
return CGF.EmitCall(E->getCallee()->getType(), callee, E, ReturnValueSlot());
}
@@ -461,7 +531,7 @@ CodeGenFunction::emitBuiltinObjectSize(const Expr *E, unsigned Type,
assert(DIter != LocalDeclMap.end());
return EmitLoadOfScalar(DIter->second, /*volatile=*/false,
- getContext().getSizeType(), E->getLocStart());
+ getContext().getSizeType(), E->getBeginLoc());
}
}
@@ -485,7 +555,7 @@ CodeGenFunction::emitBuiltinObjectSize(const Expr *E, unsigned Type,
}
namespace {
-/// A struct to generically desribe a bit test intrinsic.
+/// A struct to generically describe a bit test intrinsic.
struct BitTest {
enum ActionKind : uint8_t { TestOnly, Complement, Reset, Set };
enum InterlockingKind : uint8_t {
@@ -711,8 +781,11 @@ static RValue EmitMSVCRTSetJmp(CodeGenFunction &CGF, MSVCSetJmpKind SJKind,
} else {
Name = SJKind == MSVCSetJmpKind::_setjmp ? "_setjmp" : "_setjmpex";
Arg1Ty = CGF.Int8PtrTy;
- Arg1 = CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(Intrinsic::frameaddress),
- llvm::ConstantInt::get(CGF.Int32Ty, 0));
+ if (CGF.getTarget().getTriple().getArch() == llvm::Triple::aarch64) {
+ Arg1 = CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(Intrinsic::sponentry));
+ } else
+ Arg1 = CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(Intrinsic::frameaddress),
+ llvm::ConstantInt::get(CGF.Int32Ty, 0));
}
// Mark the call site and declaration with ReturnsTwice.
@@ -745,6 +818,30 @@ enum class CodeGenFunction::MSVCIntrin {
_InterlockedIncrement,
_InterlockedOr,
_InterlockedXor,
+ _InterlockedExchangeAdd_acq,
+ _InterlockedExchangeAdd_rel,
+ _InterlockedExchangeAdd_nf,
+ _InterlockedExchange_acq,
+ _InterlockedExchange_rel,
+ _InterlockedExchange_nf,
+ _InterlockedCompareExchange_acq,
+ _InterlockedCompareExchange_rel,
+ _InterlockedCompareExchange_nf,
+ _InterlockedOr_acq,
+ _InterlockedOr_rel,
+ _InterlockedOr_nf,
+ _InterlockedXor_acq,
+ _InterlockedXor_rel,
+ _InterlockedXor_nf,
+ _InterlockedAnd_acq,
+ _InterlockedAnd_rel,
+ _InterlockedAnd_nf,
+ _InterlockedIncrement_acq,
+ _InterlockedIncrement_rel,
+ _InterlockedIncrement_nf,
+ _InterlockedDecrement_acq,
+ _InterlockedDecrement_rel,
+ _InterlockedDecrement_nf,
__fastfail,
};
@@ -811,25 +908,74 @@ Value *CodeGenFunction::EmitMSVCBuiltinExpr(MSVCIntrin BuiltinID,
return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E);
case MSVCIntrin::_InterlockedXor:
return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E);
-
- case MSVCIntrin::_InterlockedDecrement: {
- llvm::Type *IntTy = ConvertType(E->getType());
- AtomicRMWInst *RMWI = Builder.CreateAtomicRMW(
- AtomicRMWInst::Sub,
- EmitScalarExpr(E->getArg(0)),
- ConstantInt::get(IntTy, 1),
- llvm::AtomicOrdering::SequentiallyConsistent);
- return Builder.CreateSub(RMWI, ConstantInt::get(IntTy, 1));
- }
- case MSVCIntrin::_InterlockedIncrement: {
- llvm::Type *IntTy = ConvertType(E->getType());
- AtomicRMWInst *RMWI = Builder.CreateAtomicRMW(
- AtomicRMWInst::Add,
- EmitScalarExpr(E->getArg(0)),
- ConstantInt::get(IntTy, 1),
- llvm::AtomicOrdering::SequentiallyConsistent);
- return Builder.CreateAdd(RMWI, ConstantInt::get(IntTy, 1));
- }
+ case MSVCIntrin::_InterlockedExchangeAdd_acq:
+ return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E,
+ AtomicOrdering::Acquire);
+ case MSVCIntrin::_InterlockedExchangeAdd_rel:
+ return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E,
+ AtomicOrdering::Release);
+ case MSVCIntrin::_InterlockedExchangeAdd_nf:
+ return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E,
+ AtomicOrdering::Monotonic);
+ case MSVCIntrin::_InterlockedExchange_acq:
+ return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E,
+ AtomicOrdering::Acquire);
+ case MSVCIntrin::_InterlockedExchange_rel:
+ return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E,
+ AtomicOrdering::Release);
+ case MSVCIntrin::_InterlockedExchange_nf:
+ return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E,
+ AtomicOrdering::Monotonic);
+ case MSVCIntrin::_InterlockedCompareExchange_acq:
+ return EmitAtomicCmpXchgForMSIntrin(*this, E, AtomicOrdering::Acquire);
+ case MSVCIntrin::_InterlockedCompareExchange_rel:
+ return EmitAtomicCmpXchgForMSIntrin(*this, E, AtomicOrdering::Release);
+ case MSVCIntrin::_InterlockedCompareExchange_nf:
+ return EmitAtomicCmpXchgForMSIntrin(*this, E, AtomicOrdering::Monotonic);
+ case MSVCIntrin::_InterlockedOr_acq:
+ return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E,
+ AtomicOrdering::Acquire);
+ case MSVCIntrin::_InterlockedOr_rel:
+ return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E,
+ AtomicOrdering::Release);
+ case MSVCIntrin::_InterlockedOr_nf:
+ return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E,
+ AtomicOrdering::Monotonic);
+ case MSVCIntrin::_InterlockedXor_acq:
+ return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E,
+ AtomicOrdering::Acquire);
+ case MSVCIntrin::_InterlockedXor_rel:
+ return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E,
+ AtomicOrdering::Release);
+ case MSVCIntrin::_InterlockedXor_nf:
+ return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E,
+ AtomicOrdering::Monotonic);
+ case MSVCIntrin::_InterlockedAnd_acq:
+ return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E,
+ AtomicOrdering::Acquire);
+ case MSVCIntrin::_InterlockedAnd_rel:
+ return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E,
+ AtomicOrdering::Release);
+ case MSVCIntrin::_InterlockedAnd_nf:
+ return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E,
+ AtomicOrdering::Monotonic);
+ case MSVCIntrin::_InterlockedIncrement_acq:
+ return EmitAtomicIncrementValue(*this, E, AtomicOrdering::Acquire);
+ case MSVCIntrin::_InterlockedIncrement_rel:
+ return EmitAtomicIncrementValue(*this, E, AtomicOrdering::Release);
+ case MSVCIntrin::_InterlockedIncrement_nf:
+ return EmitAtomicIncrementValue(*this, E, AtomicOrdering::Monotonic);
+ case MSVCIntrin::_InterlockedDecrement_acq:
+ return EmitAtomicDecrementValue(*this, E, AtomicOrdering::Acquire);
+ case MSVCIntrin::_InterlockedDecrement_rel:
+ return EmitAtomicDecrementValue(*this, E, AtomicOrdering::Release);
+ case MSVCIntrin::_InterlockedDecrement_nf:
+ return EmitAtomicDecrementValue(*this, E, AtomicOrdering::Monotonic);
+
+ case MSVCIntrin::_InterlockedDecrement:
+ return EmitAtomicDecrementValue(*this, E);
+ case MSVCIntrin::_InterlockedIncrement:
+ return EmitAtomicIncrementValue(*this, E);
case MSVCIntrin::__fastfail: {
// Request immediate process termination from the kernel. The instruction
@@ -923,35 +1069,42 @@ llvm::Function *CodeGenFunction::generateBuiltinOSLogHelperFunction(
if (llvm::Function *F = CGM.getModule().getFunction(Name))
return F;
+ llvm::SmallVector<QualType, 4> ArgTys;
llvm::SmallVector<ImplicitParamDecl, 4> Params;
Params.emplace_back(Ctx, nullptr, SourceLocation(), &Ctx.Idents.get("buffer"),
Ctx.VoidPtrTy, ImplicitParamDecl::Other);
+ ArgTys.emplace_back(Ctx.VoidPtrTy);
for (unsigned int I = 0, E = Layout.Items.size(); I < E; ++I) {
char Size = Layout.Items[I].getSizeByte();
if (!Size)
continue;
+ QualType ArgTy = getOSLogArgType(Ctx, Size);
Params.emplace_back(
Ctx, nullptr, SourceLocation(),
- &Ctx.Idents.get(std::string("arg") + llvm::to_string(I)),
- getOSLogArgType(Ctx, Size), ImplicitParamDecl::Other);
+ &Ctx.Idents.get(std::string("arg") + llvm::to_string(I)), ArgTy,
+ ImplicitParamDecl::Other);
+ ArgTys.emplace_back(ArgTy);
}
FunctionArgList Args;
for (auto &P : Params)
Args.push_back(&P);
+ QualType ReturnTy = Ctx.VoidTy;
+ QualType FuncionTy = Ctx.getFunctionType(ReturnTy, ArgTys, {});
+
// The helper function has linkonce_odr linkage to enable the linker to merge
// identical functions. To ensure the merging always happens, 'noinline' is
// attached to the function when compiling with -Oz.
const CGFunctionInfo &FI =
- CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, Args);
+ CGM.getTypes().arrangeBuiltinFunctionDeclaration(ReturnTy, Args);
llvm::FunctionType *FuncTy = CGM.getTypes().GetFunctionType(FI);
llvm::Function *Fn = llvm::Function::Create(
FuncTy, llvm::GlobalValue::LinkOnceODRLinkage, Name, &CGM.getModule());
Fn->setVisibility(llvm::GlobalValue::HiddenVisibility);
- CGM.SetLLVMFunctionAttributes(nullptr, FI, Fn);
+ CGM.SetLLVMFunctionAttributes(GlobalDecl(), FI, Fn);
CGM.SetLLVMFunctionAttributesForDefinition(nullptr, Fn);
// Attach 'noinline' at -Oz.
@@ -962,9 +1115,9 @@ llvm::Function *CodeGenFunction::generateBuiltinOSLogHelperFunction(
IdentifierInfo *II = &Ctx.Idents.get(Name);
FunctionDecl *FD = FunctionDecl::Create(
Ctx, Ctx.getTranslationUnitDecl(), SourceLocation(), SourceLocation(), II,
- Ctx.VoidTy, nullptr, SC_PrivateExtern, false, false);
+ FuncionTy, nullptr, SC_PrivateExtern, false, false);
- StartFunction(FD, Ctx.VoidTy, Fn, FI, Args);
+ StartFunction(FD, ReturnTy, Fn, FI, Args);
// Create a scope with an artificial location for the body of this function.
auto AL = ApplyDebugLocation::CreateArtificial(*this);
@@ -1024,7 +1177,12 @@ RValue CodeGenFunction::emitBuiltinOSLogFormat(const CallExpr &E) {
llvm::Value *ArgVal;
- if (const Expr *TheExpr = Item.getExpr()) {
+ if (Item.getKind() == analyze_os_log::OSLogBufferItem::MaskKind) {
+ uint64_t Val = 0;
+ for (unsigned I = 0, E = Item.getMaskType().size(); I < E; ++I)
+ Val |= ((uint64_t)Item.getMaskType()[I]) << I * 8;
+ ArgVal = llvm::Constant::getIntegerValue(Int64Ty, llvm::APInt(64, Val));
+ } else if (const Expr *TheExpr = Item.getExpr()) {
ArgVal = EmitScalarExpr(TheExpr, /*Ignore*/ false);
// Check if this is a retainable type.
@@ -1077,7 +1235,7 @@ static bool isSpecialMixedSignMultiply(unsigned BuiltinID,
WidthAndSignedness Op2Info,
WidthAndSignedness ResultInfo) {
return BuiltinID == Builtin::BI__builtin_mul_overflow &&
- Op1Info.Width == Op2Info.Width && Op1Info.Width >= ResultInfo.Width &&
+ std::max(Op1Info.Width, Op2Info.Width) >= ResultInfo.Width &&
Op1Info.Signed != Op2Info.Signed;
}
@@ -1098,11 +1256,20 @@ EmitCheckedMixedSignMultiply(CodeGenFunction &CGF, const clang::Expr *Op1,
const clang::Expr *UnsignedOp = Op1Info.Signed ? Op2 : Op1;
llvm::Value *Signed = CGF.EmitScalarExpr(SignedOp);
llvm::Value *Unsigned = CGF.EmitScalarExpr(UnsignedOp);
+ unsigned SignedOpWidth = Op1Info.Signed ? Op1Info.Width : Op2Info.Width;
+ unsigned UnsignedOpWidth = Op1Info.Signed ? Op2Info.Width : Op1Info.Width;
+
+ // One of the operands may be smaller than the other. If so, [s|z]ext it.
+ if (SignedOpWidth < UnsignedOpWidth)
+ Signed = CGF.Builder.CreateSExt(Signed, Unsigned->getType(), "op.sext");
+ if (UnsignedOpWidth < SignedOpWidth)
+ Unsigned = CGF.Builder.CreateZExt(Unsigned, Signed->getType(), "op.zext");
llvm::Type *OpTy = Signed->getType();
llvm::Value *Zero = llvm::Constant::getNullValue(OpTy);
Address ResultPtr = CGF.EmitPointerWithAlignment(ResultArg);
llvm::Type *ResTy = ResultPtr.getElementType();
+ unsigned OpWidth = std::max(Op1Info.Width, Op2Info.Width);
// Take the absolute value of the signed operand.
llvm::Value *IsNegative = CGF.Builder.CreateICmpSLT(Signed, Zero);
@@ -1120,8 +1287,8 @@ EmitCheckedMixedSignMultiply(CodeGenFunction &CGF, const clang::Expr *Op1,
if (ResultInfo.Signed) {
// Signed overflow occurs if the result is greater than INT_MAX or lesser
// than INT_MIN, i.e when |Result| > (INT_MAX + IsNegative).
- auto IntMax = llvm::APInt::getSignedMaxValue(ResultInfo.Width)
- .zextOrSelf(Op1Info.Width);
+ auto IntMax =
+ llvm::APInt::getSignedMaxValue(ResultInfo.Width).zextOrSelf(OpWidth);
llvm::Value *MaxResult =
CGF.Builder.CreateAdd(llvm::ConstantInt::get(OpTy, IntMax),
CGF.Builder.CreateZExt(IsNegative, OpTy));
@@ -1139,9 +1306,9 @@ EmitCheckedMixedSignMultiply(CodeGenFunction &CGF, const clang::Expr *Op1,
llvm::Value *Underflow = CGF.Builder.CreateAnd(
IsNegative, CGF.Builder.CreateIsNotNull(UnsignedResult));
Overflow = CGF.Builder.CreateOr(UnsignedOverflow, Underflow);
- if (ResultInfo.Width < Op1Info.Width) {
+ if (ResultInfo.Width < OpWidth) {
auto IntMax =
- llvm::APInt::getMaxValue(ResultInfo.Width).zext(Op1Info.Width);
+ llvm::APInt::getMaxValue(ResultInfo.Width).zext(OpWidth);
llvm::Value *TruncOverflow = CGF.Builder.CreateICmpUGT(
UnsignedResult, llvm::ConstantInt::get(OpTy, IntMax));
Overflow = CGF.Builder.CreateOr(Overflow, TruncOverflow);
@@ -1252,9 +1419,61 @@ static llvm::Value *dumpRecord(CodeGenFunction &CGF, QualType RType,
return Res;
}
-RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
- unsigned BuiltinID, const CallExpr *E,
+static bool
+TypeRequiresBuiltinLaunderImp(const ASTContext &Ctx, QualType Ty,
+ llvm::SmallPtrSetImpl<const Decl *> &Seen) {
+ if (const auto *Arr = Ctx.getAsArrayType(Ty))
+ Ty = Ctx.getBaseElementType(Arr);
+
+ const auto *Record = Ty->getAsCXXRecordDecl();
+ if (!Record)
+ return false;
+
+ // We've already checked this type, or are in the process of checking it.
+ if (!Seen.insert(Record).second)
+ return false;
+
+ assert(Record->hasDefinition() &&
+ "Incomplete types should already be diagnosed");
+
+ if (Record->isDynamicClass())
+ return true;
+
+ for (FieldDecl *F : Record->fields()) {
+ if (TypeRequiresBuiltinLaunderImp(Ctx, F->getType(), Seen))
+ return true;
+ }
+ return false;
+}
+
+/// Determine if the specified type requires laundering by checking if it is a
+/// dynamic class type or contains a subobject which is a dynamic class type.
+static bool TypeRequiresBuiltinLaunder(CodeGenModule &CGM, QualType Ty) {
+ if (!CGM.getCodeGenOpts().StrictVTablePointers)
+ return false;
+ llvm::SmallPtrSet<const Decl *, 16> Seen;
+ return TypeRequiresBuiltinLaunderImp(CGM.getContext(), Ty, Seen);
+}
+
+RValue CodeGenFunction::emitRotate(const CallExpr *E, bool IsRotateRight) {
+ llvm::Value *Src = EmitScalarExpr(E->getArg(0));
+ llvm::Value *ShiftAmt = EmitScalarExpr(E->getArg(1));
+
+ // The builtin's shift arg may have a different type than the source arg and
+ // result, but the LLVM intrinsic uses the same type for all values.
+ llvm::Type *Ty = Src->getType();
+ ShiftAmt = Builder.CreateIntCast(ShiftAmt, Ty, false);
+
+ // Rotate is a special case of LLVM funnel shift - 1st 2 args are the same.
+ unsigned IID = IsRotateRight ? Intrinsic::fshr : Intrinsic::fshl;
+ Value *F = CGM.getIntrinsic(IID, Ty);
+ return RValue::get(Builder.CreateCall(F, { Src, Src, ShiftAmt }));
+}
+
+RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
+ const CallExpr *E,
ReturnValueSlot ReturnValue) {
+ const FunctionDecl *FD = GD.getDecl()->getAsFunction();
// See if we can constant fold this builtin. If so, don't emit it at all.
Expr::EvalResult Result;
if (E->EvaluateAsRValue(Result, CGM.getContext()) &&
@@ -1537,6 +1756,26 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
return RValue::get(ComplexVal.second);
}
+ case Builtin::BI__builtin_clrsb:
+ case Builtin::BI__builtin_clrsbl:
+ case Builtin::BI__builtin_clrsbll: {
+ // clrsb(x) -> clz(x < 0 ? ~x : x) - 1 or
+ Value *ArgValue = EmitScalarExpr(E->getArg(0));
+
+ llvm::Type *ArgType = ArgValue->getType();
+ Value *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
+
+ llvm::Type *ResultType = ConvertType(E->getType());
+ Value *Zero = llvm::Constant::getNullValue(ArgType);
+ Value *IsNeg = Builder.CreateICmpSLT(ArgValue, Zero, "isneg");
+ Value *Inverse = Builder.CreateNot(ArgValue, "not");
+ Value *Tmp = Builder.CreateSelect(IsNeg, Inverse, ArgValue);
+ Value *Ctlz = Builder.CreateCall(F, {Tmp, Builder.getFalse()});
+ Value *Result = Builder.CreateSub(Ctlz, llvm::ConstantInt::get(ArgType, 1));
+ Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
+ "cast");
+ return RValue::get(Result);
+ }
case Builtin::BI__builtin_ctzs:
case Builtin::BI__builtin_ctz:
case Builtin::BI__builtin_ctzl:
@@ -1609,6 +1848,21 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
"cast");
return RValue::get(Result);
}
+ case Builtin::BI__lzcnt16:
+ case Builtin::BI__lzcnt:
+ case Builtin::BI__lzcnt64: {
+ Value *ArgValue = EmitScalarExpr(E->getArg(0));
+
+ llvm::Type *ArgType = ArgValue->getType();
+ Value *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
+
+ llvm::Type *ResultType = ConvertType(E->getType());
+ Value *Result = Builder.CreateCall(F, {ArgValue, Builder.getFalse()});
+ if (Result->getType() != ResultType)
+ Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
+ "cast");
+ return RValue::get(Result);
+ }
case Builtin::BI__popcnt16:
case Builtin::BI__popcnt:
case Builtin::BI__popcnt64:
@@ -1627,46 +1881,6 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
"cast");
return RValue::get(Result);
}
- case Builtin::BI_rotr8:
- case Builtin::BI_rotr16:
- case Builtin::BI_rotr:
- case Builtin::BI_lrotr:
- case Builtin::BI_rotr64: {
- Value *Val = EmitScalarExpr(E->getArg(0));
- Value *Shift = EmitScalarExpr(E->getArg(1));
-
- llvm::Type *ArgType = Val->getType();
- Shift = Builder.CreateIntCast(Shift, ArgType, false);
- unsigned ArgWidth = ArgType->getIntegerBitWidth();
- Value *Mask = llvm::ConstantInt::get(ArgType, ArgWidth - 1);
-
- Value *RightShiftAmt = Builder.CreateAnd(Shift, Mask);
- Value *RightShifted = Builder.CreateLShr(Val, RightShiftAmt);
- Value *LeftShiftAmt = Builder.CreateAnd(Builder.CreateNeg(Shift), Mask);
- Value *LeftShifted = Builder.CreateShl(Val, LeftShiftAmt);
- Value *Result = Builder.CreateOr(LeftShifted, RightShifted);
- return RValue::get(Result);
- }
- case Builtin::BI_rotl8:
- case Builtin::BI_rotl16:
- case Builtin::BI_rotl:
- case Builtin::BI_lrotl:
- case Builtin::BI_rotl64: {
- Value *Val = EmitScalarExpr(E->getArg(0));
- Value *Shift = EmitScalarExpr(E->getArg(1));
-
- llvm::Type *ArgType = Val->getType();
- Shift = Builder.CreateIntCast(Shift, ArgType, false);
- unsigned ArgWidth = ArgType->getIntegerBitWidth();
- Value *Mask = llvm::ConstantInt::get(ArgType, ArgWidth - 1);
-
- Value *LeftShiftAmt = Builder.CreateAnd(Shift, Mask);
- Value *LeftShifted = Builder.CreateShl(Val, LeftShiftAmt);
- Value *RightShiftAmt = Builder.CreateAnd(Builder.CreateNeg(Shift), Mask);
- Value *RightShifted = Builder.CreateLShr(Val, RightShiftAmt);
- Value *Result = Builder.CreateOr(LeftShifted, RightShifted);
- return RValue::get(Result);
- }
case Builtin::BI__builtin_unpredictable: {
// Always return the argument of __builtin_unpredictable. LLVM does not
// handle this builtin. Metadata for this builtin should be added directly
@@ -1690,15 +1904,17 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
return RValue::get(Result);
}
case Builtin::BI__builtin_assume_aligned: {
- Value *PtrValue = EmitScalarExpr(E->getArg(0));
+ const Expr *Ptr = E->getArg(0);
+ Value *PtrValue = EmitScalarExpr(Ptr);
Value *OffsetValue =
(E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) : nullptr;
Value *AlignmentValue = EmitScalarExpr(E->getArg(1));
ConstantInt *AlignmentCI = cast<ConstantInt>(AlignmentValue);
- unsigned Alignment = (unsigned) AlignmentCI->getZExtValue();
+ unsigned Alignment = (unsigned)AlignmentCI->getZExtValue();
- EmitAlignmentAssumption(PtrValue, Alignment, OffsetValue);
+ EmitAlignmentAssumption(PtrValue, Ptr, /*The expr loc is sufficient.*/ SourceLocation(),
+ Alignment, OffsetValue);
return RValue::get(PtrValue);
}
case Builtin::BI__assume:
@@ -1721,6 +1937,48 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
case Builtin::BI__builtin_bitreverse64: {
return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::bitreverse));
}
+ case Builtin::BI__builtin_rotateleft8:
+ case Builtin::BI__builtin_rotateleft16:
+ case Builtin::BI__builtin_rotateleft32:
+ case Builtin::BI__builtin_rotateleft64:
+ case Builtin::BI_rotl8: // Microsoft variants of rotate left
+ case Builtin::BI_rotl16:
+ case Builtin::BI_rotl:
+ case Builtin::BI_lrotl:
+ case Builtin::BI_rotl64:
+ return emitRotate(E, false);
+
+ case Builtin::BI__builtin_rotateright8:
+ case Builtin::BI__builtin_rotateright16:
+ case Builtin::BI__builtin_rotateright32:
+ case Builtin::BI__builtin_rotateright64:
+ case Builtin::BI_rotr8: // Microsoft variants of rotate right
+ case Builtin::BI_rotr16:
+ case Builtin::BI_rotr:
+ case Builtin::BI_lrotr:
+ case Builtin::BI_rotr64:
+ return emitRotate(E, true);
+
+ case Builtin::BI__builtin_constant_p: {
+ llvm::Type *ResultType = ConvertType(E->getType());
+ if (CGM.getCodeGenOpts().OptimizationLevel == 0)
+ // At -O0, we don't perform inlining, so we don't need to delay the
+ // processing.
+ return RValue::get(ConstantInt::get(ResultType, 0));
+
+ const Expr *Arg = E->getArg(0);
+ QualType ArgType = Arg->getType();
+ if (!hasScalarEvaluationKind(ArgType) || ArgType->isFunctionType())
+ // We can only reason about scalar types.
+ return RValue::get(ConstantInt::get(ResultType, 0));
+
+ Value *ArgValue = EmitScalarExpr(Arg);
+ Value *F = CGM.getIntrinsic(Intrinsic::is_constant, ConvertType(ArgType));
+ Value *Result = Builder.CreateCall(F, ArgValue);
+ if (Result->getType() != ResultType)
+ Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/false);
+ return RValue::get(Result);
+ }
case Builtin::BI__builtin_object_size: {
unsigned Type =
E->getArg(1)->EvaluateKnownConstInt(getContext()).getZExtValue();
@@ -1985,10 +2243,12 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
case Builtin::BI__builtin___memcpy_chk: {
// fold __builtin_memcpy_chk(x, y, cst1, cst2) to memcpy iff cst1<=cst2.
- llvm::APSInt Size, DstSize;
- if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) ||
- !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext()))
+ Expr::EvalResult SizeResult, DstSizeResult;
+ if (!E->getArg(2)->EvaluateAsInt(SizeResult, CGM.getContext()) ||
+ !E->getArg(3)->EvaluateAsInt(DstSizeResult, CGM.getContext()))
break;
+ llvm::APSInt Size = SizeResult.Val.getInt();
+ llvm::APSInt DstSize = DstSizeResult.Val.getInt();
if (Size.ugt(DstSize))
break;
Address Dest = EmitPointerWithAlignment(E->getArg(0));
@@ -2009,10 +2269,12 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
case Builtin::BI__builtin___memmove_chk: {
// fold __builtin_memmove_chk(x, y, cst1, cst2) to memmove iff cst1<=cst2.
- llvm::APSInt Size, DstSize;
- if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) ||
- !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext()))
+ Expr::EvalResult SizeResult, DstSizeResult;
+ if (!E->getArg(2)->EvaluateAsInt(SizeResult, CGM.getContext()) ||
+ !E->getArg(3)->EvaluateAsInt(DstSizeResult, CGM.getContext()))
break;
+ llvm::APSInt Size = SizeResult.Val.getInt();
+ llvm::APSInt DstSize = DstSizeResult.Val.getInt();
if (Size.ugt(DstSize))
break;
Address Dest = EmitPointerWithAlignment(E->getArg(0));
@@ -2047,10 +2309,12 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
}
case Builtin::BI__builtin___memset_chk: {
// fold __builtin_memset_chk(x, y, cst1, cst2) to memset iff cst1<=cst2.
- llvm::APSInt Size, DstSize;
- if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) ||
- !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext()))
+ Expr::EvalResult SizeResult, DstSizeResult;
+ if (!E->getArg(2)->EvaluateAsInt(SizeResult, CGM.getContext()) ||
+ !E->getArg(3)->EvaluateAsInt(DstSizeResult, CGM.getContext()))
break;
+ llvm::APSInt Size = SizeResult.Val.getInt();
+ llvm::APSInt DstSize = DstSizeResult.Val.getInt();
if (Size.ugt(DstSize))
break;
Address Dest = EmitPointerWithAlignment(E->getArg(0));
@@ -2258,6 +2522,15 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
return RValue::get(nullptr);
}
+ case Builtin::BI__builtin_launder: {
+ const Expr *Arg = E->getArg(0);
+ QualType ArgTy = Arg->getType()->getPointeeType();
+ Value *Ptr = EmitScalarExpr(Arg);
+ if (TypeRequiresBuiltinLaunder(CGM, ArgTy))
+ Ptr = Builder.CreateLaunderInvariantGroup(Ptr);
+
+ return RValue::get(Ptr);
+ }
case Builtin::BI__sync_fetch_and_add:
case Builtin::BI__sync_fetch_and_sub:
case Builtin::BI__sync_fetch_and_or:
@@ -2952,7 +3225,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
case Builtin::BI_InterlockedExchangePointer:
return RValue::get(
EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange, E));
- case Builtin::BI_InterlockedCompareExchangePointer: {
+ case Builtin::BI_InterlockedCompareExchangePointer:
+ case Builtin::BI_InterlockedCompareExchangePointer_nf: {
llvm::Type *RTy;
llvm::IntegerType *IntType =
IntegerType::get(getLLVMContext(),
@@ -2969,10 +3243,12 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
llvm::Value *Comparand =
Builder.CreatePtrToInt(EmitScalarExpr(E->getArg(2)), IntType);
- auto Result =
- Builder.CreateAtomicCmpXchg(Destination, Comparand, Exchange,
- AtomicOrdering::SequentiallyConsistent,
- AtomicOrdering::SequentiallyConsistent);
+ auto Ordering =
+ BuiltinID == Builtin::BI_InterlockedCompareExchangePointer_nf ?
+ AtomicOrdering::Monotonic : AtomicOrdering::SequentiallyConsistent;
+
+ auto Result = Builder.CreateAtomicCmpXchg(Destination, Comparand, Exchange,
+ Ordering, Ordering);
Result->setVolatile(true);
return RValue::get(Builder.CreateIntToPtr(Builder.CreateExtractValue(Result,
@@ -2982,16 +3258,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
case Builtin::BI_InterlockedCompareExchange8:
case Builtin::BI_InterlockedCompareExchange16:
case Builtin::BI_InterlockedCompareExchange:
- case Builtin::BI_InterlockedCompareExchange64: {
- AtomicCmpXchgInst *CXI = Builder.CreateAtomicCmpXchg(
- EmitScalarExpr(E->getArg(0)),
- EmitScalarExpr(E->getArg(2)),
- EmitScalarExpr(E->getArg(1)),
- AtomicOrdering::SequentiallyConsistent,
- AtomicOrdering::SequentiallyConsistent);
- CXI->setVolatile(true);
- return RValue::get(Builder.CreateExtractValue(CXI, 0));
- }
+ case Builtin::BI_InterlockedCompareExchange64:
+ return RValue::get(EmitAtomicCmpXchgForMSIntrin(*this, E));
case Builtin::BI_InterlockedIncrement16:
case Builtin::BI_InterlockedIncrement:
return RValue::get(
@@ -3337,24 +3605,31 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
// Create a temporary array to hold the sizes of local pointer arguments
// for the block. \p First is the position of the first size argument.
- auto CreateArrayForSizeVar = [=](unsigned First) {
- auto *AT = llvm::ArrayType::get(SizeTy, NumArgs - First);
- auto *Arr = Builder.CreateAlloca(AT);
- llvm::Value *Ptr;
+ auto CreateArrayForSizeVar = [=](unsigned First)
+ -> std::tuple<llvm::Value *, llvm::Value *, llvm::Value *> {
+ llvm::APInt ArraySize(32, NumArgs - First);
+ QualType SizeArrayTy = getContext().getConstantArrayType(
+ getContext().getSizeType(), ArraySize, ArrayType::Normal,
+ /*IndexTypeQuals=*/0);
+ auto Tmp = CreateMemTemp(SizeArrayTy, "block_sizes");
+ llvm::Value *TmpPtr = Tmp.getPointer();
+ llvm::Value *TmpSize = EmitLifetimeStart(
+ CGM.getDataLayout().getTypeAllocSize(Tmp.getElementType()), TmpPtr);
+ llvm::Value *ElemPtr;
// Each of the following arguments specifies the size of the corresponding
// argument passed to the enqueued block.
auto *Zero = llvm::ConstantInt::get(IntTy, 0);
for (unsigned I = First; I < NumArgs; ++I) {
auto *Index = llvm::ConstantInt::get(IntTy, I - First);
- auto *GEP = Builder.CreateGEP(Arr, {Zero, Index});
+ auto *GEP = Builder.CreateGEP(TmpPtr, {Zero, Index});
if (I == First)
- Ptr = GEP;
+ ElemPtr = GEP;
auto *V =
Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(I)), SizeTy);
Builder.CreateAlignedStore(
V, GEP, CGM.getDataLayout().getPrefTypeAlignment(SizeTy));
}
- return Ptr;
+ return std::tie(ElemPtr, TmpSize, TmpPtr);
};
// Could have events and/or varargs.
@@ -3366,24 +3641,27 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
llvm::Value *Kernel =
Builder.CreatePointerCast(Info.Kernel, GenericVoidPtrTy);
auto *Block = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
- auto *PtrToSizeArray = CreateArrayForSizeVar(4);
+ llvm::Value *ElemPtr, *TmpSize, *TmpPtr;
+ std::tie(ElemPtr, TmpSize, TmpPtr) = CreateArrayForSizeVar(4);
// Create a vector of the arguments, as well as a constant value to
// express to the runtime the number of variadic arguments.
std::vector<llvm::Value *> Args = {
Queue, Flags, Range,
Kernel, Block, ConstantInt::get(IntTy, NumArgs - 4),
- PtrToSizeArray};
+ ElemPtr};
std::vector<llvm::Type *> ArgTys = {
- QueueTy, IntTy, RangeTy,
- GenericVoidPtrTy, GenericVoidPtrTy, IntTy,
- PtrToSizeArray->getType()};
+ QueueTy, IntTy, RangeTy, GenericVoidPtrTy,
+ GenericVoidPtrTy, IntTy, ElemPtr->getType()};
llvm::FunctionType *FTy = llvm::FunctionType::get(
Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
- return RValue::get(
- Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
- llvm::ArrayRef<llvm::Value *>(Args)));
+ auto Call =
+ RValue::get(Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
+ llvm::ArrayRef<llvm::Value *>(Args)));
+ if (TmpSize)
+ EmitLifetimeEnd(TmpSize, TmpPtr);
+ return Call;
}
// Any calls now have event arguments passed.
if (NumArgs >= 7) {
@@ -3400,7 +3678,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
llvm::Value *ClkEvent = EmitScalarExpr(E->getArg(5));
// Convert to generic address space.
EventList = Builder.CreatePointerCast(EventList, EventPtrTy);
- ClkEvent = Builder.CreatePointerCast(ClkEvent, EventPtrTy);
+ ClkEvent = ClkEvent->getType()->isIntegerTy()
+ ? Builder.CreateBitOrPointerCast(ClkEvent, EventPtrTy)
+ : Builder.CreatePointerCast(ClkEvent, EventPtrTy);
auto Info =
CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(6));
llvm::Value *Kernel =
@@ -3430,15 +3710,19 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
ArgTys.push_back(Int32Ty);
Name = "__enqueue_kernel_events_varargs";
- auto *PtrToSizeArray = CreateArrayForSizeVar(7);
- Args.push_back(PtrToSizeArray);
- ArgTys.push_back(PtrToSizeArray->getType());
+ llvm::Value *ElemPtr, *TmpSize, *TmpPtr;
+ std::tie(ElemPtr, TmpSize, TmpPtr) = CreateArrayForSizeVar(7);
+ Args.push_back(ElemPtr);
+ ArgTys.push_back(ElemPtr->getType());
llvm::FunctionType *FTy = llvm::FunctionType::get(
Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
- return RValue::get(
- Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
- llvm::ArrayRef<llvm::Value *>(Args)));
+ auto Call =
+ RValue::get(Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
+ llvm::ArrayRef<llvm::Value *>(Args)));
+ if (TmpSize)
+ EmitLifetimeEnd(TmpSize, TmpPtr);
+ return Call;
}
LLVM_FALLTHROUGH;
}
@@ -3530,13 +3814,6 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
case Builtin::BI__builtin_os_log_format:
return emitBuiltinOSLogFormat(*E);
- case Builtin::BI__builtin_os_log_format_buffer_size: {
- analyze_os_log::OSLogBufferLayout Layout;
- analyze_os_log::computeOSLogBufferLayout(CGM.getContext(), E, Layout);
- return RValue::get(ConstantInt::get(ConvertType(E->getType()),
- Layout.size().getQuantity()));
- }
-
case Builtin::BI__xray_customevent: {
if (!ShouldXRayInstrumentFunction())
return RValue::getIgnored();
@@ -3703,6 +3980,16 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
// we need to do a bit cast.
llvm::Type *PTy = FTy->getParamType(i);
if (PTy != ArgValue->getType()) {
+ // XXX - vector of pointers?
+ if (auto *PtrTy = dyn_cast<llvm::PointerType>(PTy)) {
+ if (PtrTy->getAddressSpace() !=
+ ArgValue->getType()->getPointerAddressSpace()) {
+ ArgValue = Builder.CreateAddrSpaceCast(
+ ArgValue,
+ ArgValue->getType()->getPointerTo(PtrTy->getAddressSpace()));
+ }
+ }
+
assert(PTy->canLosslesslyBitCastTo(FTy->getParamType(i)) &&
"Must be able to losslessly bit cast to param");
ArgValue = Builder.CreateBitCast(ArgValue, PTy);
@@ -3719,6 +4006,14 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
RetTy = ConvertType(BuiltinRetType);
if (RetTy != V->getType()) {
+ // XXX - vector of pointers?
+ if (auto *PtrTy = dyn_cast<llvm::PointerType>(RetTy)) {
+ if (PtrTy->getAddressSpace() != V->getType()->getPointerAddressSpace()) {
+ V = Builder.CreateAddrSpaceCast(
+ V, V->getType()->getPointerTo(PtrTy->getAddressSpace()));
+ }
+ }
+
assert(V->getType()->canLosslesslyBitCastTo(RetTy) &&
"Must be able to losslessly bit cast result type");
V = Builder.CreateBitCast(V, RetTy);
@@ -4286,6 +4581,14 @@ static const NeonIntrinsicInfo AArch64SIMDIntrinsicMap[] = {
NEONMAP0(vextq_v),
NEONMAP0(vfma_v),
NEONMAP0(vfmaq_v),
+ NEONMAP1(vfmlal_high_v, aarch64_neon_fmlal2, 0),
+ NEONMAP1(vfmlal_low_v, aarch64_neon_fmlal, 0),
+ NEONMAP1(vfmlalq_high_v, aarch64_neon_fmlal2, 0),
+ NEONMAP1(vfmlalq_low_v, aarch64_neon_fmlal, 0),
+ NEONMAP1(vfmlsl_high_v, aarch64_neon_fmlsl2, 0),
+ NEONMAP1(vfmlsl_low_v, aarch64_neon_fmlsl, 0),
+ NEONMAP1(vfmlslq_high_v, aarch64_neon_fmlsl2, 0),
+ NEONMAP1(vfmlslq_low_v, aarch64_neon_fmlsl, 0),
NEONMAP2(vhadd_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts),
NEONMAP2(vhaddq_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts),
NEONMAP2(vhsub_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts),
@@ -5259,6 +5562,34 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
Int = Usgn ? LLVMIntrinsic : AltLLVMIntrinsic;
return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vdot");
}
+ case NEON::BI__builtin_neon_vfmlal_low_v:
+ case NEON::BI__builtin_neon_vfmlalq_low_v: {
+ llvm::Type *InputTy =
+ llvm::VectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
+ llvm::Type *Tys[2] = { Ty, InputTy };
+ return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlal_low");
+ }
+ case NEON::BI__builtin_neon_vfmlsl_low_v:
+ case NEON::BI__builtin_neon_vfmlslq_low_v: {
+ llvm::Type *InputTy =
+ llvm::VectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
+ llvm::Type *Tys[2] = { Ty, InputTy };
+ return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlsl_low");
+ }
+ case NEON::BI__builtin_neon_vfmlal_high_v:
+ case NEON::BI__builtin_neon_vfmlalq_high_v: {
+ llvm::Type *InputTy =
+ llvm::VectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
+ llvm::Type *Tys[2] = { Ty, InputTy };
+ return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlal_high");
+ }
+ case NEON::BI__builtin_neon_vfmlsl_high_v:
+ case NEON::BI__builtin_neon_vfmlslq_high_v: {
+ llvm::Type *InputTy =
+ llvm::VectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
+ llvm::Type *Tys[2] = { Ty, InputTy };
+ return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlsl_high");
+ }
}
assert(Int && "Expected valid intrinsic number");
@@ -5506,10 +5837,11 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
llvm::FunctionType *FTy =
llvm::FunctionType::get(VoidTy, /*Variadic=*/false);
- APSInt Value;
- if (!E->getArg(0)->EvaluateAsInt(Value, CGM.getContext()))
+ Expr::EvalResult Result;
+ if (!E->getArg(0)->EvaluateAsInt(Result, CGM.getContext()))
llvm_unreachable("Sema will ensure that the parameter is constant");
+ llvm::APSInt Value = Result.Val.getInt();
uint64_t ZExtValue = Value.zextOrTrunc(IsThumb ? 16 : 32).getZExtValue();
llvm::InlineAsm *Emit =
@@ -5991,6 +6323,120 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E);
case ARM::BI_InterlockedIncrement64:
return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E);
+ case ARM::BI_InterlockedExchangeAdd8_acq:
+ case ARM::BI_InterlockedExchangeAdd16_acq:
+ case ARM::BI_InterlockedExchangeAdd_acq:
+ case ARM::BI_InterlockedExchangeAdd64_acq:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd_acq, E);
+ case ARM::BI_InterlockedExchangeAdd8_rel:
+ case ARM::BI_InterlockedExchangeAdd16_rel:
+ case ARM::BI_InterlockedExchangeAdd_rel:
+ case ARM::BI_InterlockedExchangeAdd64_rel:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd_rel, E);
+ case ARM::BI_InterlockedExchangeAdd8_nf:
+ case ARM::BI_InterlockedExchangeAdd16_nf:
+ case ARM::BI_InterlockedExchangeAdd_nf:
+ case ARM::BI_InterlockedExchangeAdd64_nf:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd_nf, E);
+ case ARM::BI_InterlockedExchange8_acq:
+ case ARM::BI_InterlockedExchange16_acq:
+ case ARM::BI_InterlockedExchange_acq:
+ case ARM::BI_InterlockedExchange64_acq:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange_acq, E);
+ case ARM::BI_InterlockedExchange8_rel:
+ case ARM::BI_InterlockedExchange16_rel:
+ case ARM::BI_InterlockedExchange_rel:
+ case ARM::BI_InterlockedExchange64_rel:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange_rel, E);
+ case ARM::BI_InterlockedExchange8_nf:
+ case ARM::BI_InterlockedExchange16_nf:
+ case ARM::BI_InterlockedExchange_nf:
+ case ARM::BI_InterlockedExchange64_nf:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange_nf, E);
+ case ARM::BI_InterlockedCompareExchange8_acq:
+ case ARM::BI_InterlockedCompareExchange16_acq:
+ case ARM::BI_InterlockedCompareExchange_acq:
+ case ARM::BI_InterlockedCompareExchange64_acq:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedCompareExchange_acq, E);
+ case ARM::BI_InterlockedCompareExchange8_rel:
+ case ARM::BI_InterlockedCompareExchange16_rel:
+ case ARM::BI_InterlockedCompareExchange_rel:
+ case ARM::BI_InterlockedCompareExchange64_rel:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedCompareExchange_rel, E);
+ case ARM::BI_InterlockedCompareExchange8_nf:
+ case ARM::BI_InterlockedCompareExchange16_nf:
+ case ARM::BI_InterlockedCompareExchange_nf:
+ case ARM::BI_InterlockedCompareExchange64_nf:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedCompareExchange_nf, E);
+ case ARM::BI_InterlockedOr8_acq:
+ case ARM::BI_InterlockedOr16_acq:
+ case ARM::BI_InterlockedOr_acq:
+ case ARM::BI_InterlockedOr64_acq:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr_acq, E);
+ case ARM::BI_InterlockedOr8_rel:
+ case ARM::BI_InterlockedOr16_rel:
+ case ARM::BI_InterlockedOr_rel:
+ case ARM::BI_InterlockedOr64_rel:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr_rel, E);
+ case ARM::BI_InterlockedOr8_nf:
+ case ARM::BI_InterlockedOr16_nf:
+ case ARM::BI_InterlockedOr_nf:
+ case ARM::BI_InterlockedOr64_nf:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr_nf, E);
+ case ARM::BI_InterlockedXor8_acq:
+ case ARM::BI_InterlockedXor16_acq:
+ case ARM::BI_InterlockedXor_acq:
+ case ARM::BI_InterlockedXor64_acq:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor_acq, E);
+ case ARM::BI_InterlockedXor8_rel:
+ case ARM::BI_InterlockedXor16_rel:
+ case ARM::BI_InterlockedXor_rel:
+ case ARM::BI_InterlockedXor64_rel:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor_rel, E);
+ case ARM::BI_InterlockedXor8_nf:
+ case ARM::BI_InterlockedXor16_nf:
+ case ARM::BI_InterlockedXor_nf:
+ case ARM::BI_InterlockedXor64_nf:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor_nf, E);
+ case ARM::BI_InterlockedAnd8_acq:
+ case ARM::BI_InterlockedAnd16_acq:
+ case ARM::BI_InterlockedAnd_acq:
+ case ARM::BI_InterlockedAnd64_acq:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd_acq, E);
+ case ARM::BI_InterlockedAnd8_rel:
+ case ARM::BI_InterlockedAnd16_rel:
+ case ARM::BI_InterlockedAnd_rel:
+ case ARM::BI_InterlockedAnd64_rel:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd_rel, E);
+ case ARM::BI_InterlockedAnd8_nf:
+ case ARM::BI_InterlockedAnd16_nf:
+ case ARM::BI_InterlockedAnd_nf:
+ case ARM::BI_InterlockedAnd64_nf:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd_nf, E);
+ case ARM::BI_InterlockedIncrement16_acq:
+ case ARM::BI_InterlockedIncrement_acq:
+ case ARM::BI_InterlockedIncrement64_acq:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement_acq, E);
+ case ARM::BI_InterlockedIncrement16_rel:
+ case ARM::BI_InterlockedIncrement_rel:
+ case ARM::BI_InterlockedIncrement64_rel:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement_rel, E);
+ case ARM::BI_InterlockedIncrement16_nf:
+ case ARM::BI_InterlockedIncrement_nf:
+ case ARM::BI_InterlockedIncrement64_nf:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement_nf, E);
+ case ARM::BI_InterlockedDecrement16_acq:
+ case ARM::BI_InterlockedDecrement_acq:
+ case ARM::BI_InterlockedDecrement64_acq:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement_acq, E);
+ case ARM::BI_InterlockedDecrement16_rel:
+ case ARM::BI_InterlockedDecrement_rel:
+ case ARM::BI_InterlockedDecrement64_rel:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement_rel, E);
+ case ARM::BI_InterlockedDecrement16_nf:
+ case ARM::BI_InterlockedDecrement_nf:
+ case ARM::BI_InterlockedDecrement64_nf:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement_nf, E);
}
// Get the last argument, which specifies the vector type.
@@ -6497,11 +6943,33 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
return Builder.CreateCall(F, {StoreVal, StoreAddr}, "stxr");
}
+ if (BuiltinID == AArch64::BI__getReg) {
+ Expr::EvalResult Result;
+ if (!E->getArg(0)->EvaluateAsInt(Result, CGM.getContext()))
+ llvm_unreachable("Sema will ensure that the parameter is constant");
+
+ llvm::APSInt Value = Result.Val.getInt();
+ LLVMContext &Context = CGM.getLLVMContext();
+ std::string Reg = Value == 31 ? "sp" : "x" + Value.toString(10);
+
+ llvm::Metadata *Ops[] = {llvm::MDString::get(Context, Reg)};
+ llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
+ llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
+
+ llvm::Value *F =
+ CGM.getIntrinsic(llvm::Intrinsic::read_register, {Int64Ty});
+ return Builder.CreateCall(F, Metadata);
+ }
+
if (BuiltinID == AArch64::BI__builtin_arm_clrex) {
Function *F = CGM.getIntrinsic(Intrinsic::aarch64_clrex);
return Builder.CreateCall(F);
}
+ if (BuiltinID == AArch64::BI_ReadWriteBarrier)
+ return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
+ llvm::SyncScope::SingleThread);
+
// CRC32
Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
switch (BuiltinID) {
@@ -6564,6 +7032,48 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType, IsRead);
}
+ if (BuiltinID == AArch64::BI_ReadStatusReg ||
+ BuiltinID == AArch64::BI_WriteStatusReg) {
+ LLVMContext &Context = CGM.getLLVMContext();
+
+ unsigned SysReg =
+ E->getArg(0)->EvaluateKnownConstInt(getContext()).getZExtValue();
+
+ std::string SysRegStr;
+ llvm::raw_string_ostream(SysRegStr) <<
+ ((1 << 1) | ((SysReg >> 14) & 1)) << ":" <<
+ ((SysReg >> 11) & 7) << ":" <<
+ ((SysReg >> 7) & 15) << ":" <<
+ ((SysReg >> 3) & 15) << ":" <<
+ ( SysReg & 7);
+
+ llvm::Metadata *Ops[] = { llvm::MDString::get(Context, SysRegStr) };
+ llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
+ llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
+
+ llvm::Type *RegisterType = Int64Ty;
+ llvm::Type *ValueType = Int32Ty;
+ llvm::Type *Types[] = { RegisterType };
+
+ if (BuiltinID == AArch64::BI_ReadStatusReg) {
+ llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::read_register, Types);
+ llvm::Value *Call = Builder.CreateCall(F, Metadata);
+
+ return Builder.CreateTrunc(Call, ValueType);
+ }
+
+ llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::write_register, Types);
+ llvm::Value *ArgValue = EmitScalarExpr(E->getArg(1));
+ ArgValue = Builder.CreateZExt(ArgValue, RegisterType);
+
+ return Builder.CreateCall(F, { Metadata, ArgValue });
+ }
+
+ if (BuiltinID == AArch64::BI_AddressOfReturnAddress) {
+ llvm::Value *F = CGM.getIntrinsic(Intrinsic::addressofreturnaddress);
+ return Builder.CreateCall(F);
+ }
+
// Find out if any arguments are required to be integer constant
// expressions.
unsigned ICEArguments = 0;
@@ -6659,7 +7169,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
case NEON::BI__builtin_neon_vcvth_f16_u32:
case NEON::BI__builtin_neon_vcvth_f16_u64:
usgn = true;
- // FALL THROUGH
+ LLVM_FALLTHROUGH;
case NEON::BI__builtin_neon_vcvth_f16_s16:
case NEON::BI__builtin_neon_vcvth_f16_s32:
case NEON::BI__builtin_neon_vcvth_f16_s64: {
@@ -6679,7 +7189,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
}
case NEON::BI__builtin_neon_vcvth_u16_f16:
usgn = true;
- // FALL THROUGH
+ LLVM_FALLTHROUGH;
case NEON::BI__builtin_neon_vcvth_s16_f16: {
Ops.push_back(EmitScalarExpr(E->getArg(0)));
Ops[0] = Builder.CreateBitCast(Ops[0], HalfTy);
@@ -6689,7 +7199,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
}
case NEON::BI__builtin_neon_vcvth_u32_f16:
usgn = true;
- // FALL THROUGH
+ LLVM_FALLTHROUGH;
case NEON::BI__builtin_neon_vcvth_s32_f16: {
Ops.push_back(EmitScalarExpr(E->getArg(0)));
Ops[0] = Builder.CreateBitCast(Ops[0], HalfTy);
@@ -6699,7 +7209,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
}
case NEON::BI__builtin_neon_vcvth_u64_f16:
usgn = true;
- // FALL THROUGH
+ LLVM_FALLTHROUGH;
case NEON::BI__builtin_neon_vcvth_s64_f16: {
Ops.push_back(EmitScalarExpr(E->getArg(0)));
Ops[0] = Builder.CreateBitCast(Ops[0], HalfTy);
@@ -8414,6 +8924,129 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E);
case AArch64::BI_InterlockedIncrement64:
return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E);
+ case AArch64::BI_InterlockedExchangeAdd8_acq:
+ case AArch64::BI_InterlockedExchangeAdd16_acq:
+ case AArch64::BI_InterlockedExchangeAdd_acq:
+ case AArch64::BI_InterlockedExchangeAdd64_acq:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd_acq, E);
+ case AArch64::BI_InterlockedExchangeAdd8_rel:
+ case AArch64::BI_InterlockedExchangeAdd16_rel:
+ case AArch64::BI_InterlockedExchangeAdd_rel:
+ case AArch64::BI_InterlockedExchangeAdd64_rel:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd_rel, E);
+ case AArch64::BI_InterlockedExchangeAdd8_nf:
+ case AArch64::BI_InterlockedExchangeAdd16_nf:
+ case AArch64::BI_InterlockedExchangeAdd_nf:
+ case AArch64::BI_InterlockedExchangeAdd64_nf:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd_nf, E);
+ case AArch64::BI_InterlockedExchange8_acq:
+ case AArch64::BI_InterlockedExchange16_acq:
+ case AArch64::BI_InterlockedExchange_acq:
+ case AArch64::BI_InterlockedExchange64_acq:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange_acq, E);
+ case AArch64::BI_InterlockedExchange8_rel:
+ case AArch64::BI_InterlockedExchange16_rel:
+ case AArch64::BI_InterlockedExchange_rel:
+ case AArch64::BI_InterlockedExchange64_rel:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange_rel, E);
+ case AArch64::BI_InterlockedExchange8_nf:
+ case AArch64::BI_InterlockedExchange16_nf:
+ case AArch64::BI_InterlockedExchange_nf:
+ case AArch64::BI_InterlockedExchange64_nf:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange_nf, E);
+ case AArch64::BI_InterlockedCompareExchange8_acq:
+ case AArch64::BI_InterlockedCompareExchange16_acq:
+ case AArch64::BI_InterlockedCompareExchange_acq:
+ case AArch64::BI_InterlockedCompareExchange64_acq:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedCompareExchange_acq, E);
+ case AArch64::BI_InterlockedCompareExchange8_rel:
+ case AArch64::BI_InterlockedCompareExchange16_rel:
+ case AArch64::BI_InterlockedCompareExchange_rel:
+ case AArch64::BI_InterlockedCompareExchange64_rel:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedCompareExchange_rel, E);
+ case AArch64::BI_InterlockedCompareExchange8_nf:
+ case AArch64::BI_InterlockedCompareExchange16_nf:
+ case AArch64::BI_InterlockedCompareExchange_nf:
+ case AArch64::BI_InterlockedCompareExchange64_nf:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedCompareExchange_nf, E);
+ case AArch64::BI_InterlockedOr8_acq:
+ case AArch64::BI_InterlockedOr16_acq:
+ case AArch64::BI_InterlockedOr_acq:
+ case AArch64::BI_InterlockedOr64_acq:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr_acq, E);
+ case AArch64::BI_InterlockedOr8_rel:
+ case AArch64::BI_InterlockedOr16_rel:
+ case AArch64::BI_InterlockedOr_rel:
+ case AArch64::BI_InterlockedOr64_rel:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr_rel, E);
+ case AArch64::BI_InterlockedOr8_nf:
+ case AArch64::BI_InterlockedOr16_nf:
+ case AArch64::BI_InterlockedOr_nf:
+ case AArch64::BI_InterlockedOr64_nf:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr_nf, E);
+ case AArch64::BI_InterlockedXor8_acq:
+ case AArch64::BI_InterlockedXor16_acq:
+ case AArch64::BI_InterlockedXor_acq:
+ case AArch64::BI_InterlockedXor64_acq:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor_acq, E);
+ case AArch64::BI_InterlockedXor8_rel:
+ case AArch64::BI_InterlockedXor16_rel:
+ case AArch64::BI_InterlockedXor_rel:
+ case AArch64::BI_InterlockedXor64_rel:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor_rel, E);
+ case AArch64::BI_InterlockedXor8_nf:
+ case AArch64::BI_InterlockedXor16_nf:
+ case AArch64::BI_InterlockedXor_nf:
+ case AArch64::BI_InterlockedXor64_nf:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor_nf, E);
+ case AArch64::BI_InterlockedAnd8_acq:
+ case AArch64::BI_InterlockedAnd16_acq:
+ case AArch64::BI_InterlockedAnd_acq:
+ case AArch64::BI_InterlockedAnd64_acq:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd_acq, E);
+ case AArch64::BI_InterlockedAnd8_rel:
+ case AArch64::BI_InterlockedAnd16_rel:
+ case AArch64::BI_InterlockedAnd_rel:
+ case AArch64::BI_InterlockedAnd64_rel:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd_rel, E);
+ case AArch64::BI_InterlockedAnd8_nf:
+ case AArch64::BI_InterlockedAnd16_nf:
+ case AArch64::BI_InterlockedAnd_nf:
+ case AArch64::BI_InterlockedAnd64_nf:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd_nf, E);
+ case AArch64::BI_InterlockedIncrement16_acq:
+ case AArch64::BI_InterlockedIncrement_acq:
+ case AArch64::BI_InterlockedIncrement64_acq:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement_acq, E);
+ case AArch64::BI_InterlockedIncrement16_rel:
+ case AArch64::BI_InterlockedIncrement_rel:
+ case AArch64::BI_InterlockedIncrement64_rel:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement_rel, E);
+ case AArch64::BI_InterlockedIncrement16_nf:
+ case AArch64::BI_InterlockedIncrement_nf:
+ case AArch64::BI_InterlockedIncrement64_nf:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement_nf, E);
+ case AArch64::BI_InterlockedDecrement16_acq:
+ case AArch64::BI_InterlockedDecrement_acq:
+ case AArch64::BI_InterlockedDecrement64_acq:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement_acq, E);
+ case AArch64::BI_InterlockedDecrement16_rel:
+ case AArch64::BI_InterlockedDecrement_rel:
+ case AArch64::BI_InterlockedDecrement64_rel:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement_rel, E);
+ case AArch64::BI_InterlockedDecrement16_nf:
+ case AArch64::BI_InterlockedDecrement_nf:
+ case AArch64::BI_InterlockedDecrement64_nf:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement_nf, E);
+
+ case AArch64::BI_InterlockedAdd: {
+ Value *Arg0 = EmitScalarExpr(E->getArg(0));
+ Value *Arg1 = EmitScalarExpr(E->getArg(1));
+ AtomicRMWInst *RMWI = Builder.CreateAtomicRMW(
+ AtomicRMWInst::Add, Arg0, Arg1,
+ llvm::AtomicOrdering::SequentiallyConsistent);
+ return Builder.CreateAdd(RMWI, Arg1);
+ }
}
}
@@ -8524,8 +9157,9 @@ static Value *EmitX86CompressStore(CodeGenFunction &CGF,
}
static Value *EmitX86MaskLogic(CodeGenFunction &CGF, Instruction::BinaryOps Opc,
- unsigned NumElts, ArrayRef<Value *> Ops,
+ ArrayRef<Value *> Ops,
bool InvertLHS = false) {
+ unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
Value *LHS = getMaskVecValue(CGF, Ops[0], NumElts);
Value *RHS = getMaskVecValue(CGF, Ops[1], NumElts);
@@ -8533,7 +9167,25 @@ static Value *EmitX86MaskLogic(CodeGenFunction &CGF, Instruction::BinaryOps Opc,
LHS = CGF.Builder.CreateNot(LHS);
return CGF.Builder.CreateBitCast(CGF.Builder.CreateBinOp(Opc, LHS, RHS),
- CGF.Builder.getIntNTy(std::max(NumElts, 8U)));
+ Ops[0]->getType());
+}
+
+static Value *EmitX86FunnelShift(CodeGenFunction &CGF, Value *Op0, Value *Op1,
+ Value *Amt, bool IsRight) {
+ llvm::Type *Ty = Op0->getType();
+
+ // Amount may be scalar immediate, in which case create a splat vector.
+ // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
+ // we only care about the lowest log2 bits anyway.
+ if (Amt->getType() != Ty) {
+ unsigned NumElts = Ty->getVectorNumElements();
+ Amt = CGF.Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
+ Amt = CGF.Builder.CreateVectorSplat(NumElts, Amt);
+ }
+
+ unsigned IID = IsRight ? Intrinsic::fshr : Intrinsic::fshl;
+ Value *F = CGF.CGM.getIntrinsic(IID, Ty);
+ return CGF.Builder.CreateCall(F, {Op0, Op1, Amt});
}
static Value *EmitX86Select(CodeGenFunction &CGF,
@@ -8855,6 +9507,17 @@ static Value *EmitX86SExtMask(CodeGenFunction &CGF, Value *Op,
return CGF.Builder.CreateSExt(Mask, DstTy, "vpmovm2");
}
+// Emit addition or subtraction with signed/unsigned saturation.
+static Value *EmitX86AddSubSatExpr(CodeGenFunction &CGF,
+ ArrayRef<Value *> Ops, bool IsSigned,
+ bool IsAddition) {
+ Intrinsic::ID IID =
+ IsSigned ? (IsAddition ? Intrinsic::sadd_sat : Intrinsic::ssub_sat)
+ : (IsAddition ? Intrinsic::uadd_sat : Intrinsic::usub_sat);
+ llvm::Function *F = CGF.CGM.getIntrinsic(IID, Ops[0]->getType());
+ return CGF.Builder.CreateCall(F, {Ops[0], Ops[1]});
+}
+
Value *CodeGenFunction::EmitX86CpuIs(const CallExpr *E) {
const Expr *CPUExpr = E->getArg(0)->IgnoreParenCasts();
StringRef CPUStr = cast<clang::StringLiteral>(CPUExpr)->getString();
@@ -8876,6 +9539,7 @@ Value *CodeGenFunction::EmitX86CpuIs(StringRef CPUStr) {
// Grab the global __cpu_model.
llvm::Constant *CpuModel = CGM.CreateRuntimeVariable(STy, "__cpu_model");
+ cast<llvm::GlobalValue>(CpuModel)->setDSOLocal(true);
// Calculate the index needed to access the correct field based on the
// range. Also adjust the expected value.
@@ -8911,17 +9575,17 @@ Value *CodeGenFunction::EmitX86CpuSupports(const CallExpr *E) {
return EmitX86CpuSupports(FeatureStr);
}
-uint32_t
+uint64_t
CodeGenFunction::GetX86CpuSupportsMask(ArrayRef<StringRef> FeatureStrs) {
// Processor features and mapping to processor feature value.
- uint32_t FeaturesMask = 0;
+ uint64_t FeaturesMask = 0;
for (const StringRef &FeatureStr : FeatureStrs) {
unsigned Feature =
StringSwitch<unsigned>(FeatureStr)
#define X86_FEATURE_COMPAT(VAL, ENUM, STR) .Case(STR, VAL)
#include "llvm/Support/X86TargetParser.def"
;
- FeaturesMask |= (1U << Feature);
+ FeaturesMask |= (1ULL << Feature);
}
return FeaturesMask;
}
@@ -8930,37 +9594,66 @@ Value *CodeGenFunction::EmitX86CpuSupports(ArrayRef<StringRef> FeatureStrs) {
return EmitX86CpuSupports(GetX86CpuSupportsMask(FeatureStrs));
}
-llvm::Value *CodeGenFunction::EmitX86CpuSupports(uint32_t FeaturesMask) {
- // Matching the struct layout from the compiler-rt/libgcc structure that is
- // filled in:
- // unsigned int __cpu_vendor;
- // unsigned int __cpu_type;
- // unsigned int __cpu_subtype;
- // unsigned int __cpu_features[1];
- llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, Int32Ty,
- llvm::ArrayType::get(Int32Ty, 1));
+llvm::Value *CodeGenFunction::EmitX86CpuSupports(uint64_t FeaturesMask) {
+ uint32_t Features1 = Lo_32(FeaturesMask);
+ uint32_t Features2 = Hi_32(FeaturesMask);
- // Grab the global __cpu_model.
- llvm::Constant *CpuModel = CGM.CreateRuntimeVariable(STy, "__cpu_model");
+ Value *Result = Builder.getTrue();
+
+ if (Features1 != 0) {
+ // Matching the struct layout from the compiler-rt/libgcc structure that is
+ // filled in:
+ // unsigned int __cpu_vendor;
+ // unsigned int __cpu_type;
+ // unsigned int __cpu_subtype;
+ // unsigned int __cpu_features[1];
+ llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, Int32Ty,
+ llvm::ArrayType::get(Int32Ty, 1));
+
+ // Grab the global __cpu_model.
+ llvm::Constant *CpuModel = CGM.CreateRuntimeVariable(STy, "__cpu_model");
+ cast<llvm::GlobalValue>(CpuModel)->setDSOLocal(true);
+
+ // Grab the first (0th) element from the field __cpu_features off of the
+ // global in the struct STy.
+ Value *Idxs[] = {Builder.getInt32(0), Builder.getInt32(3),
+ Builder.getInt32(0)};
+ Value *CpuFeatures = Builder.CreateGEP(STy, CpuModel, Idxs);
+ Value *Features =
+ Builder.CreateAlignedLoad(CpuFeatures, CharUnits::fromQuantity(4));
+
+ // Check the value of the bit corresponding to the feature requested.
+ Value *Mask = Builder.getInt32(Features1);
+ Value *Bitset = Builder.CreateAnd(Features, Mask);
+ Value *Cmp = Builder.CreateICmpEQ(Bitset, Mask);
+ Result = Builder.CreateAnd(Result, Cmp);
+ }
- // Grab the first (0th) element from the field __cpu_features off of the
- // global in the struct STy.
- Value *Idxs[] = {ConstantInt::get(Int32Ty, 0), ConstantInt::get(Int32Ty, 3),
- ConstantInt::get(Int32Ty, 0)};
- Value *CpuFeatures = Builder.CreateGEP(STy, CpuModel, Idxs);
- Value *Features =
- Builder.CreateAlignedLoad(CpuFeatures, CharUnits::fromQuantity(4));
-
- // Check the value of the bit corresponding to the feature requested.
- Value *Bitset = Builder.CreateAnd(
- Features, llvm::ConstantInt::get(Int32Ty, FeaturesMask));
- return Builder.CreateICmpNE(Bitset, llvm::ConstantInt::get(Int32Ty, 0));
+ if (Features2 != 0) {
+ llvm::Constant *CpuFeatures2 = CGM.CreateRuntimeVariable(Int32Ty,
+ "__cpu_features2");
+ cast<llvm::GlobalValue>(CpuFeatures2)->setDSOLocal(true);
+
+ Value *Features =
+ Builder.CreateAlignedLoad(CpuFeatures2, CharUnits::fromQuantity(4));
+
+ // Check the value of the bit corresponding to the feature requested.
+ Value *Mask = Builder.getInt32(Features2);
+ Value *Bitset = Builder.CreateAnd(Features, Mask);
+ Value *Cmp = Builder.CreateICmpEQ(Bitset, Mask);
+ Result = Builder.CreateAnd(Result, Cmp);
+ }
+
+ return Result;
}
Value *CodeGenFunction::EmitX86CpuInit() {
llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy,
/*Variadic*/ false);
llvm::Constant *Func = CGM.CreateRuntimeFunction(FTy, "__cpu_indicator_init");
+ cast<llvm::GlobalValue>(Func)->setDSOLocal(true);
+ cast<llvm::GlobalValue>(Func)->setDLLStorageClass(
+ llvm::GlobalValue::DefaultStorageClass);
return Builder.CreateCall(Func);
}
@@ -9051,6 +9744,24 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
case X86::BI__rdtsc: {
return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_rdtsc));
}
+ case X86::BI__builtin_ia32_rdtscp: {
+ Value *Call = Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_rdtscp));
+ Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 1),
+ Ops[0]);
+ return Builder.CreateExtractValue(Call, 0);
+ }
+ case X86::BI__builtin_ia32_lzcnt_u16:
+ case X86::BI__builtin_ia32_lzcnt_u32:
+ case X86::BI__builtin_ia32_lzcnt_u64: {
+ Value *F = CGM.getIntrinsic(Intrinsic::ctlz, Ops[0]->getType());
+ return Builder.CreateCall(F, {Ops[0], Builder.getInt1(false)});
+ }
+ case X86::BI__builtin_ia32_tzcnt_u16:
+ case X86::BI__builtin_ia32_tzcnt_u32:
+ case X86::BI__builtin_ia32_tzcnt_u64: {
+ Value *F = CGM.getIntrinsic(Intrinsic::cttz, Ops[0]->getType());
+ return Builder.CreateCall(F, {Ops[0], Builder.getInt1(false)});
+ }
case X86::BI__builtin_ia32_undef128:
case X86::BI__builtin_ia32_undef256:
case X86::BI__builtin_ia32_undef512:
@@ -9822,6 +10533,50 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
"psrldq");
return Builder.CreateBitCast(SV, ResultType, "cast");
}
+ case X86::BI__builtin_ia32_kshiftliqi:
+ case X86::BI__builtin_ia32_kshiftlihi:
+ case X86::BI__builtin_ia32_kshiftlisi:
+ case X86::BI__builtin_ia32_kshiftlidi: {
+ unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff;
+ unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
+
+ if (ShiftVal >= NumElts)
+ return llvm::Constant::getNullValue(Ops[0]->getType());
+
+ Value *In = getMaskVecValue(*this, Ops[0], NumElts);
+
+ uint32_t Indices[64];
+ for (unsigned i = 0; i != NumElts; ++i)
+ Indices[i] = NumElts + i - ShiftVal;
+
+ Value *Zero = llvm::Constant::getNullValue(In->getType());
+ Value *SV = Builder.CreateShuffleVector(Zero, In,
+ makeArrayRef(Indices, NumElts),
+ "kshiftl");
+ return Builder.CreateBitCast(SV, Ops[0]->getType());
+ }
+ case X86::BI__builtin_ia32_kshiftriqi:
+ case X86::BI__builtin_ia32_kshiftrihi:
+ case X86::BI__builtin_ia32_kshiftrisi:
+ case X86::BI__builtin_ia32_kshiftridi: {
+ unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff;
+ unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
+
+ if (ShiftVal >= NumElts)
+ return llvm::Constant::getNullValue(Ops[0]->getType());
+
+ Value *In = getMaskVecValue(*this, Ops[0], NumElts);
+
+ uint32_t Indices[64];
+ for (unsigned i = 0; i != NumElts; ++i)
+ Indices[i] = i + ShiftVal;
+
+ Value *Zero = llvm::Constant::getNullValue(In->getType());
+ Value *SV = Builder.CreateShuffleVector(In, Zero,
+ makeArrayRef(Indices, NumElts),
+ "kshiftr");
+ return Builder.CreateBitCast(SV, Ops[0]->getType());
+ }
case X86::BI__builtin_ia32_movnti:
case X86::BI__builtin_ia32_movnti64:
case X86::BI__builtin_ia32_movntsd:
@@ -9847,7 +10602,41 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
SI->setAlignment(1);
return SI;
}
-
+ // Rotate is a special case of funnel shift - 1st 2 args are the same.
+ case X86::BI__builtin_ia32_vprotb:
+ case X86::BI__builtin_ia32_vprotw:
+ case X86::BI__builtin_ia32_vprotd:
+ case X86::BI__builtin_ia32_vprotq:
+ case X86::BI__builtin_ia32_vprotbi:
+ case X86::BI__builtin_ia32_vprotwi:
+ case X86::BI__builtin_ia32_vprotdi:
+ case X86::BI__builtin_ia32_vprotqi:
+ case X86::BI__builtin_ia32_prold128:
+ case X86::BI__builtin_ia32_prold256:
+ case X86::BI__builtin_ia32_prold512:
+ case X86::BI__builtin_ia32_prolq128:
+ case X86::BI__builtin_ia32_prolq256:
+ case X86::BI__builtin_ia32_prolq512:
+ case X86::BI__builtin_ia32_prolvd128:
+ case X86::BI__builtin_ia32_prolvd256:
+ case X86::BI__builtin_ia32_prolvd512:
+ case X86::BI__builtin_ia32_prolvq128:
+ case X86::BI__builtin_ia32_prolvq256:
+ case X86::BI__builtin_ia32_prolvq512:
+ return EmitX86FunnelShift(*this, Ops[0], Ops[0], Ops[1], false);
+ case X86::BI__builtin_ia32_prord128:
+ case X86::BI__builtin_ia32_prord256:
+ case X86::BI__builtin_ia32_prord512:
+ case X86::BI__builtin_ia32_prorq128:
+ case X86::BI__builtin_ia32_prorq256:
+ case X86::BI__builtin_ia32_prorq512:
+ case X86::BI__builtin_ia32_prorvd128:
+ case X86::BI__builtin_ia32_prorvd256:
+ case X86::BI__builtin_ia32_prorvd512:
+ case X86::BI__builtin_ia32_prorvq128:
+ case X86::BI__builtin_ia32_prorvq256:
+ case X86::BI__builtin_ia32_prorvq512:
+ return EmitX86FunnelShift(*this, Ops[0], Ops[0], Ops[1], true);
case X86::BI__builtin_ia32_selectb_128:
case X86::BI__builtin_ia32_selectb_256:
case X86::BI__builtin_ia32_selectb_512:
@@ -9905,38 +10694,147 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
return EmitX86MaskedCompare(*this, CC, false, Ops);
}
+ case X86::BI__builtin_ia32_kortestcqi:
case X86::BI__builtin_ia32_kortestchi:
- case X86::BI__builtin_ia32_kortestzhi: {
- Value *Or = EmitX86MaskLogic(*this, Instruction::Or, 16, Ops);
- Value *C;
- if (BuiltinID == X86::BI__builtin_ia32_kortestchi)
- C = llvm::Constant::getAllOnesValue(Builder.getInt16Ty());
- else
- C = llvm::Constant::getNullValue(Builder.getInt16Ty());
+ case X86::BI__builtin_ia32_kortestcsi:
+ case X86::BI__builtin_ia32_kortestcdi: {
+ Value *Or = EmitX86MaskLogic(*this, Instruction::Or, Ops);
+ Value *C = llvm::Constant::getAllOnesValue(Ops[0]->getType());
Value *Cmp = Builder.CreateICmpEQ(Or, C);
return Builder.CreateZExt(Cmp, ConvertType(E->getType()));
}
+ case X86::BI__builtin_ia32_kortestzqi:
+ case X86::BI__builtin_ia32_kortestzhi:
+ case X86::BI__builtin_ia32_kortestzsi:
+ case X86::BI__builtin_ia32_kortestzdi: {
+ Value *Or = EmitX86MaskLogic(*this, Instruction::Or, Ops);
+ Value *C = llvm::Constant::getNullValue(Ops[0]->getType());
+ Value *Cmp = Builder.CreateICmpEQ(Or, C);
+ return Builder.CreateZExt(Cmp, ConvertType(E->getType()));
+ }
+
+ case X86::BI__builtin_ia32_ktestcqi:
+ case X86::BI__builtin_ia32_ktestzqi:
+ case X86::BI__builtin_ia32_ktestchi:
+ case X86::BI__builtin_ia32_ktestzhi:
+ case X86::BI__builtin_ia32_ktestcsi:
+ case X86::BI__builtin_ia32_ktestzsi:
+ case X86::BI__builtin_ia32_ktestcdi:
+ case X86::BI__builtin_ia32_ktestzdi: {
+ Intrinsic::ID IID;
+ switch (BuiltinID) {
+ default: llvm_unreachable("Unsupported intrinsic!");
+ case X86::BI__builtin_ia32_ktestcqi:
+ IID = Intrinsic::x86_avx512_ktestc_b;
+ break;
+ case X86::BI__builtin_ia32_ktestzqi:
+ IID = Intrinsic::x86_avx512_ktestz_b;
+ break;
+ case X86::BI__builtin_ia32_ktestchi:
+ IID = Intrinsic::x86_avx512_ktestc_w;
+ break;
+ case X86::BI__builtin_ia32_ktestzhi:
+ IID = Intrinsic::x86_avx512_ktestz_w;
+ break;
+ case X86::BI__builtin_ia32_ktestcsi:
+ IID = Intrinsic::x86_avx512_ktestc_d;
+ break;
+ case X86::BI__builtin_ia32_ktestzsi:
+ IID = Intrinsic::x86_avx512_ktestz_d;
+ break;
+ case X86::BI__builtin_ia32_ktestcdi:
+ IID = Intrinsic::x86_avx512_ktestc_q;
+ break;
+ case X86::BI__builtin_ia32_ktestzdi:
+ IID = Intrinsic::x86_avx512_ktestz_q;
+ break;
+ }
+
+ unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
+ Value *LHS = getMaskVecValue(*this, Ops[0], NumElts);
+ Value *RHS = getMaskVecValue(*this, Ops[1], NumElts);
+ Function *Intr = CGM.getIntrinsic(IID);
+ return Builder.CreateCall(Intr, {LHS, RHS});
+ }
+ case X86::BI__builtin_ia32_kaddqi:
+ case X86::BI__builtin_ia32_kaddhi:
+ case X86::BI__builtin_ia32_kaddsi:
+ case X86::BI__builtin_ia32_kadddi: {
+ Intrinsic::ID IID;
+ switch (BuiltinID) {
+ default: llvm_unreachable("Unsupported intrinsic!");
+ case X86::BI__builtin_ia32_kaddqi:
+ IID = Intrinsic::x86_avx512_kadd_b;
+ break;
+ case X86::BI__builtin_ia32_kaddhi:
+ IID = Intrinsic::x86_avx512_kadd_w;
+ break;
+ case X86::BI__builtin_ia32_kaddsi:
+ IID = Intrinsic::x86_avx512_kadd_d;
+ break;
+ case X86::BI__builtin_ia32_kadddi:
+ IID = Intrinsic::x86_avx512_kadd_q;
+ break;
+ }
+
+ unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
+ Value *LHS = getMaskVecValue(*this, Ops[0], NumElts);
+ Value *RHS = getMaskVecValue(*this, Ops[1], NumElts);
+ Function *Intr = CGM.getIntrinsic(IID);
+ Value *Res = Builder.CreateCall(Intr, {LHS, RHS});
+ return Builder.CreateBitCast(Res, Ops[0]->getType());
+ }
+ case X86::BI__builtin_ia32_kandqi:
case X86::BI__builtin_ia32_kandhi:
- return EmitX86MaskLogic(*this, Instruction::And, 16, Ops);
+ case X86::BI__builtin_ia32_kandsi:
+ case X86::BI__builtin_ia32_kanddi:
+ return EmitX86MaskLogic(*this, Instruction::And, Ops);
+ case X86::BI__builtin_ia32_kandnqi:
case X86::BI__builtin_ia32_kandnhi:
- return EmitX86MaskLogic(*this, Instruction::And, 16, Ops, true);
+ case X86::BI__builtin_ia32_kandnsi:
+ case X86::BI__builtin_ia32_kandndi:
+ return EmitX86MaskLogic(*this, Instruction::And, Ops, true);
+ case X86::BI__builtin_ia32_korqi:
case X86::BI__builtin_ia32_korhi:
- return EmitX86MaskLogic(*this, Instruction::Or, 16, Ops);
+ case X86::BI__builtin_ia32_korsi:
+ case X86::BI__builtin_ia32_kordi:
+ return EmitX86MaskLogic(*this, Instruction::Or, Ops);
+ case X86::BI__builtin_ia32_kxnorqi:
case X86::BI__builtin_ia32_kxnorhi:
- return EmitX86MaskLogic(*this, Instruction::Xor, 16, Ops, true);
+ case X86::BI__builtin_ia32_kxnorsi:
+ case X86::BI__builtin_ia32_kxnordi:
+ return EmitX86MaskLogic(*this, Instruction::Xor, Ops, true);
+ case X86::BI__builtin_ia32_kxorqi:
case X86::BI__builtin_ia32_kxorhi:
- return EmitX86MaskLogic(*this, Instruction::Xor, 16, Ops);
- case X86::BI__builtin_ia32_knothi: {
- Ops[0] = getMaskVecValue(*this, Ops[0], 16);
- return Builder.CreateBitCast(Builder.CreateNot(Ops[0]),
- Builder.getInt16Ty());
+ case X86::BI__builtin_ia32_kxorsi:
+ case X86::BI__builtin_ia32_kxordi:
+ return EmitX86MaskLogic(*this, Instruction::Xor, Ops);
+ case X86::BI__builtin_ia32_knotqi:
+ case X86::BI__builtin_ia32_knothi:
+ case X86::BI__builtin_ia32_knotsi:
+ case X86::BI__builtin_ia32_knotdi: {
+ unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
+ Value *Res = getMaskVecValue(*this, Ops[0], NumElts);
+ return Builder.CreateBitCast(Builder.CreateNot(Res),
+ Ops[0]->getType());
+ }
+ case X86::BI__builtin_ia32_kmovb:
+ case X86::BI__builtin_ia32_kmovw:
+ case X86::BI__builtin_ia32_kmovd:
+ case X86::BI__builtin_ia32_kmovq: {
+ // Bitcast to vXi1 type and then back to integer. This gets the mask
+ // register type into the IR, but might be optimized out depending on
+ // what's around it.
+ unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
+ Value *Res = getMaskVecValue(*this, Ops[0], NumElts);
+ return Builder.CreateBitCast(Res, Ops[0]->getType());
}
case X86::BI__builtin_ia32_kunpckdi:
case X86::BI__builtin_ia32_kunpcksi:
case X86::BI__builtin_ia32_kunpckhi: {
- unsigned NumElts = Ops[0]->getType()->getScalarSizeInBits();
+ unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
Value *LHS = getMaskVecValue(*this, Ops[0], NumElts);
Value *RHS = getMaskVecValue(*this, Ops[1], NumElts);
uint32_t Indices[64];
@@ -10103,6 +11001,52 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
case X86::BI__builtin_ia32_pternlogq256_maskz:
return EmitX86Ternlog(*this, /*ZeroMask*/true, Ops);
+ case X86::BI__builtin_ia32_vpshldd128:
+ case X86::BI__builtin_ia32_vpshldd256:
+ case X86::BI__builtin_ia32_vpshldd512:
+ case X86::BI__builtin_ia32_vpshldq128:
+ case X86::BI__builtin_ia32_vpshldq256:
+ case X86::BI__builtin_ia32_vpshldq512:
+ case X86::BI__builtin_ia32_vpshldw128:
+ case X86::BI__builtin_ia32_vpshldw256:
+ case X86::BI__builtin_ia32_vpshldw512:
+ return EmitX86FunnelShift(*this, Ops[0], Ops[1], Ops[2], false);
+
+ case X86::BI__builtin_ia32_vpshrdd128:
+ case X86::BI__builtin_ia32_vpshrdd256:
+ case X86::BI__builtin_ia32_vpshrdd512:
+ case X86::BI__builtin_ia32_vpshrdq128:
+ case X86::BI__builtin_ia32_vpshrdq256:
+ case X86::BI__builtin_ia32_vpshrdq512:
+ case X86::BI__builtin_ia32_vpshrdw128:
+ case X86::BI__builtin_ia32_vpshrdw256:
+ case X86::BI__builtin_ia32_vpshrdw512:
+ // Ops 0 and 1 are swapped.
+ return EmitX86FunnelShift(*this, Ops[1], Ops[0], Ops[2], true);
+
+ case X86::BI__builtin_ia32_vpshldvd128:
+ case X86::BI__builtin_ia32_vpshldvd256:
+ case X86::BI__builtin_ia32_vpshldvd512:
+ case X86::BI__builtin_ia32_vpshldvq128:
+ case X86::BI__builtin_ia32_vpshldvq256:
+ case X86::BI__builtin_ia32_vpshldvq512:
+ case X86::BI__builtin_ia32_vpshldvw128:
+ case X86::BI__builtin_ia32_vpshldvw256:
+ case X86::BI__builtin_ia32_vpshldvw512:
+ return EmitX86FunnelShift(*this, Ops[0], Ops[1], Ops[2], false);
+
+ case X86::BI__builtin_ia32_vpshrdvd128:
+ case X86::BI__builtin_ia32_vpshrdvd256:
+ case X86::BI__builtin_ia32_vpshrdvd512:
+ case X86::BI__builtin_ia32_vpshrdvq128:
+ case X86::BI__builtin_ia32_vpshrdvq256:
+ case X86::BI__builtin_ia32_vpshrdvq512:
+ case X86::BI__builtin_ia32_vpshrdvw128:
+ case X86::BI__builtin_ia32_vpshrdvw256:
+ case X86::BI__builtin_ia32_vpshrdvw512:
+ // Ops 0 and 1 are swapped.
+ return EmitX86FunnelShift(*this, Ops[1], Ops[0], Ops[2], true);
+
// 3DNow!
case X86::BI__builtin_ia32_pswapdsf:
case X86::BI__builtin_ia32_pswapdsi: {
@@ -10145,6 +11089,33 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
Ops[0]);
return Builder.CreateExtractValue(Call, 1);
}
+ case X86::BI__builtin_ia32_addcarryx_u32:
+ case X86::BI__builtin_ia32_addcarryx_u64:
+ case X86::BI__builtin_ia32_subborrow_u32:
+ case X86::BI__builtin_ia32_subborrow_u64: {
+ Intrinsic::ID IID;
+ switch (BuiltinID) {
+ default: llvm_unreachable("Unsupported intrinsic!");
+ case X86::BI__builtin_ia32_addcarryx_u32:
+ IID = Intrinsic::x86_addcarry_32;
+ break;
+ case X86::BI__builtin_ia32_addcarryx_u64:
+ IID = Intrinsic::x86_addcarry_64;
+ break;
+ case X86::BI__builtin_ia32_subborrow_u32:
+ IID = Intrinsic::x86_subborrow_32;
+ break;
+ case X86::BI__builtin_ia32_subborrow_u64:
+ IID = Intrinsic::x86_subborrow_64;
+ break;
+ }
+
+ Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID),
+ { Ops[0], Ops[1], Ops[2] });
+ Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 1),
+ Ops[3]);
+ return Builder.CreateExtractValue(Call, 0);
+ }
case X86::BI__builtin_ia32_fpclassps128_mask:
case X86::BI__builtin_ia32_fpclassps256_mask:
@@ -10183,6 +11154,51 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
return EmitX86MaskedCompareResult(*this, Fpclass, NumElts, MaskIn);
}
+ case X86::BI__builtin_ia32_vpmultishiftqb128:
+ case X86::BI__builtin_ia32_vpmultishiftqb256:
+ case X86::BI__builtin_ia32_vpmultishiftqb512: {
+ Intrinsic::ID ID;
+ switch (BuiltinID) {
+ default: llvm_unreachable("Unsupported intrinsic!");
+ case X86::BI__builtin_ia32_vpmultishiftqb128:
+ ID = Intrinsic::x86_avx512_pmultishift_qb_128;
+ break;
+ case X86::BI__builtin_ia32_vpmultishiftqb256:
+ ID = Intrinsic::x86_avx512_pmultishift_qb_256;
+ break;
+ case X86::BI__builtin_ia32_vpmultishiftqb512:
+ ID = Intrinsic::x86_avx512_pmultishift_qb_512;
+ break;
+ }
+
+ return Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
+ }
+
+ case X86::BI__builtin_ia32_vpshufbitqmb128_mask:
+ case X86::BI__builtin_ia32_vpshufbitqmb256_mask:
+ case X86::BI__builtin_ia32_vpshufbitqmb512_mask: {
+ unsigned NumElts = Ops[0]->getType()->getVectorNumElements();
+ Value *MaskIn = Ops[2];
+ Ops.erase(&Ops[2]);
+
+ Intrinsic::ID ID;
+ switch (BuiltinID) {
+ default: llvm_unreachable("Unsupported intrinsic!");
+ case X86::BI__builtin_ia32_vpshufbitqmb128_mask:
+ ID = Intrinsic::x86_avx512_vpshufbitqmb_128;
+ break;
+ case X86::BI__builtin_ia32_vpshufbitqmb256_mask:
+ ID = Intrinsic::x86_avx512_vpshufbitqmb_256;
+ break;
+ case X86::BI__builtin_ia32_vpshufbitqmb512_mask:
+ ID = Intrinsic::x86_avx512_vpshufbitqmb_512;
+ break;
+ }
+
+ Value *Shufbit = Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
+ return EmitX86MaskedCompareResult(*this, Shufbit, NumElts, MaskIn);
+ }
+
// packed comparison intrinsics
case X86::BI__builtin_ia32_cmpeqps:
case X86::BI__builtin_ia32_cmpeqpd:
@@ -10361,6 +11377,27 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
llvm::SyncScope::System);
}
+ case X86::BI__shiftleft128:
+ case X86::BI__shiftright128: {
+ // FIXME: Once fshl/fshr no longer add an unneeded and and cmov, do this:
+ // llvm::Function *F = CGM.getIntrinsic(
+ // BuiltinID == X86::BI__shiftleft128 ? Intrinsic::fshl : Intrinsic::fshr,
+ // Int64Ty);
+ // Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty);
+ // return Builder.CreateCall(F, Ops);
+ llvm::Type *Int128Ty = Builder.getInt128Ty();
+ Value *Val = Builder.CreateOr(
+ Builder.CreateShl(Builder.CreateZExt(Ops[1], Int128Ty), 64),
+ Builder.CreateZExt(Ops[0], Int128Ty));
+ Value *Amt = Builder.CreateAnd(Builder.CreateZExt(Ops[2], Int128Ty),
+ llvm::ConstantInt::get(Int128Ty, 0x3f));
+ Value *Res;
+ if (BuiltinID == X86::BI__shiftleft128)
+ Res = Builder.CreateLShr(Builder.CreateShl(Val, Amt), 64);
+ else
+ Res = Builder.CreateLShr(Val, Amt);
+ return Builder.CreateTrunc(Res, Int64Ty);
+ }
case X86::BI_ReadWriteBarrier:
case X86::BI_ReadBarrier:
case X86::BI_WriteBarrier: {
@@ -10401,14 +11438,11 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
llvm::Type *Int128PtrTy = Int128Ty->getPointerTo();
Value *Destination =
- Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), Int128PtrTy);
- Value *ExchangeHigh128 =
- Builder.CreateZExt(EmitScalarExpr(E->getArg(1)), Int128Ty);
- Value *ExchangeLow128 =
- Builder.CreateZExt(EmitScalarExpr(E->getArg(2)), Int128Ty);
- Address ComparandResult(
- Builder.CreateBitCast(EmitScalarExpr(E->getArg(3)), Int128PtrTy),
- getContext().toCharUnitsFromBits(128));
+ Builder.CreateBitCast(Ops[0], Int128PtrTy);
+ Value *ExchangeHigh128 = Builder.CreateZExt(Ops[1], Int128Ty);
+ Value *ExchangeLow128 = Builder.CreateZExt(Ops[2], Int128Ty);
+ Address ComparandResult(Builder.CreateBitCast(Ops[3], Int128PtrTy),
+ getContext().toCharUnitsFromBits(128));
Value *Exchange = Builder.CreateOr(
Builder.CreateShl(ExchangeHigh128, 64, "", false, false),
@@ -10459,8 +11493,8 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
case X86::BI__readfsdword:
case X86::BI__readfsqword: {
llvm::Type *IntTy = ConvertType(E->getType());
- Value *Ptr = Builder.CreateIntToPtr(EmitScalarExpr(E->getArg(0)),
- llvm::PointerType::get(IntTy, 257));
+ Value *Ptr =
+ Builder.CreateIntToPtr(Ops[0], llvm::PointerType::get(IntTy, 257));
LoadInst *Load = Builder.CreateAlignedLoad(
IntTy, Ptr, getContext().getTypeAlignInChars(E->getType()));
Load->setVolatile(true);
@@ -10471,17 +11505,44 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
case X86::BI__readgsdword:
case X86::BI__readgsqword: {
llvm::Type *IntTy = ConvertType(E->getType());
- Value *Ptr = Builder.CreateIntToPtr(EmitScalarExpr(E->getArg(0)),
- llvm::PointerType::get(IntTy, 256));
+ Value *Ptr =
+ Builder.CreateIntToPtr(Ops[0], llvm::PointerType::get(IntTy, 256));
LoadInst *Load = Builder.CreateAlignedLoad(
IntTy, Ptr, getContext().getTypeAlignInChars(E->getType()));
Load->setVolatile(true);
return Load;
}
+ case X86::BI__builtin_ia32_paddsb512:
+ case X86::BI__builtin_ia32_paddsw512:
+ case X86::BI__builtin_ia32_paddsb256:
+ case X86::BI__builtin_ia32_paddsw256:
+ case X86::BI__builtin_ia32_paddsb128:
+ case X86::BI__builtin_ia32_paddsw128:
+ return EmitX86AddSubSatExpr(*this, Ops, true, true);
+ case X86::BI__builtin_ia32_paddusb512:
+ case X86::BI__builtin_ia32_paddusw512:
+ case X86::BI__builtin_ia32_paddusb256:
+ case X86::BI__builtin_ia32_paddusw256:
+ case X86::BI__builtin_ia32_paddusb128:
+ case X86::BI__builtin_ia32_paddusw128:
+ return EmitX86AddSubSatExpr(*this, Ops, false, true);
+ case X86::BI__builtin_ia32_psubsb512:
+ case X86::BI__builtin_ia32_psubsw512:
+ case X86::BI__builtin_ia32_psubsb256:
+ case X86::BI__builtin_ia32_psubsw256:
+ case X86::BI__builtin_ia32_psubsb128:
+ case X86::BI__builtin_ia32_psubsw128:
+ return EmitX86AddSubSatExpr(*this, Ops, true, false);
+ case X86::BI__builtin_ia32_psubusb512:
+ case X86::BI__builtin_ia32_psubusw512:
+ case X86::BI__builtin_ia32_psubusb256:
+ case X86::BI__builtin_ia32_psubusw256:
+ case X86::BI__builtin_ia32_psubusb128:
+ case X86::BI__builtin_ia32_psubusw128:
+ return EmitX86AddSubSatExpr(*this, Ops, false, false);
}
}
-
Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
const CallExpr *E) {
SmallVector<Value*, 4> Ops;
@@ -10901,6 +11962,28 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
auto RetTy = ConvertType(BIRetType);
return Builder.CreateBitCast(ShuffleCall, RetTy);
}
+
+ case PPC::BI__builtin_pack_vector_int128: {
+ bool isLittleEndian = getTarget().isLittleEndian();
+ Value *UndefValue =
+ llvm::UndefValue::get(llvm::VectorType::get(Ops[0]->getType(), 2));
+ Value *Res = Builder.CreateInsertElement(
+ UndefValue, Ops[0], (uint64_t)(isLittleEndian ? 1 : 0));
+ Res = Builder.CreateInsertElement(Res, Ops[1],
+ (uint64_t)(isLittleEndian ? 0 : 1));
+ return Builder.CreateBitCast(Res, ConvertType(E->getType()));
+ }
+
+ case PPC::BI__builtin_unpack_vector_int128: {
+ ConstantInt *Index = cast<ConstantInt>(Ops[1]);
+ Value *Unpacked = Builder.CreateBitCast(
+ Ops[0], llvm::VectorType::get(ConvertType(E->getType()), 2));
+
+ if (getTarget().isLittleEndian())
+ Index = ConstantInt::get(Index->getType(), 1 - Index->getZExtValue());
+
+ return Builder.CreateExtractElement(Unpacked, Index);
+ }
}
}
@@ -10948,12 +12031,16 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
case AMDGPU::BI__builtin_amdgcn_ds_swizzle:
return emitBinaryBuiltin(*this, E, Intrinsic::amdgcn_ds_swizzle);
- case AMDGPU::BI__builtin_amdgcn_mov_dpp: {
- llvm::SmallVector<llvm::Value *, 5> Args;
- for (unsigned I = 0; I != 5; ++I)
+ case AMDGPU::BI__builtin_amdgcn_mov_dpp:
+ case AMDGPU::BI__builtin_amdgcn_update_dpp: {
+ llvm::SmallVector<llvm::Value *, 6> Args;
+ for (unsigned I = 0; I != E->getNumArgs(); ++I)
Args.push_back(EmitScalarExpr(E->getArg(I)));
- Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_mov_dpp,
- Args[0]->getType());
+ assert(Args.size() == 5 || Args.size() == 6);
+ if (Args.size() == 5)
+ Args.insert(Args.begin(), llvm::UndefValue::get(Args[0]->getType()));
+ Value *F =
+ CGM.getIntrinsic(Intrinsic::amdgcn_update_dpp, Args[0]->getType());
return Builder.CreateCall(F, Args);
}
case AMDGPU::BI__builtin_amdgcn_div_fixup:
@@ -11039,50 +12126,6 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
CI->setConvergent();
return CI;
}
- case AMDGPU::BI__builtin_amdgcn_ds_faddf:
- case AMDGPU::BI__builtin_amdgcn_ds_fminf:
- case AMDGPU::BI__builtin_amdgcn_ds_fmaxf: {
- llvm::SmallVector<llvm::Value *, 5> Args;
- for (unsigned I = 0; I != 5; ++I)
- Args.push_back(EmitScalarExpr(E->getArg(I)));
- const llvm::Type *PtrTy = Args[0]->getType();
- // check pointer parameter
- if (!PtrTy->isPointerTy() ||
- E->getArg(0)
- ->getType()
- ->getPointeeType()
- .getQualifiers()
- .getAddressSpace() != LangAS::opencl_local ||
- !PtrTy->getPointerElementType()->isFloatTy()) {
- CGM.Error(E->getArg(0)->getLocStart(),
- "parameter should have type \"local float*\"");
- return nullptr;
- }
- // check float parameter
- if (!Args[1]->getType()->isFloatTy()) {
- CGM.Error(E->getArg(1)->getLocStart(),
- "parameter should have type \"float\"");
- return nullptr;
- }
-
- Intrinsic::ID ID;
- switch (BuiltinID) {
- case AMDGPU::BI__builtin_amdgcn_ds_faddf:
- ID = Intrinsic::amdgcn_ds_fadd;
- break;
- case AMDGPU::BI__builtin_amdgcn_ds_fminf:
- ID = Intrinsic::amdgcn_ds_fmin;
- break;
- case AMDGPU::BI__builtin_amdgcn_ds_fmaxf:
- ID = Intrinsic::amdgcn_ds_fmax;
- break;
- default:
- llvm_unreachable("Unknown BuiltinID");
- }
- Value *F = CGM.getIntrinsic(ID);
- return Builder.CreateCall(F, Args);
- }
-
// amdgcn workitem
case AMDGPU::BI__builtin_amdgcn_workitem_id_x:
return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_x, 0, 1024);
@@ -11363,7 +12406,7 @@ Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID,
return Builder.CreateCall(F, {X, Y, M4Value});
}
- // Vector intrisincs that output the post-instruction CC value.
+ // Vector intrinsics that output the post-instruction CC value.
#define INTRINSIC_WITH_CC(NAME) \
case SystemZ::BI__builtin_##NAME: \
@@ -11823,7 +12866,7 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID,
bool isColMajor = isColMajorArg.getSExtValue();
unsigned IID;
unsigned NumResults = 8;
- // PTX Instructions (and LLVM instrinsics) are defined for slice _d_, yet
+ // PTX Instructions (and LLVM intrinsics) are defined for slice _d_, yet
// for some reason nvcc builtins use _c_.
switch (BuiltinID) {
case NVPTX::BI__hmma_m16n16k16_st_c_f16:
@@ -12046,31 +13089,6 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_memory_grow, ResultType);
return Builder.CreateCall(Callee, Args);
}
- case WebAssembly::BI__builtin_wasm_mem_size: {
- llvm::Type *ResultType = ConvertType(E->getType());
- Value *I = EmitScalarExpr(E->getArg(0));
- Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_mem_size, ResultType);
- return Builder.CreateCall(Callee, I);
- }
- case WebAssembly::BI__builtin_wasm_mem_grow: {
- llvm::Type *ResultType = ConvertType(E->getType());
- Value *Args[] = {
- EmitScalarExpr(E->getArg(0)),
- EmitScalarExpr(E->getArg(1))
- };
- Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_mem_grow, ResultType);
- return Builder.CreateCall(Callee, Args);
- }
- case WebAssembly::BI__builtin_wasm_current_memory: {
- llvm::Type *ResultType = ConvertType(E->getType());
- Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_current_memory, ResultType);
- return Builder.CreateCall(Callee);
- }
- case WebAssembly::BI__builtin_wasm_grow_memory: {
- Value *X = EmitScalarExpr(E->getArg(0));
- Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_grow_memory, X->getType());
- return Builder.CreateCall(Callee, X);
- }
case WebAssembly::BI__builtin_wasm_throw: {
Value *Tag = EmitScalarExpr(E->getArg(0));
Value *Obj = EmitScalarExpr(E->getArg(1));
@@ -12081,6 +13099,211 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_rethrow);
return Builder.CreateCall(Callee);
}
+ case WebAssembly::BI__builtin_wasm_atomic_wait_i32: {
+ Value *Addr = EmitScalarExpr(E->getArg(0));
+ Value *Expected = EmitScalarExpr(E->getArg(1));
+ Value *Timeout = EmitScalarExpr(E->getArg(2));
+ Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_atomic_wait_i32);
+ return Builder.CreateCall(Callee, {Addr, Expected, Timeout});
+ }
+ case WebAssembly::BI__builtin_wasm_atomic_wait_i64: {
+ Value *Addr = EmitScalarExpr(E->getArg(0));
+ Value *Expected = EmitScalarExpr(E->getArg(1));
+ Value *Timeout = EmitScalarExpr(E->getArg(2));
+ Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_atomic_wait_i64);
+ return Builder.CreateCall(Callee, {Addr, Expected, Timeout});
+ }
+ case WebAssembly::BI__builtin_wasm_atomic_notify: {
+ Value *Addr = EmitScalarExpr(E->getArg(0));
+ Value *Count = EmitScalarExpr(E->getArg(1));
+ Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_atomic_notify);
+ return Builder.CreateCall(Callee, {Addr, Count});
+ }
+ case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32_f32:
+ case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32_f64:
+ case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i64_f32:
+ case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i64_f64:
+ case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32x4_f32x4:
+ case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i64x2_f64x2: {
+ Value *Src = EmitScalarExpr(E->getArg(0));
+ llvm::Type *ResT = ConvertType(E->getType());
+ Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_trunc_saturate_signed,
+ {ResT, Src->getType()});
+ return Builder.CreateCall(Callee, {Src});
+ }
+ case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i32_f32:
+ case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i32_f64:
+ case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i64_f32:
+ case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i64_f64:
+ case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i32x4_f32x4:
+ case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i64x2_f64x2: {
+ Value *Src = EmitScalarExpr(E->getArg(0));
+ llvm::Type *ResT = ConvertType(E->getType());
+ Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_trunc_saturate_unsigned,
+ {ResT, Src->getType()});
+ return Builder.CreateCall(Callee, {Src});
+ }
+ case WebAssembly::BI__builtin_wasm_min_f32:
+ case WebAssembly::BI__builtin_wasm_min_f64:
+ case WebAssembly::BI__builtin_wasm_min_f32x4:
+ case WebAssembly::BI__builtin_wasm_min_f64x2: {
+ Value *LHS = EmitScalarExpr(E->getArg(0));
+ Value *RHS = EmitScalarExpr(E->getArg(1));
+ Value *Callee = CGM.getIntrinsic(Intrinsic::minimum,
+ ConvertType(E->getType()));
+ return Builder.CreateCall(Callee, {LHS, RHS});
+ }
+ case WebAssembly::BI__builtin_wasm_max_f32:
+ case WebAssembly::BI__builtin_wasm_max_f64:
+ case WebAssembly::BI__builtin_wasm_max_f32x4:
+ case WebAssembly::BI__builtin_wasm_max_f64x2: {
+ Value *LHS = EmitScalarExpr(E->getArg(0));
+ Value *RHS = EmitScalarExpr(E->getArg(1));
+ Value *Callee = CGM.getIntrinsic(Intrinsic::maximum,
+ ConvertType(E->getType()));
+ return Builder.CreateCall(Callee, {LHS, RHS});
+ }
+ case WebAssembly::BI__builtin_wasm_extract_lane_s_i8x16:
+ case WebAssembly::BI__builtin_wasm_extract_lane_u_i8x16:
+ case WebAssembly::BI__builtin_wasm_extract_lane_s_i16x8:
+ case WebAssembly::BI__builtin_wasm_extract_lane_u_i16x8:
+ case WebAssembly::BI__builtin_wasm_extract_lane_i32x4:
+ case WebAssembly::BI__builtin_wasm_extract_lane_i64x2:
+ case WebAssembly::BI__builtin_wasm_extract_lane_f32x4:
+ case WebAssembly::BI__builtin_wasm_extract_lane_f64x2: {
+ llvm::APSInt LaneConst;
+ if (!E->getArg(1)->isIntegerConstantExpr(LaneConst, getContext()))
+ llvm_unreachable("Constant arg isn't actually constant?");
+ Value *Vec = EmitScalarExpr(E->getArg(0));
+ Value *Lane = llvm::ConstantInt::get(getLLVMContext(), LaneConst);
+ Value *Extract = Builder.CreateExtractElement(Vec, Lane);
+ switch (BuiltinID) {
+ case WebAssembly::BI__builtin_wasm_extract_lane_s_i8x16:
+ case WebAssembly::BI__builtin_wasm_extract_lane_s_i16x8:
+ return Builder.CreateSExt(Extract, ConvertType(E->getType()));
+ case WebAssembly::BI__builtin_wasm_extract_lane_u_i8x16:
+ case WebAssembly::BI__builtin_wasm_extract_lane_u_i16x8:
+ return Builder.CreateZExt(Extract, ConvertType(E->getType()));
+ case WebAssembly::BI__builtin_wasm_extract_lane_i32x4:
+ case WebAssembly::BI__builtin_wasm_extract_lane_i64x2:
+ case WebAssembly::BI__builtin_wasm_extract_lane_f32x4:
+ case WebAssembly::BI__builtin_wasm_extract_lane_f64x2:
+ return Extract;
+ default:
+ llvm_unreachable("unexpected builtin ID");
+ }
+ }
+ case WebAssembly::BI__builtin_wasm_replace_lane_i8x16:
+ case WebAssembly::BI__builtin_wasm_replace_lane_i16x8:
+ case WebAssembly::BI__builtin_wasm_replace_lane_i32x4:
+ case WebAssembly::BI__builtin_wasm_replace_lane_i64x2:
+ case WebAssembly::BI__builtin_wasm_replace_lane_f32x4:
+ case WebAssembly::BI__builtin_wasm_replace_lane_f64x2: {
+ llvm::APSInt LaneConst;
+ if (!E->getArg(1)->isIntegerConstantExpr(LaneConst, getContext()))
+ llvm_unreachable("Constant arg isn't actually constant?");
+ Value *Vec = EmitScalarExpr(E->getArg(0));
+ Value *Lane = llvm::ConstantInt::get(getLLVMContext(), LaneConst);
+ Value *Val = EmitScalarExpr(E->getArg(2));
+ switch (BuiltinID) {
+ case WebAssembly::BI__builtin_wasm_replace_lane_i8x16:
+ case WebAssembly::BI__builtin_wasm_replace_lane_i16x8: {
+ llvm::Type *ElemType = ConvertType(E->getType())->getVectorElementType();
+ Value *Trunc = Builder.CreateTrunc(Val, ElemType);
+ return Builder.CreateInsertElement(Vec, Trunc, Lane);
+ }
+ case WebAssembly::BI__builtin_wasm_replace_lane_i32x4:
+ case WebAssembly::BI__builtin_wasm_replace_lane_i64x2:
+ case WebAssembly::BI__builtin_wasm_replace_lane_f32x4:
+ case WebAssembly::BI__builtin_wasm_replace_lane_f64x2:
+ return Builder.CreateInsertElement(Vec, Val, Lane);
+ default:
+ llvm_unreachable("unexpected builtin ID");
+ }
+ }
+ case WebAssembly::BI__builtin_wasm_add_saturate_s_i8x16:
+ case WebAssembly::BI__builtin_wasm_add_saturate_u_i8x16:
+ case WebAssembly::BI__builtin_wasm_add_saturate_s_i16x8:
+ case WebAssembly::BI__builtin_wasm_add_saturate_u_i16x8:
+ case WebAssembly::BI__builtin_wasm_sub_saturate_s_i8x16:
+ case WebAssembly::BI__builtin_wasm_sub_saturate_u_i8x16:
+ case WebAssembly::BI__builtin_wasm_sub_saturate_s_i16x8:
+ case WebAssembly::BI__builtin_wasm_sub_saturate_u_i16x8: {
+ unsigned IntNo;
+ switch (BuiltinID) {
+ case WebAssembly::BI__builtin_wasm_add_saturate_s_i8x16:
+ case WebAssembly::BI__builtin_wasm_add_saturate_s_i16x8:
+ IntNo = Intrinsic::sadd_sat;
+ break;
+ case WebAssembly::BI__builtin_wasm_add_saturate_u_i8x16:
+ case WebAssembly::BI__builtin_wasm_add_saturate_u_i16x8:
+ IntNo = Intrinsic::uadd_sat;
+ break;
+ case WebAssembly::BI__builtin_wasm_sub_saturate_s_i8x16:
+ case WebAssembly::BI__builtin_wasm_sub_saturate_s_i16x8:
+ IntNo = Intrinsic::wasm_sub_saturate_signed;
+ break;
+ case WebAssembly::BI__builtin_wasm_sub_saturate_u_i8x16:
+ case WebAssembly::BI__builtin_wasm_sub_saturate_u_i16x8:
+ IntNo = Intrinsic::wasm_sub_saturate_unsigned;
+ break;
+ default:
+ llvm_unreachable("unexpected builtin ID");
+ }
+ Value *LHS = EmitScalarExpr(E->getArg(0));
+ Value *RHS = EmitScalarExpr(E->getArg(1));
+ Value *Callee = CGM.getIntrinsic(IntNo, ConvertType(E->getType()));
+ return Builder.CreateCall(Callee, {LHS, RHS});
+ }
+ case WebAssembly::BI__builtin_wasm_bitselect: {
+ Value *V1 = EmitScalarExpr(E->getArg(0));
+ Value *V2 = EmitScalarExpr(E->getArg(1));
+ Value *C = EmitScalarExpr(E->getArg(2));
+ Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_bitselect,
+ ConvertType(E->getType()));
+ return Builder.CreateCall(Callee, {V1, V2, C});
+ }
+ case WebAssembly::BI__builtin_wasm_any_true_i8x16:
+ case WebAssembly::BI__builtin_wasm_any_true_i16x8:
+ case WebAssembly::BI__builtin_wasm_any_true_i32x4:
+ case WebAssembly::BI__builtin_wasm_any_true_i64x2:
+ case WebAssembly::BI__builtin_wasm_all_true_i8x16:
+ case WebAssembly::BI__builtin_wasm_all_true_i16x8:
+ case WebAssembly::BI__builtin_wasm_all_true_i32x4:
+ case WebAssembly::BI__builtin_wasm_all_true_i64x2: {
+ unsigned IntNo;
+ switch (BuiltinID) {
+ case WebAssembly::BI__builtin_wasm_any_true_i8x16:
+ case WebAssembly::BI__builtin_wasm_any_true_i16x8:
+ case WebAssembly::BI__builtin_wasm_any_true_i32x4:
+ case WebAssembly::BI__builtin_wasm_any_true_i64x2:
+ IntNo = Intrinsic::wasm_anytrue;
+ break;
+ case WebAssembly::BI__builtin_wasm_all_true_i8x16:
+ case WebAssembly::BI__builtin_wasm_all_true_i16x8:
+ case WebAssembly::BI__builtin_wasm_all_true_i32x4:
+ case WebAssembly::BI__builtin_wasm_all_true_i64x2:
+ IntNo = Intrinsic::wasm_alltrue;
+ break;
+ default:
+ llvm_unreachable("unexpected builtin ID");
+ }
+ Value *Vec = EmitScalarExpr(E->getArg(0));
+ Value *Callee = CGM.getIntrinsic(IntNo, Vec->getType());
+ return Builder.CreateCall(Callee, {Vec});
+ }
+ case WebAssembly::BI__builtin_wasm_abs_f32x4:
+ case WebAssembly::BI__builtin_wasm_abs_f64x2: {
+ Value *Vec = EmitScalarExpr(E->getArg(0));
+ Value *Callee = CGM.getIntrinsic(Intrinsic::fabs, Vec->getType());
+ return Builder.CreateCall(Callee, {Vec});
+ }
+ case WebAssembly::BI__builtin_wasm_sqrt_f32x4:
+ case WebAssembly::BI__builtin_wasm_sqrt_f64x2: {
+ Value *Vec = EmitScalarExpr(E->getArg(0));
+ Value *Callee = CGM.getIntrinsic(Intrinsic::sqrt, Vec->getType());
+ return Builder.CreateCall(Callee, {Vec});
+ }
default:
return nullptr;
diff --git a/lib/CodeGen/CGCUDANV.cpp b/lib/CodeGen/CGCUDANV.cpp
index 5fcc9e011bcb..1c578bd151bd 100644
--- a/lib/CodeGen/CGCUDANV.cpp
+++ b/lib/CodeGen/CGCUDANV.cpp
@@ -137,7 +137,7 @@ CGNVCUDARuntime::addUnderscoredPrefixToName(StringRef FuncName) const {
CGNVCUDARuntime::CGNVCUDARuntime(CodeGenModule &CGM)
: CGCUDARuntime(CGM), Context(CGM.getLLVMContext()),
TheModule(CGM.getModule()),
- RelocatableDeviceCode(CGM.getLangOpts().CUDARelocatableDeviceCode) {
+ RelocatableDeviceCode(CGM.getLangOpts().GPURelocatableDeviceCode) {
CodeGen::CodeGenTypes &Types = CGM.getTypes();
ASTContext &Ctx = CGM.getContext();
@@ -353,8 +353,8 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() {
// global variable and save a reference in GpuBinaryHandle to be cleaned up
// in destructor on exit. Then associate all known kernels with the GPU binary
// handle so CUDA runtime can figure out what to call on the GPU side.
- std::unique_ptr<llvm::MemoryBuffer> CudaGpuBinary;
- if (!IsHIP) {
+ std::unique_ptr<llvm::MemoryBuffer> CudaGpuBinary = nullptr;
+ if (!CudaGpuBinaryFileName.empty()) {
llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> CudaGpuBinaryOrErr =
llvm::MemoryBuffer::getFileOrSTDIN(CudaGpuBinaryFileName);
if (std::error_code EC = CudaGpuBinaryOrErr.getError()) {
@@ -388,15 +388,23 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() {
ModuleIDSectionName = "__hip_module_id";
ModuleIDPrefix = "__hip_";
- // For HIP, create an external symbol __hip_fatbin in section .hip_fatbin.
- // The external symbol is supposed to contain the fat binary but will be
- // populated somewhere else, e.g. by lld through link script.
- FatBinStr = new llvm::GlobalVariable(
+ if (CudaGpuBinary) {
+ // If fatbin is available from early finalization, create a string
+ // literal containing the fat binary loaded from the given file.
+ FatBinStr = makeConstantString(CudaGpuBinary->getBuffer(), "",
+ FatbinConstantName, 8);
+ } else {
+ // If fatbin is not available, create an external symbol
+ // __hip_fatbin in section .hip_fatbin. The external symbol is supposed
+ // to contain the fat binary but will be populated somewhere else,
+ // e.g. by lld through link script.
+ FatBinStr = new llvm::GlobalVariable(
CGM.getModule(), CGM.Int8Ty,
/*isConstant=*/true, llvm::GlobalValue::ExternalLinkage, nullptr,
"__hip_fatbin", nullptr,
llvm::GlobalVariable::NotThreadLocal);
- cast<llvm::GlobalVariable>(FatBinStr)->setSection(FatbinConstantName);
+ cast<llvm::GlobalVariable>(FatBinStr)->setSection(FatbinConstantName);
+ }
FatMagic = HIPFatMagic;
} else {
@@ -447,6 +455,8 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() {
// thread safety of the loaded program. Therefore we can assume sequential
// execution of constructor functions here.
if (IsHIP) {
+ auto Linkage = CudaGpuBinary ? llvm::GlobalValue::InternalLinkage :
+ llvm::GlobalValue::LinkOnceAnyLinkage;
llvm::BasicBlock *IfBlock =
llvm::BasicBlock::Create(Context, "if", ModuleCtorFunc);
llvm::BasicBlock *ExitBlock =
@@ -455,10 +465,13 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() {
// of HIP ABI.
GpuBinaryHandle = new llvm::GlobalVariable(
TheModule, VoidPtrPtrTy, /*isConstant=*/false,
- llvm::GlobalValue::LinkOnceAnyLinkage,
+ Linkage,
/*Initializer=*/llvm::ConstantPointerNull::get(VoidPtrPtrTy),
"__hip_gpubin_handle");
GpuBinaryHandle->setAlignment(CGM.getPointerAlign().getQuantity());
+ // Prevent the weak symbol in different shared libraries being merged.
+ if (Linkage != llvm::GlobalValue::InternalLinkage)
+ GpuBinaryHandle->setVisibility(llvm::GlobalValue::HiddenVisibility);
Address GpuBinaryAddr(
GpuBinaryHandle,
CharUnits::fromQuantity(GpuBinaryHandle->getAlignment()));
@@ -507,7 +520,7 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() {
// Generate a unique module ID.
SmallString<64> ModuleID;
llvm::raw_svector_ostream OS(ModuleID);
- OS << ModuleIDPrefix << llvm::format("%x", FatbinWrapper->getGUID());
+ OS << ModuleIDPrefix << llvm::format("%" PRIx64, FatbinWrapper->getGUID());
llvm::Constant *ModuleIDConstant =
makeConstantString(ModuleID.str(), "", ModuleIDSectionName, 32);
diff --git a/lib/CodeGen/CGCXX.cpp b/lib/CodeGen/CGCXX.cpp
index d5945be43458..8b0733fbec3e 100644
--- a/lib/CodeGen/CGCXX.cpp
+++ b/lib/CodeGen/CGCXX.cpp
@@ -23,7 +23,7 @@
#include "clang/AST/Mangle.h"
#include "clang/AST/RecordLayout.h"
#include "clang/AST/StmtCXX.h"
-#include "clang/Frontend/CodeGenOptions.h"
+#include "clang/Basic/CodeGenOptions.h"
#include "llvm/ADT/StringExtras.h"
using namespace clang;
using namespace CodeGen;
@@ -276,7 +276,7 @@ static CGCallee BuildAppleKextVirtualCall(CodeGenFunction &CGF,
CGF.Builder.CreateConstInBoundsGEP1_64(VTable, VTableIndex, "vfnkxt");
llvm::Value *VFunc =
CGF.Builder.CreateAlignedLoad(VFuncPtr, CGF.PointerAlignInBytes);
- CGCallee Callee(GD.getDecl()->getCanonicalDecl(), VFunc);
+ CGCallee Callee(GD, VFunc);
return Callee;
}
diff --git a/lib/CodeGen/CGCXXABI.cpp b/lib/CodeGen/CGCXXABI.cpp
index 3b1b47cdfe07..ed168b1ce72d 100644
--- a/lib/CodeGen/CGCXXABI.cpp
+++ b/lib/CodeGen/CGCXXABI.cpp
@@ -132,7 +132,7 @@ void CGCXXABI::buildThisParam(CodeGenFunction &CGF, FunctionArgList &params) {
// generation. Maybe we can come up with a better way?
auto *ThisDecl = ImplicitParamDecl::Create(
CGM.getContext(), nullptr, MD->getLocation(),
- &CGM.getContext().Idents.get("this"), MD->getThisType(CGM.getContext()),
+ &CGM.getContext().Idents.get("this"), MD->getThisType(),
ImplicitParamDecl::CXXThis);
params.push_back(ThisDecl);
CGF.CXXABIThisDecl = ThisDecl;
diff --git a/lib/CodeGen/CGCall.cpp b/lib/CodeGen/CGCall.cpp
index fa51dc30c58b..7d494bb1f1c7 100644
--- a/lib/CodeGen/CGCall.cpp
+++ b/lib/CodeGen/CGCall.cpp
@@ -23,11 +23,11 @@
#include "clang/AST/Decl.h"
#include "clang/AST/DeclCXX.h"
#include "clang/AST/DeclObjC.h"
+#include "clang/Basic/CodeGenOptions.h"
#include "clang/Basic/TargetBuiltins.h"
#include "clang/Basic/TargetInfo.h"
#include "clang/CodeGen/CGFunctionInfo.h"
#include "clang/CodeGen/SwiftCallingConv.h"
-#include "clang/Frontend/CodeGenOptions.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Analysis/ValueTracking.h"
@@ -59,6 +59,7 @@ unsigned CodeGenTypes::ClangCallConvToLLVMCallConv(CallingConv CC) {
case CC_X86Pascal: return llvm::CallingConv::C;
// TODO: Add support for __vectorcall to LLVM.
case CC_X86VectorCall: return llvm::CallingConv::X86_VectorCall;
+ case CC_AArch64VectorCall: return llvm::CallingConv::AArch64_VectorCall;
case CC_SpirFunction: return llvm::CallingConv::SPIR_FUNC;
case CC_OpenCLKernel: return CGM.getTargetCodeGenInfo().getOpenCLKernelCallingConv();
case CC_PreserveMost: return llvm::CallingConv::PreserveMost;
@@ -67,11 +68,13 @@ unsigned CodeGenTypes::ClangCallConvToLLVMCallConv(CallingConv CC) {
}
}
-/// Derives the 'this' type for codegen purposes, i.e. ignoring method
+/// Derives the 'this' type for codegen purposes, i.e. ignoring method CVR
/// qualification.
-/// FIXME: address space qualification?
-static CanQualType GetThisType(ASTContext &Context, const CXXRecordDecl *RD) {
+static CanQualType GetThisType(ASTContext &Context, const CXXRecordDecl *RD,
+ const CXXMethodDecl *MD) {
QualType RecTy = Context.getTagDeclType(RD)->getCanonicalTypeInternal();
+ if (MD)
+ RecTy = Context.getAddrSpaceQualType(RecTy, MD->getTypeQualifiers().getAddressSpace());
return Context.getPointerType(CanQualType::CreateUnsafe(RecTy));
}
@@ -214,6 +217,9 @@ static CallingConv getCallingConventionForDecl(const Decl *D, bool IsWindows) {
if (PcsAttr *PCS = D->getAttr<PcsAttr>())
return (PCS->getPCS() == PcsAttr::AAPCS ? CC_AAPCS : CC_AAPCS_VFP);
+ if (D->hasAttr<AArch64VectorPcsAttr>())
+ return CC_AArch64VectorCall;
+
if (D->hasAttr<IntelOclBiccAttr>())
return CC_IntelOclBicc;
@@ -246,7 +252,7 @@ CodeGenTypes::arrangeCXXMethodType(const CXXRecordDecl *RD,
// Add the 'this' pointer.
if (RD)
- argTypes.push_back(GetThisType(Context, RD));
+ argTypes.push_back(GetThisType(Context, RD, MD));
else
argTypes.push_back(Context.VoidPtrTy);
@@ -302,7 +308,7 @@ CodeGenTypes::arrangeCXXStructorDeclaration(const CXXMethodDecl *MD,
SmallVector<CanQualType, 16> argTypes;
SmallVector<FunctionProtoType::ExtParameterInfo, 16> paramInfos;
- argTypes.push_back(GetThisType(Context, MD->getParent()));
+ argTypes.push_back(GetThisType(Context, MD->getParent(), MD));
bool PassParams = true;
@@ -529,7 +535,7 @@ const CGFunctionInfo &
CodeGenTypes::arrangeUnprototypedMustTailThunk(const CXXMethodDecl *MD) {
assert(MD->isVirtual() && "only methods have thunks");
CanQual<FunctionProtoType> FTP = GetFormalType(MD);
- CanQualType ArgTys[] = { GetThisType(Context, MD->getParent()) };
+ CanQualType ArgTys[] = { GetThisType(Context, MD->getParent(), MD) };
return arrangeLLVMFunctionInfo(Context.VoidTy, /*instanceMethod=*/false,
/*chainCall=*/false, ArgTys,
FTP->getExtInfo(), {}, RequiredArgs(1));
@@ -543,7 +549,7 @@ CodeGenTypes::arrangeMSCtorClosure(const CXXConstructorDecl *CD,
CanQual<FunctionProtoType> FTP = GetFormalType(CD);
SmallVector<CanQualType, 2> ArgTys;
const CXXRecordDecl *RD = CD->getParent();
- ArgTys.push_back(GetThisType(Context, RD));
+ ArgTys.push_back(GetThisType(Context, RD, CD));
if (CT == Ctor_CopyingClosure)
ArgTys.push_back(*FTP->param_type_begin());
if (RD->getNumVBases() > 0)
@@ -741,8 +747,8 @@ CodeGenTypes::arrangeLLVMFunctionInfo(CanQualType resultType,
FunctionType::ExtInfo info,
ArrayRef<FunctionProtoType::ExtParameterInfo> paramInfos,
RequiredArgs required) {
- assert(std::all_of(argTypes.begin(), argTypes.end(),
- [](CanQualType T) { return T.isCanonicalAsParam(); }));
+ assert(llvm::all_of(argTypes,
+ [](CanQualType T) { return T.isCanonicalAsParam(); }));
// Lookup or create unique function info.
llvm::FoldingSetNodeID ID;
@@ -1253,8 +1259,8 @@ static llvm::Value *CreateCoercedLoad(Address Src, llvm::Type *Ty,
// Otherwise do coercion through memory. This is stupid, but simple.
Address Tmp = CreateTempAllocaForCoercion(CGF, Ty, Src.getAlignment());
- Address Casted = CGF.Builder.CreateBitCast(Tmp, CGF.AllocaInt8PtrTy);
- Address SrcCasted = CGF.Builder.CreateBitCast(Src, CGF.AllocaInt8PtrTy);
+ Address Casted = CGF.Builder.CreateElementBitCast(Tmp,CGF.Int8Ty);
+ Address SrcCasted = CGF.Builder.CreateElementBitCast(Src,CGF.Int8Ty);
CGF.Builder.CreateMemCpy(Casted, SrcCasted,
llvm::ConstantInt::get(CGF.IntPtrTy, SrcSize),
false);
@@ -1335,8 +1341,8 @@ static void CreateCoercedStore(llvm::Value *Src,
// to that information.
Address Tmp = CreateTempAllocaForCoercion(CGF, SrcTy, Dst.getAlignment());
CGF.Builder.CreateStore(Src, Tmp);
- Address Casted = CGF.Builder.CreateBitCast(Tmp, CGF.AllocaInt8PtrTy);
- Address DstCasted = CGF.Builder.CreateBitCast(Dst, CGF.AllocaInt8PtrTy);
+ Address Casted = CGF.Builder.CreateElementBitCast(Tmp,CGF.Int8Ty);
+ Address DstCasted = CGF.Builder.CreateElementBitCast(Dst,CGF.Int8Ty);
CGF.Builder.CreateMemCpy(DstCasted, Casted,
llvm::ConstantInt::get(CGF.IntPtrTy, DstSize),
false);
@@ -1709,6 +1715,8 @@ void CodeGenModule::ConstructDefaultFnAttrList(StringRef Name, bool HasOptnone,
if (CodeGenOpts.DisableRedZone)
FuncAttrs.addAttribute(llvm::Attribute::NoRedZone);
+ if (CodeGenOpts.IndirectTlsSegRefs)
+ FuncAttrs.addAttribute("indirect-tls-seg-refs");
if (CodeGenOpts.NoImplicitFloat)
FuncAttrs.addAttribute(llvm::Attribute::NoImplicitFloat);
@@ -1784,6 +1792,11 @@ void CodeGenModule::ConstructDefaultFnAttrList(StringRef Name, bool HasOptnone,
FuncAttrs.addAttribute("stackrealign");
if (CodeGenOpts.Backchain)
FuncAttrs.addAttribute("backchain");
+
+ // FIXME: The interaction of this attribute with the SLH command line flag
+ // has not been determined.
+ if (CodeGenOpts.SpeculativeLoadHardening)
+ FuncAttrs.addAttribute(llvm::Attribute::SpeculativeLoadHardening);
}
if (getLangOpts().assumeFunctionsAreConvergent()) {
@@ -1803,6 +1816,12 @@ void CodeGenModule::ConstructDefaultFnAttrList(StringRef Name, bool HasOptnone,
if (CodeGenOpts.FlushDenorm)
FuncAttrs.addAttribute("nvptx-f32ftz", "true");
}
+
+ for (StringRef Attr : CodeGenOpts.DefaultFunctionAttrs) {
+ StringRef Var, Value;
+ std::tie(Var, Value) = Attr.split('=');
+ FuncAttrs.addAttribute(Var, Value);
+ }
}
void CodeGenModule::AddDefaultFnAttrs(llvm::Function &F) {
@@ -1828,7 +1847,7 @@ void CodeGenModule::ConstructAttributeList(
AddAttributesFromFunctionProtoType(getContext(), FuncAttrs,
CalleeInfo.getCalleeFunctionProtoType());
- const Decl *TargetDecl = CalleeInfo.getCalleeDecl();
+ const Decl *TargetDecl = CalleeInfo.getCalleeDecl().getDecl();
bool HasOptnone = false;
// FIXME: handle sseregparm someday...
@@ -1845,6 +1864,8 @@ void CodeGenModule::ConstructAttributeList(
FuncAttrs.addAttribute(llvm::Attribute::NoDuplicate);
if (TargetDecl->hasAttr<ConvergentAttr>())
FuncAttrs.addAttribute(llvm::Attribute::Convergent);
+ if (TargetDecl->hasAttr<SpeculativeLoadHardeningAttr>())
+ FuncAttrs.addAttribute(llvm::Attribute::SpeculativeLoadHardening);
if (const FunctionDecl *Fn = dyn_cast<FunctionDecl>(TargetDecl)) {
AddAttributesFromFunctionProtoType(
@@ -1936,7 +1957,7 @@ void CodeGenModule::ConstructAttributeList(
FuncAttrs.addAttribute("disable-tail-calls",
llvm::toStringRef(DisableTailCalls));
- GetCPUAndFeaturesAttributes(TargetDecl, FuncAttrs);
+ GetCPUAndFeaturesAttributes(CalleeInfo.getCalleeDecl(), FuncAttrs);
}
ClangToLLVMArgMapping IRFunctionArgs(getContext(), FI);
@@ -2327,7 +2348,7 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,
} else {
// Load scalar value from indirect argument.
llvm::Value *V =
- EmitLoadOfScalar(ParamAddr, false, Ty, Arg->getLocStart());
+ EmitLoadOfScalar(ParamAddr, false, Ty, Arg->getBeginLoc());
if (isPromoted)
V = emitArgumentDemotion(*this, Arg, V);
@@ -2389,7 +2410,10 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,
if (!AVAttr)
if (const auto *TOTy = dyn_cast<TypedefType>(OTy))
AVAttr = TOTy->getDecl()->getAttr<AlignValueAttr>();
- if (AVAttr) {
+ if (AVAttr && !SanOpts.has(SanitizerKind::Alignment)) {
+ // If alignment-assumption sanitizer is enabled, we do *not* add
+ // alignment attribute here, but emit normal alignment assumption,
+ // so the UBSAN check could function.
llvm::Value *AlignmentValue =
EmitScalarExpr(AVAttr->getAlignment());
llvm::ConstantInt *AlignmentCI =
@@ -2490,7 +2514,7 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,
// Match to what EmitParmDecl is expecting for this type.
if (CodeGenFunction::hasScalarEvaluationKind(Ty)) {
llvm::Value *V =
- EmitLoadOfScalar(Alloca, false, Ty, Arg->getLocStart());
+ EmitLoadOfScalar(Alloca, false, Ty, Arg->getBeginLoc());
if (isPromoted)
V = emitArgumentDemotion(*this, Arg, V);
ArgVals.push_back(ParamValue::forDirect(V));
@@ -3063,8 +3087,9 @@ void CodeGenFunction::EmitDelegateCallArg(CallArgList &args,
QualType type = param->getType();
- assert(!isInAllocaArgument(CGM.getCXXABI(), type) &&
- "cannot emit delegate call arguments for inalloca arguments!");
+ if (isInAllocaArgument(CGM.getCXXABI(), type)) {
+ CGM.ErrorUnsupported(param, "forwarded non-trivially copyable parameter");
+ }
// GetAddrOfLocalVar returns a pointer-to-pointer for references,
// but the argument needs to be the original pointer.
@@ -3945,15 +3970,28 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
} else if (I->hasLValue()) {
auto LV = I->getKnownLValue();
auto AS = LV.getAddressSpace();
+
if ((!ArgInfo.getIndirectByVal() &&
(LV.getAlignment() >=
- getContext().getTypeAlignInChars(I->Ty))) ||
- (ArgInfo.getIndirectByVal() &&
- ((AS != LangAS::Default && AS != LangAS::opencl_private &&
- AS != CGM.getASTAllocaAddressSpace())))) {
+ getContext().getTypeAlignInChars(I->Ty)))) {
+ NeedCopy = true;
+ }
+ if (!getLangOpts().OpenCL) {
+ if ((ArgInfo.getIndirectByVal() &&
+ (AS != LangAS::Default &&
+ AS != CGM.getASTAllocaAddressSpace()))) {
+ NeedCopy = true;
+ }
+ }
+ // For OpenCL even if RV is located in default or alloca address space
+ // we don't want to perform address space cast for it.
+ else if ((ArgInfo.getIndirectByVal() &&
+ Addr.getType()->getAddressSpace() != IRFuncTy->
+ getParamType(FirstIRArg)->getPointerAddressSpace())) {
NeedCopy = true;
}
}
+
if (NeedCopy) {
// Create an aligned temporary, and copy to it.
Address AI = CreateMemTempWithoutCast(
@@ -4235,6 +4273,13 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
}
#endif
+ // Update the largest vector width if any arguments have vector types.
+ for (unsigned i = 0; i < IRCallArgs.size(); ++i) {
+ if (auto *VT = dyn_cast<llvm::VectorType>(IRCallArgs[i]->getType()))
+ LargestVectorWidth = std::max(LargestVectorWidth,
+ VT->getPrimitiveSizeInBits());
+ }
+
// Compute the calling convention and attributes.
unsigned CallingConv;
llvm::AttributeList Attrs;
@@ -4248,8 +4293,11 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
// Apply always_inline to all calls within flatten functions.
// FIXME: should this really take priority over __try, below?
if (CurCodeDecl && CurCodeDecl->hasAttr<FlattenAttr>() &&
- !(Callee.getAbstractInfo().getCalleeDecl() &&
- Callee.getAbstractInfo().getCalleeDecl()->hasAttr<NoInlineAttr>())) {
+ !(Callee.getAbstractInfo().getCalleeDecl().getDecl() &&
+ Callee.getAbstractInfo()
+ .getCalleeDecl()
+ .getDecl()
+ ->hasAttr<NoInlineAttr>())) {
Attrs =
Attrs.addAttribute(getLLVMContext(), llvm::AttributeList::FunctionIndex,
llvm::Attribute::AlwaysInline);
@@ -4315,6 +4363,11 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
if (!CI->getType()->isVoidTy())
CI->setName("call");
+ // Update largest vector width from the return type.
+ if (auto *VT = dyn_cast<llvm::VectorType>(CI->getType()))
+ LargestVectorWidth = std::max(LargestVectorWidth,
+ VT->getPrimitiveSizeInBits());
+
// Insert instrumentation or attach profile metadata at indirect call sites.
// For more details, see the comment before the definition of
// IPVK_IndirectCallTarget in InstrProfData.inc.
@@ -4329,7 +4382,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
// Suppress tail calls if requested.
if (llvm::CallInst *Call = dyn_cast<llvm::CallInst>(CI)) {
- const Decl *TargetDecl = Callee.getAbstractInfo().getCalleeDecl();
+ const Decl *TargetDecl = Callee.getAbstractInfo().getCalleeDecl().getDecl();
if (TargetDecl && TargetDecl->hasAttr<NotTailCalledAttr>())
Call->setTailCallKind(llvm::CallInst::TCK_NoTail);
}
@@ -4476,7 +4529,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
} ();
// Emit the assume_aligned check on the return value.
- const Decl *TargetDecl = Callee.getAbstractInfo().getCalleeDecl();
+ const Decl *TargetDecl = Callee.getAbstractInfo().getCalleeDecl().getDecl();
if (Ret.isScalar() && TargetDecl) {
if (const auto *AA = TargetDecl->getAttr<AssumeAlignedAttr>()) {
llvm::Value *OffsetValue = nullptr;
@@ -4485,13 +4538,14 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
llvm::Value *Alignment = EmitScalarExpr(AA->getAlignment());
llvm::ConstantInt *AlignmentCI = cast<llvm::ConstantInt>(Alignment);
- EmitAlignmentAssumption(Ret.getScalarVal(), AlignmentCI->getZExtValue(),
- OffsetValue);
+ EmitAlignmentAssumption(Ret.getScalarVal(), RetTy, Loc, AA->getLocation(),
+ AlignmentCI->getZExtValue(), OffsetValue);
} else if (const auto *AA = TargetDecl->getAttr<AllocAlignAttr>()) {
- llvm::Value *ParamVal =
- CallArgs[AA->getParamIndex().getLLVMIndex()].getRValue(
- *this).getScalarVal();
- EmitAlignmentAssumption(Ret.getScalarVal(), ParamVal);
+ llvm::Value *AlignmentVal = CallArgs[AA->getParamIndex().getLLVMIndex()]
+ .getRValue(*this)
+ .getScalarVal();
+ EmitAlignmentAssumption(Ret.getScalarVal(), RetTy, Loc, AA->getLocation(),
+ AlignmentVal);
}
}
@@ -4502,8 +4556,8 @@ CGCallee CGCallee::prepareConcreteCallee(CodeGenFunction &CGF) const {
if (isVirtual()) {
const CallExpr *CE = getVirtualCallExpr();
return CGF.CGM.getCXXABI().getVirtualFunctionPointer(
- CGF, getVirtualMethodDecl(), getThisAddress(),
- getFunctionType(), CE ? CE->getLocStart() : SourceLocation());
+ CGF, getVirtualMethodDecl(), getThisAddress(), getFunctionType(),
+ CE ? CE->getBeginLoc() : SourceLocation());
}
return *this;
diff --git a/lib/CodeGen/CGCall.h b/lib/CodeGen/CGCall.h
index 99a36e4e12f1..c300808bea28 100644
--- a/lib/CodeGen/CGCall.h
+++ b/lib/CodeGen/CGCall.h
@@ -46,21 +46,21 @@ class CGCalleeInfo {
/// The function prototype of the callee.
const FunctionProtoType *CalleeProtoTy;
/// The function declaration of the callee.
- const Decl *CalleeDecl;
+ GlobalDecl CalleeDecl;
public:
- explicit CGCalleeInfo() : CalleeProtoTy(nullptr), CalleeDecl(nullptr) {}
- CGCalleeInfo(const FunctionProtoType *calleeProtoTy, const Decl *calleeDecl)
+ explicit CGCalleeInfo() : CalleeProtoTy(nullptr), CalleeDecl() {}
+ CGCalleeInfo(const FunctionProtoType *calleeProtoTy, GlobalDecl calleeDecl)
: CalleeProtoTy(calleeProtoTy), CalleeDecl(calleeDecl) {}
CGCalleeInfo(const FunctionProtoType *calleeProtoTy)
- : CalleeProtoTy(calleeProtoTy), CalleeDecl(nullptr) {}
- CGCalleeInfo(const Decl *calleeDecl)
+ : CalleeProtoTy(calleeProtoTy), CalleeDecl() {}
+ CGCalleeInfo(GlobalDecl calleeDecl)
: CalleeProtoTy(nullptr), CalleeDecl(calleeDecl) {}
const FunctionProtoType *getCalleeFunctionProtoType() const {
return CalleeProtoTy;
}
- const Decl *getCalleeDecl() const { return CalleeDecl; }
+ const GlobalDecl getCalleeDecl() const { return CalleeDecl; }
};
/// All available information about a concrete callee.
@@ -171,7 +171,7 @@ public:
}
CGCalleeInfo getAbstractInfo() const {
if (isVirtual())
- return VirtualInfo.MD.getDecl();
+ return VirtualInfo.MD;
assert(isOrdinary());
return AbstractInfo;
}
diff --git a/lib/CodeGen/CGClass.cpp b/lib/CodeGen/CGClass.cpp
index ec4eb000a3b9..ee150a792b76 100644
--- a/lib/CodeGen/CGClass.cpp
+++ b/lib/CodeGen/CGClass.cpp
@@ -16,14 +16,15 @@
#include "CGDebugInfo.h"
#include "CGRecordLayout.h"
#include "CodeGenFunction.h"
+#include "TargetInfo.h"
#include "clang/AST/CXXInheritance.h"
#include "clang/AST/DeclTemplate.h"
#include "clang/AST/EvaluatedExprVisitor.h"
#include "clang/AST/RecordLayout.h"
#include "clang/AST/StmtCXX.h"
+#include "clang/Basic/CodeGenOptions.h"
#include "clang/Basic/TargetBuiltins.h"
#include "clang/CodeGen/CGFunctionInfo.h"
-#include "clang/Frontend/CodeGenOptions.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Metadata.h"
#include "llvm/Transforms/Utils/SanitizerStats.h"
@@ -829,7 +830,7 @@ void CodeGenFunction::EmitConstructorBody(FunctionArgList &Args) {
// delegation optimization.
if (CtorType == Ctor_Complete && IsConstructorDelegationValid(Ctor) &&
CGM.getTarget().getCXXABI().hasConstructorVariants()) {
- EmitDelegateCXXConstructorCall(Ctor, Ctor_Base, Args, Ctor->getLocEnd());
+ EmitDelegateCXXConstructorCall(Ctor, Ctor_Base, Args, Ctor->getEndLoc());
return;
}
@@ -2012,8 +2013,19 @@ void CodeGenFunction::EmitCXXConstructorCall(const CXXConstructorDecl *D,
bool NewPointerIsChecked) {
CallArgList Args;
+ LangAS SlotAS = E->getType().getAddressSpace();
+ QualType ThisType = D->getThisType();
+ LangAS ThisAS = ThisType.getTypePtr()->getPointeeType().getAddressSpace();
+ llvm::Value *ThisPtr = This.getPointer();
+ if (SlotAS != ThisAS) {
+ unsigned TargetThisAS = getContext().getTargetAddressSpace(ThisAS);
+ llvm::Type *NewType =
+ ThisPtr->getType()->getPointerElementType()->getPointerTo(TargetThisAS);
+ ThisPtr = getTargetHooks().performAddrSpaceCast(*this, This.getPointer(),
+ ThisAS, SlotAS, NewType);
+ }
// Push the this ptr.
- Args.add(RValue::get(This.getPointer()), D->getThisType(getContext()));
+ Args.add(RValue::get(ThisPtr), D->getThisType());
// If this is a trivial constructor, emit a memcpy now before we lose
// the alignment information on the argument.
@@ -2122,7 +2134,7 @@ void CodeGenFunction::EmitCXXConstructorCall(const CXXConstructorDecl *D,
CGM.getAddrOfCXXStructor(D, getFromCtorType(Type));
const CGFunctionInfo &Info = CGM.getTypes().arrangeCXXConstructorCall(
Args, D, Type, ExtraArgs.Prefix, ExtraArgs.Suffix, PassPrototypeArgs);
- CGCallee Callee = CGCallee::forDirect(CalleePtr, D);
+ CGCallee Callee = CGCallee::forDirect(CalleePtr, GlobalDecl(D, Type));
EmitCall(Info, Callee, ReturnValueSlot(), Args);
// Generate vtable assumptions if we're constructing a complete object
@@ -2147,7 +2159,7 @@ void CodeGenFunction::EmitInheritedCXXConstructorCall(
const CXXConstructorDecl *D, bool ForVirtualBase, Address This,
bool InheritedFromVBase, const CXXInheritedCtorInitExpr *E) {
CallArgList Args;
- CallArg ThisArg(RValue::get(This.getPointer()), D->getThisType(getContext()));
+ CallArg ThisArg(RValue::get(This.getPointer()), D->getThisType());
// Forward the parameters.
if (InheritedFromVBase &&
@@ -2196,6 +2208,7 @@ void CodeGenFunction::EmitInlinedInheritingCXXConstructorCall(
GlobalDecl GD(Ctor, CtorType);
InlinedInheritingConstructorScope Scope(*this, GD);
ApplyInlineDebugLocation DebugScope(*this, GD);
+ RunCleanupsScope RunCleanups(*this);
// Save the arguments to be passed to the inherited constructor.
CXXInheritedCtorInitExprArgs = Args;
@@ -2271,7 +2284,7 @@ CodeGenFunction::EmitSynthesizedCXXCopyCtorCall(const CXXConstructorDecl *D,
CallArgList Args;
// Push the this ptr.
- Args.add(RValue::get(This.getPointer()), D->getThisType(getContext()));
+ Args.add(RValue::get(This.getPointer()), D->getThisType());
// Push the src ptr.
QualType QT = *(FPT->param_type_begin());
@@ -2808,7 +2821,7 @@ void CodeGenFunction::EmitForwardingCallToLambda(
// variadic arguments.
// Now emit our call.
- auto callee = CGCallee::forDirect(calleePtr, callOperator);
+ auto callee = CGCallee::forDirect(calleePtr, GlobalDecl(callOperator));
RValue RV = EmitCall(calleeFnInfo, callee, returnSlot, callArgs);
// If necessary, copy the returned value into the slot.
@@ -2839,12 +2852,12 @@ void CodeGenFunction::EmitLambdaBlockInvokeBody() {
CallArgList CallArgs;
QualType ThisType = getContext().getPointerType(getContext().getRecordType(Lambda));
- Address ThisPtr = GetAddrOfBlockDecl(variable, false);
+ Address ThisPtr = GetAddrOfBlockDecl(variable);
CallArgs.add(RValue::get(ThisPtr.getPointer()), ThisType);
// Add the rest of the parameters.
for (auto param : BD->parameters())
- EmitDelegateCallArg(CallArgs, param, param->getLocStart());
+ EmitDelegateCallArg(CallArgs, param, param->getBeginLoc());
assert(!Lambda->isGenericLambda() &&
"generic lambda interconversion to block not implemented");
@@ -2863,7 +2876,7 @@ void CodeGenFunction::EmitLambdaDelegatingInvokeBody(const CXXMethodDecl *MD) {
// Add the rest of the parameters.
for (auto Param : MD->parameters())
- EmitDelegateCallArg(CallArgs, Param, Param->getLocStart());
+ EmitDelegateCallArg(CallArgs, Param, Param->getBeginLoc());
const CXXMethodDecl *CallOp = Lambda->getLambdaCallOperator();
// For a generic lambda, find the corresponding call operator specialization
diff --git a/lib/CodeGen/CGCleanup.cpp b/lib/CodeGen/CGCleanup.cpp
index 0a766d176200..3743d24f11fc 100644
--- a/lib/CodeGen/CGCleanup.cpp
+++ b/lib/CodeGen/CGCleanup.cpp
@@ -366,7 +366,7 @@ static llvm::SwitchInst *TransitionToCleanupSwitch(CodeGenFunction &CGF,
llvm::BasicBlock *Block) {
// If it's a branch, turn it into a switch whose default
// destination is its original target.
- llvm::TerminatorInst *Term = Block->getTerminator();
+ llvm::Instruction *Term = Block->getTerminator();
assert(Term && "can't transition block without terminator");
if (llvm::BranchInst *Br = dyn_cast<llvm::BranchInst>(Term)) {
@@ -589,7 +589,7 @@ static void ForwardPrebranchedFallthrough(llvm::BasicBlock *Exit,
llvm::BasicBlock *To) {
// Exit is the exit block of a cleanup, so it always terminates in
// an unconditional branch or a switch.
- llvm::TerminatorInst *Term = Exit->getTerminator();
+ llvm::Instruction *Term = Exit->getTerminator();
if (llvm::BranchInst *Br = dyn_cast<llvm::BranchInst>(Term)) {
assert(Br->isUnconditional() && Br->getSuccessor(0) == From);
diff --git a/lib/CodeGen/CGCoroutine.cpp b/lib/CodeGen/CGCoroutine.cpp
index 4f525c8aac85..80fa7c873631 100644
--- a/lib/CodeGen/CGCoroutine.cpp
+++ b/lib/CodeGen/CGCoroutine.cpp
@@ -93,10 +93,10 @@ static void createCoroData(CodeGenFunction &CGF,
CallExpr const *CoroIdExpr = nullptr) {
if (CurCoro.Data) {
if (CurCoro.Data->CoroIdExpr)
- CGF.CGM.Error(CoroIdExpr->getLocStart(),
+ CGF.CGM.Error(CoroIdExpr->getBeginLoc(),
"only one __builtin_coro_id can be used in a function");
else if (CoroIdExpr)
- CGF.CGM.Error(CoroIdExpr->getLocStart(),
+ CGF.CGM.Error(CoroIdExpr->getBeginLoc(),
"__builtin_coro_id shall not be used in a C++ coroutine");
else
llvm_unreachable("EmitCoroutineBodyStatement called twice?");
@@ -444,7 +444,7 @@ struct CallCoroDelete final : public EHScopeStack::Cleanup {
// We should have captured coro.free from the emission of deallocate.
auto *CoroFree = CGF.CurCoro.Data->LastCoroFree;
if (!CoroFree) {
- CGF.CGM.Error(Deallocate->getLocStart(),
+ CGF.CGM.Error(Deallocate->getBeginLoc(),
"Deallocation expressoin does not refer to coro.free");
return;
}
@@ -654,7 +654,7 @@ void CodeGenFunction::EmitCoroutineBody(const CoroutineBodyStmt &S) {
EmitBlock(BodyBB);
}
- auto Loc = S.getLocStart();
+ auto Loc = S.getBeginLoc();
CXXCatchStmt Catch(Loc, /*exDecl=*/nullptr,
CurCoro.Data->ExceptionHandler);
auto *TryStmt =
@@ -707,8 +707,8 @@ RValue CodeGenFunction::EmitCoroutineIntrinsic(const CallExpr *E,
if (CurCoro.Data && CurCoro.Data->CoroBegin) {
return RValue::get(CurCoro.Data->CoroBegin);
}
- CGM.Error(E->getLocStart(), "this builtin expect that __builtin_coro_begin "
- "has been used earlier in this function");
+ CGM.Error(E->getBeginLoc(), "this builtin expect that __builtin_coro_begin "
+ "has been used earlier in this function");
auto NullPtr = llvm::ConstantPointerNull::get(Builder.getInt8PtrTy());
return RValue::get(NullPtr);
}
@@ -722,7 +722,7 @@ RValue CodeGenFunction::EmitCoroutineIntrinsic(const CallExpr *E,
Args.push_back(CurCoro.Data->CoroId);
break;
}
- CGM.Error(E->getLocStart(), "this builtin expect that __builtin_coro_id has"
+ CGM.Error(E->getBeginLoc(), "this builtin expect that __builtin_coro_id has"
" been used earlier in this function");
// Fallthrough to the next case to add TokenNone as the first argument.
LLVM_FALLTHROUGH;
diff --git a/lib/CodeGen/CGDebugInfo.cpp b/lib/CodeGen/CGDebugInfo.cpp
index 5be6fb3e4245..41f8721468a3 100644
--- a/lib/CodeGen/CGDebugInfo.cpp
+++ b/lib/CodeGen/CGDebugInfo.cpp
@@ -25,10 +25,10 @@
#include "clang/AST/DeclTemplate.h"
#include "clang/AST/Expr.h"
#include "clang/AST/RecordLayout.h"
+#include "clang/Basic/CodeGenOptions.h"
#include "clang/Basic/FileManager.h"
#include "clang/Basic/SourceManager.h"
#include "clang/Basic/Version.h"
-#include "clang/Frontend/CodeGenOptions.h"
#include "clang/Frontend/FrontendOptions.h"
#include "clang/Lex/HeaderSearchOptions.h"
#include "clang/Lex/ModuleMap.h"
@@ -41,6 +41,7 @@
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/MD5.h"
@@ -180,8 +181,7 @@ void CGDebugInfo::setLocation(SourceLocation Loc) {
SourceManager &SM = CGM.getContext().getSourceManager();
auto *Scope = cast<llvm::DIScope>(LexicalBlockStack.back());
PresumedLoc PCLoc = SM.getPresumedLoc(CurLoc);
-
- if (PCLoc.isInvalid() || Scope->getFilename() == PCLoc.getFilename())
+ if (PCLoc.isInvalid() || Scope->getFile() == getOrCreateFile(CurLoc))
return;
if (auto *LBF = dyn_cast<llvm::DILexicalBlockFile>(Scope)) {
@@ -220,7 +220,7 @@ llvm::DIScope *CGDebugInfo::getContextDescriptor(const Decl *Context,
if (const auto *RDecl = dyn_cast<RecordDecl>(Context))
if (!RDecl->isDependentType())
return getOrCreateType(CGM.getContext().getTypeDeclType(RDecl),
- getOrCreateMainFile());
+ TheCU->getFile());
return Default;
}
@@ -234,6 +234,9 @@ PrintingPolicy CGDebugInfo::getPrintingPolicy() const {
if (CGM.getCodeGenOpts().EmitCodeView)
PP.MSVCFormatting = true;
+ // Apply -fdebug-prefix-map.
+ PP.RemapFilePaths = true;
+ PP.remapPath = [this](StringRef Path) { return remapDIPath(Path); };
return PP;
}
@@ -401,19 +404,18 @@ Optional<StringRef> CGDebugInfo::getSource(const SourceManager &SM,
llvm::DIFile *CGDebugInfo::getOrCreateFile(SourceLocation Loc) {
if (!Loc.isValid())
// If Location is not valid then use main input file.
- return getOrCreateMainFile();
+ return TheCU->getFile();
SourceManager &SM = CGM.getContext().getSourceManager();
PresumedLoc PLoc = SM.getPresumedLoc(Loc);
- if (PLoc.isInvalid() || StringRef(PLoc.getFilename()).empty())
+ StringRef FileName = PLoc.getFilename();
+ if (PLoc.isInvalid() || FileName.empty())
// If the location is not valid then use main input file.
- return getOrCreateMainFile();
+ return TheCU->getFile();
// Cache the results.
- const char *fname = PLoc.getFilename();
- auto It = DIFileCache.find(fname);
-
+ auto It = DIFileCache.find(FileName.data());
if (It != DIFileCache.end()) {
// Verify that the information still exists.
if (llvm::Metadata *V = It->second)
@@ -426,22 +428,48 @@ llvm::DIFile *CGDebugInfo::getOrCreateFile(SourceLocation Loc) {
Optional<llvm::DIFile::ChecksumInfo<StringRef>> CSInfo;
if (CSKind)
CSInfo.emplace(*CSKind, Checksum);
-
- llvm::DIFile *F = DBuilder.createFile(
- remapDIPath(PLoc.getFilename()), remapDIPath(getCurrentDirname()), CSInfo,
- getSource(SM, SM.getFileID(Loc)));
-
- DIFileCache[fname].reset(F);
+ return createFile(FileName, CSInfo, getSource(SM, SM.getFileID(Loc)));
+}
+
+llvm::DIFile *
+CGDebugInfo::createFile(StringRef FileName,
+ Optional<llvm::DIFile::ChecksumInfo<StringRef>> CSInfo,
+ Optional<StringRef> Source) {
+ StringRef Dir;
+ StringRef File;
+ std::string RemappedFile = remapDIPath(FileName);
+ std::string CurDir = remapDIPath(getCurrentDirname());
+ SmallString<128> DirBuf;
+ SmallString<128> FileBuf;
+ if (llvm::sys::path::is_absolute(RemappedFile)) {
+ // Strip the common prefix (if it is more than just "/") from current
+ // directory and FileName for a more space-efficient encoding.
+ auto FileIt = llvm::sys::path::begin(RemappedFile);
+ auto FileE = llvm::sys::path::end(RemappedFile);
+ auto CurDirIt = llvm::sys::path::begin(CurDir);
+ auto CurDirE = llvm::sys::path::end(CurDir);
+ for (; CurDirIt != CurDirE && *CurDirIt == *FileIt; ++CurDirIt, ++FileIt)
+ llvm::sys::path::append(DirBuf, *CurDirIt);
+ if (std::distance(llvm::sys::path::begin(CurDir), CurDirIt) == 1) {
+ // The common prefix only the root; stripping it would cause
+ // LLVM diagnostic locations to be more confusing.
+ Dir = {};
+ File = RemappedFile;
+ } else {
+ for (; FileIt != FileE; ++FileIt)
+ llvm::sys::path::append(FileBuf, *FileIt);
+ Dir = DirBuf;
+ File = FileBuf;
+ }
+ } else {
+ Dir = CurDir;
+ File = RemappedFile;
+ }
+ llvm::DIFile *F = DBuilder.createFile(File, Dir, CSInfo, Source);
+ DIFileCache[FileName.data()].reset(F);
return F;
}
-llvm::DIFile *CGDebugInfo::getOrCreateMainFile() {
- return DBuilder.createFile(
- remapDIPath(TheCU->getFilename()), remapDIPath(TheCU->getDirectory()),
- TheCU->getFile()->getChecksum(),
- CGM.getCodeGenOpts().EmbedSource ? TheCU->getSource() : None);
-}
-
std::string CGDebugInfo::remapDIPath(StringRef Path) const {
for (const auto &Entry : DebugPrefixMap)
if (Path.startswith(Entry.first))
@@ -527,11 +555,11 @@ void CGDebugInfo::CreateCompileUnit() {
llvm::dwarf::SourceLanguage LangTag;
const LangOptions &LO = CGM.getLangOpts();
if (LO.CPlusPlus) {
- if (LO.ObjC1)
+ if (LO.ObjC)
LangTag = llvm::dwarf::DW_LANG_ObjC_plus_plus;
else
LangTag = llvm::dwarf::DW_LANG_C_plus_plus;
- } else if (LO.ObjC1) {
+ } else if (LO.ObjC) {
LangTag = llvm::dwarf::DW_LANG_ObjC;
} else if (LO.RenderScript) {
LangTag = llvm::dwarf::DW_LANG_GOOGLE_RenderScript;
@@ -545,7 +573,7 @@ void CGDebugInfo::CreateCompileUnit() {
// Figure out which version of the ObjC runtime we have.
unsigned RuntimeVers = 0;
- if (LO.ObjC1)
+ if (LO.ObjC)
RuntimeVers = LO.ObjCRuntime.isNonFragile() ? 2 : 1;
llvm::DICompileUnit::DebugEmissionKind EmissionKind;
@@ -557,29 +585,42 @@ void CGDebugInfo::CreateCompileUnit() {
case codegenoptions::DebugLineTablesOnly:
EmissionKind = llvm::DICompileUnit::LineTablesOnly;
break;
+ case codegenoptions::DebugDirectivesOnly:
+ EmissionKind = llvm::DICompileUnit::DebugDirectivesOnly;
+ break;
case codegenoptions::LimitedDebugInfo:
case codegenoptions::FullDebugInfo:
EmissionKind = llvm::DICompileUnit::FullDebug;
break;
}
+ uint64_t DwoId = 0;
+ auto &CGOpts = CGM.getCodeGenOpts();
+ // The DIFile used by the CU is distinct from the main source
+ // file. Its directory part specifies what becomes the
+ // DW_AT_comp_dir (the compilation directory), even if the source
+ // file was specified with an absolute path.
if (CSKind)
CSInfo.emplace(*CSKind, Checksum);
+ llvm::DIFile *CUFile = DBuilder.createFile(
+ remapDIPath(MainFileName), remapDIPath(getCurrentDirname()), CSInfo,
+ getSource(SM, SM.getMainFileID()));
// Create new compile unit.
- // FIXME - Eliminate TheCU.
- auto &CGOpts = CGM.getCodeGenOpts();
TheCU = DBuilder.createCompileUnit(
- LangTag,
- DBuilder.createFile(remapDIPath(MainFileName),
- remapDIPath(getCurrentDirname()), CSInfo,
- getSource(SM, SM.getMainFileID())),
- CGOpts.EmitVersionIdentMetadata ? Producer : "",
+ LangTag, CUFile, CGOpts.EmitVersionIdentMetadata ? Producer : "",
LO.Optimize || CGOpts.PrepareForLTO || CGOpts.PrepareForThinLTO,
CGOpts.DwarfDebugFlags, RuntimeVers,
- CGOpts.EnableSplitDwarf ? "" : CGOpts.SplitDwarfFile, EmissionKind,
- 0 /* DWOid */, CGOpts.SplitDwarfInlining, CGOpts.DebugInfoForProfiling,
- CGOpts.GnuPubnames);
+ (CGOpts.getSplitDwarfMode() != CodeGenOptions::NoFission)
+ ? ""
+ : CGOpts.SplitDwarfFile,
+ EmissionKind, DwoId, CGOpts.SplitDwarfInlining,
+ CGOpts.DebugInfoForProfiling,
+ CGM.getTarget().getTriple().isNVPTX()
+ ? llvm::DICompileUnit::DebugNameTableKind::None
+ : static_cast<llvm::DICompileUnit::DebugNameTableKind>(
+ CGOpts.DebugNameTable),
+ CGOpts.DebugRangesBaseAddress);
}
llvm::DIType *CGDebugInfo::CreateType(const BuiltinType *BT) {
@@ -597,9 +638,9 @@ llvm::DIType *CGDebugInfo::CreateType(const BuiltinType *BT) {
return nullptr;
case BuiltinType::ObjCClass:
if (!ClassTy)
- ClassTy = DBuilder.createForwardDecl(llvm::dwarf::DW_TAG_structure_type,
- "objc_class", TheCU,
- getOrCreateMainFile(), 0);
+ ClassTy =
+ DBuilder.createForwardDecl(llvm::dwarf::DW_TAG_structure_type,
+ "objc_class", TheCU, TheCU->getFile(), 0);
return ClassTy;
case BuiltinType::ObjCId: {
// typedef struct objc_class *Class;
@@ -611,21 +652,21 @@ llvm::DIType *CGDebugInfo::CreateType(const BuiltinType *BT) {
return ObjTy;
if (!ClassTy)
- ClassTy = DBuilder.createForwardDecl(llvm::dwarf::DW_TAG_structure_type,
- "objc_class", TheCU,
- getOrCreateMainFile(), 0);
+ ClassTy =
+ DBuilder.createForwardDecl(llvm::dwarf::DW_TAG_structure_type,
+ "objc_class", TheCU, TheCU->getFile(), 0);
unsigned Size = CGM.getContext().getTypeSize(CGM.getContext().VoidPtrTy);
auto *ISATy = DBuilder.createPointerType(ClassTy, Size);
- ObjTy = DBuilder.createStructType(
- TheCU, "objc_object", getOrCreateMainFile(), 0, 0, 0,
- llvm::DINode::FlagZero, nullptr, llvm::DINodeArray());
+ ObjTy = DBuilder.createStructType(TheCU, "objc_object", TheCU->getFile(), 0,
+ 0, 0, llvm::DINode::FlagZero, nullptr,
+ llvm::DINodeArray());
DBuilder.replaceArrays(
ObjTy, DBuilder.getOrCreateArray(&*DBuilder.createMemberType(
- ObjTy, "isa", getOrCreateMainFile(), 0, Size, 0, 0,
+ ObjTy, "isa", TheCU->getFile(), 0, Size, 0, 0,
llvm::DINode::FlagZero, ISATy)));
return ObjTy;
}
@@ -633,7 +674,7 @@ llvm::DIType *CGDebugInfo::CreateType(const BuiltinType *BT) {
if (!SelTy)
SelTy = DBuilder.createForwardDecl(llvm::dwarf::DW_TAG_structure_type,
"objc_selector", TheCU,
- getOrCreateMainFile(), 0);
+ TheCU->getFile(), 0);
return SelTy;
}
@@ -652,6 +693,10 @@ llvm::DIType *CGDebugInfo::CreateType(const BuiltinType *BT) {
return getOrCreateStructPtrType("opencl_queue_t", OCLQueueDITy);
case BuiltinType::OCLReserveID:
return getOrCreateStructPtrType("opencl_reserve_id_t", OCLReserveIDDITy);
+#define EXT_OPAQUE_TYPE(ExtType, Id, Ext) \
+ case BuiltinType::Id: \
+ return getOrCreateStructPtrType("opencl_" #ExtType, Id##Ty);
+#include "clang/Basic/OpenCLExtensionTypes.def"
case BuiltinType::UChar:
case BuiltinType::Char_U:
@@ -825,31 +870,45 @@ static bool hasCXXMangling(const TagDecl *TD, llvm::DICompileUnit *TheCU) {
}
}
-// Determines if the tag declaration will require a type identifier.
+// Determines if the debug info for this tag declaration needs a type
+// identifier. The purpose of the unique identifier is to deduplicate type
+// information for identical types across TUs. Because of the C++ one definition
+// rule (ODR), it is valid to assume that the type is defined the same way in
+// every TU and its debug info is equivalent.
+//
+// C does not have the ODR, and it is common for codebases to contain multiple
+// different definitions of a struct with the same name in different TUs.
+// Therefore, if the type doesn't have a C++ mangling, don't give it an
+// identifer. Type information in C is smaller and simpler than C++ type
+// information, so the increase in debug info size is negligible.
+//
+// If the type is not externally visible, it should be unique to the current TU,
+// and should not need an identifier to participate in type deduplication.
+// However, when emitting CodeView, the format internally uses these
+// unique type name identifers for references between debug info. For example,
+// the method of a class in an anonymous namespace uses the identifer to refer
+// to its parent class. The Microsoft C++ ABI attempts to provide unique names
+// for such types, so when emitting CodeView, always use identifiers for C++
+// types. This may create problems when attempting to emit CodeView when the MS
+// C++ ABI is not in use.
static bool needsTypeIdentifier(const TagDecl *TD, CodeGenModule &CGM,
llvm::DICompileUnit *TheCU) {
// We only add a type identifier for types with C++ name mangling.
if (!hasCXXMangling(TD, TheCU))
return false;
- // CodeView types with C++ mangling need a type identifier.
- if (CGM.getCodeGenOpts().EmitCodeView)
- return true;
-
// Externally visible types with C++ mangling need a type identifier.
if (TD->isExternallyVisible())
return true;
+ // CodeView types with C++ mangling need a type identifier.
+ if (CGM.getCodeGenOpts().EmitCodeView)
+ return true;
+
return false;
}
-// When emitting CodeView debug information we need to produce a type
-// identifier for all types which have a C++ mangling. Until a GUID is added
-// to the identifier (not currently implemented) the result will not be unique
-// across compilation units.
-// When emitting DWARF debug information, we need to produce a type identifier
-// for all externally visible types with C++ name mangling. This identifier
-// should be unique across ODR-compliant compilation units.
+// Returns a unique type identifier string if one exists, or an empty string.
static SmallString<256> getTypeIdentifier(const TagType *Ty, CodeGenModule &CGM,
llvm::DICompileUnit *TheCU) {
SmallString<256> Identifier;
@@ -936,18 +995,53 @@ llvm::DIType *CGDebugInfo::getOrCreateStructPtrType(StringRef Name,
if (Cache)
return Cache;
Cache = DBuilder.createForwardDecl(llvm::dwarf::DW_TAG_structure_type, Name,
- TheCU, getOrCreateMainFile(), 0);
+ TheCU, TheCU->getFile(), 0);
unsigned Size = CGM.getContext().getTypeSize(CGM.getContext().VoidPtrTy);
Cache = DBuilder.createPointerType(Cache, Size);
return Cache;
}
+uint64_t CGDebugInfo::collectDefaultElementTypesForBlockPointer(
+ const BlockPointerType *Ty, llvm::DIFile *Unit, llvm::DIDerivedType *DescTy,
+ unsigned LineNo, SmallVectorImpl<llvm::Metadata *> &EltTys) {
+ QualType FType;
+
+ // Advanced by calls to CreateMemberType in increments of FType, then
+ // returned as the overall size of the default elements.
+ uint64_t FieldOffset = 0;
+
+ // Blocks in OpenCL have unique constraints which make the standard fields
+ // redundant while requiring size and align fields for enqueue_kernel. See
+ // initializeForBlockHeader in CGBlocks.cpp
+ if (CGM.getLangOpts().OpenCL) {
+ FType = CGM.getContext().IntTy;
+ EltTys.push_back(CreateMemberType(Unit, FType, "__size", &FieldOffset));
+ EltTys.push_back(CreateMemberType(Unit, FType, "__align", &FieldOffset));
+ } else {
+ FType = CGM.getContext().getPointerType(CGM.getContext().VoidTy);
+ EltTys.push_back(CreateMemberType(Unit, FType, "__isa", &FieldOffset));
+ FType = CGM.getContext().IntTy;
+ EltTys.push_back(CreateMemberType(Unit, FType, "__flags", &FieldOffset));
+ EltTys.push_back(CreateMemberType(Unit, FType, "__reserved", &FieldOffset));
+ FType = CGM.getContext().getPointerType(Ty->getPointeeType());
+ EltTys.push_back(CreateMemberType(Unit, FType, "__FuncPtr", &FieldOffset));
+ FType = CGM.getContext().getPointerType(CGM.getContext().VoidTy);
+ uint64_t FieldSize = CGM.getContext().getTypeSize(Ty);
+ uint32_t FieldAlign = CGM.getContext().getTypeAlign(Ty);
+ EltTys.push_back(DBuilder.createMemberType(
+ Unit, "__descriptor", nullptr, LineNo, FieldSize, FieldAlign,
+ FieldOffset, llvm::DINode::FlagZero, DescTy));
+ FieldOffset += FieldSize;
+ }
+
+ return FieldOffset;
+}
+
llvm::DIType *CGDebugInfo::CreateType(const BlockPointerType *Ty,
llvm::DIFile *Unit) {
SmallVector<llvm::Metadata *, 8> EltTys;
QualType FType;
- uint64_t FieldSize, FieldOffset;
- uint32_t FieldAlign;
+ uint64_t FieldOffset;
llvm::DINodeArray Elements;
FieldOffset = 0;
@@ -959,10 +1053,9 @@ llvm::DIType *CGDebugInfo::CreateType(const BlockPointerType *Ty,
EltTys.clear();
llvm::DINode::DIFlags Flags = llvm::DINode::FlagAppleBlock;
- unsigned LineNo = 0;
auto *EltTy =
- DBuilder.createStructType(Unit, "__block_descriptor", nullptr, LineNo,
+ DBuilder.createStructType(Unit, "__block_descriptor", nullptr, 0,
FieldOffset, 0, Flags, nullptr, Elements);
// Bit size, align and offset of the type.
@@ -970,27 +1063,8 @@ llvm::DIType *CGDebugInfo::CreateType(const BlockPointerType *Ty,
auto *DescTy = DBuilder.createPointerType(EltTy, Size);
- FieldOffset = 0;
- if (CGM.getLangOpts().OpenCL) {
- FType = CGM.getContext().IntTy;
- EltTys.push_back(CreateMemberType(Unit, FType, "__size", &FieldOffset));
- EltTys.push_back(CreateMemberType(Unit, FType, "__align", &FieldOffset));
- } else {
- FType = CGM.getContext().getPointerType(CGM.getContext().VoidTy);
- EltTys.push_back(CreateMemberType(Unit, FType, "__isa", &FieldOffset));
- FType = CGM.getContext().IntTy;
- EltTys.push_back(CreateMemberType(Unit, FType, "__flags", &FieldOffset));
- EltTys.push_back(CreateMemberType(Unit, FType, "__reserved", &FieldOffset));
- FType = CGM.getContext().getPointerType(Ty->getPointeeType());
- EltTys.push_back(CreateMemberType(Unit, FType, "__FuncPtr", &FieldOffset));
- FType = CGM.getContext().getPointerType(CGM.getContext().VoidTy);
- FieldSize = CGM.getContext().getTypeSize(Ty);
- FieldAlign = CGM.getContext().getTypeAlign(Ty);
- EltTys.push_back(DBuilder.createMemberType(
- Unit, "__descriptor", nullptr, LineNo, FieldSize, FieldAlign, FieldOffset,
- llvm::DINode::FlagZero, DescTy));
- FieldOffset += FieldSize;
- }
+ FieldOffset = collectDefaultElementTypesForBlockPointer(Ty, Unit, DescTy,
+ 0, EltTys);
Elements = DBuilder.getOrCreateArray(EltTys);
@@ -998,7 +1072,7 @@ llvm::DIType *CGDebugInfo::CreateType(const BlockPointerType *Ty,
// DW_AT_APPLE_BLOCK attribute and are an implementation detail only
// the debugger needs to know about. To allow type uniquing, emit
// them without a name or a location.
- EltTy = DBuilder.createStructType(Unit, "", nullptr, LineNo, FieldOffset, 0,
+ EltTy = DBuilder.createStructType(Unit, "", nullptr, 0, FieldOffset, 0,
Flags, nullptr, Elements);
return DBuilder.createPointerType(EltTy, Size);
@@ -1058,6 +1132,7 @@ static unsigned getDwarfCC(CallingConv CC) {
case CC_X86_64SysV:
return llvm::dwarf::DW_CC_LLVM_X86_64SysV;
case CC_AAPCS:
+ case CC_AArch64VectorCall:
return llvm::dwarf::DW_CC_LLVM_AAPCS;
case CC_AAPCS_VFP:
return llvm::dwarf::DW_CC_LLVM_AAPCS_VFP;
@@ -1353,8 +1428,7 @@ CGDebugInfo::getOrCreateMethodType(const CXXMethodDecl *Method,
if (Method->isStatic())
return cast_or_null<llvm::DISubroutineType>(
getOrCreateType(QualType(Func, 0), Unit));
- return getOrCreateInstanceMethodType(Method->getThisType(CGM.getContext()),
- Func, Unit);
+ return getOrCreateInstanceMethodType(Method->getThisType(), Func, Unit);
}
llvm::DISubroutineType *CGDebugInfo::getOrCreateInstanceMethodType(
@@ -1450,16 +1524,16 @@ llvm::DISubprogram *CGDebugInfo::CreateCXXMemberFunction(
// Collect virtual method info.
llvm::DIType *ContainingType = nullptr;
- unsigned Virtuality = 0;
unsigned VIndex = 0;
llvm::DINode::DIFlags Flags = llvm::DINode::FlagZero;
+ llvm::DISubprogram::DISPFlags SPFlags = llvm::DISubprogram::SPFlagZero;
int ThisAdjustment = 0;
if (Method->isVirtual()) {
if (Method->isPure())
- Virtuality = llvm::dwarf::DW_VIRTUALITY_pure_virtual;
+ SPFlags |= llvm::DISubprogram::SPFlagPureVirtual;
else
- Virtuality = llvm::dwarf::DW_VIRTUALITY_virtual;
+ SPFlags |= llvm::DISubprogram::SPFlagVirtual;
if (CGM.getTarget().getCXXABI().isItaniumFamily()) {
// It doesn't make sense to give a virtual destructor a vtable index,
@@ -1511,12 +1585,13 @@ llvm::DISubprogram *CGDebugInfo::CreateCXXMemberFunction(
Flags |= llvm::DINode::FlagLValueReference;
if (Method->getRefQualifier() == RQ_RValue)
Flags |= llvm::DINode::FlagRValueReference;
+ if (CGM.getLangOpts().Optimize)
+ SPFlags |= llvm::DISubprogram::SPFlagOptimized;
llvm::DINodeArray TParamsArray = CollectFunctionTemplateParams(Method, Unit);
llvm::DISubprogram *SP = DBuilder.createMethod(
RecordTy, MethodName, MethodLinkageName, MethodDefUnit, MethodLine,
- MethodTy, /*isLocalToUnit=*/false, /*isDefinition=*/false, Virtuality,
- VIndex, ThisAdjustment, ContainingType, Flags, CGM.getLangOpts().Optimize,
+ MethodTy, VIndex, ThisAdjustment, ContainingType, Flags, SPFlags,
TParamsArray.get());
SPCache[Method->getCanonicalDecl()].reset(SP);
@@ -1741,6 +1816,29 @@ CGDebugInfo::CollectFunctionTemplateParams(const FunctionDecl *FD,
return llvm::DINodeArray();
}
+llvm::DINodeArray CGDebugInfo::CollectVarTemplateParams(const VarDecl *VL,
+ llvm::DIFile *Unit) {
+ if (auto *TS = dyn_cast<VarTemplateSpecializationDecl>(VL)) {
+ auto T = TS->getSpecializedTemplateOrPartial();
+ auto TA = TS->getTemplateArgs().asArray();
+ // Collect parameters for a partial specialization
+ if (T.is<VarTemplatePartialSpecializationDecl *>()) {
+ const TemplateParameterList *TList =
+ T.get<VarTemplatePartialSpecializationDecl *>()
+ ->getTemplateParameters();
+ return CollectTemplateParams(TList, TA, Unit);
+ }
+
+ // Collect parameters for an explicit specialization
+ if (T.is<VarTemplateDecl *>()) {
+ const TemplateParameterList *TList = T.get<VarTemplateDecl *>()
+ ->getTemplateParameters();
+ return CollectTemplateParams(TList, TA, Unit);
+ }
+ }
+ return llvm::DINodeArray();
+}
+
llvm::DINodeArray CGDebugInfo::CollectCXXTemplateParams(
const ClassTemplateSpecializationDecl *TSpecial, llvm::DIFile *Unit) {
// Always get the full list of parameters, not just the ones from
@@ -1896,8 +1994,17 @@ static bool isDefinedInClangModule(const RecordDecl *RD) {
if (auto *CXXDecl = dyn_cast<CXXRecordDecl>(RD)) {
if (!CXXDecl->isCompleteDefinition())
return false;
+ // Check wether RD is a template.
auto TemplateKind = CXXDecl->getTemplateSpecializationKind();
if (TemplateKind != TSK_Undeclared) {
+ // Unfortunately getOwningModule() isn't accurate enough to find the
+ // owning module of a ClassTemplateSpecializationDecl that is inside a
+ // namespace spanning multiple modules.
+ bool Explicit = false;
+ if (auto *TD = dyn_cast<ClassTemplateSpecializationDecl>(CXXDecl))
+ Explicit = TD->isExplicitInstantiationOrSpecialization();
+ if (!Explicit && CXXDecl->getEnclosingNamespaceContext())
+ return false;
// This is a template, check the origin of the first member.
if (CXXDecl->field_begin() == CXXDecl->field_end())
return TemplateKind == TSK_ExplicitInstantiationDeclaration;
@@ -2445,9 +2552,9 @@ llvm::DIType *CGDebugInfo::CreateType(const ArrayType *Ty, llvm::DIFile *Unit) {
Count = CAT->getSize().getZExtValue();
else if (const auto *VAT = dyn_cast<VariableArrayType>(Ty)) {
if (Expr *Size = VAT->getSizeExpr()) {
- llvm::APSInt V;
- if (Size->EvaluateAsInt(V, CGM.getContext()))
- Count = V.getExtValue();
+ Expr::EvalResult Result;
+ if (Size->EvaluateAsInt(Result, CGM.getContext()))
+ Count = Result.Val.getInt().getExtValue();
}
}
@@ -2513,9 +2620,9 @@ llvm::DIType *CGDebugInfo::CreateType(const MemberPointerType *Ty,
const FunctionProtoType *FPT =
Ty->getPointeeType()->getAs<FunctionProtoType>();
return DBuilder.createMemberPointerType(
- getOrCreateInstanceMethodType(CGM.getContext().getPointerType(QualType(
- Ty->getClass(), FPT->getTypeQuals())),
- FPT, U),
+ getOrCreateInstanceMethodType(
+ CXXMethodDecl::getThisType(FPT, Ty->getMostRecentCXXRecordDecl()),
+ FPT, U),
ClassType, Size, /*Align=*/0, Flags);
}
@@ -2603,7 +2710,7 @@ llvm::DIType *CGDebugInfo::CreateTypeDefinition(const EnumType *Ty) {
llvm::DIType *ClassTy = getOrCreateType(ED->getIntegerType(), DefUnit);
return DBuilder.createEnumerationType(EnumContext, ED->getName(), DefUnit,
Line, Size, Align, EltArray, ClassTy,
- Identifier, ED->isFixed());
+ Identifier, ED->isScoped());
}
llvm::DIMacro *CGDebugInfo::CreateMacro(llvm::DIMacroFile *Parent,
@@ -3035,6 +3142,7 @@ void CGDebugInfo::collectFunctionDeclProps(GlobalDecl GD, llvm::DIFile *Unit,
void CGDebugInfo::collectVarDeclProps(const VarDecl *VD, llvm::DIFile *&Unit,
unsigned &LineNo, QualType &T,
StringRef &Name, StringRef &LinkageName,
+ llvm::MDTuple *&TemplateParameters,
llvm::DIScope *&VDContext) {
Unit = getOrCreateFile(VD->getLocation());
LineNo = getLineNumber(VD->getLocation());
@@ -3058,6 +3166,13 @@ void CGDebugInfo::collectVarDeclProps(const VarDecl *VD, llvm::DIFile *&Unit,
if (LinkageName == Name)
LinkageName = StringRef();
+ if (isa<VarTemplateSpecializationDecl>(VD)) {
+ llvm::DINodeArray parameterNodes = CollectVarTemplateParams(VD, &*Unit);
+ TemplateParameters = parameterNodes.get();
+ } else {
+ TemplateParameters = nullptr;
+ }
+
// Since we emit declarations (DW_AT_members) for static members, place the
// definition of those static members in the namespace they were declared in
// in the source code (the lexical decl context).
@@ -3084,6 +3199,7 @@ llvm::DISubprogram *CGDebugInfo::getFunctionFwdDeclOrStub(GlobalDecl GD,
llvm::DINodeArray TParamsArray;
StringRef Name, LinkageName;
llvm::DINode::DIFlags Flags = llvm::DINode::FlagZero;
+ llvm::DISubprogram::DISPFlags SPFlags = llvm::DISubprogram::SPFlagZero;
SourceLocation Loc = GD.getDecl()->getLocation();
llvm::DIFile *Unit = getOrCreateFile(Loc);
llvm::DIScope *DContext = Unit;
@@ -3100,20 +3216,23 @@ llvm::DISubprogram *CGDebugInfo::getFunctionFwdDeclOrStub(GlobalDecl GD,
CallingConv CC = FD->getType()->castAs<FunctionType>()->getCallConv();
QualType FnType = CGM.getContext().getFunctionType(
FD->getReturnType(), ArgTypes, FunctionProtoType::ExtProtoInfo(CC));
+ if (!FD->isExternallyVisible())
+ SPFlags |= llvm::DISubprogram::SPFlagLocalToUnit;
+ if (CGM.getLangOpts().Optimize)
+ SPFlags |= llvm::DISubprogram::SPFlagOptimized;
+
if (Stub) {
+ Flags |= getCallSiteRelatedAttrs();
+ SPFlags |= llvm::DISubprogram::SPFlagDefinition;
return DBuilder.createFunction(
DContext, Name, LinkageName, Unit, Line,
- getOrCreateFunctionType(GD.getDecl(), FnType, Unit),
- !FD->isExternallyVisible(),
- /* isDefinition = */ true, 0, Flags, CGM.getLangOpts().Optimize,
+ getOrCreateFunctionType(GD.getDecl(), FnType, Unit), 0, Flags, SPFlags,
TParamsArray.get(), getFunctionDeclaration(FD));
}
llvm::DISubprogram *SP = DBuilder.createTempFunctionFwdDecl(
DContext, Name, LinkageName, Unit, Line,
- getOrCreateFunctionType(GD.getDecl(), FnType, Unit),
- !FD->isExternallyVisible(),
- /* isDefinition = */ false, 0, Flags, CGM.getLangOpts().Optimize,
+ getOrCreateFunctionType(GD.getDecl(), FnType, Unit), 0, Flags, SPFlags,
TParamsArray.get(), getFunctionDeclaration(FD));
const FunctionDecl *CanonDecl = FD->getCanonicalDecl();
FwdDeclReplaceMap.emplace_back(std::piecewise_construct,
@@ -3138,12 +3257,14 @@ CGDebugInfo::getGlobalVariableForwardDeclaration(const VarDecl *VD) {
llvm::DIFile *Unit = getOrCreateFile(Loc);
llvm::DIScope *DContext = Unit;
unsigned Line = getLineNumber(Loc);
+ llvm::MDTuple *TemplateParameters = nullptr;
- collectVarDeclProps(VD, Unit, Line, T, Name, LinkageName, DContext);
+ collectVarDeclProps(VD, Unit, Line, T, Name, LinkageName, TemplateParameters,
+ DContext);
auto Align = getDeclAlignIfRequired(VD, CGM.getContext());
auto *GV = DBuilder.createTempGlobalVariableFwdDecl(
DContext, Name, LinkageName, Unit, Line, getOrCreateType(T, Unit),
- !VD->isExternallyVisible(), nullptr, Align);
+ !VD->isExternallyVisible(), nullptr, TemplateParameters, Align);
FwdDeclReplaceMap.emplace_back(
std::piecewise_construct,
std::make_tuple(cast<VarDecl>(VD->getCanonicalDecl())),
@@ -3299,6 +3420,7 @@ void CGDebugInfo::EmitFunctionStart(GlobalDecl GD, SourceLocation Loc,
bool HasDecl = (D != nullptr);
llvm::DINode::DIFlags Flags = llvm::DINode::FlagZero;
+ llvm::DISubprogram::DISPFlags SPFlags = llvm::DISubprogram::SPFlagZero;
llvm::DIFile *Unit = getOrCreateFile(Loc);
llvm::DIScope *FDContext = Unit;
llvm::DINodeArray TParamsArray;
@@ -3338,6 +3460,15 @@ void CGDebugInfo::EmitFunctionStart(GlobalDecl GD, SourceLocation Loc,
if (CurFuncIsThunk)
Flags |= llvm::DINode::FlagThunk;
+ if (Fn->hasLocalLinkage())
+ SPFlags |= llvm::DISubprogram::SPFlagLocalToUnit;
+ if (CGM.getLangOpts().Optimize)
+ SPFlags |= llvm::DISubprogram::SPFlagOptimized;
+
+ llvm::DINode::DIFlags FlagsForDef = Flags | getCallSiteRelatedAttrs();
+ llvm::DISubprogram::DISPFlags SPFlagsForDef =
+ SPFlags | llvm::DISubprogram::SPFlagDefinition;
+
unsigned LineNo = getLineNumber(Loc);
unsigned ScopeLine = getLineNumber(ScopeLoc);
@@ -3348,9 +3479,8 @@ void CGDebugInfo::EmitFunctionStart(GlobalDecl GD, SourceLocation Loc,
// are emitted as CU level entities by the backend.
llvm::DISubprogram *SP = DBuilder.createFunction(
FDContext, Name, LinkageName, Unit, LineNo,
- getOrCreateFunctionType(D, FnType, Unit), Fn->hasLocalLinkage(),
- true /*definition*/, ScopeLine, Flags, CGM.getLangOpts().Optimize,
- TParamsArray.get(), getFunctionDeclaration(D));
+ getOrCreateFunctionType(D, FnType, Unit), ScopeLine, FlagsForDef,
+ SPFlagsForDef, TParamsArray.get(), getFunctionDeclaration(D));
Fn->setSubprogram(SP);
// We might get here with a VarDecl in the case we're generating
// code for the initialization of globals. Do not record these decls
@@ -3370,8 +3500,7 @@ void CGDebugInfo::EmitFunctionStart(GlobalDecl GD, SourceLocation Loc,
cast<llvm::DICompositeType>(It->second);
llvm::DISubprogram *FD = DBuilder.createFunction(
InterfaceDecl, Name, LinkageName, Unit, LineNo,
- getOrCreateFunctionType(D, FnType, Unit), Fn->hasLocalLinkage(),
- false /*definition*/, ScopeLine, Flags, CGM.getLangOpts().Optimize,
+ getOrCreateFunctionType(D, FnType, Unit), ScopeLine, Flags, SPFlags,
TParamsArray.get());
DBuilder.finalizeSubprogram(FD);
ObjCMethodCache[ID].push_back(FD);
@@ -3420,11 +3549,13 @@ void CGDebugInfo::EmitFunctionDecl(GlobalDecl GD, SourceLocation Loc,
}
unsigned LineNo = getLineNumber(Loc);
unsigned ScopeLine = 0;
+ llvm::DISubprogram::DISPFlags SPFlags = llvm::DISubprogram::SPFlagZero;
+ if (CGM.getLangOpts().Optimize)
+ SPFlags |= llvm::DISubprogram::SPFlagOptimized;
DBuilder.retainType(DBuilder.createFunction(
FDContext, Name, LinkageName, Unit, LineNo,
- getOrCreateFunctionType(D, FnType, Unit), false /*internalLinkage*/,
- false /*definition*/, ScopeLine, Flags, CGM.getLangOpts().Optimize,
+ getOrCreateFunctionType(D, FnType, Unit), ScopeLine, Flags, SPFlags,
TParamsArray.get(), getFunctionDeclaration(D)));
}
@@ -3453,7 +3584,7 @@ void CGDebugInfo::EmitLocation(CGBuilderTy &Builder, SourceLocation Loc) {
// Update our current location
setLocation(Loc);
- if (CurLoc.isInvalid() || CurLoc.isMacroID())
+ if (CurLoc.isInvalid() || CurLoc.isMacroID() || LexicalBlockStack.empty())
return;
llvm::MDNode *Scope = LexicalBlockStack.back();
@@ -3530,9 +3661,9 @@ void CGDebugInfo::EmitFunctionEnd(CGBuilderTy &Builder, llvm::Function *Fn) {
DBuilder.finalizeSubprogram(Fn->getSubprogram());
}
-llvm::DIType *CGDebugInfo::EmitTypeForVarWithBlocksAttr(const VarDecl *VD,
- uint64_t *XOffset) {
-
+CGDebugInfo::BlockByRefType
+CGDebugInfo::EmitTypeForVarWithBlocksAttr(const VarDecl *VD,
+ uint64_t *XOffset) {
SmallVector<llvm::Metadata *, 5> EltTys;
QualType FType;
uint64_t FieldSize, FieldOffset;
@@ -3584,23 +3715,21 @@ llvm::DIType *CGDebugInfo::EmitTypeForVarWithBlocksAttr(const VarDecl *VD,
}
FType = Type;
- llvm::DIType *FieldTy = getOrCreateType(FType, Unit);
+ llvm::DIType *WrappedTy = getOrCreateType(FType, Unit);
FieldSize = CGM.getContext().getTypeSize(FType);
FieldAlign = CGM.getContext().toBits(Align);
*XOffset = FieldOffset;
- FieldTy = DBuilder.createMemberType(Unit, VD->getName(), Unit, 0, FieldSize,
- FieldAlign, FieldOffset,
- llvm::DINode::FlagZero, FieldTy);
+ llvm::DIType *FieldTy = DBuilder.createMemberType(
+ Unit, VD->getName(), Unit, 0, FieldSize, FieldAlign, FieldOffset,
+ llvm::DINode::FlagZero, WrappedTy);
EltTys.push_back(FieldTy);
FieldOffset += FieldSize;
llvm::DINodeArray Elements = DBuilder.getOrCreateArray(EltTys);
-
- llvm::DINode::DIFlags Flags = llvm::DINode::FlagBlockByrefStruct;
-
- return DBuilder.createStructType(Unit, "", Unit, 0, FieldOffset, 0, Flags,
- nullptr, Elements);
+ return {DBuilder.createStructType(Unit, "", Unit, 0, FieldOffset, 0,
+ llvm::DINode::FlagZero, nullptr, Elements),
+ WrappedTy};
}
llvm::DILocalVariable *CGDebugInfo::EmitDeclare(const VarDecl *VD,
@@ -3621,7 +3750,7 @@ llvm::DILocalVariable *CGDebugInfo::EmitDeclare(const VarDecl *VD,
llvm::DIType *Ty;
uint64_t XOffset = 0;
if (VD->hasAttr<BlocksAttr>())
- Ty = EmitTypeForVarWithBlocksAttr(VD, &XOffset);
+ Ty = EmitTypeForVarWithBlocksAttr(VD, &XOffset).WrappedType;
else
Ty = getOrCreateType(VD->getType(), Unit);
@@ -3759,7 +3888,7 @@ void CGDebugInfo::EmitDeclareOfBlockDeclRefVariable(
llvm::DIFile *Unit = getOrCreateFile(VD->getLocation());
llvm::DIType *Ty;
if (isByRef)
- Ty = EmitTypeForVarWithBlocksAttr(VD, &XOffset);
+ Ty = EmitTypeForVarWithBlocksAttr(VD, &XOffset).WrappedType;
else
Ty = getOrCreateType(VD->getType(), Unit);
@@ -3830,6 +3959,44 @@ bool operator<(const BlockLayoutChunk &l, const BlockLayoutChunk &r) {
}
} // namespace
+void CGDebugInfo::collectDefaultFieldsForBlockLiteralDeclare(
+ const CGBlockInfo &Block, const ASTContext &Context, SourceLocation Loc,
+ const llvm::StructLayout &BlockLayout, llvm::DIFile *Unit,
+ SmallVectorImpl<llvm::Metadata *> &Fields) {
+ // Blocks in OpenCL have unique constraints which make the standard fields
+ // redundant while requiring size and align fields for enqueue_kernel. See
+ // initializeForBlockHeader in CGBlocks.cpp
+ if (CGM.getLangOpts().OpenCL) {
+ Fields.push_back(createFieldType("__size", Context.IntTy, Loc, AS_public,
+ BlockLayout.getElementOffsetInBits(0),
+ Unit, Unit));
+ Fields.push_back(createFieldType("__align", Context.IntTy, Loc, AS_public,
+ BlockLayout.getElementOffsetInBits(1),
+ Unit, Unit));
+ } else {
+ Fields.push_back(createFieldType("__isa", Context.VoidPtrTy, Loc, AS_public,
+ BlockLayout.getElementOffsetInBits(0),
+ Unit, Unit));
+ Fields.push_back(createFieldType("__flags", Context.IntTy, Loc, AS_public,
+ BlockLayout.getElementOffsetInBits(1),
+ Unit, Unit));
+ Fields.push_back(
+ createFieldType("__reserved", Context.IntTy, Loc, AS_public,
+ BlockLayout.getElementOffsetInBits(2), Unit, Unit));
+ auto *FnTy = Block.getBlockExpr()->getFunctionType();
+ auto FnPtrType = CGM.getContext().getPointerType(FnTy->desugar());
+ Fields.push_back(createFieldType("__FuncPtr", FnPtrType, Loc, AS_public,
+ BlockLayout.getElementOffsetInBits(3),
+ Unit, Unit));
+ Fields.push_back(createFieldType(
+ "__descriptor",
+ Context.getPointerType(Block.NeedsCopyDispose
+ ? Context.getBlockDescriptorExtendedType()
+ : Context.getBlockDescriptorType()),
+ Loc, AS_public, BlockLayout.getElementOffsetInBits(4), Unit, Unit));
+ }
+}
+
void CGDebugInfo::EmitDeclareOfBlockLiteralArgVariable(const CGBlockInfo &block,
StringRef Name,
unsigned ArgNo,
@@ -3852,35 +4019,8 @@ void CGDebugInfo::EmitDeclareOfBlockLiteralArgVariable(const CGBlockInfo &block,
CGM.getDataLayout().getStructLayout(block.StructureType);
SmallVector<llvm::Metadata *, 16> fields;
- if (CGM.getLangOpts().OpenCL) {
- fields.push_back(createFieldType("__size", C.IntTy, loc, AS_public,
- blockLayout->getElementOffsetInBits(0),
- tunit, tunit));
- fields.push_back(createFieldType("__align", C.IntTy, loc, AS_public,
- blockLayout->getElementOffsetInBits(1),
- tunit, tunit));
- } else {
- fields.push_back(createFieldType("__isa", C.VoidPtrTy, loc, AS_public,
- blockLayout->getElementOffsetInBits(0),
- tunit, tunit));
- fields.push_back(createFieldType("__flags", C.IntTy, loc, AS_public,
- blockLayout->getElementOffsetInBits(1),
- tunit, tunit));
- fields.push_back(createFieldType("__reserved", C.IntTy, loc, AS_public,
- blockLayout->getElementOffsetInBits(2),
- tunit, tunit));
- auto *FnTy = block.getBlockExpr()->getFunctionType();
- auto FnPtrType = CGM.getContext().getPointerType(FnTy->desugar());
- fields.push_back(createFieldType("__FuncPtr", FnPtrType, loc, AS_public,
- blockLayout->getElementOffsetInBits(3),
- tunit, tunit));
- fields.push_back(createFieldType(
- "__descriptor",
- C.getPointerType(block.NeedsCopyDispose
- ? C.getBlockDescriptorExtendedType()
- : C.getBlockDescriptorType()),
- loc, AS_public, blockLayout->getElementOffsetInBits(4), tunit, tunit));
- }
+ collectDefaultFieldsForBlockLiteralDeclare(block, C, loc, *blockLayout, tunit,
+ fields);
// We want to sort the captures by offset, not because DWARF
// requires this, but because we're paranoid about debuggers.
@@ -3923,7 +4063,7 @@ void CGDebugInfo::EmitDeclareOfBlockLiteralArgVariable(const CGBlockInfo &block,
QualType type;
if (auto *Method =
cast_or_null<CXXMethodDecl>(blockDecl->getNonClosureContext()))
- type = Method->getThisType(C);
+ type = Method->getThisType();
else if (auto *RDecl = dyn_cast<CXXRecordDecl>(blockDecl->getParent()))
type = QualType(RDecl->getTypeForDecl(), 0);
else
@@ -3941,10 +4081,10 @@ void CGDebugInfo::EmitDeclareOfBlockLiteralArgVariable(const CGBlockInfo &block,
if (capture->isByRef()) {
TypeInfo PtrInfo = C.getTypeInfo(C.VoidPtrTy);
auto Align = PtrInfo.AlignIsRequired ? PtrInfo.Align : 0;
-
- // FIXME: this creates a second copy of this type!
+ // FIXME: This recomputes the layout of the BlockByRefWrapper.
uint64_t xoffset;
- fieldType = EmitTypeForVarWithBlocksAttr(variable, &xoffset);
+ fieldType =
+ EmitTypeForVarWithBlocksAttr(variable, &xoffset).BlockByRefWrapper;
fieldType = DBuilder.createPointerType(fieldType, PtrInfo.Width);
fieldType = DBuilder.createMemberType(tunit, name, tunit, line,
PtrInfo.Width, Align, offsetInBits,
@@ -4045,7 +4185,9 @@ void CGDebugInfo::EmitGlobalVariable(llvm::GlobalVariable *Var,
unsigned LineNo;
StringRef DeclName, LinkageName;
QualType T;
- collectVarDeclProps(D, Unit, LineNo, T, DeclName, LinkageName, DContext);
+ llvm::MDTuple *TemplateParameters = nullptr;
+ collectVarDeclProps(D, Unit, LineNo, T, DeclName, LinkageName,
+ TemplateParameters, DContext);
// Attempt to store one global variable for the declaration - even if we
// emit a lot of fields.
@@ -4071,7 +4213,8 @@ void CGDebugInfo::EmitGlobalVariable(llvm::GlobalVariable *Var,
DContext, DeclName, LinkageName, Unit, LineNo, getOrCreateType(T, Unit),
Var->hasLocalLinkage(),
Expr.empty() ? nullptr : DBuilder.createExpression(Expr),
- getOrCreateStaticDataMemberDeclarationOrNull(D), Align);
+ getOrCreateStaticDataMemberDeclarationOrNull(D), TemplateParameters,
+ Align);
Var->addDebugInfo(GVE);
}
DeclCache[D->getCanonicalDecl()].reset(GVE);
@@ -4128,10 +4271,19 @@ void CGDebugInfo::EmitGlobalVariable(const ValueDecl *VD, const APValue &Init) {
InitExpr = DBuilder.createConstantValueExpression(
Init.getFloat().bitcastToAPInt().getZExtValue());
}
+
+ llvm::MDTuple *TemplateParameters = nullptr;
+
+ if (isa<VarTemplateSpecializationDecl>(VD))
+ if (VarD) {
+ llvm::DINodeArray parameterNodes = CollectVarTemplateParams(VarD, &*Unit);
+ TemplateParameters = parameterNodes.get();
+ }
+
GV.reset(DBuilder.createGlobalVariableExpression(
DContext, Name, StringRef(), Unit, getLineNumber(VD->getLocation()), Ty,
true, InitExpr, getOrCreateStaticDataMemberDeclarationOrNull(VarD),
- Align));
+ TemplateParameters, Align));
}
llvm::DIScope *CGDebugInfo::getCurrentContextDescriptor(const Decl *D) {
@@ -4320,7 +4472,7 @@ void CGDebugInfo::EmitExplicitCastType(QualType Ty) {
if (CGM.getCodeGenOpts().getDebugInfo() < codegenoptions::LimitedDebugInfo)
return;
- if (auto *DieTy = getOrCreateType(Ty, getOrCreateMainFile()))
+ if (auto *DieTy = getOrCreateType(Ty, TheCU->getFile()))
// Don't ignore in case of explicit cast where it is referenced indirectly.
DBuilder.retainType(DieTy);
}
@@ -4332,3 +4484,22 @@ llvm::DebugLoc CGDebugInfo::SourceLocToDebugLoc(SourceLocation Loc) {
llvm::MDNode *Scope = LexicalBlockStack.back();
return llvm::DebugLoc::get(getLineNumber(Loc), getColumnNumber(Loc), Scope);
}
+
+llvm::DINode::DIFlags CGDebugInfo::getCallSiteRelatedAttrs() const {
+ // Call site-related attributes are only useful in optimized programs, and
+ // when there's a possibility of debugging backtraces.
+ if (!CGM.getLangOpts().Optimize || DebugKind == codegenoptions::NoDebugInfo ||
+ DebugKind == codegenoptions::LocTrackingOnly)
+ return llvm::DINode::FlagZero;
+
+ // Call site-related attributes are available in DWARF v5. Some debuggers,
+ // while not fully DWARF v5-compliant, may accept these attributes as if they
+ // were part of DWARF v4.
+ bool SupportsDWARFv4Ext =
+ CGM.getCodeGenOpts().DwarfVersion == 4 &&
+ CGM.getCodeGenOpts().getDebuggerTuning() == llvm::DebuggerKind::LLDB;
+ if (!SupportsDWARFv4Ext && CGM.getCodeGenOpts().DwarfVersion < 5)
+ return llvm::DINode::FlagZero;
+
+ return llvm::DINode::FlagAllCallsDescribed;
+}
diff --git a/lib/CodeGen/CGDebugInfo.h b/lib/CodeGen/CGDebugInfo.h
index e632806138f0..031e40b9dde9 100644
--- a/lib/CodeGen/CGDebugInfo.h
+++ b/lib/CodeGen/CGDebugInfo.h
@@ -20,8 +20,8 @@
#include "clang/AST/ExternalASTSource.h"
#include "clang/AST/Type.h"
#include "clang/AST/TypeOrdering.h"
+#include "clang/Basic/CodeGenOptions.h"
#include "clang/Basic/SourceLocation.h"
-#include "clang/Frontend/CodeGenOptions.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/Optional.h"
@@ -76,6 +76,9 @@ class CGDebugInfo {
llvm::DIType *OCLQueueDITy = nullptr;
llvm::DIType *OCLNDRangeDITy = nullptr;
llvm::DIType *OCLReserveIDDITy = nullptr;
+#define EXT_OPAQUE_TYPE(ExtType, Id, Ext) \
+ llvm::DIType *Id##Ty = nullptr;
+#include "clang/Basic/OpenCLExtensionTypes.def"
/// Cache of previously constructed Types.
llvm::DenseMap<const void *, llvm::TrackingMDRef> TypeCache;
@@ -248,6 +251,11 @@ class CGDebugInfo {
llvm::DINodeArray CollectFunctionTemplateParams(const FunctionDecl *FD,
llvm::DIFile *Unit);
+ /// A helper function to collect debug info for function template
+ /// parameters.
+ llvm::DINodeArray CollectVarTemplateParams(const VarDecl *VD,
+ llvm::DIFile *Unit);
+
/// A helper function to collect debug info for template
/// parameters.
llvm::DINodeArray
@@ -311,12 +319,31 @@ class CGDebugInfo {
void AppendAddressSpaceXDeref(unsigned AddressSpace,
SmallVectorImpl<int64_t> &Expr) const;
+ /// A helper function to collect debug info for the default elements of a
+ /// block.
+ ///
+ /// \returns The next available field offset after the default elements.
+ uint64_t collectDefaultElementTypesForBlockPointer(
+ const BlockPointerType *Ty, llvm::DIFile *Unit,
+ llvm::DIDerivedType *DescTy, unsigned LineNo,
+ SmallVectorImpl<llvm::Metadata *> &EltTys);
+
+ /// A helper function to collect debug info for the default fields of a
+ /// block.
+ void collectDefaultFieldsForBlockLiteralDeclare(
+ const CGBlockInfo &Block, const ASTContext &Context, SourceLocation Loc,
+ const llvm::StructLayout &BlockLayout, llvm::DIFile *Unit,
+ SmallVectorImpl<llvm::Metadata *> &Fields);
+
public:
CGDebugInfo(CodeGenModule &CGM);
~CGDebugInfo();
void finalize();
+ /// Remap a given path with the current debug prefix map
+ std::string remapDIPath(StringRef) const;
+
/// Register VLA size expression debug node with the qualified type.
void registerVLASizeExpression(QualType Ty, llvm::Metadata *SizeExpr) {
SizeExprCache[Ty] = SizeExpr;
@@ -475,9 +502,16 @@ private:
llvm::Optional<unsigned> ArgNo,
CGBuilderTy &Builder);
+ struct BlockByRefType {
+ /// The wrapper struct used inside the __block_literal struct.
+ llvm::DIType *BlockByRefWrapper;
+ /// The type as it appears in the source code.
+ llvm::DIType *WrappedType;
+ };
+
/// Build up structure info for the byref. See \a BuildByRefType.
- llvm::DIType *EmitTypeForVarWithBlocksAttr(const VarDecl *VD,
- uint64_t *OffSet);
+ BlockByRefType EmitTypeForVarWithBlocksAttr(const VarDecl *VD,
+ uint64_t *OffSet);
/// Get context info for the DeclContext of \p Decl.
llvm::DIScope *getDeclContextDescriptor(const Decl *D);
@@ -497,9 +531,6 @@ private:
/// Create new compile unit.
void CreateCompileUnit();
- /// Remap a given path with the current debug prefix map
- std::string remapDIPath(StringRef) const;
-
/// Compute the file checksum debug info for input file ID.
Optional<llvm::DIFile::ChecksumKind>
computeChecksum(FileID FID, SmallString<32> &Checksum) const;
@@ -507,11 +538,15 @@ private:
/// Get the source of the given file ID.
Optional<StringRef> getSource(const SourceManager &SM, FileID FID);
- /// Get the file debug info descriptor for the input location.
+ /// Convenience function to get the file debug info descriptor for the input
+ /// location.
llvm::DIFile *getOrCreateFile(SourceLocation Loc);
- /// Get the file info for main compile unit.
- llvm::DIFile *getOrCreateMainFile();
+ /// Create a file debug info descriptor for a source file.
+ llvm::DIFile *
+ createFile(StringRef FileName,
+ Optional<llvm::DIFile::ChecksumInfo<StringRef>> CSInfo,
+ Optional<StringRef> Source);
/// Get the type from the cache or create a new type if necessary.
llvm::DIType *getOrCreateType(QualType Ty, llvm::DIFile *Fg);
@@ -580,6 +615,11 @@ private:
unsigned LineNo, StringRef LinkageName,
llvm::GlobalVariable *Var, llvm::DIScope *DContext);
+
+ /// Return flags which enable debug info emission for call sites, provided
+ /// that it is supported and enabled.
+ llvm::DINode::DIFlags getCallSiteRelatedAttrs() const;
+
/// Get the printing policy for producing names for debug info.
PrintingPolicy getPrintingPolicy() const;
@@ -622,7 +662,9 @@ private:
/// Collect various properties of a VarDecl.
void collectVarDeclProps(const VarDecl *VD, llvm::DIFile *&Unit,
unsigned &LineNo, QualType &T, StringRef &Name,
- StringRef &LinkageName, llvm::DIScope *&VDContext);
+ StringRef &LinkageName,
+ llvm::MDTuple *&TemplateParameters,
+ llvm::DIScope *&VDContext);
/// Allocate a copy of \p A using the DebugInfoNames allocator
/// and return a reference to it. If multiple arguments are given the strings
@@ -702,7 +744,7 @@ public:
/// function \p InlinedFn. The current debug location becomes the inlined call
/// site of the inlined function.
ApplyInlineDebugLocation(CodeGenFunction &CGF, GlobalDecl InlinedFn);
- /// Restore everything back to the orginial state.
+ /// Restore everything back to the original state.
~ApplyInlineDebugLocation();
};
diff --git a/lib/CodeGen/CGDecl.cpp b/lib/CodeGen/CGDecl.cpp
index 57b2fbadbeec..5959d889b455 100644
--- a/lib/CodeGen/CGDecl.cpp
+++ b/lib/CodeGen/CGDecl.cpp
@@ -26,10 +26,11 @@
#include "clang/AST/Decl.h"
#include "clang/AST/DeclObjC.h"
#include "clang/AST/DeclOpenMP.h"
+#include "clang/Basic/CodeGenOptions.h"
#include "clang/Basic/SourceManager.h"
#include "clang/Basic/TargetInfo.h"
#include "clang/CodeGen/CGFunctionInfo.h"
-#include "clang/Frontend/CodeGenOptions.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/Intrinsics.h"
@@ -104,6 +105,7 @@ void CodeGenFunction::EmitDecl(const Decl &D) {
case Decl::Import:
case Decl::OMPThreadPrivate:
case Decl::OMPCapturedExpr:
+ case Decl::OMPRequires:
case Decl::Empty:
// None of these decls require codegen support.
return;
@@ -545,7 +547,7 @@ namespace {
void Emit(CodeGenFunction &CGF, Flags flags) override {
// Compute the address of the local variable, in case it's a
// byref or something.
- DeclRefExpr DRE(const_cast<VarDecl*>(&Var), false,
+ DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(&Var), false,
Var.getType(), VK_LValue, SourceLocation());
llvm::Value *value = CGF.EmitLoadOfScalar(CGF.EmitDeclRefLValue(&DRE),
SourceLocation());
@@ -563,7 +565,7 @@ namespace {
: CleanupFn(CleanupFn), FnInfo(*Info), Var(*Var) {}
void Emit(CodeGenFunction &CGF, Flags flags) override {
- DeclRefExpr DRE(const_cast<VarDecl*>(&Var), false,
+ DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(&Var), false,
Var.getType(), VK_LValue, SourceLocation());
// Compute the address of the local variable, in case it's a byref
// or something.
@@ -752,9 +754,9 @@ void CodeGenFunction::EmitScalarInit(const Expr *init, const ValueDecl *D,
// If we're emitting a value with lifetime, we have to do the
// initialization *before* we leave the cleanup scopes.
- if (const ExprWithCleanups *ewc = dyn_cast<ExprWithCleanups>(init)) {
- enterFullExpression(ewc);
- init = ewc->getSubExpr();
+ if (const FullExpr *fe = dyn_cast<FullExpr>(init)) {
+ enterFullExpression(fe);
+ init = fe->getSubExpr();
}
CodeGenFunction::RunCleanupsScope Scope(*this);
@@ -795,15 +797,21 @@ void CodeGenFunction::EmitScalarInit(const Expr *init, const ValueDecl *D,
case Qualifiers::OCL_None:
llvm_unreachable("present but none");
+ case Qualifiers::OCL_Strong: {
+ if (!D || !isa<VarDecl>(D) || !cast<VarDecl>(D)->isARCPseudoStrong()) {
+ value = EmitARCRetainScalarExpr(init);
+ break;
+ }
+ // If D is pseudo-strong, treat it like __unsafe_unretained here. This means
+ // that we omit the retain, and causes non-autoreleased return values to be
+ // immediately released.
+ LLVM_FALLTHROUGH;
+ }
+
case Qualifiers::OCL_ExplicitNone:
value = EmitARCUnsafeUnretainedScalarExpr(init);
break;
- case Qualifiers::OCL_Strong: {
- value = EmitARCRetainScalarExpr(init);
- break;
- }
-
case Qualifiers::OCL_Weak: {
// If it's not accessed by the initializer, try to emit the
// initialization with a copy or move.
@@ -948,111 +956,242 @@ static bool shouldUseBZeroPlusStoresToInitialize(llvm::Constant *Init,
canEmitInitWithFewStoresAfterBZero(Init, StoreBudget);
}
-/// A byte pattern.
-///
-/// Can be "any" pattern if the value was padding or known to be undef.
-/// Can be "none" pattern if a sequence doesn't exist.
-class BytePattern {
- uint8_t Val;
- enum class ValueType : uint8_t { Specific, Any, None } Type;
- BytePattern(ValueType Type) : Type(Type) {}
-
-public:
- BytePattern(uint8_t Value) : Val(Value), Type(ValueType::Specific) {}
- static BytePattern Any() { return BytePattern(ValueType::Any); }
- static BytePattern None() { return BytePattern(ValueType::None); }
- bool isAny() const { return Type == ValueType::Any; }
- bool isNone() const { return Type == ValueType::None; }
- bool isValued() const { return Type == ValueType::Specific; }
- uint8_t getValue() const {
- assert(isValued());
- return Val;
- }
- BytePattern merge(const BytePattern Other) const {
- if (isNone() || Other.isNone())
- return None();
- if (isAny())
- return Other;
- if (Other.isAny())
- return *this;
- if (getValue() == Other.getValue())
- return *this;
- return None();
- }
-};
-
-/// Figures out whether the constant can be initialized with memset.
-static BytePattern constantIsRepeatedBytePattern(llvm::Constant *C) {
- if (isa<llvm::ConstantAggregateZero>(C) || isa<llvm::ConstantPointerNull>(C))
- return BytePattern(0x00);
- if (isa<llvm::UndefValue>(C))
- return BytePattern::Any();
-
- if (isa<llvm::ConstantInt>(C)) {
- auto *Int = cast<llvm::ConstantInt>(C);
- if (Int->getBitWidth() % 8 != 0)
- return BytePattern::None();
- const llvm::APInt &Value = Int->getValue();
- if (Value.isSplat(8))
- return BytePattern(Value.getLoBits(8).getLimitedValue());
- return BytePattern::None();
- }
-
- if (isa<llvm::ConstantFP>(C)) {
- auto *FP = cast<llvm::ConstantFP>(C);
- llvm::APInt Bits = FP->getValueAPF().bitcastToAPInt();
- if (Bits.getBitWidth() % 8 != 0)
- return BytePattern::None();
- if (!Bits.isSplat(8))
- return BytePattern::None();
- return BytePattern(Bits.getLimitedValue() & 0xFF);
- }
-
- if (isa<llvm::ConstantVector>(C)) {
- llvm::Constant *Splat = cast<llvm::ConstantVector>(C)->getSplatValue();
- if (Splat)
- return constantIsRepeatedBytePattern(Splat);
- return BytePattern::None();
- }
-
- if (isa<llvm::ConstantArray>(C) || isa<llvm::ConstantStruct>(C)) {
- BytePattern Pattern(BytePattern::Any());
- for (unsigned I = 0, E = C->getNumOperands(); I != E; ++I) {
- llvm::Constant *Elt = cast<llvm::Constant>(C->getOperand(I));
- Pattern = Pattern.merge(constantIsRepeatedBytePattern(Elt));
- if (Pattern.isNone())
- return Pattern;
+/// Decide whether we should use memset to initialize a local variable instead
+/// of using a memcpy from a constant global. Assumes we've already decided to
+/// not user bzero.
+/// FIXME We could be more clever, as we are for bzero above, and generate
+/// memset followed by stores. It's unclear that's worth the effort.
+static llvm::Value *shouldUseMemSetToInitialize(llvm::Constant *Init,
+ uint64_t GlobalSize) {
+ uint64_t SizeLimit = 32;
+ if (GlobalSize <= SizeLimit)
+ return nullptr;
+ return llvm::isBytewiseValue(Init);
+}
+
+static llvm::Constant *patternFor(CodeGenModule &CGM, llvm::Type *Ty) {
+ // The following value is a guaranteed unmappable pointer value and has a
+ // repeated byte-pattern which makes it easier to synthesize. We use it for
+ // pointers as well as integers so that aggregates are likely to be
+ // initialized with this repeated value.
+ constexpr uint64_t LargeValue = 0xAAAAAAAAAAAAAAAAull;
+ // For 32-bit platforms it's a bit trickier because, across systems, only the
+ // zero page can reasonably be expected to be unmapped, and even then we need
+ // a very low address. We use a smaller value, and that value sadly doesn't
+ // have a repeated byte-pattern. We don't use it for integers.
+ constexpr uint32_t SmallValue = 0x000000AA;
+ // Floating-point values are initialized as NaNs because they propagate. Using
+ // a repeated byte pattern means that it will be easier to initialize
+ // all-floating-point aggregates and arrays with memset. Further, aggregates
+ // which mix integral and a few floats might also initialize with memset
+ // followed by a handful of stores for the floats. Using fairly unique NaNs
+ // also means they'll be easier to distinguish in a crash.
+ constexpr bool NegativeNaN = true;
+ constexpr uint64_t NaNPayload = 0xFFFFFFFFFFFFFFFFull;
+ if (Ty->isIntOrIntVectorTy()) {
+ unsigned BitWidth = cast<llvm::IntegerType>(
+ Ty->isVectorTy() ? Ty->getVectorElementType() : Ty)
+ ->getBitWidth();
+ if (BitWidth <= 64)
+ return llvm::ConstantInt::get(Ty, LargeValue);
+ return llvm::ConstantInt::get(
+ Ty, llvm::APInt::getSplat(BitWidth, llvm::APInt(64, LargeValue)));
+ }
+ if (Ty->isPtrOrPtrVectorTy()) {
+ auto *PtrTy = cast<llvm::PointerType>(
+ Ty->isVectorTy() ? Ty->getVectorElementType() : Ty);
+ unsigned PtrWidth = CGM.getContext().getTargetInfo().getPointerWidth(
+ PtrTy->getAddressSpace());
+ llvm::Type *IntTy = llvm::IntegerType::get(CGM.getLLVMContext(), PtrWidth);
+ uint64_t IntValue;
+ switch (PtrWidth) {
+ default:
+ llvm_unreachable("pattern initialization of unsupported pointer width");
+ case 64:
+ IntValue = LargeValue;
+ break;
+ case 32:
+ IntValue = SmallValue;
+ break;
}
- return Pattern;
+ auto *Int = llvm::ConstantInt::get(IntTy, IntValue);
+ return llvm::ConstantExpr::getIntToPtr(Int, PtrTy);
+ }
+ if (Ty->isFPOrFPVectorTy()) {
+ unsigned BitWidth = llvm::APFloat::semanticsSizeInBits(
+ (Ty->isVectorTy() ? Ty->getVectorElementType() : Ty)
+ ->getFltSemantics());
+ llvm::APInt Payload(64, NaNPayload);
+ if (BitWidth >= 64)
+ Payload = llvm::APInt::getSplat(BitWidth, Payload);
+ return llvm::ConstantFP::getQNaN(Ty, NegativeNaN, &Payload);
+ }
+ if (Ty->isArrayTy()) {
+ // Note: this doesn't touch tail padding (at the end of an object, before
+ // the next array object). It is instead handled by replaceUndef.
+ auto *ArrTy = cast<llvm::ArrayType>(Ty);
+ llvm::SmallVector<llvm::Constant *, 8> Element(
+ ArrTy->getNumElements(), patternFor(CGM, ArrTy->getElementType()));
+ return llvm::ConstantArray::get(ArrTy, Element);
+ }
+
+ // Note: this doesn't touch struct padding. It will initialize as much union
+ // padding as is required for the largest type in the union. Padding is
+ // instead handled by replaceUndef. Stores to structs with volatile members
+ // don't have a volatile qualifier when initialized according to C++. This is
+ // fine because stack-based volatiles don't really have volatile semantics
+ // anyways, and the initialization shouldn't be observable.
+ auto *StructTy = cast<llvm::StructType>(Ty);
+ llvm::SmallVector<llvm::Constant *, 8> Struct(StructTy->getNumElements());
+ for (unsigned El = 0; El != Struct.size(); ++El)
+ Struct[El] = patternFor(CGM, StructTy->getElementType(El));
+ return llvm::ConstantStruct::get(StructTy, Struct);
+}
+
+static Address createUnnamedGlobalFrom(CodeGenModule &CGM, const VarDecl &D,
+ CGBuilderTy &Builder,
+ llvm::Constant *Constant,
+ CharUnits Align) {
+ auto FunctionName = [&](const DeclContext *DC) -> std::string {
+ if (const auto *FD = dyn_cast<FunctionDecl>(DC)) {
+ if (const auto *CC = dyn_cast<CXXConstructorDecl>(FD))
+ return CC->getNameAsString();
+ if (const auto *CD = dyn_cast<CXXDestructorDecl>(FD))
+ return CD->getNameAsString();
+ return CGM.getMangledName(FD);
+ } else if (const auto *OM = dyn_cast<ObjCMethodDecl>(DC)) {
+ return OM->getNameAsString();
+ } else if (isa<BlockDecl>(DC)) {
+ return "<block>";
+ } else if (isa<CapturedDecl>(DC)) {
+ return "<captured>";
+ } else {
+ llvm::llvm_unreachable_internal("expected a function or method");
+ }
+ };
+
+ auto *Ty = Constant->getType();
+ bool isConstant = true;
+ llvm::GlobalVariable *InsertBefore = nullptr;
+ unsigned AS = CGM.getContext().getTargetAddressSpace(
+ CGM.getStringLiteralAddressSpace());
+ llvm::GlobalVariable *GV = new llvm::GlobalVariable(
+ CGM.getModule(), Ty, isConstant, llvm::GlobalValue::PrivateLinkage,
+ Constant,
+ "__const." + FunctionName(D.getParentFunctionOrMethod()) + "." +
+ D.getName(),
+ InsertBefore, llvm::GlobalValue::NotThreadLocal, AS);
+ GV->setAlignment(Align.getQuantity());
+ GV->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
+
+ Address SrcPtr = Address(GV, Align);
+ llvm::Type *BP = llvm::PointerType::getInt8PtrTy(CGM.getLLVMContext(), AS);
+ if (SrcPtr.getType() != BP)
+ SrcPtr = Builder.CreateBitCast(SrcPtr, BP);
+ return SrcPtr;
+}
+
+static void emitStoresForConstant(CodeGenModule &CGM, const VarDecl &D,
+ Address Loc, bool isVolatile,
+ CGBuilderTy &Builder,
+ llvm::Constant *constant) {
+ auto *Ty = constant->getType();
+ bool isScalar = Ty->isIntOrIntVectorTy() || Ty->isPtrOrPtrVectorTy() ||
+ Ty->isFPOrFPVectorTy();
+ if (isScalar) {
+ Builder.CreateStore(constant, Loc, isVolatile);
+ return;
}
- if (llvm::ConstantDataSequential *CDS =
- dyn_cast<llvm::ConstantDataSequential>(C)) {
- BytePattern Pattern(BytePattern::Any());
- for (unsigned I = 0, E = CDS->getNumElements(); I != E; ++I) {
- llvm::Constant *Elt = CDS->getElementAsConstant(I);
- Pattern = Pattern.merge(constantIsRepeatedBytePattern(Elt));
- if (Pattern.isNone())
- return Pattern;
+ auto *Int8Ty = llvm::IntegerType::getInt8Ty(CGM.getLLVMContext());
+ auto *IntPtrTy = CGM.getDataLayout().getIntPtrType(CGM.getLLVMContext());
+
+ // If the initializer is all or mostly the same, codegen with bzero / memset
+ // then do a few stores afterward.
+ uint64_t ConstantSize = CGM.getDataLayout().getTypeAllocSize(Ty);
+ auto *SizeVal = llvm::ConstantInt::get(IntPtrTy, ConstantSize);
+ if (shouldUseBZeroPlusStoresToInitialize(constant, ConstantSize)) {
+ Builder.CreateMemSet(Loc, llvm::ConstantInt::get(Int8Ty, 0), SizeVal,
+ isVolatile);
+
+ bool valueAlreadyCorrect =
+ constant->isNullValue() || isa<llvm::UndefValue>(constant);
+ if (!valueAlreadyCorrect) {
+ Loc = Builder.CreateBitCast(Loc, Ty->getPointerTo(Loc.getAddressSpace()));
+ emitStoresForInitAfterBZero(CGM, constant, Loc, isVolatile, Builder);
+ }
+ return;
+ }
+
+ llvm::Value *Pattern = shouldUseMemSetToInitialize(constant, ConstantSize);
+ if (Pattern) {
+ uint64_t Value = 0x00;
+ if (!isa<llvm::UndefValue>(Pattern)) {
+ const llvm::APInt &AP = cast<llvm::ConstantInt>(Pattern)->getValue();
+ assert(AP.getBitWidth() <= 8);
+ Value = AP.getLimitedValue();
}
- return Pattern;
+ Builder.CreateMemSet(Loc, llvm::ConstantInt::get(Int8Ty, Value), SizeVal,
+ isVolatile);
+ return;
}
- // BlockAddress, ConstantExpr, and everything else is scary.
- return BytePattern::None();
+ Builder.CreateMemCpy(
+ Loc,
+ createUnnamedGlobalFrom(CGM, D, Builder, constant, Loc.getAlignment()),
+ SizeVal, isVolatile);
}
-/// Decide whether we should use memset to initialize a local variable instead
-/// of using a memcpy from a constant global. Assumes we've already decided to
-/// not user bzero.
-/// FIXME We could be more clever, as we are for bzero above, and generate
-/// memset followed by stores. It's unclear that's worth the effort.
-static BytePattern shouldUseMemSetToInitialize(llvm::Constant *Init,
- uint64_t GlobalSize) {
- uint64_t SizeLimit = 32;
- if (GlobalSize <= SizeLimit)
- return BytePattern::None();
- return constantIsRepeatedBytePattern(Init);
+static void emitStoresForZeroInit(CodeGenModule &CGM, const VarDecl &D,
+ Address Loc, bool isVolatile,
+ CGBuilderTy &Builder) {
+ llvm::Type *ElTy = Loc.getElementType();
+ llvm::Constant *constant = llvm::Constant::getNullValue(ElTy);
+ emitStoresForConstant(CGM, D, Loc, isVolatile, Builder, constant);
+}
+
+static void emitStoresForPatternInit(CodeGenModule &CGM, const VarDecl &D,
+ Address Loc, bool isVolatile,
+ CGBuilderTy &Builder) {
+ llvm::Type *ElTy = Loc.getElementType();
+ llvm::Constant *constant = patternFor(CGM, ElTy);
+ assert(!isa<llvm::UndefValue>(constant));
+ emitStoresForConstant(CGM, D, Loc, isVolatile, Builder, constant);
+}
+
+static bool containsUndef(llvm::Constant *constant) {
+ auto *Ty = constant->getType();
+ if (isa<llvm::UndefValue>(constant))
+ return true;
+ if (Ty->isStructTy() || Ty->isArrayTy() || Ty->isVectorTy())
+ for (llvm::Use &Op : constant->operands())
+ if (containsUndef(cast<llvm::Constant>(Op)))
+ return true;
+ return false;
+}
+
+static llvm::Constant *replaceUndef(llvm::Constant *constant) {
+ // FIXME: when doing pattern initialization, replace undef with 0xAA instead.
+ // FIXME: also replace padding between values by creating a new struct type
+ // which has no padding.
+ auto *Ty = constant->getType();
+ if (isa<llvm::UndefValue>(constant))
+ return llvm::Constant::getNullValue(Ty);
+ if (!(Ty->isStructTy() || Ty->isArrayTy() || Ty->isVectorTy()))
+ return constant;
+ if (!containsUndef(constant))
+ return constant;
+ llvm::SmallVector<llvm::Constant *, 8> Values(constant->getNumOperands());
+ for (unsigned Op = 0, NumOp = constant->getNumOperands(); Op != NumOp; ++Op) {
+ auto *OpValue = cast<llvm::Constant>(constant->getOperand(Op));
+ Values[Op] = replaceUndef(OpValue);
+ }
+ if (Ty->isStructTy())
+ return llvm::ConstantStruct::get(cast<llvm::StructType>(Ty), Values);
+ if (Ty->isArrayTy())
+ return llvm::ConstantArray::get(cast<llvm::ArrayType>(Ty), Values);
+ assert(Ty->isVectorTy());
+ return llvm::ConstantVector::get(Values);
}
/// EmitAutoVarDecl - Emit code and set up an entry in LocalDeclMap for a
@@ -1098,6 +1237,7 @@ void CodeGenFunction::EmitAndRegisterVariableArrayDimensions(
// For each dimension stores its QualType and corresponding
// size-expression Value.
SmallVector<CodeGenFunction::VlaSizePair, 4> Dimensions;
+ SmallVector<IdentifierInfo *, 4> VLAExprNames;
// Break down the array into individual dimensions.
QualType Type1D = D.getType();
@@ -1106,8 +1246,14 @@ void CodeGenFunction::EmitAndRegisterVariableArrayDimensions(
if (auto *C = dyn_cast<llvm::ConstantInt>(VlaSize.NumElts))
Dimensions.emplace_back(C, Type1D.getUnqualifiedType());
else {
- auto SizeExprAddr = CreateDefaultAlignTempAlloca(
- VlaSize.NumElts->getType(), "__vla_expr");
+ // Generate a locally unique name for the size expression.
+ Twine Name = Twine("__vla_expr") + Twine(VLAExprCounter++);
+ SmallString<12> Buffer;
+ StringRef NameRef = Name.toStringRef(Buffer);
+ auto &Ident = getContext().Idents.getOwn(NameRef);
+ VLAExprNames.push_back(&Ident);
+ auto SizeExprAddr =
+ CreateDefaultAlignTempAlloca(VlaSize.NumElts->getType(), NameRef);
Builder.CreateStore(VlaSize.NumElts, SizeExprAddr);
Dimensions.emplace_back(SizeExprAddr.getPointer(),
Type1D.getUnqualifiedType());
@@ -1121,20 +1267,20 @@ void CodeGenFunction::EmitAndRegisterVariableArrayDimensions(
// Register each dimension's size-expression with a DILocalVariable,
// so that it can be used by CGDebugInfo when instantiating a DISubrange
// to describe this array.
+ unsigned NameIdx = 0;
for (auto &VlaSize : Dimensions) {
llvm::Metadata *MD;
if (auto *C = dyn_cast<llvm::ConstantInt>(VlaSize.NumElts))
MD = llvm::ConstantAsMetadata::get(C);
else {
// Create an artificial VarDecl to generate debug info for.
- IdentifierInfo &NameIdent = getContext().Idents.getOwn(
- cast<llvm::AllocaInst>(VlaSize.NumElts)->getName());
+ IdentifierInfo *NameIdent = VLAExprNames[NameIdx++];
auto VlaExprTy = VlaSize.NumElts->getType()->getPointerElementType();
auto QT = getContext().getIntTypeForBitwidth(
VlaExprTy->getScalarSizeInBits(), false);
auto *ArtificialDecl = VarDecl::Create(
getContext(), const_cast<DeclContext *>(D.getDeclContext()),
- D.getLocation(), D.getLocation(), &NameIdent, QT,
+ D.getLocation(), D.getLocation(), NameIdent, QT,
getContext().CreateTypeSourceInfo(QT), SC_Auto);
ArtificialDecl->setImplicit();
@@ -1157,8 +1303,8 @@ CodeGenFunction::EmitAutoVarAlloca(const VarDecl &D) {
AutoVarEmission emission(D);
- bool isByRef = D.hasAttr<BlocksAttr>();
- emission.IsByRef = isByRef;
+ bool isEscapingByRef = D.isEscapingByref();
+ emission.IsEscapingByRef = isEscapingByRef;
CharUnits alignment = getContext().getDeclAlign(&D);
@@ -1197,8 +1343,8 @@ CodeGenFunction::EmitAutoVarAlloca(const VarDecl &D) {
// in OpenCL.
if ((!getLangOpts().OpenCL ||
Ty.getAddressSpace() == LangAS::opencl_constant) &&
- (CGM.getCodeGenOpts().MergeAllConstants && !NRVO && !isByRef &&
- CGM.isTypeConstant(Ty, true))) {
+ (CGM.getCodeGenOpts().MergeAllConstants && !NRVO &&
+ !isEscapingByRef && CGM.isTypeConstant(Ty, true))) {
EmitStaticVarDecl(D, llvm::GlobalValue::InternalLinkage);
// Signal this condition to later callbacks.
@@ -1250,7 +1396,7 @@ CodeGenFunction::EmitAutoVarAlloca(const VarDecl &D) {
} else {
CharUnits allocaAlignment;
llvm::Type *allocaTy;
- if (isByRef) {
+ if (isEscapingByRef) {
auto &byrefInfo = getBlockByrefInfo(&D);
allocaTy = byrefInfo.Type;
allocaAlignment = byrefInfo.ByrefAlignment;
@@ -1439,6 +1585,8 @@ void CodeGenFunction::EmitAutoVarInit(const AutoVarEmission &emission) {
auto DL = ApplyDebugLocation::CreateDefaultArtificial(*this, D.getLocation());
QualType type = D.getType();
+ bool isVolatile = type.isVolatileQualified();
+
// If this local has an initializer, emit it now.
const Expr *Init = D.getInit();
@@ -1450,7 +1598,7 @@ void CodeGenFunction::EmitAutoVarInit(const AutoVarEmission &emission) {
}
// Initialize the structure of a __block variable.
- if (emission.IsByRef)
+ if (emission.IsEscapingByRef)
emitByrefStructureInit(emission);
// Initialize the variable here if it doesn't have a initializer and it is a
@@ -1460,30 +1608,126 @@ void CodeGenFunction::EmitAutoVarInit(const AutoVarEmission &emission) {
type.isNonTrivialToPrimitiveDefaultInitialize() ==
QualType::PDIK_Struct) {
LValue Dst = MakeAddrLValue(emission.getAllocatedAddress(), type);
- if (emission.IsByRef)
+ if (emission.IsEscapingByRef)
drillIntoBlockVariable(*this, Dst, &D);
defaultInitNonTrivialCStructVar(Dst);
return;
}
- if (isTrivialInitializer(Init))
- return;
-
// Check whether this is a byref variable that's potentially
// captured and moved by its own initializer. If so, we'll need to
// emit the initializer first, then copy into the variable.
- bool capturedByInit = emission.IsByRef && isCapturedBy(D, Init);
+ bool capturedByInit =
+ Init && emission.IsEscapingByRef && isCapturedBy(D, Init);
Address Loc =
- capturedByInit ? emission.Addr : emission.getObjectAddress(*this);
+ capturedByInit ? emission.Addr : emission.getObjectAddress(*this);
+
+ // Note: constexpr already initializes everything correctly.
+ LangOptions::TrivialAutoVarInitKind trivialAutoVarInit =
+ (D.isConstexpr()
+ ? LangOptions::TrivialAutoVarInitKind::Uninitialized
+ : (D.getAttr<UninitializedAttr>()
+ ? LangOptions::TrivialAutoVarInitKind::Uninitialized
+ : getContext().getLangOpts().getTrivialAutoVarInit()));
+
+ auto initializeWhatIsTechnicallyUninitialized = [&]() {
+ if (trivialAutoVarInit ==
+ LangOptions::TrivialAutoVarInitKind::Uninitialized)
+ return;
+
+ CharUnits Size = getContext().getTypeSizeInChars(type);
+ if (!Size.isZero()) {
+ switch (trivialAutoVarInit) {
+ case LangOptions::TrivialAutoVarInitKind::Uninitialized:
+ llvm_unreachable("Uninitialized handled above");
+ case LangOptions::TrivialAutoVarInitKind::Zero:
+ emitStoresForZeroInit(CGM, D, Loc, isVolatile, Builder);
+ break;
+ case LangOptions::TrivialAutoVarInitKind::Pattern:
+ emitStoresForPatternInit(CGM, D, Loc, isVolatile, Builder);
+ break;
+ }
+ return;
+ }
+
+ // VLAs look zero-sized to getTypeInfo. We can't emit constant stores to
+ // them, so emit a memcpy with the VLA size to initialize each element.
+ // Technically zero-sized or negative-sized VLAs are undefined, and UBSan
+ // will catch that code, but there exists code which generates zero-sized
+ // VLAs. Be nice and initialize whatever they requested.
+ const VariableArrayType *VlaType =
+ dyn_cast_or_null<VariableArrayType>(getContext().getAsArrayType(type));
+ if (!VlaType)
+ return;
+ auto VlaSize = getVLASize(VlaType);
+ auto SizeVal = VlaSize.NumElts;
+ CharUnits EltSize = getContext().getTypeSizeInChars(VlaSize.Type);
+ switch (trivialAutoVarInit) {
+ case LangOptions::TrivialAutoVarInitKind::Uninitialized:
+ llvm_unreachable("Uninitialized handled above");
+
+ case LangOptions::TrivialAutoVarInitKind::Zero:
+ if (!EltSize.isOne())
+ SizeVal = Builder.CreateNUWMul(SizeVal, CGM.getSize(EltSize));
+ Builder.CreateMemSet(Loc, llvm::ConstantInt::get(Int8Ty, 0), SizeVal,
+ isVolatile);
+ break;
+
+ case LangOptions::TrivialAutoVarInitKind::Pattern: {
+ llvm::Type *ElTy = Loc.getElementType();
+ llvm::Constant *Constant = patternFor(CGM, ElTy);
+ CharUnits ConstantAlign = getContext().getTypeAlignInChars(VlaSize.Type);
+ llvm::BasicBlock *SetupBB = createBasicBlock("vla-setup.loop");
+ llvm::BasicBlock *LoopBB = createBasicBlock("vla-init.loop");
+ llvm::BasicBlock *ContBB = createBasicBlock("vla-init.cont");
+ llvm::Value *IsZeroSizedVLA = Builder.CreateICmpEQ(
+ SizeVal, llvm::ConstantInt::get(SizeVal->getType(), 0),
+ "vla.iszerosized");
+ Builder.CreateCondBr(IsZeroSizedVLA, ContBB, SetupBB);
+ EmitBlock(SetupBB);
+ if (!EltSize.isOne())
+ SizeVal = Builder.CreateNUWMul(SizeVal, CGM.getSize(EltSize));
+ llvm::Value *BaseSizeInChars =
+ llvm::ConstantInt::get(IntPtrTy, EltSize.getQuantity());
+ Address Begin = Builder.CreateElementBitCast(Loc, Int8Ty, "vla.begin");
+ llvm::Value *End =
+ Builder.CreateInBoundsGEP(Begin.getPointer(), SizeVal, "vla.end");
+ llvm::BasicBlock *OriginBB = Builder.GetInsertBlock();
+ EmitBlock(LoopBB);
+ llvm::PHINode *Cur = Builder.CreatePHI(Begin.getType(), 2, "vla.cur");
+ Cur->addIncoming(Begin.getPointer(), OriginBB);
+ CharUnits CurAlign = Loc.getAlignment().alignmentOfArrayElement(EltSize);
+ Builder.CreateMemCpy(
+ Address(Cur, CurAlign),
+ createUnnamedGlobalFrom(CGM, D, Builder, Constant, ConstantAlign),
+ BaseSizeInChars, isVolatile);
+ llvm::Value *Next =
+ Builder.CreateInBoundsGEP(Int8Ty, Cur, BaseSizeInChars, "vla.next");
+ llvm::Value *Done = Builder.CreateICmpEQ(Next, End, "vla-init.isdone");
+ Builder.CreateCondBr(Done, ContBB, LoopBB);
+ Cur->addIncoming(Next, LoopBB);
+ EmitBlock(ContBB);
+ } break;
+ }
+ };
+
+ if (isTrivialInitializer(Init)) {
+ initializeWhatIsTechnicallyUninitialized();
+ return;
+ }
llvm::Constant *constant = nullptr;
if (emission.IsConstantAggregate || D.isConstexpr()) {
assert(!capturedByInit && "constant init contains a capturing block?");
constant = ConstantEmitter(*this).tryEmitAbstractForInitializer(D);
+ if (constant && trivialAutoVarInit !=
+ LangOptions::TrivialAutoVarInitKind::Uninitialized)
+ constant = replaceUndef(constant);
}
if (!constant) {
+ initializeWhatIsTechnicallyUninitialized();
LValue lv = MakeAddrLValue(Loc, type);
lv.setNonGC(true);
return EmitExprAsInit(Init, &D, lv, capturedByInit);
@@ -1496,61 +1740,11 @@ void CodeGenFunction::EmitAutoVarInit(const AutoVarEmission &emission) {
return EmitStoreThroughLValue(RValue::get(constant), lv, true);
}
- // If this is a simple aggregate initialization, we can optimize it
- // in various ways.
- bool isVolatile = type.isVolatileQualified();
-
- llvm::Value *SizeVal =
- llvm::ConstantInt::get(IntPtrTy,
- getContext().getTypeSizeInChars(type).getQuantity());
-
llvm::Type *BP = CGM.Int8Ty->getPointerTo(Loc.getAddressSpace());
if (Loc.getType() != BP)
Loc = Builder.CreateBitCast(Loc, BP);
- // If the initializer is all or mostly the same, codegen with bzero / memset
- // then do a few stores afterward.
- uint64_t ConstantSize =
- CGM.getDataLayout().getTypeAllocSize(constant->getType());
- if (shouldUseBZeroPlusStoresToInitialize(constant, ConstantSize)) {
- Builder.CreateMemSet(Loc, llvm::ConstantInt::get(Int8Ty, 0), SizeVal,
- isVolatile);
- // Zero and undef don't require a stores.
- if (!constant->isNullValue() && !isa<llvm::UndefValue>(constant)) {
- Loc = Builder.CreateBitCast(Loc,
- constant->getType()->getPointerTo(Loc.getAddressSpace()));
- emitStoresForInitAfterBZero(CGM, constant, Loc, isVolatile, Builder);
- }
- return;
- }
-
- BytePattern Pattern = shouldUseMemSetToInitialize(constant, ConstantSize);
- if (!Pattern.isNone()) {
- uint8_t Value = Pattern.isAny() ? 0x00 : Pattern.getValue();
- Builder.CreateMemSet(Loc, llvm::ConstantInt::get(Int8Ty, Value), SizeVal,
- isVolatile);
- return;
- }
-
- // Otherwise, create a temporary global with the initializer then
- // memcpy from the global to the alloca.
- std::string Name = getStaticDeclName(CGM, D);
- unsigned AS = CGM.getContext().getTargetAddressSpace(
- CGM.getStringLiteralAddressSpace());
- BP = llvm::PointerType::getInt8PtrTy(getLLVMContext(), AS);
-
- llvm::GlobalVariable *GV = new llvm::GlobalVariable(
- CGM.getModule(), constant->getType(), true,
- llvm::GlobalValue::PrivateLinkage, constant, Name, nullptr,
- llvm::GlobalValue::NotThreadLocal, AS);
- GV->setAlignment(Loc.getAlignment().getQuantity());
- GV->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
-
- Address SrcPtr = Address(GV, Loc.getAlignment());
- if (SrcPtr.getType() != BP)
- SrcPtr = Builder.CreateBitCast(SrcPtr, BP);
-
- Builder.CreateMemCpy(Loc, SrcPtr, SizeVal, isVolatile);
+ emitStoresForConstant(CGM, D, Loc, isVolatile, Builder, constant);
}
/// Emit an expression as an initializer for an object (variable, field, etc.)
@@ -1712,12 +1906,14 @@ void CodeGenFunction::EmitAutoVarCleanups(const AutoVarEmission &emission) {
// If this is a block variable, call _Block_object_destroy
// (on the unforwarded address). Don't enter this cleanup if we're in pure-GC
// mode.
- if (emission.IsByRef && CGM.getLangOpts().getGC() != LangOptions::GCOnly) {
+ if (emission.IsEscapingByRef &&
+ CGM.getLangOpts().getGC() != LangOptions::GCOnly) {
BlockFieldFlags Flags = BLOCK_FIELD_IS_BYREF;
if (emission.Variable->getType().isObjCGCWeak())
Flags |= BLOCK_FIELD_IS_WEAK;
enterByrefCleanup(NormalAndEHCleanup, emission.Addr, Flags,
- /*LoadBlockVarAddr*/ false);
+ /*LoadBlockVarAddr*/ false,
+ cxxDestructorCanThrow(emission.Variable->getType()));
}
}
@@ -2134,15 +2330,11 @@ void CodeGenFunction::EmitParmDecl(const VarDecl &D, ParamValue Arg,
// cleanup to do the release at the end of the function.
bool isConsumed = D.hasAttr<NSConsumedAttr>();
- // 'self' is always formally __strong, but if this is not an
- // init method then we don't want to retain it.
+ // If a parameter is pseudo-strong then we can omit the implicit retain.
if (D.isARCPseudoStrong()) {
- const ObjCMethodDecl *method = cast<ObjCMethodDecl>(CurCodeDecl);
- assert(&D == method->getSelfDecl());
- assert(lt == Qualifiers::OCL_Strong);
- assert(qs.hasConst());
- assert(method->getMethodFamily() != OMF_init);
- (void) method;
+ assert(lt == Qualifiers::OCL_Strong &&
+ "pseudo-strong variable isn't strong?");
+ assert(qs.hasConst() && "pseudo-strong variable should be const!");
lt = Qualifiers::OCL_ExplicitNone;
}
@@ -2224,3 +2416,7 @@ void CodeGenModule::EmitOMPDeclareReduction(const OMPDeclareReductionDecl *D,
return;
getOpenMPRuntime().emitUserDefinedReduction(CGF, D);
}
+
+void CodeGenModule::EmitOMPRequiresDecl(const OMPRequiresDecl *D) {
+ getOpenMPRuntime().checkArchForUnifiedAddressing(*this, D);
+}
diff --git a/lib/CodeGen/CGDeclCXX.cpp b/lib/CodeGen/CGDeclCXX.cpp
index 510863f68eff..9aa31f181e99 100644
--- a/lib/CodeGen/CGDeclCXX.cpp
+++ b/lib/CodeGen/CGDeclCXX.cpp
@@ -15,7 +15,7 @@
#include "CGCXXABI.h"
#include "CGObjCRuntime.h"
#include "CGOpenMPRuntime.h"
-#include "clang/Frontend/CodeGenOptions.h"
+#include "clang/Basic/CodeGenOptions.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/MDBuilder.h"
@@ -26,7 +26,10 @@ using namespace CodeGen;
static void EmitDeclInit(CodeGenFunction &CGF, const VarDecl &D,
ConstantAddress DeclPtr) {
- assert(D.hasGlobalStorage() && "VarDecl must have global storage!");
+ assert(
+ (D.hasGlobalStorage() ||
+ (D.hasLocalStorage() && CGF.getContext().getLangOpts().OpenCLCPlusPlus)) &&
+ "VarDecl must have global or local (in the case of OpenCL) storage!");
assert(!D.getType()->isReferenceType() &&
"Should not call EmitDeclInit on a reference!");
@@ -63,15 +66,24 @@ static void EmitDeclInit(CodeGenFunction &CGF, const VarDecl &D,
/// Emit code to cause the destruction of the given variable with
/// static storage duration.
static void EmitDeclDestroy(CodeGenFunction &CGF, const VarDecl &D,
- ConstantAddress addr) {
+ ConstantAddress Addr) {
+ // Honor __attribute__((no_destroy)) and bail instead of attempting
+ // to emit a reference to a possibly nonexistent destructor, which
+ // in turn can cause a crash. This will result in a global constructor
+ // that isn't balanced out by a destructor call as intended by the
+ // attribute. This also checks for -fno-c++-static-destructors and
+ // bails even if the attribute is not present.
+ if (D.isNoDestroy(CGF.getContext()))
+ return;
+
CodeGenModule &CGM = CGF.CGM;
// FIXME: __attribute__((cleanup)) ?
- QualType type = D.getType();
- QualType::DestructionKind dtorKind = type.isDestructedType();
+ QualType Type = D.getType();
+ QualType::DestructionKind DtorKind = Type.isDestructedType();
- switch (dtorKind) {
+ switch (DtorKind) {
case QualType::DK_none:
return;
@@ -86,13 +98,14 @@ static void EmitDeclDestroy(CodeGenFunction &CGF, const VarDecl &D,
return;
}
- llvm::Constant *function;
- llvm::Constant *argument;
+ llvm::Constant *Func;
+ llvm::Constant *Argument;
// Special-case non-array C++ destructors, if they have the right signature.
// Under some ABIs, destructors return this instead of void, and cannot be
- // passed directly to __cxa_atexit if the target does not allow this mismatch.
- const CXXRecordDecl *Record = type->getAsCXXRecordDecl();
+ // passed directly to __cxa_atexit if the target does not allow this
+ // mismatch.
+ const CXXRecordDecl *Record = Type->getAsCXXRecordDecl();
bool CanRegisterDestructor =
Record && (!CGM.getCXXABI().HasThisReturn(
GlobalDecl(Record->getDestructor(), Dtor_Complete)) ||
@@ -103,43 +116,47 @@ static void EmitDeclDestroy(CodeGenFunction &CGF, const VarDecl &D,
bool UsingExternalHelper = !CGM.getCodeGenOpts().CXAAtExit;
if (Record && (CanRegisterDestructor || UsingExternalHelper)) {
assert(!Record->hasTrivialDestructor());
- CXXDestructorDecl *dtor = Record->getDestructor();
+ CXXDestructorDecl *Dtor = Record->getDestructor();
- function = CGM.getAddrOfCXXStructor(dtor, StructorType::Complete);
- argument = llvm::ConstantExpr::getBitCast(
- addr.getPointer(), CGF.getTypes().ConvertType(type)->getPointerTo());
+ Func = CGM.getAddrOfCXXStructor(Dtor, StructorType::Complete);
+ Argument = llvm::ConstantExpr::getBitCast(
+ Addr.getPointer(), CGF.getTypes().ConvertType(Type)->getPointerTo());
// Otherwise, the standard logic requires a helper function.
} else {
- function = CodeGenFunction(CGM)
- .generateDestroyHelper(addr, type, CGF.getDestroyer(dtorKind),
- CGF.needsEHCleanup(dtorKind), &D);
- argument = llvm::Constant::getNullValue(CGF.Int8PtrTy);
+ Func = CodeGenFunction(CGM)
+ .generateDestroyHelper(Addr, Type, CGF.getDestroyer(DtorKind),
+ CGF.needsEHCleanup(DtorKind), &D);
+ Argument = llvm::Constant::getNullValue(CGF.Int8PtrTy);
}
- CGM.getCXXABI().registerGlobalDtor(CGF, D, function, argument);
+ CGM.getCXXABI().registerGlobalDtor(CGF, D, Func, Argument);
}
/// Emit code to cause the variable at the given address to be considered as
/// constant from this point onwards.
static void EmitDeclInvariant(CodeGenFunction &CGF, const VarDecl &D,
llvm::Constant *Addr) {
+ return CGF.EmitInvariantStart(
+ Addr, CGF.getContext().getTypeSizeInChars(D.getType()));
+}
+
+void CodeGenFunction::EmitInvariantStart(llvm::Constant *Addr, CharUnits Size) {
// Do not emit the intrinsic if we're not optimizing.
- if (!CGF.CGM.getCodeGenOpts().OptimizationLevel)
+ if (!CGM.getCodeGenOpts().OptimizationLevel)
return;
// Grab the llvm.invariant.start intrinsic.
llvm::Intrinsic::ID InvStartID = llvm::Intrinsic::invariant_start;
// Overloaded address space type.
- llvm::Type *ObjectPtr[1] = {CGF.Int8PtrTy};
- llvm::Constant *InvariantStart = CGF.CGM.getIntrinsic(InvStartID, ObjectPtr);
+ llvm::Type *ObjectPtr[1] = {Int8PtrTy};
+ llvm::Constant *InvariantStart = CGM.getIntrinsic(InvStartID, ObjectPtr);
// Emit a call with the size in bytes of the object.
- CharUnits WidthChars = CGF.getContext().getTypeSizeInChars(D.getType());
- uint64_t Width = WidthChars.getQuantity();
- llvm::Value *Args[2] = { llvm::ConstantInt::getSigned(CGF.Int64Ty, Width),
- llvm::ConstantExpr::getBitCast(Addr, CGF.Int8PtrTy)};
- CGF.Builder.CreateCall(InvariantStart, Args);
+ uint64_t Width = Size.getQuantity();
+ llvm::Value *Args[2] = { llvm::ConstantInt::getSigned(Int64Ty, Width),
+ llvm::ConstantExpr::getBitCast(Addr, Int8PtrTy)};
+ Builder.CreateCall(InvariantStart, Args);
}
void CodeGenFunction::EmitCXXGlobalVarDeclInit(const VarDecl &D,
@@ -347,6 +364,10 @@ llvm::Function *CodeGenModule::CreateGlobalInitOrDestructFunction(
!isInSanitizerBlacklist(SanitizerKind::Memory, Fn, Loc))
Fn->addFnAttr(llvm::Attribute::SanitizeMemory);
+ if (getLangOpts().Sanitize.has(SanitizerKind::KernelMemory) &&
+ !isInSanitizerBlacklist(SanitizerKind::KernelMemory, Fn, Loc))
+ Fn->addFnAttr(llvm::Attribute::SanitizeMemory);
+
if (getLangOpts().Sanitize.has(SanitizerKind::SafeStack) &&
!isInSanitizerBlacklist(SanitizerKind::SafeStack, Fn, Loc))
Fn->addFnAttr(llvm::Attribute::SafeStack);
@@ -355,6 +376,22 @@ llvm::Function *CodeGenModule::CreateGlobalInitOrDestructFunction(
!isInSanitizerBlacklist(SanitizerKind::ShadowCallStack, Fn, Loc))
Fn->addFnAttr(llvm::Attribute::ShadowCallStack);
+ auto RASignKind = getCodeGenOpts().getSignReturnAddress();
+ if (RASignKind != CodeGenOptions::SignReturnAddressScope::None) {
+ Fn->addFnAttr("sign-return-address",
+ RASignKind == CodeGenOptions::SignReturnAddressScope::All
+ ? "all"
+ : "non-leaf");
+ auto RASignKey = getCodeGenOpts().getSignReturnAddressKey();
+ Fn->addFnAttr("sign-return-address-key",
+ RASignKey == CodeGenOptions::SignReturnAddressKeyValue::AKey
+ ? "a_key"
+ : "b_key");
+ }
+
+ if (getCodeGenOpts().BranchTargetEnforcement)
+ Fn->addFnAttr("branch-target-enforcement");
+
return Fn;
}
@@ -565,7 +602,7 @@ void CodeGenFunction::GenerateCXXGlobalVarDeclInitFunc(llvm::Function *Fn,
if (D->hasAttr<NoDebugAttr>())
DebugInfo = nullptr; // disable debug info indefinitely for this function
- CurEHLocation = D->getLocStart();
+ CurEHLocation = D->getBeginLoc();
StartFunction(GlobalDecl(D), getContext().VoidTy, Fn,
getTypes().arrangeNullaryFunction(),
@@ -587,7 +624,7 @@ void CodeGenFunction::GenerateCXXGlobalVarDeclInitFunc(llvm::Function *Fn,
void
CodeGenFunction::GenerateCXXGlobalInitFunc(llvm::Function *Fn,
ArrayRef<llvm::Function *> Decls,
- Address Guard) {
+ ConstantAddress Guard) {
{
auto NL = ApplyDebugLocation::CreateEmpty(*this);
StartFunction(GlobalDecl(), getContext().VoidTy, Fn,
@@ -611,6 +648,12 @@ CodeGenFunction::GenerateCXXGlobalInitFunc(llvm::Function *Fn,
// initializers use previously-initialized thread_local vars, that's
// probably supposed to be OK, but the standard doesn't say.
Builder.CreateStore(llvm::ConstantInt::get(GuardVal->getType(),1), Guard);
+
+ // The guard variable can't ever change again.
+ EmitInvariantStart(
+ Guard.getPointer(),
+ CharUnits::fromQuantity(
+ CGM.getDataLayout().getTypeAllocSize(GuardVal->getType())));
}
RunCleanupsScope Scope(*this);
@@ -679,7 +722,7 @@ llvm::Function *CodeGenFunction::generateDestroyHelper(
llvm::Function *fn = CGM.CreateGlobalInitOrDestructFunction(
FTy, "__cxx_global_array_dtor", FI, VD->getLocation());
- CurEHLocation = VD->getLocStart();
+ CurEHLocation = VD->getBeginLoc();
StartFunction(VD, getContext().VoidTy, fn, FI, args);
diff --git a/lib/CodeGen/CGException.cpp b/lib/CodeGen/CGException.cpp
index a2ff102e1ab4..5756e13d2623 100644
--- a/lib/CodeGen/CGException.cpp
+++ b/lib/CodeGen/CGException.cpp
@@ -66,7 +66,7 @@ llvm::Constant *CodeGenModule::getTerminateFn() {
name = "__std_terminate";
else
name = "?terminate@@YAXXZ";
- } else if (getLangOpts().ObjC1 &&
+ } else if (getLangOpts().ObjC &&
getLangOpts().ObjCRuntime.hasTerminate())
name = "objc_terminate";
else
@@ -224,7 +224,7 @@ const EHPersonality &EHPersonality::get(CodeGenModule &CGM,
if (FD && FD->usesSEHTry())
return getSEHPersonalityMSVC(T);
- if (L.ObjC1)
+ if (L.ObjC)
return L.CPlusPlus ? getObjCXXPersonality(Target, L)
: getObjCPersonality(Target, L);
return L.CPlusPlus ? getCXXPersonality(Target, L)
@@ -250,7 +250,11 @@ static llvm::Constant *getPersonalityFn(CodeGenModule &CGM,
static llvm::Constant *getOpaquePersonalityFn(CodeGenModule &CGM,
const EHPersonality &Personality) {
llvm::Constant *Fn = getPersonalityFn(CGM, Personality);
- return llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
+ llvm::PointerType* Int8PtrTy = llvm::PointerType::get(
+ llvm::Type::getInt8Ty(CGM.getLLVMContext()),
+ CGM.getDataLayout().getProgramAddressSpace());
+
+ return llvm::ConstantExpr::getBitCast(Fn, Int8PtrTy);
}
/// Check whether a landingpad instruction only uses C++ features.
@@ -315,7 +319,7 @@ static bool PersonalityHasOnlyCXXUses(llvm::Constant *Fn) {
/// when it really needs it.
void CodeGenModule::SimplifyPersonality() {
// If we're not in ObjC++ -fexceptions, there's nothing to do.
- if (!LangOpts.CPlusPlus || !LangOpts.ObjC1 || !LangOpts.Exceptions)
+ if (!LangOpts.CPlusPlus || !LangOpts.ObjC || !LangOpts.Exceptions)
return;
// Both the problem this endeavors to fix and the way the logic
@@ -1248,7 +1252,7 @@ void CodeGenFunction::ExitCXXTryStmt(const CXXTryStmt &S, bool IsFnTryBlock) {
// we follow the false destination for each of the cond branches to reach
// the rethrow block.
llvm::BasicBlock *RethrowBlock = WasmCatchStartBlock;
- while (llvm::TerminatorInst *TI = RethrowBlock->getTerminator()) {
+ while (llvm::Instruction *TI = RethrowBlock->getTerminator()) {
auto *BI = cast<llvm::BranchInst>(TI);
assert(BI->isConditional());
RethrowBlock = BI->getSuccessor(1);
@@ -1623,8 +1627,16 @@ struct PerformSEHFinally final : EHScopeStack::Cleanup {
// Compute the two argument values.
QualType ArgTys[2] = {Context.UnsignedCharTy, Context.VoidPtrTy};
- llvm::Value *LocalAddrFn = CGM.getIntrinsic(llvm::Intrinsic::localaddress);
- llvm::Value *FP = CGF.Builder.CreateCall(LocalAddrFn);
+ llvm::Value *FP = nullptr;
+ // If CFG.IsOutlinedSEHHelper is true, then we are within a finally block.
+ if (CGF.IsOutlinedSEHHelper) {
+ FP = &CGF.CurFn->arg_begin()[1];
+ } else {
+ llvm::Value *LocalAddrFn =
+ CGM.getIntrinsic(llvm::Intrinsic::localaddress);
+ FP = CGF.Builder.CreateCall(LocalAddrFn);
+ }
+
llvm::Value *IsForEH =
llvm::ConstantInt::get(CGF.ConvertType(ArgTys[0]), F.isForEHCleanup());
Args.add(RValue::get(IsForEH), ArgTys[0]);
@@ -1777,7 +1789,7 @@ void CodeGenFunction::EmitCapturedLocals(CodeGenFunction &ParentCGF,
// frame pointer of the parent function. We only need to do this in filters,
// since finally funclets recover the parent FP for us.
llvm::Function *RecoverFPIntrin =
- CGM.getIntrinsic(llvm::Intrinsic::x86_seh_recoverfp);
+ CGM.getIntrinsic(llvm::Intrinsic::eh_recoverfp);
llvm::Constant *ParentI8Fn =
llvm::ConstantExpr::getBitCast(ParentCGF.CurFn, Int8PtrTy);
ParentFP = Builder.CreateCall(RecoverFPIntrin, {ParentI8Fn, EntryFP});
@@ -1823,13 +1835,13 @@ void CodeGenFunction::EmitCapturedLocals(CodeGenFunction &ParentCGF,
void CodeGenFunction::startOutlinedSEHHelper(CodeGenFunction &ParentCGF,
bool IsFilter,
const Stmt *OutlinedStmt) {
- SourceLocation StartLoc = OutlinedStmt->getLocStart();
+ SourceLocation StartLoc = OutlinedStmt->getBeginLoc();
// Get the mangled function name.
SmallString<128> Name;
{
llvm::raw_svector_ostream OS(Name);
- const FunctionDecl *ParentSEHFn = ParentCGF.CurSEHParent;
+ const NamedDecl *ParentSEHFn = ParentCGF.CurSEHParent;
assert(ParentSEHFn && "No CurSEHParent!");
MangleContext &Mangler = CGM.getCXXABI().getMangleContext();
if (IsFilter)
@@ -1871,10 +1883,10 @@ void CodeGenFunction::startOutlinedSEHHelper(CodeGenFunction &ParentCGF,
IsOutlinedSEHHelper = true;
StartFunction(GlobalDecl(), RetTy, Fn, FnInfo, Args,
- OutlinedStmt->getLocStart(), OutlinedStmt->getLocStart());
+ OutlinedStmt->getBeginLoc(), OutlinedStmt->getBeginLoc());
CurSEHParent = ParentCGF.CurSEHParent;
- CGM.SetLLVMFunctionAttributes(nullptr, FnInfo, CurFn);
+ CGM.SetLLVMFunctionAttributes(GlobalDecl(), FnInfo, CurFn);
EmitCapturedLocals(ParentCGF, OutlinedStmt, IsFilter);
}
@@ -1893,7 +1905,7 @@ CodeGenFunction::GenerateSEHFilterFunction(CodeGenFunction &ParentCGF,
FilterExpr->getType()->isSignedIntegerType());
Builder.CreateStore(R, ReturnValue);
- FinishFunction(FilterExpr->getLocEnd());
+ FinishFunction(FilterExpr->getEndLoc());
return CurFn;
}
@@ -1907,7 +1919,7 @@ CodeGenFunction::GenerateSEHFinallyFunction(CodeGenFunction &ParentCGF,
// Emit the original filter expression, convert to i32, and return.
EmitStmt(FinallyBlock);
- FinishFunction(FinallyBlock->getLocEnd());
+ FinishFunction(FinallyBlock->getEndLoc());
return CurFn;
}
@@ -1972,6 +1984,11 @@ llvm::Value *CodeGenFunction::EmitSEHAbnormalTermination() {
return Builder.CreateZExt(&*AI, Int32Ty);
}
+void CodeGenFunction::pushSEHCleanup(CleanupKind Kind,
+ llvm::Function *FinallyFunc) {
+ EHStack.pushCleanup<PerformSEHFinally>(Kind, FinallyFunc);
+}
+
void CodeGenFunction::EnterSEHTryStmt(const SEHTryStmt &S) {
CodeGenFunction HelperCGF(CGM, /*suppressNewContext=*/true);
if (const SEHFinallyStmt *Finally = S.getFinallyHandler()) {
diff --git a/lib/CodeGen/CGExpr.cpp b/lib/CodeGen/CGExpr.cpp
index f168dd02ead1..34a921e2dc00 100644
--- a/lib/CodeGen/CGExpr.cpp
+++ b/lib/CodeGen/CGExpr.cpp
@@ -26,7 +26,7 @@
#include "clang/AST/Attr.h"
#include "clang/AST/DeclObjC.h"
#include "clang/AST/NSAPI.h"
-#include "clang/Frontend/CodeGenOptions.h"
+#include "clang/Basic/CodeGenOptions.h"
#include "llvm/ADT/Hashing.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/IR/DataLayout.h"
@@ -419,8 +419,12 @@ LValue CodeGenFunction::
EmitMaterializeTemporaryExpr(const MaterializeTemporaryExpr *M) {
const Expr *E = M->GetTemporaryExpr();
- // FIXME: ideally this would use EmitAnyExprToMem, however, we cannot do so
- // as that will cause the lifetime adjustment to be lost for ARC
+ assert((!M->getExtendingDecl() || !isa<VarDecl>(M->getExtendingDecl()) ||
+ !cast<VarDecl>(M->getExtendingDecl())->isARCPseudoStrong()) &&
+ "Reference should never be pseudo-strong!");
+
+ // FIXME: ideally this would use EmitAnyExprToMem, however, we cannot do so
+ // as that will cause the lifetime adjustment to be lost for ARC
auto ownership = M->getType().getObjCLifetime();
if (ownership != Qualifiers::OCL_None &&
ownership != Qualifiers::OCL_ExplicitNone) {
@@ -498,18 +502,51 @@ EmitMaterializeTemporaryExpr(const MaterializeTemporaryExpr *M) {
} else {
switch (M->getStorageDuration()) {
case SD_Automatic:
- case SD_FullExpression:
if (auto *Size = EmitLifetimeStart(
CGM.getDataLayout().getTypeAllocSize(Alloca.getElementType()),
Alloca.getPointer())) {
- if (M->getStorageDuration() == SD_Automatic)
- pushCleanupAfterFullExpr<CallLifetimeEnd>(NormalEHLifetimeMarker,
- Alloca, Size);
- else
- pushFullExprCleanup<CallLifetimeEnd>(NormalEHLifetimeMarker, Alloca,
- Size);
+ pushCleanupAfterFullExpr<CallLifetimeEnd>(NormalEHLifetimeMarker,
+ Alloca, Size);
+ }
+ break;
+
+ case SD_FullExpression: {
+ if (!ShouldEmitLifetimeMarkers)
+ break;
+
+ // Avoid creating a conditional cleanup just to hold an llvm.lifetime.end
+ // marker. Instead, start the lifetime of a conditional temporary earlier
+ // so that it's unconditional. Don't do this in ASan's use-after-scope
+ // mode so that it gets the more precise lifetime marks. If the type has
+ // a non-trivial destructor, we'll have a cleanup block for it anyway,
+ // so this typically doesn't help; skip it in that case.
+ ConditionalEvaluation *OldConditional = nullptr;
+ CGBuilderTy::InsertPoint OldIP;
+ if (isInConditionalBranch() && !E->getType().isDestructedType() &&
+ !CGM.getCodeGenOpts().SanitizeAddressUseAfterScope) {
+ OldConditional = OutermostConditional;
+ OutermostConditional = nullptr;
+
+ OldIP = Builder.saveIP();
+ llvm::BasicBlock *Block = OldConditional->getStartingBlock();
+ Builder.restoreIP(CGBuilderTy::InsertPoint(
+ Block, llvm::BasicBlock::iterator(Block->back())));
+ }
+
+ if (auto *Size = EmitLifetimeStart(
+ CGM.getDataLayout().getTypeAllocSize(Alloca.getElementType()),
+ Alloca.getPointer())) {
+ pushFullExprCleanup<CallLifetimeEnd>(NormalEHLifetimeMarker, Alloca,
+ Size);
+ }
+
+ if (OldConditional) {
+ OutermostConditional = OldConditional;
+ Builder.restoreIP(OldIP);
}
break;
+ }
+
default:
break;
}
@@ -1043,7 +1080,7 @@ Address CodeGenFunction::EmitPointerWithAlignment(const Expr *E,
EmitVTablePtrCheckForCast(PT->getPointeeType(), Addr.getPointer(),
/*MayBeNull=*/true,
CodeGenFunction::CFITCK_UnrelatedCast,
- CE->getLocStart());
+ CE->getBeginLoc());
}
return CE->getCastKind() != CK_AddressSpaceConversion
? Builder.CreateBitCast(Addr, ConvertType(E->getType()))
@@ -1227,6 +1264,8 @@ LValue CodeGenFunction::EmitLValue(const Expr *E) {
return EmitVAArgExprLValue(cast<VAArgExpr>(E));
case Expr::DeclRefExprClass:
return EmitDeclRefLValue(cast<DeclRefExpr>(E));
+ case Expr::ConstantExprClass:
+ return EmitLValue(cast<ConstantExpr>(E)->getSubExpr());
case Expr::ParenExprClass:
return EmitLValue(cast<ParenExpr>(E)->getSubExpr());
case Expr::GenericSelectionExprClass:
@@ -1458,6 +1497,16 @@ CodeGenFunction::tryEmitAsConstant(const MemberExpr *ME) {
return ConstantEmission();
}
+llvm::Value *CodeGenFunction::emitScalarConstant(
+ const CodeGenFunction::ConstantEmission &Constant, Expr *E) {
+ assert(Constant && "not a constant");
+ if (Constant.isReference())
+ return EmitLoadOfLValue(Constant.getReferenceLValue(*this, E),
+ E->getExprLoc())
+ .getScalarVal();
+ return Constant.getValue();
+}
+
llvm::Value *CodeGenFunction::EmitLoadOfScalar(LValue lvalue,
SourceLocation Loc) {
return EmitLoadOfScalar(lvalue.getAddress(), lvalue.isVolatile(),
@@ -2237,18 +2286,14 @@ static LValue EmitThreadPrivateVarDeclLValue(
static Address emitDeclTargetLinkVarDeclLValue(CodeGenFunction &CGF,
const VarDecl *VD, QualType T) {
- for (const auto *D : VD->redecls()) {
- if (!VD->hasAttrs())
- continue;
- if (const auto *Attr = D->getAttr<OMPDeclareTargetDeclAttr>())
- if (Attr->getMapType() == OMPDeclareTargetDeclAttr::MT_Link) {
- QualType PtrTy = CGF.getContext().getPointerType(VD->getType());
- Address Addr =
- CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetLink(VD);
- return CGF.EmitLoadOfPointer(Addr, PtrTy->castAs<PointerType>());
- }
- }
- return Address::invalid();
+ llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
+ OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
+ if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_To)
+ return Address::invalid();
+ assert(*Res == OMPDeclareTargetDeclAttr::MT_Link && "Expected link clause");
+ QualType PtrTy = CGF.getContext().getPointerType(VD->getType());
+ Address Addr = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetLink(VD);
+ return CGF.EmitLoadOfPointer(Addr, PtrTy->castAs<PointerType>());
}
Address
@@ -2408,6 +2453,7 @@ LValue CodeGenFunction::EmitDeclRefLValue(const DeclRefExpr *E) {
// A DeclRefExpr for a reference initialized by a constant expression can
// appear without being odr-used. Directly emit the constant initializer.
const Expr *Init = VD->getAnyInitializer(VD);
+ const auto *BD = dyn_cast_or_null<BlockDecl>(CurCodeDecl);
if (Init && !isa<ParmVarDecl>(VD) && VD->getType()->isReferenceType() &&
VD->isUsableInConstantExpressions(getContext()) &&
VD->checkInitIsICE() &&
@@ -2417,7 +2463,7 @@ LValue CodeGenFunction::EmitDeclRefLValue(const DeclRefExpr *E) {
(LocalDeclMap.count(VD->getCanonicalDecl()) ||
CapturedStmtInfo->lookup(VD->getCanonicalDecl()))) ||
LambdaCaptureFields.lookup(VD->getCanonicalDecl()) ||
- isa<BlockDecl>(CurCodeDecl)))) {
+ (BD && BD->capturesVariable(VD))))) {
llvm::Constant *Val =
ConstantEmitter(*this).emitAbstract(E->getLocation(),
*VD->evaluateValue(),
@@ -2456,7 +2502,7 @@ LValue CodeGenFunction::EmitDeclRefLValue(const DeclRefExpr *E) {
}
assert(isa<BlockDecl>(CurCodeDecl));
- Address addr = GetAddrOfBlockDecl(VD, VD->hasAttr<BlocksAttr>());
+ Address addr = GetAddrOfBlockDecl(VD);
return MakeAddrLValue(addr, T, AlignmentSource::Decl);
}
}
@@ -2508,7 +2554,7 @@ LValue CodeGenFunction::EmitDeclRefLValue(const DeclRefExpr *E) {
}
// Drill into block byref variables.
- bool isBlockByref = VD->hasAttr<BlocksAttr>();
+ bool isBlockByref = VD->isEscapingByref();
if (isBlockByref) {
addr = emitBlockByrefAddress(addr, VD);
}
@@ -2571,7 +2617,7 @@ LValue CodeGenFunction::EmitUnaryOpLValue(const UnaryOperator *E) {
// of a pointer to object; as in void foo (__weak id *param); *param = 0;
// But, we continue to generate __strong write barrier on indirect write
// into a pointer to object.
- if (getLangOpts().ObjC1 &&
+ if (getLangOpts().ObjC &&
getLangOpts().getGC() != LangOptions::NonGC &&
LV.isObjCWeak())
LV.setNonGC(!E->isOBJCGCCandidate(getContext()));
@@ -2632,7 +2678,7 @@ LValue CodeGenFunction::EmitPredefinedLValue(const PredefinedExpr *E) {
if (FnName.startswith("\01"))
FnName = FnName.substr(1);
StringRef NameItems[] = {
- PredefinedExpr::getIdentTypeName(E->getIdentType()), FnName};
+ PredefinedExpr::getIdentKindName(E->getIdentKind()), FnName};
std::string GVName = llvm::join(NameItems, NameItems + 2, ".");
if (auto *BD = dyn_cast_or_null<BlockDecl>(CurCodeDecl)) {
std::string Name = SL->getString();
@@ -2837,6 +2883,11 @@ static void emitCheckHandlerCall(CodeGenFunction &CGF,
CheckRecoverableKind RecoverKind, bool IsFatal,
llvm::BasicBlock *ContBB) {
assert(IsFatal || RecoverKind != CheckRecoverableKind::Unrecoverable);
+ Optional<ApplyDebugLocation> DL;
+ if (!CGF.Builder.getCurrentDebugLocation()) {
+ // Ensure that the call has at least an artificial debug location.
+ DL.emplace(CGF, SourceLocation());
+ }
bool NeedsAbortSuffix =
IsFatal && RecoverKind != CheckRecoverableKind::Unrecoverable;
bool MinimalRuntime = CGF.CGM.getCodeGenOpts().SanitizeMinimalRuntime;
@@ -3448,7 +3499,7 @@ LValue CodeGenFunction::EmitArraySubscriptExpr(const ArraySubscriptExpr *E,
LValue LV = MakeAddrLValue(Addr, E->getType(), EltBaseInfo, EltTBAAInfo);
- if (getLangOpts().ObjC1 &&
+ if (getLangOpts().ObjC &&
getLangOpts().getGC() != LangOptions::NonGC) {
LV.setNonGC(!E->isOBJCGCCandidate(getContext()));
setObjCGCLValueClass(getContext(), E, LV);
@@ -3901,7 +3952,7 @@ LValue CodeGenFunction::EmitLValueForField(LValue base,
LValue RefLVal = MakeAddrLValue(addr, FieldType, FieldBaseInfo,
FieldTBAAInfo);
if (RecordCVR & Qualifiers::Volatile)
- RefLVal.getQuals().setVolatile(true);
+ RefLVal.getQuals().addVolatile();
addr = EmitLoadOfReference(RefLVal, &FieldBaseInfo, &FieldTBAAInfo);
// Qualifiers on the struct don't apply to the referencee.
@@ -4121,8 +4172,9 @@ LValue CodeGenFunction::EmitCastLValue(const CastExpr *E) {
case CK_ARCReclaimReturnedObject:
case CK_ARCExtendBlockObject:
case CK_CopyAndAutoreleaseBlockObject:
- case CK_AddressSpaceConversion:
case CK_IntToOCLSampler:
+ case CK_FixedPointCast:
+ case CK_FixedPointToBoolean:
return EmitUnsupportedLValue(E, "unexpected cast lvalue");
case CK_Dependent:
@@ -4193,8 +4245,8 @@ LValue CodeGenFunction::EmitCastLValue(const CastExpr *E) {
if (SanOpts.has(SanitizerKind::CFIDerivedCast))
EmitVTablePtrCheckForCast(E->getType(), Derived.getPointer(),
- /*MayBeNull=*/false,
- CFITCK_DerivedCast, E->getLocStart());
+ /*MayBeNull=*/false, CFITCK_DerivedCast,
+ E->getBeginLoc());
return MakeAddrLValue(Derived, E->getType(), LV.getBaseInfo(),
CGM.getTBAAInfoForSubobject(LV, E->getType()));
@@ -4210,12 +4262,21 @@ LValue CodeGenFunction::EmitCastLValue(const CastExpr *E) {
if (SanOpts.has(SanitizerKind::CFIUnrelatedCast))
EmitVTablePtrCheckForCast(E->getType(), V.getPointer(),
- /*MayBeNull=*/false,
- CFITCK_UnrelatedCast, E->getLocStart());
+ /*MayBeNull=*/false, CFITCK_UnrelatedCast,
+ E->getBeginLoc());
return MakeAddrLValue(V, E->getType(), LV.getBaseInfo(),
CGM.getTBAAInfoForSubobject(LV, E->getType()));
}
+ case CK_AddressSpaceConversion: {
+ LValue LV = EmitLValue(E->getSubExpr());
+ QualType DestTy = getContext().getPointerType(E->getType());
+ llvm::Value *V = getTargetHooks().performAddrSpaceCast(
+ *this, LV.getPointer(), E->getSubExpr()->getType().getAddressSpace(),
+ E->getType().getAddressSpace(), ConvertType(DestTy));
+ return MakeAddrLValue(Address(V, LV.getAddress().getAlignment()),
+ E->getType(), LV.getBaseInfo(), LV.getTBAAInfo());
+ }
case CK_ObjCObjectLValueCast: {
LValue LV = EmitLValue(E->getSubExpr());
Address V = Builder.CreateElementBitCast(LV.getAddress(),
@@ -4223,10 +4284,8 @@ LValue CodeGenFunction::EmitCastLValue(const CastExpr *E) {
return MakeAddrLValue(V, E->getType(), LV.getBaseInfo(),
CGM.getTBAAInfoForSubobject(LV, E->getType()));
}
- case CK_ZeroToOCLQueue:
- llvm_unreachable("NULL to OpenCL queue lvalue cast is not valid");
- case CK_ZeroToOCLEvent:
- llvm_unreachable("NULL to OpenCL event lvalue cast is not valid");
+ case CK_ZeroToOCLOpaqueType:
+ llvm_unreachable("NULL to OpenCL opaque type lvalue cast is not valid");
}
llvm_unreachable("Unhandled lvalue cast kind?");
@@ -4333,7 +4392,7 @@ static CGCallee EmitDirectCallee(CodeGenFunction &CGF, const FunctionDecl *FD) {
}
llvm::Constant *calleePtr = EmitFunctionDeclPointer(CGF.CGM, FD);
- return CGCallee::forDirect(calleePtr, FD);
+ return CGCallee::forDirect(calleePtr, GlobalDecl(FD));
}
CGCallee CodeGenFunction::EmitCallee(const Expr *E) {
@@ -4377,8 +4436,13 @@ CGCallee CodeGenFunction::EmitCallee(const Expr *E) {
calleePtr = EmitLValue(E).getPointer();
}
assert(functionType->isFunctionType());
- CGCalleeInfo calleeInfo(functionType->getAs<FunctionProtoType>(),
- E->getReferencedDeclOfCallee());
+
+ GlobalDecl GD;
+ if (const auto *VD =
+ dyn_cast_or_null<VarDecl>(E->getReferencedDeclOfCallee()))
+ GD = GlobalDecl(VD);
+
+ CGCalleeInfo calleeInfo(functionType->getAs<FunctionProtoType>(), GD);
CGCallee callee(calleeInfo, calleePtr);
return callee;
}
@@ -4563,7 +4627,8 @@ RValue CodeGenFunction::EmitCall(QualType CalleeType, const CGCallee &OrigCallee
assert(CalleeType->isFunctionPointerType() &&
"Call must have function pointer type!");
- const Decl *TargetDecl = OrigCallee.getAbstractInfo().getCalleeDecl();
+ const Decl *TargetDecl =
+ OrigCallee.getAbstractInfo().getCalleeDecl().getDecl();
if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(TargetDecl))
// We can only guarantee that a function is called from the correct
@@ -4620,10 +4685,8 @@ RValue CodeGenFunction::EmitCall(QualType CalleeType, const CGCallee &OrigCallee
DecodeAddrUsedInPrologue(CalleePtr, CalleeRTTIEncoded);
llvm::Value *CalleeRTTIMatch =
Builder.CreateICmpEQ(CalleeRTTI, FTRTTIConst);
- llvm::Constant *StaticData[] = {
- EmitCheckSourceLocation(E->getLocStart()),
- EmitCheckTypeDescriptor(CalleeType)
- };
+ llvm::Constant *StaticData[] = {EmitCheckSourceLocation(E->getBeginLoc()),
+ EmitCheckTypeDescriptor(CalleeType)};
EmitCheck(std::make_pair(CalleeRTTIMatch, SanitizerKind::Function),
SanitizerHandler::FunctionTypeMismatch, StaticData, CalleePtr);
@@ -4657,7 +4720,7 @@ RValue CodeGenFunction::EmitCall(QualType CalleeType, const CGCallee &OrigCallee
auto CrossDsoTypeId = CGM.CreateCrossDsoCfiTypeId(MD);
llvm::Constant *StaticData[] = {
llvm::ConstantInt::get(Int8Ty, CFITCK_ICall),
- EmitCheckSourceLocation(E->getLocStart()),
+ EmitCheckSourceLocation(E->getBeginLoc()),
EmitCheckTypeDescriptor(QualType(FnType, 0)),
};
if (CGM.getCodeGenOpts().SanitizeCfiCrossDso && CrossDsoTypeId) {
diff --git a/lib/CodeGen/CGExprAgg.cpp b/lib/CodeGen/CGExprAgg.cpp
index 62641102861c..db49b3f28a59 100644
--- a/lib/CodeGen/CGExprAgg.cpp
+++ b/lib/CodeGen/CGExprAgg.cpp
@@ -125,6 +125,10 @@ public:
return Visit(E->getReplacement());
}
+ void VisitConstantExpr(ConstantExpr *E) {
+ return Visit(E->getSubExpr());
+ }
+
// l-values.
void VisitDeclRefExpr(DeclRefExpr *E) { EmitAggLoadOfLValue(E); }
void VisitMemberExpr(MemberExpr *ME) { EmitAggLoadOfLValue(ME); }
@@ -847,10 +851,11 @@ void AggExprEmitter::VisitCastExpr(CastExpr *E) {
case CK_ARCExtendBlockObject:
case CK_CopyAndAutoreleaseBlockObject:
case CK_BuiltinFnToFnPtr:
- case CK_ZeroToOCLEvent:
- case CK_ZeroToOCLQueue:
+ case CK_ZeroToOCLOpaqueType:
case CK_AddressSpaceConversion:
case CK_IntToOCLSampler:
+ case CK_FixedPointCast:
+ case CK_FixedPointToBoolean:
llvm_unreachable("cast kind invalid for aggregate types");
}
}
diff --git a/lib/CodeGen/CGExprCXX.cpp b/lib/CodeGen/CGExprCXX.cpp
index f29ef754c03f..884ce96859c5 100644
--- a/lib/CodeGen/CGExprCXX.cpp
+++ b/lib/CodeGen/CGExprCXX.cpp
@@ -17,8 +17,8 @@
#include "CGDebugInfo.h"
#include "CGObjCRuntime.h"
#include "ConstantEmitter.h"
+#include "clang/Basic/CodeGenOptions.h"
#include "clang/CodeGen/CGFunctionInfo.h"
-#include "clang/Frontend/CodeGenOptions.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/Intrinsics.h"
@@ -177,7 +177,8 @@ RValue CodeGenFunction::EmitCXXMemberCallExpr(const CXXMemberCallExpr *CE,
if (MD->isStatic()) {
// The method is static, emit it as we would a regular call.
- CGCallee callee = CGCallee::forDirect(CGM.GetAddrOfFunction(MD), MD);
+ CGCallee callee =
+ CGCallee::forDirect(CGM.GetAddrOfFunction(MD), GlobalDecl(MD));
return EmitCall(getContext().getPointerType(MD->getType()), callee, CE,
ReturnValue);
}
@@ -353,13 +354,13 @@ RValue CodeGenFunction::EmitCXXMemberOrOperatorMemberCallExpr(
else if (!DevirtualizedMethod)
Callee = CGCallee::forDirect(
CGM.getAddrOfCXXStructor(Dtor, StructorType::Complete, FInfo, Ty),
- Dtor);
+ GlobalDecl(Dtor, Dtor_Complete));
else {
const CXXDestructorDecl *DDtor =
cast<CXXDestructorDecl>(DevirtualizedMethod);
Callee = CGCallee::forDirect(
- CGM.GetAddrOfFunction(GlobalDecl(DDtor, Dtor_Complete), Ty),
- DDtor);
+ CGM.GetAddrOfFunction(GlobalDecl(DDtor, Dtor_Complete), Ty),
+ GlobalDecl(DDtor, Dtor_Complete));
}
EmitCXXMemberOrOperatorCall(
CalleeDecl, Callee, ReturnValue, This.getPointer(),
@@ -371,8 +372,8 @@ RValue CodeGenFunction::EmitCXXMemberOrOperatorMemberCallExpr(
CGCallee Callee;
if (const CXXConstructorDecl *Ctor = dyn_cast<CXXConstructorDecl>(MD)) {
Callee = CGCallee::forDirect(
- CGM.GetAddrOfFunction(GlobalDecl(Ctor, Ctor_Complete), Ty),
- Ctor);
+ CGM.GetAddrOfFunction(GlobalDecl(Ctor, Ctor_Complete), Ty),
+ GlobalDecl(Ctor, Ctor_Complete));
} else if (UseVirtualCall) {
Callee = CGCallee::forVirtual(CE, MD, This.getAddress(), Ty);
} else {
@@ -383,17 +384,18 @@ RValue CodeGenFunction::EmitCXXMemberOrOperatorMemberCallExpr(
std::tie(VTable, RD) =
CGM.getCXXABI().LoadVTablePtr(*this, This.getAddress(),
MD->getParent());
- EmitVTablePtrCheckForCall(RD, VTable, CFITCK_NVCall, CE->getLocStart());
+ EmitVTablePtrCheckForCall(RD, VTable, CFITCK_NVCall, CE->getBeginLoc());
}
if (getLangOpts().AppleKext && MD->isVirtual() && HasQualifier)
Callee = BuildAppleKextVirtualCall(MD, Qualifier, Ty);
else if (!DevirtualizedMethod)
- Callee = CGCallee::forDirect(CGM.GetAddrOfFunction(MD, Ty), MD);
+ Callee =
+ CGCallee::forDirect(CGM.GetAddrOfFunction(MD, Ty), GlobalDecl(MD));
else {
- Callee = CGCallee::forDirect(
- CGM.GetAddrOfFunction(DevirtualizedMethod, Ty),
- DevirtualizedMethod);
+ Callee =
+ CGCallee::forDirect(CGM.GetAddrOfFunction(DevirtualizedMethod, Ty),
+ GlobalDecl(DevirtualizedMethod));
}
}
@@ -1293,7 +1295,7 @@ static RValue EmitNewDeleteCall(CodeGenFunction &CGF,
const CallArgList &Args) {
llvm::Instruction *CallOrInvoke;
llvm::Constant *CalleePtr = CGF.CGM.GetAddrOfFunction(CalleeDecl);
- CGCallee Callee = CGCallee::forDirect(CalleePtr, CalleeDecl);
+ CGCallee Callee = CGCallee::forDirect(CalleePtr, GlobalDecl(CalleeDecl));
RValue RV =
CGF.EmitCall(CGF.CGM.getTypes().arrangeFreeFunctionCall(
Args, CalleeType, /*chainCall=*/false),
@@ -1654,9 +1656,10 @@ llvm::Value *CodeGenFunction::EmitCXXNewExpr(const CXXNewExpr *E) {
// Emit a null check on the allocation result if the allocation
// function is allowed to return null (because it has a non-throwing
// exception spec or is the reserved placement new) and we have an
- // interesting initializer.
- bool nullCheck = E->shouldNullCheckAllocation(getContext()) &&
- (!allocType.isPODType(getContext()) || E->hasInitializer());
+ // interesting initializer will be running sanitizers on the initialization.
+ bool nullCheck = E->shouldNullCheckAllocation() &&
+ (!allocType.isPODType(getContext()) || E->hasInitializer() ||
+ sanitizePerformTypeCheck());
llvm::BasicBlock *nullCheckBB = nullptr;
llvm::BasicBlock *contBB = nullptr;
@@ -2252,7 +2255,6 @@ llvm::Value *CodeGenFunction::EmitDynamicCast(Address ThisAddr,
}
void CodeGenFunction::EmitLambdaExpr(const LambdaExpr *E, AggValueSlot Slot) {
- RunCleanupsScope Scope(*this);
LValue SlotLV = MakeAddrLValue(Slot.getAddress(), E->getType());
CXXRecordDecl::field_iterator CurField = E->getLambdaClass()->field_begin();
diff --git a/lib/CodeGen/CGExprComplex.cpp b/lib/CodeGen/CGExprComplex.cpp
index fb176093a741..2db693b44c90 100644
--- a/lib/CodeGen/CGExprComplex.cpp
+++ b/lib/CodeGen/CGExprComplex.cpp
@@ -101,6 +101,9 @@ public:
llvm_unreachable("Stmt can't have complex result type!");
}
ComplexPairTy VisitExpr(Expr *S);
+ ComplexPairTy VisitConstantExpr(ConstantExpr *E) {
+ return Visit(E->getSubExpr());
+ }
ComplexPairTy VisitParenExpr(ParenExpr *PE) { return Visit(PE->getSubExpr());}
ComplexPairTy VisitGenericSelectionExpr(GenericSelectionExpr *GE) {
return Visit(GE->getResultExpr());
@@ -505,10 +508,11 @@ ComplexPairTy ComplexExprEmitter::EmitCast(CastKind CK, Expr *Op,
case CK_ARCExtendBlockObject:
case CK_CopyAndAutoreleaseBlockObject:
case CK_BuiltinFnToFnPtr:
- case CK_ZeroToOCLEvent:
- case CK_ZeroToOCLQueue:
+ case CK_ZeroToOCLOpaqueType:
case CK_AddressSpaceConversion:
case CK_IntToOCLSampler:
+ case CK_FixedPointCast:
+ case CK_FixedPointToBoolean:
llvm_unreachable("invalid cast kind for complex value");
case CK_FloatingRealToComplex:
diff --git a/lib/CodeGen/CGExprConstant.cpp b/lib/CodeGen/CGExprConstant.cpp
index 68766479a539..c9475840aeeb 100644
--- a/lib/CodeGen/CGExprConstant.cpp
+++ b/lib/CodeGen/CGExprConstant.cpp
@@ -47,7 +47,7 @@ class ConstStructBuilder {
public:
static llvm::Constant *BuildStruct(ConstantEmitter &Emitter,
ConstExprEmitter *ExprEmitter,
- llvm::ConstantStruct *Base,
+ llvm::Constant *Base,
InitListExpr *Updater,
QualType ValTy);
static llvm::Constant *BuildStruct(ConstantEmitter &Emitter,
@@ -76,7 +76,7 @@ private:
void ConvertStructToPacked();
bool Build(InitListExpr *ILE);
- bool Build(ConstExprEmitter *Emitter, llvm::ConstantStruct *Base,
+ bool Build(ConstExprEmitter *Emitter, llvm::Constant *Base,
InitListExpr *Updater);
bool Build(const APValue &Val, const RecordDecl *RD, bool IsPrimaryBase,
const CXXRecordDecl *VTableClass, CharUnits BaseOffset);
@@ -566,7 +566,7 @@ llvm::Constant *ConstStructBuilder::Finalize(QualType Ty) {
llvm::Constant *ConstStructBuilder::BuildStruct(ConstantEmitter &Emitter,
ConstExprEmitter *ExprEmitter,
- llvm::ConstantStruct *Base,
+ llvm::Constant *Base,
InitListExpr *Updater,
QualType ValTy) {
ConstStructBuilder Builder(Emitter);
@@ -723,6 +723,10 @@ public:
return nullptr;
}
+ llvm::Constant *VisitConstantExpr(ConstantExpr *CE, QualType T) {
+ return Visit(CE->getSubExpr(), T);
+ }
+
llvm::Constant *VisitParenExpr(ParenExpr *PE, QualType T) {
return Visit(PE->getSubExpr(), T);
}
@@ -869,8 +873,9 @@ public:
case CK_FloatingToIntegral:
case CK_FloatingToBoolean:
case CK_FloatingCast:
- case CK_ZeroToOCLEvent:
- case CK_ZeroToOCLQueue:
+ case CK_FixedPointCast:
+ case CK_FixedPointToBoolean:
+ case CK_ZeroToOCLOpaqueType:
return nullptr;
}
llvm_unreachable("Invalid CastKind");
@@ -1026,8 +1031,8 @@ public:
}
if (destType->isRecordType())
- return ConstStructBuilder::BuildStruct(Emitter, this,
- dyn_cast<llvm::ConstantStruct>(Base), Updater, destType);
+ return ConstStructBuilder::BuildStruct(Emitter, this, Base, Updater,
+ destType);
return nullptr;
}
@@ -1102,7 +1107,7 @@ public:
} // end anonymous namespace.
bool ConstStructBuilder::Build(ConstExprEmitter *ExprEmitter,
- llvm::ConstantStruct *Base,
+ llvm::Constant *Base,
InitListExpr *Updater) {
assert(Base && "base expression should not be empty");
@@ -1110,7 +1115,7 @@ bool ConstStructBuilder::Build(ConstExprEmitter *ExprEmitter,
RecordDecl *RD = ExprType->getAs<RecordType>()->getDecl();
const ASTRecordLayout &Layout = CGM.getContext().getASTRecordLayout(RD);
const llvm::StructLayout *BaseLayout = CGM.getDataLayout().getStructLayout(
- Base->getType());
+ cast<llvm::StructType>(Base->getType()));
unsigned FieldNo = -1;
unsigned ElementNo = 0;
@@ -1131,7 +1136,7 @@ bool ConstStructBuilder::Build(ConstExprEmitter *ExprEmitter,
if (Field->isUnnamedBitfield())
continue;
- llvm::Constant *EltInit = Base->getOperand(ElementNo);
+ llvm::Constant *EltInit = Base->getAggregateElement(ElementNo);
// Bail out if the type of the ConstantStruct does not have the same layout
// as the type of the InitListExpr.
@@ -1450,6 +1455,7 @@ llvm::Constant *ConstantEmitter::tryEmitPrivateForVarInit(const VarDecl &D) {
if (CD->isTrivial() && CD->isDefaultConstructor())
return CGM.EmitNullConstant(D.getType());
}
+ InConstantContext = true;
}
QualType destType = D.getType();
@@ -1547,7 +1553,7 @@ llvm::Constant *ConstantEmitter::tryEmitPrivate(const Expr *E,
if (destType->isReferenceType())
Success = E->EvaluateAsLValue(Result, CGM.getContext());
else
- Success = E->EvaluateAsRValue(Result, CGM.getContext());
+ Success = E->EvaluateAsRValue(Result, CGM.getContext(), InConstantContext);
llvm::Constant *C;
if (Success && !Result.HasSideEffects)
@@ -1600,6 +1606,7 @@ private:
ConstantLValue tryEmitBase(const APValue::LValueBase &base);
ConstantLValue VisitStmt(const Stmt *S) { return nullptr; }
+ ConstantLValue VisitConstantExpr(const ConstantExpr *E);
ConstantLValue VisitCompoundLiteralExpr(const CompoundLiteralExpr *E);
ConstantLValue VisitStringLiteral(const StringLiteral *E);
ConstantLValue VisitObjCEncodeExpr(const ObjCEncodeExpr *E);
@@ -1755,6 +1762,11 @@ ConstantLValueEmitter::tryEmitBase(const APValue::LValueBase &base) {
}
ConstantLValue
+ConstantLValueEmitter::VisitConstantExpr(const ConstantExpr *E) {
+ return Visit(E->getSubExpr());
+}
+
+ConstantLValue
ConstantLValueEmitter::VisitCompoundLiteralExpr(const CompoundLiteralExpr *E) {
return tryEmitGlobalCompoundLiteral(CGM, Emitter.CGF, E);
}
@@ -1782,7 +1794,7 @@ ConstantLValueEmitter::VisitPredefinedExpr(const PredefinedExpr *E) {
return cast<ConstantAddress>(Res.getAddress());
}
- auto kind = E->getIdentType();
+ auto kind = E->getIdentKind();
if (kind == PredefinedExpr::PrettyFunction) {
return CGM.GetAddrOfConstantCString("top level", ".tmp");
}
@@ -1968,6 +1980,16 @@ llvm::Constant *ConstantEmitter::tryEmitPrivate(const APValue &Value,
Elts.push_back(C);
}
+ // This means that the array type is probably "IncompleteType" or some
+ // type that is not ConstantArray.
+ if (CAT == nullptr && CommonElementType == nullptr && !NumInitElts) {
+ const ArrayType *AT = CGM.getContext().getAsArrayType(DestType);
+ CommonElementType = CGM.getTypes().ConvertType(AT->getElementType());
+ llvm::ArrayType *AType = llvm::ArrayType::get(CommonElementType,
+ NumElements);
+ return llvm::ConstantAggregateZero::get(AType);
+ }
+
return EmitArrayConstant(CGM, CAT, CommonElementType, NumElements, Elts,
Filler);
}
diff --git a/lib/CodeGen/CGExprScalar.cpp b/lib/CodeGen/CGExprScalar.cpp
index c62588c68272..1c14d4c99a23 100644
--- a/lib/CodeGen/CGExprScalar.cpp
+++ b/lib/CodeGen/CGExprScalar.cpp
@@ -11,11 +11,11 @@
//
//===----------------------------------------------------------------------===//
-#include "CodeGenFunction.h"
-#include "CGCleanup.h"
#include "CGCXXABI.h"
+#include "CGCleanup.h"
#include "CGDebugInfo.h"
#include "CGObjCRuntime.h"
+#include "CodeGenFunction.h"
#include "CodeGenModule.h"
#include "TargetInfo.h"
#include "clang/AST/ASTContext.h"
@@ -23,8 +23,9 @@
#include "clang/AST/Expr.h"
#include "clang/AST/RecordLayout.h"
#include "clang/AST/StmtVisitor.h"
+#include "clang/Basic/CodeGenOptions.h"
+#include "clang/Basic/FixedPoint.h"
#include "clang/Basic/TargetInfo.h"
-#include "clang/Frontend/CodeGenOptions.h"
#include "llvm/ADT/Optional.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/Constants.h"
@@ -257,8 +258,11 @@ public:
AVAttr = TTy->getDecl()->getAttr<AlignValueAttr>();
} else {
// Assumptions for function parameters are emitted at the start of the
- // function, so there is no need to repeat that here.
- if (isa<ParmVarDecl>(VD))
+ // function, so there is no need to repeat that here,
+ // unless the alignment-assumption sanitizer is enabled,
+ // then we prefer the assumption over alignment attribute
+ // on IR function param.
+ if (isa<ParmVarDecl>(VD) && !CGF.SanOpts.has(SanitizerKind::Alignment))
return;
AVAttr = VD->getAttr<AlignValueAttr>();
@@ -275,7 +279,8 @@ public:
Value *AlignmentValue = CGF.EmitScalarExpr(AVAttr->getAlignment());
llvm::ConstantInt *AlignmentCI = cast<llvm::ConstantInt>(AlignmentValue);
- CGF.EmitAlignmentAssumption(V, AlignmentCI->getZExtValue());
+ CGF.EmitAlignmentAssumption(V, E, AVAttr->getLocation(),
+ AlignmentCI->getZExtValue());
}
/// EmitLoadOfLValue - Given an expression with complex type that represents a
@@ -302,7 +307,11 @@ public:
/// Known implicit conversion check kinds.
/// Keep in sync with the enum of the same name in ubsan_handlers.h
enum ImplicitConversionCheckKind : unsigned char {
- ICCK_IntegerTruncation = 0,
+ ICCK_IntegerTruncation = 0, // Legacy, was only used by clang 7.
+ ICCK_UnsignedIntegerTruncation = 1,
+ ICCK_SignedIntegerTruncation = 2,
+ ICCK_IntegerSignChange = 3,
+ ICCK_SignedIntegerTruncationOrSignChange = 4,
};
/// Emit a check that an [implicit] truncation of an integer does not
@@ -310,21 +319,39 @@ public:
void EmitIntegerTruncationCheck(Value *Src, QualType SrcType, Value *Dst,
QualType DstType, SourceLocation Loc);
+ /// Emit a check that an [implicit] conversion of an integer does not change
+ /// the sign of the value. It is not UB, so we use the value after conversion.
+ /// NOTE: Src and Dst may be the exact same value! (point to the same thing)
+ void EmitIntegerSignChangeCheck(Value *Src, QualType SrcType, Value *Dst,
+ QualType DstType, SourceLocation Loc);
+
/// Emit a conversion from the specified type to the specified destination
/// type, both of which are LLVM scalar types.
struct ScalarConversionOpts {
bool TreatBooleanAsSigned;
bool EmitImplicitIntegerTruncationChecks;
+ bool EmitImplicitIntegerSignChangeChecks;
ScalarConversionOpts()
: TreatBooleanAsSigned(false),
- EmitImplicitIntegerTruncationChecks(false) {}
+ EmitImplicitIntegerTruncationChecks(false),
+ EmitImplicitIntegerSignChangeChecks(false) {}
+
+ ScalarConversionOpts(clang::SanitizerSet SanOpts)
+ : TreatBooleanAsSigned(false),
+ EmitImplicitIntegerTruncationChecks(
+ SanOpts.hasOneOf(SanitizerKind::ImplicitIntegerTruncation)),
+ EmitImplicitIntegerSignChangeChecks(
+ SanOpts.has(SanitizerKind::ImplicitIntegerSignChange)) {}
};
Value *
EmitScalarConversion(Value *Src, QualType SrcTy, QualType DstTy,
SourceLocation Loc,
ScalarConversionOpts Opts = ScalarConversionOpts());
+ Value *EmitFixedPointConversion(Value *Src, QualType SrcTy, QualType DstTy,
+ SourceLocation Loc);
+
/// Emit a conversion from the specified complex type to the specified
/// destination type, where the destination type is an LLVM scalar type.
Value *EmitComplexToScalarConversion(CodeGenFunction::ComplexPairTy Src,
@@ -382,6 +409,9 @@ public:
}
Value *VisitExpr(Expr *S);
+ Value *VisitConstantExpr(ConstantExpr *E) {
+ return Visit(E->getSubExpr());
+ }
Value *VisitParenExpr(ParenExpr *PE) {
return Visit(PE->getSubExpr());
}
@@ -450,19 +480,10 @@ public:
return CGF.getOrCreateOpaqueRValueMapping(E).getScalarVal();
}
- Value *emitConstant(const CodeGenFunction::ConstantEmission &Constant,
- Expr *E) {
- assert(Constant && "not a constant");
- if (Constant.isReference())
- return EmitLoadOfLValue(Constant.getReferenceLValue(CGF, E),
- E->getExprLoc());
- return Constant.getValue();
- }
-
// l-values.
Value *VisitDeclRefExpr(DeclRefExpr *E) {
if (CodeGenFunction::ConstantEmission Constant = CGF.tryEmitAsConstant(E))
- return emitConstant(Constant, E);
+ return CGF.emitScalarConstant(Constant, E);
return EmitLoadOfLValue(E);
}
@@ -664,7 +685,7 @@ public:
case LangOptions::SOB_Undefined:
if (!CGF.SanOpts.has(SanitizerKind::SignedIntegerOverflow))
return Builder.CreateNSWMul(Ops.LHS, Ops.RHS, "mul");
- // Fall through.
+ LLVM_FALLTHROUGH;
case LangOptions::SOB_Trapping:
if (CanElideOverflowCheck(CGF.getContext(), Ops))
return Builder.CreateNSWMul(Ops.LHS, Ops.RHS, "mul");
@@ -941,48 +962,233 @@ void ScalarExprEmitter::EmitFloatConversionCheck(
SanitizerHandler::FloatCastOverflow, StaticArgs, OrigSrc);
}
+// Should be called within CodeGenFunction::SanitizerScope RAII scope.
+// Returns 'i1 false' when the truncation Src -> Dst was lossy.
+static std::pair<ScalarExprEmitter::ImplicitConversionCheckKind,
+ std::pair<llvm::Value *, SanitizerMask>>
+EmitIntegerTruncationCheckHelper(Value *Src, QualType SrcType, Value *Dst,
+ QualType DstType, CGBuilderTy &Builder) {
+ llvm::Type *SrcTy = Src->getType();
+ llvm::Type *DstTy = Dst->getType();
+ (void)DstTy; // Only used in assert()
+
+ // This should be truncation of integral types.
+ assert(Src != Dst);
+ assert(SrcTy->getScalarSizeInBits() > Dst->getType()->getScalarSizeInBits());
+ assert(isa<llvm::IntegerType>(SrcTy) && isa<llvm::IntegerType>(DstTy) &&
+ "non-integer llvm type");
+
+ bool SrcSigned = SrcType->isSignedIntegerOrEnumerationType();
+ bool DstSigned = DstType->isSignedIntegerOrEnumerationType();
+
+ // If both (src and dst) types are unsigned, then it's an unsigned truncation.
+ // Else, it is a signed truncation.
+ ScalarExprEmitter::ImplicitConversionCheckKind Kind;
+ SanitizerMask Mask;
+ if (!SrcSigned && !DstSigned) {
+ Kind = ScalarExprEmitter::ICCK_UnsignedIntegerTruncation;
+ Mask = SanitizerKind::ImplicitUnsignedIntegerTruncation;
+ } else {
+ Kind = ScalarExprEmitter::ICCK_SignedIntegerTruncation;
+ Mask = SanitizerKind::ImplicitSignedIntegerTruncation;
+ }
+
+ llvm::Value *Check = nullptr;
+ // 1. Extend the truncated value back to the same width as the Src.
+ Check = Builder.CreateIntCast(Dst, SrcTy, DstSigned, "anyext");
+ // 2. Equality-compare with the original source value
+ Check = Builder.CreateICmpEQ(Check, Src, "truncheck");
+ // If the comparison result is 'i1 false', then the truncation was lossy.
+ return std::make_pair(Kind, std::make_pair(Check, Mask));
+}
+
void ScalarExprEmitter::EmitIntegerTruncationCheck(Value *Src, QualType SrcType,
Value *Dst, QualType DstType,
SourceLocation Loc) {
- if (!CGF.SanOpts.has(SanitizerKind::ImplicitIntegerTruncation))
+ if (!CGF.SanOpts.hasOneOf(SanitizerKind::ImplicitIntegerTruncation))
return;
- llvm::Type *SrcTy = Src->getType();
- llvm::Type *DstTy = Dst->getType();
-
// We only care about int->int conversions here.
// We ignore conversions to/from pointer and/or bool.
if (!(SrcType->isIntegerType() && DstType->isIntegerType()))
return;
- assert(isa<llvm::IntegerType>(SrcTy) && isa<llvm::IntegerType>(DstTy) &&
- "clang integer type lowered to non-integer llvm type");
-
- unsigned SrcBits = SrcTy->getScalarSizeInBits();
- unsigned DstBits = DstTy->getScalarSizeInBits();
+ unsigned SrcBits = Src->getType()->getScalarSizeInBits();
+ unsigned DstBits = Dst->getType()->getScalarSizeInBits();
// This must be truncation. Else we do not care.
if (SrcBits <= DstBits)
return;
assert(!DstType->isBooleanType() && "we should not get here with booleans.");
+ // If the integer sign change sanitizer is enabled,
+ // and we are truncating from larger unsigned type to smaller signed type,
+ // let that next sanitizer deal with it.
+ bool SrcSigned = SrcType->isSignedIntegerOrEnumerationType();
+ bool DstSigned = DstType->isSignedIntegerOrEnumerationType();
+ if (CGF.SanOpts.has(SanitizerKind::ImplicitIntegerSignChange) &&
+ (!SrcSigned && DstSigned))
+ return;
+
CodeGenFunction::SanitizerScope SanScope(&CGF);
+ std::pair<ScalarExprEmitter::ImplicitConversionCheckKind,
+ std::pair<llvm::Value *, SanitizerMask>>
+ Check =
+ EmitIntegerTruncationCheckHelper(Src, SrcType, Dst, DstType, Builder);
+ // If the comparison result is 'i1 false', then the truncation was lossy.
+
+ // Do we care about this type of truncation?
+ if (!CGF.SanOpts.has(Check.second.second))
+ return;
+
+ llvm::Constant *StaticArgs[] = {
+ CGF.EmitCheckSourceLocation(Loc), CGF.EmitCheckTypeDescriptor(SrcType),
+ CGF.EmitCheckTypeDescriptor(DstType),
+ llvm::ConstantInt::get(Builder.getInt8Ty(), Check.first)};
+ CGF.EmitCheck(Check.second, SanitizerHandler::ImplicitConversion, StaticArgs,
+ {Src, Dst});
+}
+
+// Should be called within CodeGenFunction::SanitizerScope RAII scope.
+// Returns 'i1 false' when the conversion Src -> Dst changed the sign.
+static std::pair<ScalarExprEmitter::ImplicitConversionCheckKind,
+ std::pair<llvm::Value *, SanitizerMask>>
+EmitIntegerSignChangeCheckHelper(Value *Src, QualType SrcType, Value *Dst,
+ QualType DstType, CGBuilderTy &Builder) {
+ llvm::Type *SrcTy = Src->getType();
+ llvm::Type *DstTy = Dst->getType();
+
+ assert(isa<llvm::IntegerType>(SrcTy) && isa<llvm::IntegerType>(DstTy) &&
+ "non-integer llvm type");
+
+ bool SrcSigned = SrcType->isSignedIntegerOrEnumerationType();
+ bool DstSigned = DstType->isSignedIntegerOrEnumerationType();
+ (void)SrcSigned; // Only used in assert()
+ (void)DstSigned; // Only used in assert()
+ unsigned SrcBits = SrcTy->getScalarSizeInBits();
+ unsigned DstBits = DstTy->getScalarSizeInBits();
+ (void)SrcBits; // Only used in assert()
+ (void)DstBits; // Only used in assert()
+
+ assert(((SrcBits != DstBits) || (SrcSigned != DstSigned)) &&
+ "either the widths should be different, or the signednesses.");
+
+ // NOTE: zero value is considered to be non-negative.
+ auto EmitIsNegativeTest = [&Builder](Value *V, QualType VType,
+ const char *Name) -> Value * {
+ // Is this value a signed type?
+ bool VSigned = VType->isSignedIntegerOrEnumerationType();
+ llvm::Type *VTy = V->getType();
+ if (!VSigned) {
+ // If the value is unsigned, then it is never negative.
+ // FIXME: can we encounter non-scalar VTy here?
+ return llvm::ConstantInt::getFalse(VTy->getContext());
+ }
+ // Get the zero of the same type with which we will be comparing.
+ llvm::Constant *Zero = llvm::ConstantInt::get(VTy, 0);
+ // %V.isnegative = icmp slt %V, 0
+ // I.e is %V *strictly* less than zero, does it have negative value?
+ return Builder.CreateICmp(llvm::ICmpInst::ICMP_SLT, V, Zero,
+ llvm::Twine(Name) + "." + V->getName() +
+ ".negativitycheck");
+ };
+
+ // 1. Was the old Value negative?
+ llvm::Value *SrcIsNegative = EmitIsNegativeTest(Src, SrcType, "src");
+ // 2. Is the new Value negative?
+ llvm::Value *DstIsNegative = EmitIsNegativeTest(Dst, DstType, "dst");
+ // 3. Now, was the 'negativity status' preserved during the conversion?
+ // NOTE: conversion from negative to zero is considered to change the sign.
+ // (We want to get 'false' when the conversion changed the sign)
+ // So we should just equality-compare the negativity statuses.
llvm::Value *Check = nullptr;
+ Check = Builder.CreateICmpEQ(SrcIsNegative, DstIsNegative, "signchangecheck");
+ // If the comparison result is 'false', then the conversion changed the sign.
+ return std::make_pair(
+ ScalarExprEmitter::ICCK_IntegerSignChange,
+ std::make_pair(Check, SanitizerKind::ImplicitIntegerSignChange));
+}
- // 1. Extend the truncated value back to the same width as the Src.
- bool InputSigned = DstType->isSignedIntegerOrEnumerationType();
- Check = Builder.CreateIntCast(Dst, SrcTy, InputSigned, "anyext");
- // 2. Equality-compare with the original source value
- Check = Builder.CreateICmpEQ(Check, Src, "truncheck");
- // If the comparison result is 'i1 false', then the truncation was lossy.
+void ScalarExprEmitter::EmitIntegerSignChangeCheck(Value *Src, QualType SrcType,
+ Value *Dst, QualType DstType,
+ SourceLocation Loc) {
+ if (!CGF.SanOpts.has(SanitizerKind::ImplicitIntegerSignChange))
+ return;
+
+ llvm::Type *SrcTy = Src->getType();
+ llvm::Type *DstTy = Dst->getType();
+
+ // We only care about int->int conversions here.
+ // We ignore conversions to/from pointer and/or bool.
+ if (!(SrcType->isIntegerType() && DstType->isIntegerType()))
+ return;
+
+ bool SrcSigned = SrcType->isSignedIntegerOrEnumerationType();
+ bool DstSigned = DstType->isSignedIntegerOrEnumerationType();
+ unsigned SrcBits = SrcTy->getScalarSizeInBits();
+ unsigned DstBits = DstTy->getScalarSizeInBits();
+
+ // Now, we do not need to emit the check in *all* of the cases.
+ // We can avoid emitting it in some obvious cases where it would have been
+ // dropped by the opt passes (instcombine) always anyways.
+ // If it's a cast between effectively the same type, no check.
+ // NOTE: this is *not* equivalent to checking the canonical types.
+ if (SrcSigned == DstSigned && SrcBits == DstBits)
+ return;
+ // At least one of the values needs to have signed type.
+ // If both are unsigned, then obviously, neither of them can be negative.
+ if (!SrcSigned && !DstSigned)
+ return;
+ // If the conversion is to *larger* *signed* type, then no check is needed.
+ // Because either sign-extension happens (so the sign will remain),
+ // or zero-extension will happen (the sign bit will be zero.)
+ if ((DstBits > SrcBits) && DstSigned)
+ return;
+ if (CGF.SanOpts.has(SanitizerKind::ImplicitSignedIntegerTruncation) &&
+ (SrcBits > DstBits) && SrcSigned) {
+ // If the signed integer truncation sanitizer is enabled,
+ // and this is a truncation from signed type, then no check is needed.
+ // Because here sign change check is interchangeable with truncation check.
+ return;
+ }
+ // That's it. We can't rule out any more cases with the data we have.
+
+ CodeGenFunction::SanitizerScope SanScope(&CGF);
+
+ std::pair<ScalarExprEmitter::ImplicitConversionCheckKind,
+ std::pair<llvm::Value *, SanitizerMask>>
+ Check;
+
+ // Each of these checks needs to return 'false' when an issue was detected.
+ ImplicitConversionCheckKind CheckKind;
+ llvm::SmallVector<std::pair<llvm::Value *, SanitizerMask>, 2> Checks;
+ // So we can 'and' all the checks together, and still get 'false',
+ // if at least one of the checks detected an issue.
+
+ Check = EmitIntegerSignChangeCheckHelper(Src, SrcType, Dst, DstType, Builder);
+ CheckKind = Check.first;
+ Checks.emplace_back(Check.second);
+
+ if (CGF.SanOpts.has(SanitizerKind::ImplicitSignedIntegerTruncation) &&
+ (SrcBits > DstBits) && !SrcSigned && DstSigned) {
+ // If the signed integer truncation sanitizer was enabled,
+ // and we are truncating from larger unsigned type to smaller signed type,
+ // let's handle the case we skipped in that check.
+ Check =
+ EmitIntegerTruncationCheckHelper(Src, SrcType, Dst, DstType, Builder);
+ CheckKind = ICCK_SignedIntegerTruncationOrSignChange;
+ Checks.emplace_back(Check.second);
+ // If the comparison result is 'i1 false', then the truncation was lossy.
+ }
llvm::Constant *StaticArgs[] = {
CGF.EmitCheckSourceLocation(Loc), CGF.EmitCheckTypeDescriptor(SrcType),
CGF.EmitCheckTypeDescriptor(DstType),
- llvm::ConstantInt::get(Builder.getInt8Ty(), ICCK_IntegerTruncation)};
- CGF.EmitCheck(std::make_pair(Check, SanitizerKind::ImplicitIntegerTruncation),
- SanitizerHandler::ImplicitConversion, StaticArgs, {Src, Dst});
+ llvm::ConstantInt::get(Builder.getInt8Ty(), CheckKind)};
+ // EmitCheck() will 'and' all the checks together.
+ CGF.EmitCheck(Checks, SanitizerHandler::ImplicitConversion, StaticArgs,
+ {Src, Dst});
}
/// Emit a conversion from the specified type to the specified destination type,
@@ -991,6 +1197,27 @@ Value *ScalarExprEmitter::EmitScalarConversion(Value *Src, QualType SrcType,
QualType DstType,
SourceLocation Loc,
ScalarConversionOpts Opts) {
+ // All conversions involving fixed point types should be handled by the
+ // EmitFixedPoint family functions. This is done to prevent bloating up this
+ // function more, and although fixed point numbers are represented by
+ // integers, we do not want to follow any logic that assumes they should be
+ // treated as integers.
+ // TODO(leonardchan): When necessary, add another if statement checking for
+ // conversions to fixed point types from other types.
+ if (SrcType->isFixedPointType()) {
+ if (DstType->isFixedPointType()) {
+ return EmitFixedPointConversion(Src, SrcType, DstType, Loc);
+ } else if (DstType->isBooleanType()) {
+ // We do not need to check the padding bit on unsigned types if unsigned
+ // padding is enabled because overflow into this bit is undefined
+ // behavior.
+ return Builder.CreateIsNotNull(Src, "tobool");
+ }
+
+ llvm_unreachable(
+ "Unhandled scalar conversion involving a fixed point type.");
+ }
+
QualType NoncanonicalSrcType = SrcType;
QualType NoncanonicalDstType = DstType;
@@ -1036,8 +1263,13 @@ Value *ScalarExprEmitter::EmitScalarConversion(Value *Src, QualType SrcType,
}
// Ignore conversions like int -> uint.
- if (SrcTy == DstTy)
+ if (SrcTy == DstTy) {
+ if (Opts.EmitImplicitIntegerSignChangeChecks)
+ EmitIntegerSignChangeCheck(Src, NoncanonicalSrcType, Src,
+ NoncanonicalDstType, Loc);
+
return Src;
+ }
// Handle pointer conversions next: pointers can only be converted to/from
// other pointers and integers. Check for pointer types in terms of LLVM, as
@@ -1181,9 +1413,91 @@ Value *ScalarExprEmitter::EmitScalarConversion(Value *Src, QualType SrcType,
EmitIntegerTruncationCheck(Src, NoncanonicalSrcType, Res,
NoncanonicalDstType, Loc);
+ if (Opts.EmitImplicitIntegerSignChangeChecks)
+ EmitIntegerSignChangeCheck(Src, NoncanonicalSrcType, Res,
+ NoncanonicalDstType, Loc);
+
return Res;
}
+Value *ScalarExprEmitter::EmitFixedPointConversion(Value *Src, QualType SrcTy,
+ QualType DstTy,
+ SourceLocation Loc) {
+ using llvm::APInt;
+ using llvm::ConstantInt;
+ using llvm::Value;
+
+ assert(SrcTy->isFixedPointType());
+ assert(DstTy->isFixedPointType());
+
+ FixedPointSemantics SrcFPSema =
+ CGF.getContext().getFixedPointSemantics(SrcTy);
+ FixedPointSemantics DstFPSema =
+ CGF.getContext().getFixedPointSemantics(DstTy);
+ unsigned SrcWidth = SrcFPSema.getWidth();
+ unsigned DstWidth = DstFPSema.getWidth();
+ unsigned SrcScale = SrcFPSema.getScale();
+ unsigned DstScale = DstFPSema.getScale();
+ bool SrcIsSigned = SrcFPSema.isSigned();
+ bool DstIsSigned = DstFPSema.isSigned();
+
+ llvm::Type *DstIntTy = Builder.getIntNTy(DstWidth);
+
+ Value *Result = Src;
+ unsigned ResultWidth = SrcWidth;
+
+ if (!DstFPSema.isSaturated()) {
+ // Downscale.
+ if (DstScale < SrcScale)
+ Result = SrcIsSigned ?
+ Builder.CreateAShr(Result, SrcScale - DstScale, "downscale") :
+ Builder.CreateLShr(Result, SrcScale - DstScale, "downscale");
+
+ // Resize.
+ Result = Builder.CreateIntCast(Result, DstIntTy, SrcIsSigned, "resize");
+
+ // Upscale.
+ if (DstScale > SrcScale)
+ Result = Builder.CreateShl(Result, DstScale - SrcScale, "upscale");
+ } else {
+ // Adjust the number of fractional bits.
+ if (DstScale > SrcScale) {
+ ResultWidth = SrcWidth + DstScale - SrcScale;
+ llvm::Type *UpscaledTy = Builder.getIntNTy(ResultWidth);
+ Result = Builder.CreateIntCast(Result, UpscaledTy, SrcIsSigned, "resize");
+ Result = Builder.CreateShl(Result, DstScale - SrcScale, "upscale");
+ } else if (DstScale < SrcScale) {
+ Result = SrcIsSigned ?
+ Builder.CreateAShr(Result, SrcScale - DstScale, "downscale") :
+ Builder.CreateLShr(Result, SrcScale - DstScale, "downscale");
+ }
+
+ // Handle saturation.
+ bool LessIntBits = DstFPSema.getIntegralBits() < SrcFPSema.getIntegralBits();
+ if (LessIntBits) {
+ Value *Max = ConstantInt::get(
+ CGF.getLLVMContext(),
+ APFixedPoint::getMax(DstFPSema).getValue().extOrTrunc(ResultWidth));
+ Value *TooHigh = SrcIsSigned ? Builder.CreateICmpSGT(Result, Max)
+ : Builder.CreateICmpUGT(Result, Max);
+ Result = Builder.CreateSelect(TooHigh, Max, Result, "satmax");
+ }
+ // Cannot overflow min to dest type if src is unsigned since all fixed
+ // point types can cover the unsigned min of 0.
+ if (SrcIsSigned && (LessIntBits || !DstIsSigned)) {
+ Value *Min = ConstantInt::get(
+ CGF.getLLVMContext(),
+ APFixedPoint::getMin(DstFPSema).getValue().extOrTrunc(ResultWidth));
+ Value *TooLow = Builder.CreateICmpSLT(Result, Min);
+ Result = Builder.CreateSelect(TooLow, Min, Result, "satmin");
+ }
+
+ // Resize the integer part to get the final destination size.
+ Result = Builder.CreateIntCast(Result, DstIntTy, SrcIsSigned, "resize");
+ }
+ return Result;
+}
+
/// Emit a conversion from the specified complex type to the specified
/// destination type, where the destination type is an LLVM scalar type.
Value *ScalarExprEmitter::EmitComplexToScalarConversion(
@@ -1405,10 +1719,11 @@ Value *ScalarExprEmitter::VisitConvertVectorExpr(ConvertVectorExpr *E) {
Value *ScalarExprEmitter::VisitMemberExpr(MemberExpr *E) {
if (CodeGenFunction::ConstantEmission Constant = CGF.tryEmitAsConstant(E)) {
CGF.EmitIgnoredExpr(E->getBase());
- return emitConstant(Constant, E);
+ return CGF.emitScalarConstant(Constant, E);
} else {
- llvm::APSInt Value;
- if (E->EvaluateAsInt(Value, CGF.getContext(), Expr::SE_AllowSideEffects)) {
+ Expr::EvalResult Result;
+ if (E->EvaluateAsInt(Result, CGF.getContext(), Expr::SE_AllowSideEffects)) {
+ llvm::APSInt Value = Result.Val.getInt();
CGF.EmitIgnoredExpr(E->getBase());
return Builder.getInt(Value);
}
@@ -1681,7 +1996,7 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) {
CGF.EmitVTablePtrCheckForCast(PT->getPointeeType(), Src,
/*MayBeNull=*/true,
CodeGenFunction::CFITCK_UnrelatedCast,
- CE->getLocStart());
+ CE->getBeginLoc());
}
if (CGF.CGM.getCodeGenOpts().StrictVTablePointers) {
@@ -1745,11 +2060,10 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) {
Derived.getPointer(), DestTy->getPointeeType());
if (CGF.SanOpts.has(SanitizerKind::CFIDerivedCast))
- CGF.EmitVTablePtrCheckForCast(DestTy->getPointeeType(),
- Derived.getPointer(),
- /*MayBeNull=*/true,
- CodeGenFunction::CFITCK_DerivedCast,
- CE->getLocStart());
+ CGF.EmitVTablePtrCheckForCast(
+ DestTy->getPointeeType(), Derived.getPointer(),
+ /*MayBeNull=*/true, CodeGenFunction::CFITCK_DerivedCast,
+ CE->getBeginLoc());
return Derived.getPointer();
}
@@ -1875,11 +2189,22 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) {
return Builder.CreateVectorSplat(NumElements, Elt, "splat");
}
+ case CK_FixedPointCast:
+ return EmitScalarConversion(Visit(E), E->getType(), DestTy,
+ CE->getExprLoc());
+
+ case CK_FixedPointToBoolean:
+ assert(E->getType()->isFixedPointType() &&
+ "Expected src type to be fixed point type");
+ assert(DestTy->isBooleanType() && "Expected dest type to be boolean type");
+ return EmitScalarConversion(Visit(E), E->getType(), DestTy,
+ CE->getExprLoc());
+
case CK_IntegralCast: {
ScalarConversionOpts Opts;
- if (CGF.SanOpts.has(SanitizerKind::ImplicitIntegerTruncation)) {
- if (auto *ICE = dyn_cast<ImplicitCastExpr>(CE))
- Opts.EmitImplicitIntegerTruncationChecks = !ICE->isPartOfExplicitCast();
+ if (auto *ICE = dyn_cast<ImplicitCastExpr>(CE)) {
+ if (!ICE->isPartOfExplicitCast())
+ Opts = ScalarConversionOpts(CGF.SanOpts);
}
return EmitScalarConversion(Visit(E), E->getType(), DestTy,
CE->getExprLoc(), Opts);
@@ -1920,13 +2245,10 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) {
CE->getExprLoc());
}
- case CK_ZeroToOCLEvent: {
- assert(DestTy->isEventT() && "CK_ZeroToOCLEvent cast on non-event type");
- return llvm::Constant::getNullValue(ConvertType(DestTy));
- }
-
- case CK_ZeroToOCLQueue: {
- assert(DestTy->isQueueT() && "CK_ZeroToOCLQueue cast on non queue_t type");
+ case CK_ZeroToOCLOpaqueType: {
+ assert((DestTy->isEventT() || DestTy->isQueueT() ||
+ DestTy->isOCLIntelSubgroupAVCType()) &&
+ "CK_ZeroToOCLEvent cast on non-event type");
return llvm::Constant::getNullValue(ConvertType(DestTy));
}
@@ -1985,7 +2307,7 @@ llvm::Value *ScalarExprEmitter::EmitIncDecConsiderOverflowBehavior(
case LangOptions::SOB_Undefined:
if (!CGF.SanOpts.has(SanitizerKind::SignedIntegerOverflow))
return Builder.CreateNSWAdd(InVal, Amount, Name);
- // Fall through.
+ LLVM_FALLTHROUGH;
case LangOptions::SOB_Trapping:
if (!E->canOverflow())
return Builder.CreateNSWAdd(InVal, Amount, Name);
@@ -2280,9 +2602,11 @@ Value *ScalarExprEmitter::VisitUnaryLNot(const UnaryOperator *E) {
Value *ScalarExprEmitter::VisitOffsetOfExpr(OffsetOfExpr *E) {
// Try folding the offsetof to a constant.
- llvm::APSInt Value;
- if (E->EvaluateAsInt(Value, CGF.getContext()))
+ Expr::EvalResult EVResult;
+ if (E->EvaluateAsInt(EVResult, CGF.getContext())) {
+ llvm::APSInt Value = EVResult.Val.getInt();
return Builder.getInt(Value);
+ }
// Loop over the components of the offsetof to compute the value.
unsigned n = E->getNumComponents();
@@ -2551,9 +2875,10 @@ LValue ScalarExprEmitter::EmitCompoundAssignLValue(
// Expand the binary operator.
Result = (this->*Func)(OpInfo);
- // Convert the result back to the LHS type.
- Result =
- EmitScalarConversion(Result, E->getComputationResultType(), LHSTy, Loc);
+ // Convert the result back to the LHS type,
+ // potentially with Implicit Conversion sanitizer check.
+ Result = EmitScalarConversion(Result, E->getComputationResultType(), LHSTy,
+ Loc, ScalarConversionOpts(CGF.SanOpts));
if (atomicPHI) {
llvm::BasicBlock *opBB = Builder.GetInsertBlock();
@@ -2991,7 +3316,7 @@ Value *ScalarExprEmitter::EmitAdd(const BinOpInfo &op) {
case LangOptions::SOB_Undefined:
if (!CGF.SanOpts.has(SanitizerKind::SignedIntegerOverflow))
return Builder.CreateNSWAdd(op.LHS, op.RHS, "add");
- // Fall through.
+ LLVM_FALLTHROUGH;
case LangOptions::SOB_Trapping:
if (CanElideOverflowCheck(CGF.getContext(), op))
return Builder.CreateNSWAdd(op.LHS, op.RHS, "add");
@@ -3026,7 +3351,7 @@ Value *ScalarExprEmitter::EmitSub(const BinOpInfo &op) {
case LangOptions::SOB_Undefined:
if (!CGF.SanOpts.has(SanitizerKind::SignedIntegerOverflow))
return Builder.CreateNSWSub(op.LHS, op.RHS, "sub");
- // Fall through.
+ LLVM_FALLTHROUGH;
case LangOptions::SOB_Trapping:
if (CanElideOverflowCheck(CGF.getContext(), op))
return Builder.CreateNSWSub(op.LHS, op.RHS, "sub");
diff --git a/lib/CodeGen/CGLoopInfo.cpp b/lib/CodeGen/CGLoopInfo.cpp
index 21e2b8dd8c31..fd0a9c773a2e 100644
--- a/lib/CodeGen/CGLoopInfo.cpp
+++ b/lib/CodeGen/CGLoopInfo.cpp
@@ -10,8 +10,8 @@
#include "CGLoopInfo.h"
#include "clang/AST/ASTContext.h"
#include "clang/AST/Attr.h"
-#include "clang/Sema/LoopHint.h"
#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/CFG.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instructions.h"
@@ -21,14 +21,17 @@ using namespace llvm;
static MDNode *createMetadata(LLVMContext &Ctx, const LoopAttributes &Attrs,
const llvm::DebugLoc &StartLoc,
- const llvm::DebugLoc &EndLoc) {
+ const llvm::DebugLoc &EndLoc, MDNode *&AccGroup) {
if (!Attrs.IsParallel && Attrs.VectorizeWidth == 0 &&
Attrs.InterleaveCount == 0 && Attrs.UnrollCount == 0 &&
+ Attrs.UnrollAndJamCount == 0 && !Attrs.PipelineDisabled &&
+ Attrs.PipelineInitiationInterval == 0 &&
Attrs.VectorizeEnable == LoopAttributes::Unspecified &&
Attrs.UnrollEnable == LoopAttributes::Unspecified &&
- Attrs.DistributeEnable == LoopAttributes::Unspecified &&
- !StartLoc && !EndLoc)
+ Attrs.UnrollAndJamEnable == LoopAttributes::Unspecified &&
+ Attrs.DistributeEnable == LoopAttributes::Unspecified && !StartLoc &&
+ !EndLoc)
return nullptr;
SmallVector<Metadata *, 4> Args;
@@ -61,7 +64,7 @@ static MDNode *createMetadata(LLVMContext &Ctx, const LoopAttributes &Attrs,
Args.push_back(MDNode::get(Ctx, Vals));
}
- // Setting interleave.count
+ // Setting unroll.count
if (Attrs.UnrollCount > 0) {
Metadata *Vals[] = {MDString::get(Ctx, "llvm.loop.unroll.count"),
ConstantAsMetadata::get(ConstantInt::get(
@@ -69,6 +72,14 @@ static MDNode *createMetadata(LLVMContext &Ctx, const LoopAttributes &Attrs,
Args.push_back(MDNode::get(Ctx, Vals));
}
+ // Setting unroll_and_jam.count
+ if (Attrs.UnrollAndJamCount > 0) {
+ Metadata *Vals[] = {MDString::get(Ctx, "llvm.loop.unroll_and_jam.count"),
+ ConstantAsMetadata::get(ConstantInt::get(
+ Type::getInt32Ty(Ctx), Attrs.UnrollAndJamCount))};
+ Args.push_back(MDNode::get(Ctx, Vals));
+ }
+
// Setting vectorize.enable
if (Attrs.VectorizeEnable != LoopAttributes::Unspecified) {
Metadata *Vals[] = {MDString::get(Ctx, "llvm.loop.vectorize.enable"),
@@ -91,6 +102,19 @@ static MDNode *createMetadata(LLVMContext &Ctx, const LoopAttributes &Attrs,
Args.push_back(MDNode::get(Ctx, Vals));
}
+ // Setting unroll_and_jam.full or unroll_and_jam.disable
+ if (Attrs.UnrollAndJamEnable != LoopAttributes::Unspecified) {
+ std::string Name;
+ if (Attrs.UnrollAndJamEnable == LoopAttributes::Enable)
+ Name = "llvm.loop.unroll_and_jam.enable";
+ else if (Attrs.UnrollAndJamEnable == LoopAttributes::Full)
+ Name = "llvm.loop.unroll_and_jam.full";
+ else
+ Name = "llvm.loop.unroll_and_jam.disable";
+ Metadata *Vals[] = {MDString::get(Ctx, Name)};
+ Args.push_back(MDNode::get(Ctx, Vals));
+ }
+
if (Attrs.DistributeEnable != LoopAttributes::Unspecified) {
Metadata *Vals[] = {MDString::get(Ctx, "llvm.loop.distribute.enable"),
ConstantAsMetadata::get(ConstantInt::get(
@@ -99,6 +123,28 @@ static MDNode *createMetadata(LLVMContext &Ctx, const LoopAttributes &Attrs,
Args.push_back(MDNode::get(Ctx, Vals));
}
+ if (Attrs.IsParallel) {
+ AccGroup = MDNode::getDistinct(Ctx, {});
+ Args.push_back(MDNode::get(
+ Ctx, {MDString::get(Ctx, "llvm.loop.parallel_accesses"), AccGroup}));
+ }
+
+ if (Attrs.PipelineDisabled) {
+ Metadata *Vals[] = {
+ MDString::get(Ctx, "llvm.loop.pipeline.disable"),
+ ConstantAsMetadata::get(ConstantInt::get(
+ Type::getInt1Ty(Ctx), (Attrs.PipelineDisabled == true)))};
+ Args.push_back(MDNode::get(Ctx, Vals));
+ }
+
+ if (Attrs.PipelineInitiationInterval > 0) {
+ Metadata *Vals[] = {
+ MDString::get(Ctx, "llvm.loop.pipeline.initiationinterval"),
+ ConstantAsMetadata::get(ConstantInt::get(
+ Type::getInt32Ty(Ctx), Attrs.PipelineInitiationInterval))};
+ Args.push_back(MDNode::get(Ctx, Vals));
+ }
+
// Set the first operand to itself.
MDNode *LoopID = MDNode::get(Ctx, Args);
LoopID->replaceOperandWith(0, LoopID);
@@ -107,24 +153,31 @@ static MDNode *createMetadata(LLVMContext &Ctx, const LoopAttributes &Attrs,
LoopAttributes::LoopAttributes(bool IsParallel)
: IsParallel(IsParallel), VectorizeEnable(LoopAttributes::Unspecified),
- UnrollEnable(LoopAttributes::Unspecified), VectorizeWidth(0),
- InterleaveCount(0), UnrollCount(0),
- DistributeEnable(LoopAttributes::Unspecified) {}
+ UnrollEnable(LoopAttributes::Unspecified),
+ UnrollAndJamEnable(LoopAttributes::Unspecified), VectorizeWidth(0),
+ InterleaveCount(0), UnrollCount(0), UnrollAndJamCount(0),
+ DistributeEnable(LoopAttributes::Unspecified), PipelineDisabled(false),
+ PipelineInitiationInterval(0) {}
void LoopAttributes::clear() {
IsParallel = false;
VectorizeWidth = 0;
InterleaveCount = 0;
UnrollCount = 0;
+ UnrollAndJamCount = 0;
VectorizeEnable = LoopAttributes::Unspecified;
UnrollEnable = LoopAttributes::Unspecified;
+ UnrollAndJamEnable = LoopAttributes::Unspecified;
DistributeEnable = LoopAttributes::Unspecified;
+ PipelineDisabled = false;
+ PipelineInitiationInterval = 0;
}
LoopInfo::LoopInfo(BasicBlock *Header, const LoopAttributes &Attrs,
const llvm::DebugLoc &StartLoc, const llvm::DebugLoc &EndLoc)
: LoopID(nullptr), Header(Header), Attrs(Attrs) {
- LoopID = createMetadata(Header->getContext(), Attrs, StartLoc, EndLoc);
+ LoopID =
+ createMetadata(Header->getContext(), Attrs, StartLoc, EndLoc, AccGroup);
}
void LoopInfoStack::push(BasicBlock *Header, const llvm::DebugLoc &StartLoc,
@@ -191,12 +244,20 @@ void LoopInfoStack::push(BasicBlock *Header, clang::ASTContext &Ctx,
case LoopHintAttr::Unroll:
setUnrollState(LoopAttributes::Disable);
break;
+ case LoopHintAttr::UnrollAndJam:
+ setUnrollAndJamState(LoopAttributes::Disable);
+ break;
case LoopHintAttr::Distribute:
setDistributeState(false);
break;
+ case LoopHintAttr::PipelineDisabled:
+ setPipelineDisabled(true);
+ break;
case LoopHintAttr::UnrollCount:
+ case LoopHintAttr::UnrollAndJamCount:
case LoopHintAttr::VectorizeWidth:
case LoopHintAttr::InterleaveCount:
+ case LoopHintAttr::PipelineInitiationInterval:
llvm_unreachable("Options cannot be disabled.");
break;
}
@@ -210,12 +271,18 @@ void LoopInfoStack::push(BasicBlock *Header, clang::ASTContext &Ctx,
case LoopHintAttr::Unroll:
setUnrollState(LoopAttributes::Enable);
break;
+ case LoopHintAttr::UnrollAndJam:
+ setUnrollAndJamState(LoopAttributes::Enable);
+ break;
case LoopHintAttr::Distribute:
setDistributeState(true);
break;
case LoopHintAttr::UnrollCount:
+ case LoopHintAttr::UnrollAndJamCount:
case LoopHintAttr::VectorizeWidth:
case LoopHintAttr::InterleaveCount:
+ case LoopHintAttr::PipelineDisabled:
+ case LoopHintAttr::PipelineInitiationInterval:
llvm_unreachable("Options cannot enabled.");
break;
}
@@ -229,10 +296,14 @@ void LoopInfoStack::push(BasicBlock *Header, clang::ASTContext &Ctx,
setVectorizeEnable(true);
break;
case LoopHintAttr::Unroll:
+ case LoopHintAttr::UnrollAndJam:
case LoopHintAttr::UnrollCount:
+ case LoopHintAttr::UnrollAndJamCount:
case LoopHintAttr::VectorizeWidth:
case LoopHintAttr::InterleaveCount:
case LoopHintAttr::Distribute:
+ case LoopHintAttr::PipelineDisabled:
+ case LoopHintAttr::PipelineInitiationInterval:
llvm_unreachable("Options cannot be used to assume mem safety.");
break;
}
@@ -242,12 +313,18 @@ void LoopInfoStack::push(BasicBlock *Header, clang::ASTContext &Ctx,
case LoopHintAttr::Unroll:
setUnrollState(LoopAttributes::Full);
break;
+ case LoopHintAttr::UnrollAndJam:
+ setUnrollAndJamState(LoopAttributes::Full);
+ break;
case LoopHintAttr::Vectorize:
case LoopHintAttr::Interleave:
case LoopHintAttr::UnrollCount:
+ case LoopHintAttr::UnrollAndJamCount:
case LoopHintAttr::VectorizeWidth:
case LoopHintAttr::InterleaveCount:
case LoopHintAttr::Distribute:
+ case LoopHintAttr::PipelineDisabled:
+ case LoopHintAttr::PipelineInitiationInterval:
llvm_unreachable("Options cannot be used with 'full' hint.");
break;
}
@@ -263,10 +340,18 @@ void LoopInfoStack::push(BasicBlock *Header, clang::ASTContext &Ctx,
case LoopHintAttr::UnrollCount:
setUnrollCount(ValueInt);
break;
+ case LoopHintAttr::UnrollAndJamCount:
+ setUnrollAndJamCount(ValueInt);
+ break;
+ case LoopHintAttr::PipelineInitiationInterval:
+ setPipelineInitiationInterval(ValueInt);
+ break;
case LoopHintAttr::Unroll:
+ case LoopHintAttr::UnrollAndJam:
case LoopHintAttr::Vectorize:
case LoopHintAttr::Interleave:
case LoopHintAttr::Distribute:
+ case LoopHintAttr::PipelineDisabled:
llvm_unreachable("Options cannot be assigned a value.");
break;
}
@@ -284,6 +369,21 @@ void LoopInfoStack::pop() {
}
void LoopInfoStack::InsertHelper(Instruction *I) const {
+ if (I->mayReadOrWriteMemory()) {
+ SmallVector<Metadata *, 4> AccessGroups;
+ for (const LoopInfo &AL : Active) {
+ // Here we assume that every loop that has an access group is parallel.
+ if (MDNode *Group = AL.getAccessGroup())
+ AccessGroups.push_back(Group);
+ }
+ MDNode *UnionMD = nullptr;
+ if (AccessGroups.size() == 1)
+ UnionMD = cast<MDNode>(AccessGroups[0]);
+ else if (AccessGroups.size() >= 2)
+ UnionMD = MDNode::get(I->getContext(), AccessGroups);
+ I->setMetadata("llvm.access.group", UnionMD);
+ }
+
if (!hasInfo())
return;
@@ -291,15 +391,12 @@ void LoopInfoStack::InsertHelper(Instruction *I) const {
if (!L.getLoopID())
return;
- if (TerminatorInst *TI = dyn_cast<TerminatorInst>(I)) {
- for (unsigned i = 0, ie = TI->getNumSuccessors(); i < ie; ++i)
- if (TI->getSuccessor(i) == L.getHeader()) {
- TI->setMetadata(llvm::LLVMContext::MD_loop, L.getLoopID());
+ if (I->isTerminator()) {
+ for (BasicBlock *Succ : successors(I))
+ if (Succ == L.getHeader()) {
+ I->setMetadata(llvm::LLVMContext::MD_loop, L.getLoopID());
break;
}
return;
}
-
- if (L.getAttributes().IsParallel && I->mayReadOrWriteMemory())
- I->setMetadata("llvm.mem.parallel_loop_access", L.getLoopID());
}
diff --git a/lib/CodeGen/CGLoopInfo.h b/lib/CodeGen/CGLoopInfo.h
index 9d5f23ff9a2a..84ba03bfb00b 100644
--- a/lib/CodeGen/CGLoopInfo.h
+++ b/lib/CodeGen/CGLoopInfo.h
@@ -49,6 +49,9 @@ struct LoopAttributes {
/// Value for llvm.loop.unroll.* metadata (enable, disable, or full).
LVEnableState UnrollEnable;
+ /// Value for llvm.loop.unroll_and_jam.* metadata (enable, disable, or full).
+ LVEnableState UnrollAndJamEnable;
+
/// Value for llvm.loop.vectorize.width metadata.
unsigned VectorizeWidth;
@@ -58,8 +61,17 @@ struct LoopAttributes {
/// llvm.unroll.
unsigned UnrollCount;
+ /// llvm.unroll.
+ unsigned UnrollAndJamCount;
+
/// Value for llvm.loop.distribute.enable metadata.
LVEnableState DistributeEnable;
+
+ /// Value for llvm.loop.pipeline.disable metadata.
+ bool PipelineDisabled;
+
+ /// Value for llvm.loop.pipeline.iicount metadata.
+ unsigned PipelineInitiationInterval;
};
/// Information used when generating a structured loop.
@@ -78,6 +90,9 @@ public:
/// Get the set of attributes active for this loop.
const LoopAttributes &getAttributes() const { return Attrs; }
+ /// Return this loop's access group or nullptr if it does not have one.
+ llvm::MDNode *getAccessGroup() const { return AccGroup; }
+
private:
/// Loop ID metadata.
llvm::MDNode *LoopID;
@@ -85,6 +100,8 @@ private:
llvm::BasicBlock *Header;
/// The attributes for this loop.
LoopAttributes Attrs;
+ /// The access group for memory accesses parallel to this loop.
+ llvm::MDNode *AccGroup = nullptr;
};
/// A stack of loop information corresponding to loop nesting levels.
@@ -143,6 +160,11 @@ public:
StagedAttrs.UnrollEnable = State;
}
+ /// Set the next pushed loop unroll_and_jam state.
+ void setUnrollAndJamState(const LoopAttributes::LVEnableState &State) {
+ StagedAttrs.UnrollAndJamEnable = State;
+ }
+
/// Set the vectorize width for the next loop pushed.
void setVectorizeWidth(unsigned W) { StagedAttrs.VectorizeWidth = W; }
@@ -152,6 +174,17 @@ public:
/// Set the unroll count for the next loop pushed.
void setUnrollCount(unsigned C) { StagedAttrs.UnrollCount = C; }
+ /// \brief Set the unroll count for the next loop pushed.
+ void setUnrollAndJamCount(unsigned C) { StagedAttrs.UnrollAndJamCount = C; }
+
+ /// Set the pipeline disabled state.
+ void setPipelineDisabled(bool S) { StagedAttrs.PipelineDisabled = S; }
+
+ /// Set the pipeline initiation interval.
+ void setPipelineInitiationInterval(unsigned C) {
+ StagedAttrs.PipelineInitiationInterval = C;
+ }
+
private:
/// Returns true if there is LoopInfo on the stack.
bool hasInfo() const { return !Active.empty(); }
diff --git a/lib/CodeGen/CGNonTrivialStruct.cpp b/lib/CodeGen/CGNonTrivialStruct.cpp
index 922e0934b866..c6a96a912622 100644
--- a/lib/CodeGen/CGNonTrivialStruct.cpp
+++ b/lib/CodeGen/CGNonTrivialStruct.cpp
@@ -187,6 +187,7 @@ template <class Derived> struct GenFuncNameBase {
if (!FK)
return asDerived().visitTrivial(QualType(AT, 0), FD, CurStructOffset);
+ asDerived().flushTrivialFields();
CharUnits FieldOffset = CurStructOffset + asDerived().getFieldOffset(FD);
ASTContext &Ctx = asDerived().getContext();
const ConstantArrayType *CAT = cast<ConstantArrayType>(AT);
@@ -283,8 +284,9 @@ struct GenDefaultInitializeFuncName
struct GenDestructorFuncName : GenUnaryFuncName<GenDestructorFuncName>,
DestructedTypeVisitor<GenDestructorFuncName> {
using Super = DestructedTypeVisitor<GenDestructorFuncName>;
- GenDestructorFuncName(CharUnits DstAlignment, ASTContext &Ctx)
- : GenUnaryFuncName<GenDestructorFuncName>("__destructor_", DstAlignment,
+ GenDestructorFuncName(const char *Prefix, CharUnits DstAlignment,
+ ASTContext &Ctx)
+ : GenUnaryFuncName<GenDestructorFuncName>(Prefix, DstAlignment,
Ctx) {}
void visitWithKind(QualType::DestructionKind DK, QualType FT,
const FieldDecl *FD, CharUnits CurStructOffset) {
@@ -335,6 +337,7 @@ template <class Derived> struct GenFuncBase {
return asDerived().visitTrivial(QualType(AT, 0), FD, CurStackOffset,
Addrs);
+ asDerived().flushTrivialFields(Addrs);
CodeGenFunction &CGF = *this->CGF;
ASTContext &Ctx = CGF.getContext();
@@ -455,12 +458,13 @@ template <class Derived> struct GenFuncBase {
llvm::Function::Create(FuncTy, llvm::GlobalValue::LinkOnceODRLinkage,
FuncName, &CGM.getModule());
F->setVisibility(llvm::GlobalValue::HiddenVisibility);
- CGM.SetLLVMFunctionAttributes(nullptr, FI, F);
+ CGM.SetLLVMFunctionAttributes(GlobalDecl(), FI, F);
CGM.SetLLVMFunctionAttributesForDefinition(nullptr, F);
IdentifierInfo *II = &Ctx.Idents.get(FuncName);
FunctionDecl *FD = FunctionDecl::Create(
Ctx, Ctx.getTranslationUnitDecl(), SourceLocation(), SourceLocation(),
- II, Ctx.VoidTy, nullptr, SC_PrivateExtern, false, false);
+ II, Ctx.getFunctionType(Ctx.VoidTy, llvm::None, {}), nullptr,
+ SC_PrivateExtern, false, false);
CodeGenFunction NewCGF(CGM);
setCGF(&NewCGF);
CGF->StartFunction(FD, Ctx.VoidTy, F, FI, Args);
@@ -824,11 +828,28 @@ void CodeGenFunction::callCStructDefaultConstructor(LValue Dst) {
IsVolatile, *this, std::array<Address, 1>({{DstPtr}}));
}
+std::string
+CodeGenFunction::getNonTrivialCopyConstructorStr(QualType QT,
+ CharUnits Alignment,
+ bool IsVolatile,
+ ASTContext &Ctx) {
+ GenBinaryFuncName<false> GenName("", Alignment, Alignment, Ctx);
+ return GenName.getName(QT, IsVolatile);
+}
+
+std::string
+CodeGenFunction::getNonTrivialDestructorStr(QualType QT, CharUnits Alignment,
+ bool IsVolatile, ASTContext &Ctx) {
+ GenDestructorFuncName GenName("", Alignment, Ctx);
+ return GenName.getName(QT, IsVolatile);
+}
+
void CodeGenFunction::callCStructDestructor(LValue Dst) {
bool IsVolatile = Dst.isVolatile();
Address DstPtr = Dst.getAddress();
QualType QT = Dst.getType();
- GenDestructorFuncName GenName(DstPtr.getAlignment(), getContext());
+ GenDestructorFuncName GenName("__destructor_", DstPtr.getAlignment(),
+ getContext());
std::string FuncName = GenName.getName(QT, IsVolatile);
callSpecialFunction(GenDestructor(getContext()), FuncName, QT, IsVolatile,
*this, std::array<Address, 1>({{DstPtr}}));
diff --git a/lib/CodeGen/CGObjC.cpp b/lib/CodeGen/CGObjC.cpp
index b94bbf2a384f..9c66ff0e8fb2 100644
--- a/lib/CodeGen/CGObjC.cpp
+++ b/lib/CodeGen/CGObjC.cpp
@@ -127,7 +127,7 @@ llvm::Value *CodeGenFunction::EmitObjCCollectionLiteral(const Expr *E,
llvm::Constant *Constant =
CGM.CreateRuntimeVariable(ConvertType(IdTy), ConstantName);
LValue LV = MakeNaturalAlignAddrLValue(Constant, IdTy);
- llvm::Value *Ptr = EmitLoadOfScalar(LV, E->getLocStart());
+ llvm::Value *Ptr = EmitLoadOfScalar(LV, E->getBeginLoc());
cast<llvm::LoadInst>(Ptr)->setMetadata(
CGM.getModule().getMDKindID("invariant.load"),
llvm::MDNode::get(getLLVMContext(), None));
@@ -352,6 +352,81 @@ static const Expr *findWeakLValue(const Expr *E) {
return nullptr;
}
+/// The ObjC runtime may provide entrypoints that are likely to be faster
+/// than an ordinary message send of the appropriate selector.
+///
+/// The entrypoints are guaranteed to be equivalent to just sending the
+/// corresponding message. If the entrypoint is implemented naively as just a
+/// message send, using it is a trade-off: it sacrifices a few cycles of
+/// overhead to save a small amount of code. However, it's possible for
+/// runtimes to detect and special-case classes that use "standard"
+/// behavior; if that's dynamically a large proportion of all objects, using
+/// the entrypoint will also be faster than using a message send.
+///
+/// If the runtime does support a required entrypoint, then this method will
+/// generate a call and return the resulting value. Otherwise it will return
+/// None and the caller can generate a msgSend instead.
+static Optional<llvm::Value *>
+tryGenerateSpecializedMessageSend(CodeGenFunction &CGF, QualType ResultType,
+ llvm::Value *Receiver,
+ const CallArgList& Args, Selector Sel,
+ const ObjCMethodDecl *method,
+ bool isClassMessage) {
+ auto &CGM = CGF.CGM;
+ if (!CGM.getCodeGenOpts().ObjCConvertMessagesToRuntimeCalls)
+ return None;
+
+ auto &Runtime = CGM.getLangOpts().ObjCRuntime;
+ switch (Sel.getMethodFamily()) {
+ case OMF_alloc:
+ if (isClassMessage &&
+ Runtime.shouldUseRuntimeFunctionsForAlloc() &&
+ ResultType->isObjCObjectPointerType()) {
+ // [Foo alloc] -> objc_alloc(Foo)
+ if (Sel.isUnarySelector() && Sel.getNameForSlot(0) == "alloc")
+ return CGF.EmitObjCAlloc(Receiver, CGF.ConvertType(ResultType));
+ // [Foo allocWithZone:nil] -> objc_allocWithZone(Foo)
+ if (Sel.isKeywordSelector() && Sel.getNumArgs() == 1 &&
+ Args.size() == 1 && Args.front().getType()->isPointerType() &&
+ Sel.getNameForSlot(0) == "allocWithZone") {
+ const llvm::Value* arg = Args.front().getKnownRValue().getScalarVal();
+ if (isa<llvm::ConstantPointerNull>(arg))
+ return CGF.EmitObjCAllocWithZone(Receiver,
+ CGF.ConvertType(ResultType));
+ return None;
+ }
+ }
+ break;
+
+ case OMF_autorelease:
+ if (ResultType->isObjCObjectPointerType() &&
+ CGM.getLangOpts().getGC() == LangOptions::NonGC &&
+ Runtime.shouldUseARCFunctionsForRetainRelease())
+ return CGF.EmitObjCAutorelease(Receiver, CGF.ConvertType(ResultType));
+ break;
+
+ case OMF_retain:
+ if (ResultType->isObjCObjectPointerType() &&
+ CGM.getLangOpts().getGC() == LangOptions::NonGC &&
+ Runtime.shouldUseARCFunctionsForRetainRelease())
+ return CGF.EmitObjCRetainNonBlock(Receiver, CGF.ConvertType(ResultType));
+ break;
+
+ case OMF_release:
+ if (ResultType->isVoidType() &&
+ CGM.getLangOpts().getGC() == LangOptions::NonGC &&
+ Runtime.shouldUseARCFunctionsForRetainRelease()) {
+ CGF.EmitObjCRelease(Receiver, ARCPreciseLifetime);
+ return nullptr;
+ }
+ break;
+
+ default:
+ break;
+ }
+ return None;
+}
+
RValue CodeGenFunction::EmitObjCMessageExpr(const ObjCMessageExpr *E,
ReturnValueSlot Return) {
// Only the lookup mechanism and first two arguments of the method
@@ -474,10 +549,17 @@ RValue CodeGenFunction::EmitObjCMessageExpr(const ObjCMessageExpr *E,
Args,
method);
} else {
- result = Runtime.GenerateMessageSend(*this, Return, ResultType,
- E->getSelector(),
- Receiver, Args, OID,
- method);
+ // Call runtime methods directly if we can.
+ if (Optional<llvm::Value *> SpecializedResult =
+ tryGenerateSpecializedMessageSend(*this, ResultType, Receiver, Args,
+ E->getSelector(), method,
+ isClassMessage)) {
+ result = RValue::get(SpecializedResult.getValue());
+ } else {
+ result = Runtime.GenerateMessageSend(*this, Return, ResultType,
+ E->getSelector(), Receiver, Args,
+ OID, method);
+ }
}
// For delegate init calls in ARC, implicitly store the result of
@@ -531,7 +613,7 @@ struct FinishARCDealloc final : EHScopeStack::Cleanup {
/// CodeGenFunction.
void CodeGenFunction::StartObjCMethod(const ObjCMethodDecl *OMD,
const ObjCContainerDecl *CD) {
- SourceLocation StartLoc = OMD->getLocStart();
+ SourceLocation StartLoc = OMD->getBeginLoc();
FunctionArgList args;
// Check if we should generate debug info for this method.
if (OMD->hasAttr<NoDebugAttr>())
@@ -548,7 +630,7 @@ void CodeGenFunction::StartObjCMethod(const ObjCMethodDecl *OMD,
args.append(OMD->param_begin(), OMD->param_end());
CurGD = OMD;
- CurEHLocation = OMD->getLocEnd();
+ CurEHLocation = OMD->getEndLoc();
StartFunction(OMD, OMD->getReturnType(), Fn, FI, args,
OMD->getLocation(), StartLoc);
@@ -568,7 +650,7 @@ static llvm::Value *emitARCRetainLoadOfScalar(CodeGenFunction &CGF,
LValue lvalue, QualType type);
/// Generate an Objective-C method. An Objective-C method is a C function with
-/// its pointer, name, and types registered in the class struture.
+/// its pointer, name, and types registered in the class structure.
void CodeGenFunction::GenerateObjCMethod(const ObjCMethodDecl *OMD) {
StartObjCMethod(OMD, OMD->getClassInterface());
PGO.assignRegionCounters(GlobalDecl(OMD), CurFn);
@@ -883,9 +965,10 @@ CodeGenFunction::generateObjCGetterBody(const ObjCImplementationDecl *classImpl,
// If there's a non-trivial 'get' expression, we just have to emit that.
if (!hasTrivialGetExpr(propImpl)) {
if (!AtomicHelperFn) {
- ReturnStmt ret(SourceLocation(), propImpl->getGetterCXXConstructor(),
- /*nrvo*/ nullptr);
- EmitReturnStmt(ret);
+ auto *ret = ReturnStmt::Create(getContext(), SourceLocation(),
+ propImpl->getGetterCXXConstructor(),
+ /* NRVOCandidate=*/nullptr);
+ EmitReturnStmt(*ret);
}
else {
ObjCIvarDecl *ivar = propImpl->getPropertyIvarDecl();
@@ -1068,8 +1151,9 @@ static void emitStructSetterCall(CodeGenFunction &CGF, ObjCMethodDecl *OMD,
// The second argument is the address of the parameter variable.
ParmVarDecl *argVar = *OMD->param_begin();
- DeclRefExpr argRef(argVar, false, argVar->getType().getNonReferenceType(),
- VK_LValue, SourceLocation());
+ DeclRefExpr argRef(CGF.getContext(), argVar, false,
+ argVar->getType().getNonReferenceType(), VK_LValue,
+ SourceLocation());
llvm::Value *argAddr = CGF.EmitLValue(&argRef).getPointer();
argAddr = CGF.Builder.CreateBitCast(argAddr, CGF.Int8PtrTy);
args.add(RValue::get(argAddr), CGF.getContext().VoidPtrTy);
@@ -1113,8 +1197,9 @@ static void emitCPPObjectAtomicSetterCall(CodeGenFunction &CGF,
// The second argument is the address of the parameter variable.
ParmVarDecl *argVar = *OMD->param_begin();
- DeclRefExpr argRef(argVar, false, argVar->getType().getNonReferenceType(),
- VK_LValue, SourceLocation());
+ DeclRefExpr argRef(CGF.getContext(), argVar, false,
+ argVar->getType().getNonReferenceType(), VK_LValue,
+ SourceLocation());
llvm::Value *argAddr = CGF.EmitLValue(&argRef).getPointer();
argAddr = CGF.Builder.CreateBitCast(argAddr, CGF.Int8PtrTy);
args.add(RValue::get(argAddr), CGF.getContext().VoidPtrTy);
@@ -1286,7 +1371,7 @@ CodeGenFunction::generateObjCSetterBody(const ObjCImplementationDecl *classImpl,
// Otherwise, fake up some ASTs and emit a normal assignment.
ValueDecl *selfDecl = setterMethod->getSelfDecl();
- DeclRefExpr self(selfDecl, false, selfDecl->getType(),
+ DeclRefExpr self(getContext(), selfDecl, false, selfDecl->getType(),
VK_LValue, SourceLocation());
ImplicitCastExpr selfLoad(ImplicitCastExpr::OnStack,
selfDecl->getType(), CK_LValueToRValue, &self,
@@ -1297,7 +1382,8 @@ CodeGenFunction::generateObjCSetterBody(const ObjCImplementationDecl *classImpl,
ParmVarDecl *argDecl = *setterMethod->param_begin();
QualType argType = argDecl->getType().getNonReferenceType();
- DeclRefExpr arg(argDecl, false, argType, VK_LValue, SourceLocation());
+ DeclRefExpr arg(getContext(), argDecl, false, argType, VK_LValue,
+ SourceLocation());
ImplicitCastExpr argLoad(ImplicitCastExpr::OnStack,
argType.getUnqualifiedType(), CK_LValueToRValue,
&arg, VK_RValue);
@@ -1459,7 +1545,8 @@ void CodeGenFunction::GenerateObjCCtorDtorMethod(ObjCImplementationDecl *IMP,
llvm::Value *CodeGenFunction::LoadObjCSelf() {
VarDecl *Self = cast<ObjCMethodDecl>(CurFuncDecl)->getSelfDecl();
- DeclRefExpr DRE(Self, /*is enclosing local*/ (CurFuncDecl != CurCodeDecl),
+ DeclRefExpr DRE(getContext(), Self,
+ /*is enclosing local*/ (CurFuncDecl != CurCodeDecl),
Self->getType(), VK_LValue, SourceLocation());
return EmitLoadOfScalar(EmitDeclRefLValue(&DRE), SourceLocation());
}
@@ -1645,9 +1732,9 @@ void CodeGenFunction::EmitObjCForCollectionStmt(const ObjCForCollectionStmt &S){
// Initialize the variable, in case it's a __block variable or something.
EmitAutoVarInit(variable);
- const VarDecl* D = cast<VarDecl>(SD->getSingleDecl());
- DeclRefExpr tempDRE(const_cast<VarDecl*>(D), false, D->getType(),
- VK_LValue, SourceLocation());
+ const VarDecl *D = cast<VarDecl>(SD->getSingleDecl());
+ DeclRefExpr tempDRE(getContext(), const_cast<VarDecl *>(D), false,
+ D->getType(), VK_LValue, SourceLocation());
elementLValue = EmitLValue(&tempDRE);
elementType = D->getType();
elementIsVariable = true;
@@ -1805,23 +1892,16 @@ llvm::Value *CodeGenFunction::EmitObjCExtendObjectLifetime(QualType type,
/// being intrinsically used up until this point in the program.
void CodeGenFunction::EmitARCIntrinsicUse(ArrayRef<llvm::Value*> values) {
llvm::Constant *&fn = CGM.getObjCEntrypoints().clang_arc_use;
- if (!fn) {
- llvm::FunctionType *fnType =
- llvm::FunctionType::get(CGM.VoidTy, None, true);
- fn = CGM.CreateRuntimeFunction(fnType, "clang.arc.use");
- }
+ if (!fn)
+ fn = CGM.getIntrinsic(llvm::Intrinsic::objc_clang_arc_use);
// This isn't really a "runtime" function, but as an intrinsic it
// doesn't really matter as long as we align things up.
EmitNounwindRuntimeCall(fn, values);
}
-
-static llvm::Constant *createARCRuntimeFunction(CodeGenModule &CGM,
- llvm::FunctionType *FTy,
- StringRef Name) {
- llvm::Constant *RTF = CGM.CreateRuntimeFunction(FTy, Name);
-
+static void setARCRuntimeFunctionLinkage(CodeGenModule &CGM,
+ llvm::Constant *RTF) {
if (auto *F = dyn_cast<llvm::Function>(RTF)) {
// If the target runtime doesn't naturally support ARC, emit weak
// references to the runtime support library. We don't really
@@ -1829,14 +1909,8 @@ static llvm::Constant *createARCRuntimeFunction(CodeGenModule &CGM,
if (!CGM.getLangOpts().ObjCRuntime.hasNativeARC() &&
!CGM.getTriple().isOSBinFormatCOFF()) {
F->setLinkage(llvm::Function::ExternalWeakLinkage);
- } else if (Name == "objc_retain" || Name == "objc_release") {
- // If we have Native ARC, set nonlazybind attribute for these APIs for
- // performance.
- F->addFnAttr(llvm::Attribute::NonLazyBind);
}
}
-
- return RTF;
}
/// Perform an operation having the signature
@@ -1844,20 +1918,20 @@ static llvm::Constant *createARCRuntimeFunction(CodeGenModule &CGM,
/// where a null input causes a no-op and returns null.
static llvm::Value *emitARCValueOperation(CodeGenFunction &CGF,
llvm::Value *value,
+ llvm::Type *returnType,
llvm::Constant *&fn,
- StringRef fnName,
+ llvm::Intrinsic::ID IntID,
bool isTailCall = false) {
if (isa<llvm::ConstantPointerNull>(value))
return value;
if (!fn) {
- llvm::FunctionType *fnType =
- llvm::FunctionType::get(CGF.Int8PtrTy, CGF.Int8PtrTy, false);
- fn = createARCRuntimeFunction(CGF.CGM, fnType, fnName);
+ fn = CGF.CGM.getIntrinsic(IntID);
+ setARCRuntimeFunctionLinkage(CGF.CGM, fn);
}
// Cast the argument to 'id'.
- llvm::Type *origType = value->getType();
+ llvm::Type *origType = returnType ? returnType : value->getType();
value = CGF.Builder.CreateBitCast(value, CGF.Int8PtrTy);
// Call the function.
@@ -1874,11 +1948,10 @@ static llvm::Value *emitARCValueOperation(CodeGenFunction &CGF,
static llvm::Value *emitARCLoadOperation(CodeGenFunction &CGF,
Address addr,
llvm::Constant *&fn,
- StringRef fnName) {
+ llvm::Intrinsic::ID IntID) {
if (!fn) {
- llvm::FunctionType *fnType =
- llvm::FunctionType::get(CGF.Int8PtrTy, CGF.Int8PtrPtrTy, false);
- fn = createARCRuntimeFunction(CGF.CGM, fnType, fnName);
+ fn = CGF.CGM.getIntrinsic(IntID);
+ setARCRuntimeFunctionLinkage(CGF.CGM, fn);
}
// Cast the argument to 'id*'.
@@ -1901,16 +1974,13 @@ static llvm::Value *emitARCStoreOperation(CodeGenFunction &CGF,
Address addr,
llvm::Value *value,
llvm::Constant *&fn,
- StringRef fnName,
+ llvm::Intrinsic::ID IntID,
bool ignored) {
assert(addr.getElementType() == value->getType());
if (!fn) {
- llvm::Type *argTypes[] = { CGF.Int8PtrPtrTy, CGF.Int8PtrTy };
-
- llvm::FunctionType *fnType
- = llvm::FunctionType::get(CGF.Int8PtrTy, argTypes, false);
- fn = createARCRuntimeFunction(CGF.CGM, fnType, fnName);
+ fn = CGF.CGM.getIntrinsic(IntID);
+ setARCRuntimeFunctionLinkage(CGF.CGM, fn);
}
llvm::Type *origType = value->getType();
@@ -1932,15 +2002,12 @@ static void emitARCCopyOperation(CodeGenFunction &CGF,
Address dst,
Address src,
llvm::Constant *&fn,
- StringRef fnName) {
+ llvm::Intrinsic::ID IntID) {
assert(dst.getType() == src.getType());
if (!fn) {
- llvm::Type *argTypes[] = { CGF.Int8PtrPtrTy, CGF.Int8PtrPtrTy };
-
- llvm::FunctionType *fnType
- = llvm::FunctionType::get(CGF.Builder.getVoidTy(), argTypes, false);
- fn = createARCRuntimeFunction(CGF.CGM, fnType, fnName);
+ fn = CGF.CGM.getIntrinsic(IntID);
+ setARCRuntimeFunctionLinkage(CGF.CGM, fn);
}
llvm::Value *args[] = {
@@ -1950,6 +2017,39 @@ static void emitARCCopyOperation(CodeGenFunction &CGF,
CGF.EmitNounwindRuntimeCall(fn, args);
}
+/// Perform an operation having the signature
+/// i8* (i8*)
+/// where a null input causes a no-op and returns null.
+static llvm::Value *emitObjCValueOperation(CodeGenFunction &CGF,
+ llvm::Value *value,
+ llvm::Type *returnType,
+ llvm::Constant *&fn,
+ StringRef fnName) {
+ if (isa<llvm::ConstantPointerNull>(value))
+ return value;
+
+ if (!fn) {
+ llvm::FunctionType *fnType =
+ llvm::FunctionType::get(CGF.Int8PtrTy, CGF.Int8PtrTy, false);
+ fn = CGF.CGM.CreateRuntimeFunction(fnType, fnName);
+
+ // We have Native ARC, so set nonlazybind attribute for performance
+ if (llvm::Function *f = dyn_cast<llvm::Function>(fn))
+ if (fnName == "objc_retain")
+ f->addFnAttr(llvm::Attribute::NonLazyBind);
+ }
+
+ // Cast the argument to 'id'.
+ llvm::Type *origType = returnType ? returnType : value->getType();
+ value = CGF.Builder.CreateBitCast(value, CGF.Int8PtrTy);
+
+ // Call the function.
+ llvm::CallInst *call = CGF.EmitNounwindRuntimeCall(fn, value);
+
+ // Cast the result back to the original type.
+ return CGF.Builder.CreateBitCast(call, origType);
+}
+
/// Produce the code to do a retain. Based on the type, calls one of:
/// call i8* \@objc_retain(i8* %value)
/// call i8* \@objc_retainBlock(i8* %value)
@@ -1963,9 +2063,9 @@ llvm::Value *CodeGenFunction::EmitARCRetain(QualType type, llvm::Value *value) {
/// Retain the given object, with normal retain semantics.
/// call i8* \@objc_retain(i8* %value)
llvm::Value *CodeGenFunction::EmitARCRetainNonBlock(llvm::Value *value) {
- return emitARCValueOperation(*this, value,
+ return emitARCValueOperation(*this, value, nullptr,
CGM.getObjCEntrypoints().objc_retain,
- "objc_retain");
+ llvm::Intrinsic::objc_retain);
}
/// Retain the given block, with _Block_copy semantics.
@@ -1977,9 +2077,9 @@ llvm::Value *CodeGenFunction::EmitARCRetainNonBlock(llvm::Value *value) {
llvm::Value *CodeGenFunction::EmitARCRetainBlock(llvm::Value *value,
bool mandatory) {
llvm::Value *result
- = emitARCValueOperation(*this, value,
+ = emitARCValueOperation(*this, value, nullptr,
CGM.getObjCEntrypoints().objc_retainBlock,
- "objc_retainBlock");
+ llvm::Intrinsic::objc_retainBlock);
// If the copy isn't mandatory, add !clang.arc.copy_on_escape to
// tell the optimizer that it doesn't need to do this copy if the
@@ -2047,9 +2147,9 @@ static void emitAutoreleasedReturnValueMarker(CodeGenFunction &CGF) {
llvm::Value *
CodeGenFunction::EmitARCRetainAutoreleasedReturnValue(llvm::Value *value) {
emitAutoreleasedReturnValueMarker(*this);
- return emitARCValueOperation(*this, value,
+ return emitARCValueOperation(*this, value, nullptr,
CGM.getObjCEntrypoints().objc_retainAutoreleasedReturnValue,
- "objc_retainAutoreleasedReturnValue");
+ llvm::Intrinsic::objc_retainAutoreleasedReturnValue);
}
/// Claim a possibly-autoreleased return value at +0. This is only
@@ -2062,9 +2162,9 @@ CodeGenFunction::EmitARCRetainAutoreleasedReturnValue(llvm::Value *value) {
llvm::Value *
CodeGenFunction::EmitARCUnsafeClaimAutoreleasedReturnValue(llvm::Value *value) {
emitAutoreleasedReturnValueMarker(*this);
- return emitARCValueOperation(*this, value,
+ return emitARCValueOperation(*this, value, nullptr,
CGM.getObjCEntrypoints().objc_unsafeClaimAutoreleasedReturnValue,
- "objc_unsafeClaimAutoreleasedReturnValue");
+ llvm::Intrinsic::objc_unsafeClaimAutoreleasedReturnValue);
}
/// Release the given object.
@@ -2075,9 +2175,8 @@ void CodeGenFunction::EmitARCRelease(llvm::Value *value,
llvm::Constant *&fn = CGM.getObjCEntrypoints().objc_release;
if (!fn) {
- llvm::FunctionType *fnType =
- llvm::FunctionType::get(Builder.getVoidTy(), Int8PtrTy, false);
- fn = createARCRuntimeFunction(CGM, fnType, "objc_release");
+ fn = CGM.getIntrinsic(llvm::Intrinsic::objc_release);
+ setARCRuntimeFunctionLinkage(CGM, fn);
}
// Cast the argument to 'id'.
@@ -2122,10 +2221,8 @@ llvm::Value *CodeGenFunction::EmitARCStoreStrongCall(Address addr,
llvm::Constant *&fn = CGM.getObjCEntrypoints().objc_storeStrong;
if (!fn) {
- llvm::Type *argTypes[] = { Int8PtrPtrTy, Int8PtrTy };
- llvm::FunctionType *fnType
- = llvm::FunctionType::get(Builder.getVoidTy(), argTypes, false);
- fn = createARCRuntimeFunction(CGM, fnType, "objc_storeStrong");
+ fn = CGM.getIntrinsic(llvm::Intrinsic::objc_storeStrong);
+ setARCRuntimeFunctionLinkage(CGM, fn);
}
llvm::Value *args[] = {
@@ -2177,18 +2274,18 @@ llvm::Value *CodeGenFunction::EmitARCStoreStrong(LValue dst,
/// Autorelease the given object.
/// call i8* \@objc_autorelease(i8* %value)
llvm::Value *CodeGenFunction::EmitARCAutorelease(llvm::Value *value) {
- return emitARCValueOperation(*this, value,
+ return emitARCValueOperation(*this, value, nullptr,
CGM.getObjCEntrypoints().objc_autorelease,
- "objc_autorelease");
+ llvm::Intrinsic::objc_autorelease);
}
/// Autorelease the given object.
/// call i8* \@objc_autoreleaseReturnValue(i8* %value)
llvm::Value *
CodeGenFunction::EmitARCAutoreleaseReturnValue(llvm::Value *value) {
- return emitARCValueOperation(*this, value,
+ return emitARCValueOperation(*this, value, nullptr,
CGM.getObjCEntrypoints().objc_autoreleaseReturnValue,
- "objc_autoreleaseReturnValue",
+ llvm::Intrinsic::objc_autoreleaseReturnValue,
/*isTailCall*/ true);
}
@@ -2196,9 +2293,9 @@ CodeGenFunction::EmitARCAutoreleaseReturnValue(llvm::Value *value) {
/// call i8* \@objc_retainAutoreleaseReturnValue(i8* %value)
llvm::Value *
CodeGenFunction::EmitARCRetainAutoreleaseReturnValue(llvm::Value *value) {
- return emitARCValueOperation(*this, value,
+ return emitARCValueOperation(*this, value, nullptr,
CGM.getObjCEntrypoints().objc_retainAutoreleaseReturnValue,
- "objc_retainAutoreleaseReturnValue",
+ llvm::Intrinsic::objc_retainAutoreleaseReturnValue,
/*isTailCall*/ true);
}
@@ -2225,9 +2322,9 @@ llvm::Value *CodeGenFunction::EmitARCRetainAutorelease(QualType type,
/// call i8* \@objc_retainAutorelease(i8* %value)
llvm::Value *
CodeGenFunction::EmitARCRetainAutoreleaseNonBlock(llvm::Value *value) {
- return emitARCValueOperation(*this, value,
+ return emitARCValueOperation(*this, value, nullptr,
CGM.getObjCEntrypoints().objc_retainAutorelease,
- "objc_retainAutorelease");
+ llvm::Intrinsic::objc_retainAutorelease);
}
/// i8* \@objc_loadWeak(i8** %addr)
@@ -2235,14 +2332,14 @@ CodeGenFunction::EmitARCRetainAutoreleaseNonBlock(llvm::Value *value) {
llvm::Value *CodeGenFunction::EmitARCLoadWeak(Address addr) {
return emitARCLoadOperation(*this, addr,
CGM.getObjCEntrypoints().objc_loadWeak,
- "objc_loadWeak");
+ llvm::Intrinsic::objc_loadWeak);
}
/// i8* \@objc_loadWeakRetained(i8** %addr)
llvm::Value *CodeGenFunction::EmitARCLoadWeakRetained(Address addr) {
return emitARCLoadOperation(*this, addr,
CGM.getObjCEntrypoints().objc_loadWeakRetained,
- "objc_loadWeakRetained");
+ llvm::Intrinsic::objc_loadWeakRetained);
}
/// i8* \@objc_storeWeak(i8** %addr, i8* %value)
@@ -2252,7 +2349,7 @@ llvm::Value *CodeGenFunction::EmitARCStoreWeak(Address addr,
bool ignored) {
return emitARCStoreOperation(*this, addr, value,
CGM.getObjCEntrypoints().objc_storeWeak,
- "objc_storeWeak", ignored);
+ llvm::Intrinsic::objc_storeWeak, ignored);
}
/// i8* \@objc_initWeak(i8** %addr, i8* %value)
@@ -2272,7 +2369,7 @@ void CodeGenFunction::EmitARCInitWeak(Address addr, llvm::Value *value) {
emitARCStoreOperation(*this, addr, value,
CGM.getObjCEntrypoints().objc_initWeak,
- "objc_initWeak", /*ignored*/ true);
+ llvm::Intrinsic::objc_initWeak, /*ignored*/ true);
}
/// void \@objc_destroyWeak(i8** %addr)
@@ -2280,9 +2377,8 @@ void CodeGenFunction::EmitARCInitWeak(Address addr, llvm::Value *value) {
void CodeGenFunction::EmitARCDestroyWeak(Address addr) {
llvm::Constant *&fn = CGM.getObjCEntrypoints().objc_destroyWeak;
if (!fn) {
- llvm::FunctionType *fnType =
- llvm::FunctionType::get(Builder.getVoidTy(), Int8PtrPtrTy, false);
- fn = createARCRuntimeFunction(CGM, fnType, "objc_destroyWeak");
+ fn = CGM.getIntrinsic(llvm::Intrinsic::objc_destroyWeak);
+ setARCRuntimeFunctionLinkage(CGM, fn);
}
// Cast the argument to 'id*'.
@@ -2297,7 +2393,7 @@ void CodeGenFunction::EmitARCDestroyWeak(Address addr) {
void CodeGenFunction::EmitARCMoveWeak(Address dst, Address src) {
emitARCCopyOperation(*this, dst, src,
CGM.getObjCEntrypoints().objc_moveWeak,
- "objc_moveWeak");
+ llvm::Intrinsic::objc_moveWeak);
}
/// void \@objc_copyWeak(i8** %dest, i8** %src)
@@ -2306,7 +2402,7 @@ void CodeGenFunction::EmitARCMoveWeak(Address dst, Address src) {
void CodeGenFunction::EmitARCCopyWeak(Address dst, Address src) {
emitARCCopyOperation(*this, dst, src,
CGM.getObjCEntrypoints().objc_copyWeak,
- "objc_copyWeak");
+ llvm::Intrinsic::objc_copyWeak);
}
void CodeGenFunction::emitARCCopyAssignWeak(QualType Ty, Address DstAddr,
@@ -2329,9 +2425,8 @@ void CodeGenFunction::emitARCMoveAssignWeak(QualType Ty, Address DstAddr,
llvm::Value *CodeGenFunction::EmitObjCAutoreleasePoolPush() {
llvm::Constant *&fn = CGM.getObjCEntrypoints().objc_autoreleasePoolPush;
if (!fn) {
- llvm::FunctionType *fnType =
- llvm::FunctionType::get(Int8PtrTy, false);
- fn = createARCRuntimeFunction(CGM, fnType, "objc_autoreleasePoolPush");
+ fn = CGM.getIntrinsic(llvm::Intrinsic::objc_autoreleasePoolPush);
+ setARCRuntimeFunctionLinkage(CGM, fn);
}
return EmitNounwindRuntimeCall(fn);
@@ -2342,18 +2437,28 @@ llvm::Value *CodeGenFunction::EmitObjCAutoreleasePoolPush() {
void CodeGenFunction::EmitObjCAutoreleasePoolPop(llvm::Value *value) {
assert(value->getType() == Int8PtrTy);
- llvm::Constant *&fn = CGM.getObjCEntrypoints().objc_autoreleasePoolPop;
- if (!fn) {
- llvm::FunctionType *fnType =
- llvm::FunctionType::get(Builder.getVoidTy(), Int8PtrTy, false);
+ if (getInvokeDest()) {
+ // Call the runtime method not the intrinsic if we are handling exceptions
+ llvm::Constant *&fn =
+ CGM.getObjCEntrypoints().objc_autoreleasePoolPopInvoke;
+ if (!fn) {
+ llvm::FunctionType *fnType =
+ llvm::FunctionType::get(Builder.getVoidTy(), Int8PtrTy, false);
+ fn = CGM.CreateRuntimeFunction(fnType, "objc_autoreleasePoolPop");
+ setARCRuntimeFunctionLinkage(CGM, fn);
+ }
- // We don't want to use a weak import here; instead we should not
- // fall into this path.
- fn = createARCRuntimeFunction(CGM, fnType, "objc_autoreleasePoolPop");
- }
+ // objc_autoreleasePoolPop can throw.
+ EmitRuntimeCallOrInvoke(fn, value);
+ } else {
+ llvm::Constant *&fn = CGM.getObjCEntrypoints().objc_autoreleasePoolPop;
+ if (!fn) {
+ fn = CGM.getIntrinsic(llvm::Intrinsic::objc_autoreleasePoolPop);
+ setARCRuntimeFunctionLinkage(CGM, fn);
+ }
- // objc_autoreleasePoolPop can throw.
- EmitRuntimeCallOrInvoke(fn, value);
+ EmitRuntimeCall(fn, value);
+ }
}
/// Produce the code to do an MRR version objc_autoreleasepool_push.
@@ -2384,6 +2489,24 @@ llvm::Value *CodeGenFunction::EmitObjCMRRAutoreleasePoolPush() {
return InitRV.getScalarVal();
}
+/// Allocate the given objc object.
+/// call i8* \@objc_alloc(i8* %value)
+llvm::Value *CodeGenFunction::EmitObjCAlloc(llvm::Value *value,
+ llvm::Type *resultType) {
+ return emitObjCValueOperation(*this, value, resultType,
+ CGM.getObjCEntrypoints().objc_alloc,
+ "objc_alloc");
+}
+
+/// Allocate the given objc object.
+/// call i8* \@objc_allocWithZone(i8* %value)
+llvm::Value *CodeGenFunction::EmitObjCAllocWithZone(llvm::Value *value,
+ llvm::Type *resultType) {
+ return emitObjCValueOperation(*this, value, resultType,
+ CGM.getObjCEntrypoints().objc_allocWithZone,
+ "objc_allocWithZone");
+}
+
/// Produce the code to do a primitive release.
/// [tmp drain];
void CodeGenFunction::EmitObjCMRRAutoreleasePoolPop(llvm::Value *Arg) {
@@ -2418,6 +2541,55 @@ void CodeGenFunction::emitARCIntrinsicUse(CodeGenFunction &CGF, Address addr,
CGF.EmitARCIntrinsicUse(value);
}
+/// Autorelease the given object.
+/// call i8* \@objc_autorelease(i8* %value)
+llvm::Value *CodeGenFunction::EmitObjCAutorelease(llvm::Value *value,
+ llvm::Type *returnType) {
+ return emitObjCValueOperation(*this, value, returnType,
+ CGM.getObjCEntrypoints().objc_autoreleaseRuntimeFunction,
+ "objc_autorelease");
+}
+
+/// Retain the given object, with normal retain semantics.
+/// call i8* \@objc_retain(i8* %value)
+llvm::Value *CodeGenFunction::EmitObjCRetainNonBlock(llvm::Value *value,
+ llvm::Type *returnType) {
+ return emitObjCValueOperation(*this, value, returnType,
+ CGM.getObjCEntrypoints().objc_retainRuntimeFunction,
+ "objc_retain");
+}
+
+/// Release the given object.
+/// call void \@objc_release(i8* %value)
+void CodeGenFunction::EmitObjCRelease(llvm::Value *value,
+ ARCPreciseLifetime_t precise) {
+ if (isa<llvm::ConstantPointerNull>(value)) return;
+
+ llvm::Constant *&fn = CGM.getObjCEntrypoints().objc_release;
+ if (!fn) {
+ if (!fn) {
+ llvm::FunctionType *fnType =
+ llvm::FunctionType::get(Builder.getVoidTy(), Int8PtrTy, false);
+ fn = CGM.CreateRuntimeFunction(fnType, "objc_release");
+ setARCRuntimeFunctionLinkage(CGM, fn);
+ // We have Native ARC, so set nonlazybind attribute for performance
+ if (llvm::Function *f = dyn_cast<llvm::Function>(fn))
+ f->addFnAttr(llvm::Attribute::NonLazyBind);
+ }
+ }
+
+ // Cast the argument to 'id'.
+ value = Builder.CreateBitCast(value, Int8PtrTy);
+
+ // Call objc_release.
+ llvm::CallInst *call = EmitNounwindRuntimeCall(fn, value);
+
+ if (precise == ARCImpreciseLifetime) {
+ call->setMetadata("clang.imprecise_release",
+ llvm::MDNode::get(Builder.getContext(), None));
+ }
+}
+
namespace {
struct CallObjCAutoreleasePoolObject final : EHScopeStack::Cleanup {
llvm::Value *Token;
@@ -2446,27 +2618,36 @@ void CodeGenFunction::EmitObjCAutoreleasePoolCleanup(llvm::Value *Ptr) {
EHStack.pushCleanup<CallObjCMRRAutoreleasePoolObject>(NormalCleanup, Ptr);
}
-static TryEmitResult tryEmitARCRetainLoadOfScalar(CodeGenFunction &CGF,
- LValue lvalue,
- QualType type) {
- switch (type.getObjCLifetime()) {
+static bool shouldRetainObjCLifetime(Qualifiers::ObjCLifetime lifetime) {
+ switch (lifetime) {
case Qualifiers::OCL_None:
case Qualifiers::OCL_ExplicitNone:
case Qualifiers::OCL_Strong:
case Qualifiers::OCL_Autoreleasing:
- return TryEmitResult(CGF.EmitLoadOfLValue(lvalue,
- SourceLocation()).getScalarVal(),
- false);
+ return true;
case Qualifiers::OCL_Weak:
- return TryEmitResult(CGF.EmitARCLoadWeakRetained(lvalue.getAddress()),
- true);
+ return false;
}
llvm_unreachable("impossible lifetime!");
}
static TryEmitResult tryEmitARCRetainLoadOfScalar(CodeGenFunction &CGF,
+ LValue lvalue,
+ QualType type) {
+ llvm::Value *result;
+ bool shouldRetain = shouldRetainObjCLifetime(type.getObjCLifetime());
+ if (shouldRetain) {
+ result = CGF.EmitLoadOfLValue(lvalue, SourceLocation()).getScalarVal();
+ } else {
+ assert(type.getObjCLifetime() == Qualifiers::OCL_Weak);
+ result = CGF.EmitARCLoadWeakRetained(lvalue.getAddress());
+ }
+ return TryEmitResult(result, !shouldRetain);
+}
+
+static TryEmitResult tryEmitARCRetainLoadOfScalar(CodeGenFunction &CGF,
const Expr *e) {
e = e->IgnoreParens();
QualType type = e->getType();
@@ -2500,6 +2681,16 @@ static TryEmitResult tryEmitARCRetainLoadOfScalar(CodeGenFunction &CGF,
cast<BinaryOperator>(e)->getOpcode() == BO_Assign)
return TryEmitResult(CGF.EmitScalarExpr(e), false);
+ // Try to emit code for scalar constant instead of emitting LValue and
+ // loading it because we are not guaranteed to have an l-value. One of such
+ // cases is DeclRefExpr referencing non-odr-used constant-evaluated variable.
+ if (const auto *decl_expr = dyn_cast<DeclRefExpr>(e)) {
+ auto *DRE = const_cast<DeclRefExpr *>(decl_expr);
+ if (CodeGenFunction::ConstantEmission constant = CGF.tryEmitAsConstant(DRE))
+ return TryEmitResult(CGF.emitScalarConstant(constant, DRE),
+ !shouldRetainObjCLifetime(type.getObjCLifetime()));
+ }
+
return tryEmitARCRetainLoadOfScalar(CGF, CGF.EmitLValue(e), type);
}
@@ -3229,29 +3420,32 @@ CodeGenFunction::GenerateObjCAtomicSetterCopyHelperFunction(
ASTContext &C = getContext();
IdentifierInfo *II
= &CGM.getContext().Idents.get("__assign_helper_atomic_property_");
- FunctionDecl *FD = FunctionDecl::Create(C,
- C.getTranslationUnitDecl(),
- SourceLocation(),
- SourceLocation(), II, C.VoidTy,
- nullptr, SC_Static,
- false,
- false);
+ QualType ReturnTy = C.VoidTy;
QualType DestTy = C.getPointerType(Ty);
QualType SrcTy = Ty;
SrcTy.addConst();
SrcTy = C.getPointerType(SrcTy);
+ SmallVector<QualType, 2> ArgTys;
+ ArgTys.push_back(DestTy);
+ ArgTys.push_back(SrcTy);
+ QualType FunctionTy = C.getFunctionType(ReturnTy, ArgTys, {});
+
+ FunctionDecl *FD = FunctionDecl::Create(
+ C, C.getTranslationUnitDecl(), SourceLocation(), SourceLocation(), II,
+ FunctionTy, nullptr, SC_Static, false, false);
+
FunctionArgList args;
- ImplicitParamDecl DstDecl(getContext(), FD, SourceLocation(), /*Id=*/nullptr,
- DestTy, ImplicitParamDecl::Other);
+ ImplicitParamDecl DstDecl(C, FD, SourceLocation(), /*Id=*/nullptr, DestTy,
+ ImplicitParamDecl::Other);
args.push_back(&DstDecl);
- ImplicitParamDecl SrcDecl(getContext(), FD, SourceLocation(), /*Id=*/nullptr,
- SrcTy, ImplicitParamDecl::Other);
+ ImplicitParamDecl SrcDecl(C, FD, SourceLocation(), /*Id=*/nullptr, SrcTy,
+ ImplicitParamDecl::Other);
args.push_back(&SrcDecl);
const CGFunctionInfo &FI =
- CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, args);
+ CGM.getTypes().arrangeBuiltinFunctionDeclaration(ReturnTy, args);
llvm::FunctionType *LTy = CGM.getTypes().GetFunctionType(FI);
@@ -3262,25 +3456,25 @@ CodeGenFunction::GenerateObjCAtomicSetterCopyHelperFunction(
CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FI);
- StartFunction(FD, C.VoidTy, Fn, FI, args);
+ StartFunction(FD, ReturnTy, Fn, FI, args);
- DeclRefExpr DstExpr(&DstDecl, false, DestTy,
- VK_RValue, SourceLocation());
+ DeclRefExpr DstExpr(getContext(), &DstDecl, false, DestTy, VK_RValue,
+ SourceLocation());
UnaryOperator DST(&DstExpr, UO_Deref, DestTy->getPointeeType(),
VK_LValue, OK_Ordinary, SourceLocation(), false);
- DeclRefExpr SrcExpr(&SrcDecl, false, SrcTy,
- VK_RValue, SourceLocation());
+ DeclRefExpr SrcExpr(getContext(), &SrcDecl, false, SrcTy, VK_RValue,
+ SourceLocation());
UnaryOperator SRC(&SrcExpr, UO_Deref, SrcTy->getPointeeType(),
VK_LValue, OK_Ordinary, SourceLocation(), false);
Expr *Args[2] = { &DST, &SRC };
CallExpr *CalleeExp = cast<CallExpr>(PID->getSetterCXXAssignment());
- CXXOperatorCallExpr TheCall(C, OO_Equal, CalleeExp->getCallee(),
- Args, DestTy->getPointeeType(),
- VK_LValue, SourceLocation(), FPOptions());
+ CXXOperatorCallExpr *TheCall = CXXOperatorCallExpr::Create(
+ C, OO_Equal, CalleeExp->getCallee(), Args, DestTy->getPointeeType(),
+ VK_LValue, SourceLocation(), FPOptions());
- EmitStmt(&TheCall);
+ EmitStmt(TheCall);
FinishFunction();
HelperFn = llvm::ConstantExpr::getBitCast(Fn, VoidPtrTy);
@@ -3301,53 +3495,54 @@ CodeGenFunction::GenerateObjCAtomicGetterCopyHelperFunction(
if ((!(PD->getPropertyAttributes() & ObjCPropertyDecl::OBJC_PR_atomic)))
return nullptr;
llvm::Constant *HelperFn = nullptr;
-
if (hasTrivialGetExpr(PID))
return nullptr;
assert(PID->getGetterCXXConstructor() && "getGetterCXXConstructor - null");
if ((HelperFn = CGM.getAtomicGetterHelperFnMap(Ty)))
return HelperFn;
-
ASTContext &C = getContext();
- IdentifierInfo *II
- = &CGM.getContext().Idents.get("__copy_helper_atomic_property_");
- FunctionDecl *FD = FunctionDecl::Create(C,
- C.getTranslationUnitDecl(),
- SourceLocation(),
- SourceLocation(), II, C.VoidTy,
- nullptr, SC_Static,
- false,
- false);
+ IdentifierInfo *II =
+ &CGM.getContext().Idents.get("__copy_helper_atomic_property_");
+ QualType ReturnTy = C.VoidTy;
QualType DestTy = C.getPointerType(Ty);
QualType SrcTy = Ty;
SrcTy.addConst();
SrcTy = C.getPointerType(SrcTy);
+ SmallVector<QualType, 2> ArgTys;
+ ArgTys.push_back(DestTy);
+ ArgTys.push_back(SrcTy);
+ QualType FunctionTy = C.getFunctionType(ReturnTy, ArgTys, {});
+
+ FunctionDecl *FD = FunctionDecl::Create(
+ C, C.getTranslationUnitDecl(), SourceLocation(), SourceLocation(), II,
+ FunctionTy, nullptr, SC_Static, false, false);
+
FunctionArgList args;
- ImplicitParamDecl DstDecl(getContext(), FD, SourceLocation(), /*Id=*/nullptr,
- DestTy, ImplicitParamDecl::Other);
+ ImplicitParamDecl DstDecl(C, FD, SourceLocation(), /*Id=*/nullptr, DestTy,
+ ImplicitParamDecl::Other);
args.push_back(&DstDecl);
- ImplicitParamDecl SrcDecl(getContext(), FD, SourceLocation(), /*Id=*/nullptr,
- SrcTy, ImplicitParamDecl::Other);
+ ImplicitParamDecl SrcDecl(C, FD, SourceLocation(), /*Id=*/nullptr, SrcTy,
+ ImplicitParamDecl::Other);
args.push_back(&SrcDecl);
const CGFunctionInfo &FI =
- CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, args);
+ CGM.getTypes().arrangeBuiltinFunctionDeclaration(ReturnTy, args);
llvm::FunctionType *LTy = CGM.getTypes().GetFunctionType(FI);
- llvm::Function *Fn =
- llvm::Function::Create(LTy, llvm::GlobalValue::InternalLinkage,
- "__copy_helper_atomic_property_", &CGM.getModule());
+ llvm::Function *Fn = llvm::Function::Create(
+ LTy, llvm::GlobalValue::InternalLinkage, "__copy_helper_atomic_property_",
+ &CGM.getModule());
CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FI);
- StartFunction(FD, C.VoidTy, Fn, FI, args);
+ StartFunction(FD, ReturnTy, Fn, FI, args);
- DeclRefExpr SrcExpr(&SrcDecl, false, SrcTy,
- VK_RValue, SourceLocation());
+ DeclRefExpr SrcExpr(getContext(), &SrcDecl, false, SrcTy, VK_RValue,
+ SourceLocation());
UnaryOperator SRC(&SrcExpr, UO_Deref, SrcTy->getPointeeType(),
VK_LValue, OK_Ordinary, SourceLocation(), false);
@@ -3372,8 +3567,8 @@ CodeGenFunction::GenerateObjCAtomicGetterCopyHelperFunction(
CXXConstExpr->getConstructionKind(),
SourceRange());
- DeclRefExpr DstExpr(&DstDecl, false, DestTy,
- VK_RValue, SourceLocation());
+ DeclRefExpr DstExpr(getContext(), &DstDecl, false, DestTy, VK_RValue,
+ SourceLocation());
RValue DV = EmitAnyExpr(&DstExpr);
CharUnits Alignment
diff --git a/lib/CodeGen/CGObjCGNU.cpp b/lib/CodeGen/CGObjCGNU.cpp
index 3e994edc976b..548bd6b3fd72 100644
--- a/lib/CodeGen/CGObjCGNU.cpp
+++ b/lib/CodeGen/CGObjCGNU.cpp
@@ -18,6 +18,7 @@
#include "CGCleanup.h"
#include "CodeGenFunction.h"
#include "CodeGenModule.h"
+#include "CGCXXABI.h"
#include "clang/CodeGen/ConstantInitBuilder.h"
#include "clang/AST/ASTContext.h"
#include "clang/AST/Decl.h"
@@ -178,6 +179,9 @@ protected:
/// runtime provides some LLVM passes that can use this to do things like
/// automatic IMP caching and speculative inlining.
unsigned msgSendMDKind;
+ /// Does the current target use SEH-based exceptions? False implies
+ /// Itanium-style DWARF unwinding.
+ bool usesSEHExceptions;
/// Helper to check if we are targeting a specific runtime version or later.
bool isRuntime(ObjCRuntime::Kind kind, unsigned major, unsigned minor=0) {
@@ -217,6 +221,7 @@ protected:
llvm::Constant *value = llvm::ConstantDataArray::getString(VMContext,Str);
auto *GV = new llvm::GlobalVariable(TheModule, value->getType(), true,
llvm::GlobalValue::LinkOnceODRLinkage, value, name);
+ GV->setComdat(TheModule.getOrInsertComdat(name));
if (Private)
GV->setVisibility(llvm::GlobalValue::HiddenVisibility);
ConstStr = GV;
@@ -272,6 +277,8 @@ protected:
Fields.addInt(Int8Ty, 0);
}
+ virtual llvm::Constant *GenerateCategoryProtocolList(const
+ ObjCCategoryDecl *OCD);
virtual ConstantArrayBuilder PushPropertyListHeader(ConstantStructBuilder &Fields,
int count) {
// int count;
@@ -510,8 +517,8 @@ protected:
/// Returns a selector with the specified type encoding. An empty string is
/// used to return an untyped selector (with the types field set to NULL).
- virtual llvm::Value *GetSelector(CodeGenFunction &CGF, Selector Sel,
- const std::string &TypeEncoding);
+ virtual llvm::Value *GetTypedSelector(CodeGenFunction &CGF, Selector Sel,
+ const std::string &TypeEncoding);
/// Returns the name of ivar offset variables. In the GNUstep v1 ABI, this
/// contains the class and ivar names, in the v2 ABI this contains the type
@@ -810,8 +817,12 @@ class CGObjCGNUstep : public CGObjCGNU {
// Slot_t objc_slot_lookup_super(struct objc_super*, SEL);
SlotLookupSuperFn.init(&CGM, "objc_slot_lookup_super", SlotTy,
PtrToObjCSuperTy, SelectorTy);
- // If we're in ObjC++ mode, then we want to make
- if (CGM.getLangOpts().CPlusPlus) {
+ // If we're in ObjC++ mode, then we want to make
+ if (usesSEHExceptions) {
+ llvm::Type *VoidTy = llvm::Type::getVoidTy(VMContext);
+ // void objc_exception_rethrow(void)
+ ExceptionReThrowFn.init(&CGM, "objc_exception_rethrow", VoidTy);
+ } else if (CGM.getLangOpts().CPlusPlus) {
llvm::Type *VoidTy = llvm::Type::getVoidTy(VMContext);
// void *__cxa_begin_catch(void *e)
EnterCatchFn.init(&CGM, "__cxa_begin_catch", PtrTy, PtrTy);
@@ -888,22 +899,25 @@ class CGObjCGNUstep : public CGObjCGNU {
/// This is the ABI that provides a clean break with the legacy GCC ABI and
/// cleans up a number of things that were added to work around 1980s linkers.
class CGObjCGNUstep2 : public CGObjCGNUstep {
- /// The section for selectors.
- static constexpr const char *const SelSection = "__objc_selectors";
- /// The section for classes.
- static constexpr const char *const ClsSection = "__objc_classes";
- /// The section for references to classes.
- static constexpr const char *const ClsRefSection = "__objc_class_refs";
- /// The section for categories.
- static constexpr const char *const CatSection = "__objc_cats";
- /// The section for protocols.
- static constexpr const char *const ProtocolSection = "__objc_protocols";
- /// The section for protocol references.
- static constexpr const char *const ProtocolRefSection = "__objc_protocol_refs";
- /// The section for class aliases
- static constexpr const char *const ClassAliasSection = "__objc_class_aliases";
- /// The section for constexpr constant strings
- static constexpr const char *const ConstantStringSection = "__objc_constant_string";
+ enum SectionKind
+ {
+ SelectorSection = 0,
+ ClassSection,
+ ClassReferenceSection,
+ CategorySection,
+ ProtocolSection,
+ ProtocolReferenceSection,
+ ClassAliasSection,
+ ConstantStringSection
+ };
+ static const char *const SectionsBaseNames[8];
+ template<SectionKind K>
+ std::string sectionName() {
+ std::string name(SectionsBaseNames[K]);
+ if (CGM.getTriple().isOSBinFormatCOFF())
+ name += "$m";
+ return name;
+ }
/// The GCC ABI superclass message lookup function. Takes a pointer to a
/// structure describing the receiver and the class, and a selector as
/// arguments. Returns the IMP for the corresponding method.
@@ -1069,7 +1083,7 @@ class CGObjCGNUstep2 : public CGObjCGNUstep {
isNamed ? StringRef(StringName) : ".objc_string",
Align, false, isNamed ? llvm::GlobalValue::LinkOnceODRLinkage
: llvm::GlobalValue::PrivateLinkage);
- ObjCStrGV->setSection(ConstantStringSection);
+ ObjCStrGV->setSection(sectionName<ConstantStringSection>());
if (isNamed) {
ObjCStrGV->setComdat(TheModule.getOrInsertComdat(StringName));
ObjCStrGV->setVisibility(llvm::GlobalValue::HiddenVisibility);
@@ -1152,6 +1166,15 @@ class CGObjCGNUstep2 : public CGObjCGNUstep {
return MethodList.finishAndCreateGlobal(".objc_protocol_method_list",
CGM.getPointerAlign());
}
+ llvm::Constant *GenerateCategoryProtocolList(const ObjCCategoryDecl *OCD)
+ override {
+ SmallVector<llvm::Constant*, 16> Protocols;
+ for (const auto *PI : OCD->getReferencedProtocols())
+ Protocols.push_back(
+ llvm::ConstantExpr::getBitCast(GenerateProtocolRef(PI),
+ ProtocolPtrTy));
+ return GenerateProtocolList(Protocols);
+ }
llvm::Value *LookupIMPSuper(CodeGenFunction &CGF, Address ObjCSuper,
llvm::Value *cmd, MessageSendInfo &MSI) override {
@@ -1247,9 +1270,10 @@ class CGObjCGNUstep2 : public CGObjCGNUstep {
assert(!TheModule.getGlobalVariable(RefName));
// Emit a reference symbol.
auto GV = new llvm::GlobalVariable(TheModule, ProtocolPtrTy,
- false, llvm::GlobalValue::ExternalLinkage,
+ false, llvm::GlobalValue::LinkOnceODRLinkage,
llvm::ConstantExpr::getBitCast(Protocol, ProtocolPtrTy), RefName);
- GV->setSection(ProtocolRefSection);
+ GV->setComdat(TheModule.getOrInsertComdat(RefName));
+ GV->setSection(sectionName<ProtocolReferenceSection>());
GV->setAlignment(CGM.getPointerAlign().getQuantity());
Ref = GV;
}
@@ -1282,9 +1306,22 @@ class CGObjCGNUstep2 : public CGObjCGNUstep {
EmittedProtocol = true;
+ auto SymName = SymbolForProtocol(ProtocolName);
+ auto *OldGV = TheModule.getGlobalVariable(SymName);
+
// Use the protocol definition, if there is one.
if (const ObjCProtocolDecl *Def = PD->getDefinition())
PD = Def;
+ else {
+ // If there is no definition, then create an external linkage symbol and
+ // hope that someone else fills it in for us (and fail to link if they
+ // don't).
+ assert(!OldGV);
+ Protocol = new llvm::GlobalVariable(TheModule, ProtocolTy,
+ /*isConstant*/false,
+ llvm::GlobalValue::ExternalLinkage, nullptr, SymName);
+ return Protocol;
+ }
SmallVector<llvm::Constant*, 16> Protocols;
for (const auto *PI : PD->protocols())
@@ -1301,8 +1338,6 @@ class CGObjCGNUstep2 : public CGObjCGNUstep {
EmitProtocolMethodList(PD->class_methods(), ClassMethodList,
OptionalClassMethodList);
- auto SymName = SymbolForProtocol(ProtocolName);
- auto *OldGV = TheModule.getGlobalVariable(SymName);
// The isa pointer must be set to a magic number so the runtime knows it's
// the correct layout.
ConstantInitBuilder builder(CGM);
@@ -1326,7 +1361,7 @@ class CGObjCGNUstep2 : public CGObjCGNUstep {
auto *GV = ProtocolBuilder.finishAndCreateGlobal(SymName,
CGM.getPointerAlign(), false, llvm::GlobalValue::ExternalLinkage);
- GV->setSection(ProtocolSection);
+ GV->setSection(sectionName<ProtocolSection>());
GV->setComdat(TheModule.getOrInsertComdat(SymName));
if (OldGV) {
OldGV->replaceAllUsesWith(llvm::ConstantExpr::getBitCast(GV,
@@ -1342,8 +1377,8 @@ class CGObjCGNUstep2 : public CGObjCGNUstep {
return Val;
return llvm::ConstantExpr::getBitCast(Val, Ty);
}
- llvm::Value *GetSelector(CodeGenFunction &CGF, Selector Sel,
- const std::string &TypeEncoding) override {
+ llvm::Value *GetTypedSelector(CodeGenFunction &CGF, Selector Sel,
+ const std::string &TypeEncoding) override {
return GetConstantSelector(Sel, TypeEncoding);
}
llvm::Constant *GetTypeString(llvm::StringRef TypeEncoding) {
@@ -1359,6 +1394,7 @@ class CGObjCGNUstep2 : public CGObjCGNUstep {
TypeEncoding);
auto *GV = new llvm::GlobalVariable(TheModule, Init->getType(),
true, llvm::GlobalValue::LinkOnceODRLinkage, Init, TypesVarName);
+ GV->setComdat(TheModule.getOrInsertComdat(TypesVarName));
GV->setVisibility(llvm::GlobalValue::HiddenVisibility);
TypesGlobal = GV;
}
@@ -1387,12 +1423,41 @@ class CGObjCGNUstep2 : public CGObjCGNUstep {
CGM.getPointerAlign(), false, llvm::GlobalValue::LinkOnceODRLinkage);
GV->setComdat(TheModule.getOrInsertComdat(SelVarName));
GV->setVisibility(llvm::GlobalValue::HiddenVisibility);
- GV->setSection(SelSection);
+ GV->setSection(sectionName<SelectorSection>());
auto *SelVal = EnforceType(GV, SelectorTy);
return SelVal;
}
+ llvm::StructType *emptyStruct = nullptr;
+
+ /// Return pointers to the start and end of a section. On ELF platforms, we
+ /// use the __start_ and __stop_ symbols that GNU-compatible linkers will set
+ /// to the start and end of section names, as long as those section names are
+ /// valid identifiers and the symbols are referenced but not defined. On
+ /// Windows, we use the fact that MSVC-compatible linkers will lexically sort
+ /// by subsections and place everything that we want to reference in a middle
+ /// subsection and then insert zero-sized symbols in subsections a and z.
std::pair<llvm::Constant*,llvm::Constant*>
GetSectionBounds(StringRef Section) {
+ if (CGM.getTriple().isOSBinFormatCOFF()) {
+ if (emptyStruct == nullptr) {
+ emptyStruct = llvm::StructType::create(VMContext, ".objc_section_sentinel");
+ emptyStruct->setBody({}, /*isPacked*/true);
+ }
+ auto ZeroInit = llvm::Constant::getNullValue(emptyStruct);
+ auto Sym = [&](StringRef Prefix, StringRef SecSuffix) {
+ auto *Sym = new llvm::GlobalVariable(TheModule, emptyStruct,
+ /*isConstant*/false,
+ llvm::GlobalValue::LinkOnceODRLinkage, ZeroInit, Prefix +
+ Section);
+ Sym->setVisibility(llvm::GlobalValue::HiddenVisibility);
+ Sym->setSection((Section + SecSuffix).str());
+ Sym->setComdat(TheModule.getOrInsertComdat((Prefix +
+ Section).str()));
+ Sym->setAlignment(1);
+ return Sym;
+ };
+ return { Sym("__start_", "$a"), Sym("__stop", "$z") };
+ }
auto *Start = new llvm::GlobalVariable(TheModule, PtrTy,
/*isConstant*/false,
llvm::GlobalValue::ExternalLinkage, nullptr, StringRef("__start_") +
@@ -1405,6 +1470,9 @@ class CGObjCGNUstep2 : public CGObjCGNUstep {
Stop->setVisibility(llvm::GlobalValue::HiddenVisibility);
return { Start, Stop };
}
+ CatchTypeInfo getCatchAllTypeInfo() override {
+ return CGM.getCXXABI().getCatchAllTypeInfo();
+ }
llvm::Function *ModuleInitFunction() override {
llvm::Function *LoadFunction = llvm::Function::Create(
llvm::FunctionType::get(llvm::Type::getVoidTy(VMContext), false),
@@ -1420,19 +1488,11 @@ class CGObjCGNUstep2 : public CGObjCGNUstep {
ConstantInitBuilder builder(CGM);
auto InitStructBuilder = builder.beginStruct();
InitStructBuilder.addInt(Int64Ty, 0);
- auto addSection = [&](const char *section) {
- auto bounds = GetSectionBounds(section);
+ for (auto *s : SectionsBaseNames) {
+ auto bounds = GetSectionBounds(s);
InitStructBuilder.add(bounds.first);
InitStructBuilder.add(bounds.second);
};
- addSection(SelSection);
- addSection(ClsSection);
- addSection(ClsRefSection);
- addSection(CatSection);
- addSection(ProtocolSection);
- addSection(ProtocolRefSection);
- addSection(ClassAliasSection);
- addSection(ConstantStringSection);
auto *InitStruct = InitStructBuilder.finishAndCreateGlobal(".objc_init",
CGM.getPointerAlign(), false, llvm::GlobalValue::LinkOnceODRLinkage);
InitStruct->setVisibility(llvm::GlobalValue::HiddenVisibility);
@@ -1451,18 +1511,23 @@ class CGObjCGNUstep2 : public CGObjCGNUstep {
// Check that this hasn't been renamed. This shouldn't happen, because
// this function should be called precisely once.
assert(InitVar->getName() == ".objc_ctor");
- InitVar->setSection(".ctors");
+ // In Windows, initialisers are sorted by the suffix. XCL is for library
+ // initialisers, which run before user initialisers. We are running
+ // Objective-C loads at the end of library load. This means +load methods
+ // will run before any other static constructors, but that static
+ // constructors can see a fully initialised Objective-C state.
+ if (CGM.getTriple().isOSBinFormatCOFF())
+ InitVar->setSection(".CRT$XCLz");
+ else
+ InitVar->setSection(".ctors");
InitVar->setVisibility(llvm::GlobalValue::HiddenVisibility);
InitVar->setComdat(TheModule.getOrInsertComdat(".objc_ctor"));
- CGM.addCompilerUsedGlobal(InitVar);
+ CGM.addUsedGlobal(InitVar);
for (auto *C : Categories) {
auto *Cat = cast<llvm::GlobalVariable>(C->stripPointerCasts());
- Cat->setSection(CatSection);
+ Cat->setSection(sectionName<CategorySection>());
CGM.addUsedGlobal(Cat);
}
- // Add a null value fore each special section so that we can always
- // guarantee that the _start and _stop symbols will exist and be
- // meaningful.
auto createNullGlobal = [&](StringRef Name, ArrayRef<llvm::Constant*> Init,
StringRef Section) {
auto nullBuilder = builder.beginStruct();
@@ -1476,38 +1541,48 @@ class CGObjCGNUstep2 : public CGObjCGNUstep {
CGM.addUsedGlobal(GV);
return GV;
};
- createNullGlobal(".objc_null_selector", {NULLPtr, NULLPtr}, SelSection);
- if (Categories.empty())
- createNullGlobal(".objc_null_category", {NULLPtr, NULLPtr,
- NULLPtr, NULLPtr, NULLPtr, NULLPtr, NULLPtr}, CatSection);
- if (!EmittedClass) {
- createNullGlobal(".objc_null_cls_init_ref", NULLPtr, ClsSection);
- createNullGlobal(".objc_null_class_ref", { NULLPtr, NULLPtr },
- ClsRefSection);
- }
- if (!EmittedProtocol)
- createNullGlobal(".objc_null_protocol", {NULLPtr, NULLPtr, NULLPtr,
- NULLPtr, NULLPtr, NULLPtr, NULLPtr, NULLPtr, NULLPtr, NULLPtr,
- NULLPtr}, ProtocolSection);
- if (!EmittedProtocolRef)
- createNullGlobal(".objc_null_protocol_ref", {NULLPtr}, ProtocolRefSection);
- if (!ClassAliases.empty())
- for (auto clsAlias : ClassAliases)
- createNullGlobal(std::string(".objc_class_alias") +
- clsAlias.second, { MakeConstantString(clsAlias.second),
- GetClassVar(clsAlias.first) }, ClassAliasSection);
- else
- createNullGlobal(".objc_null_class_alias", { NULLPtr, NULLPtr },
- ClassAliasSection);
- if (ConstantStrings.empty()) {
- auto i32Zero = llvm::ConstantInt::get(Int32Ty, 0);
- createNullGlobal(".objc_null_constant_string", { NULLPtr, i32Zero,
- i32Zero, i32Zero, i32Zero, NULLPtr }, ConstantStringSection);
+ for (auto clsAlias : ClassAliases)
+ createNullGlobal(std::string(".objc_class_alias") +
+ clsAlias.second, { MakeConstantString(clsAlias.second),
+ GetClassVar(clsAlias.first) }, sectionName<ClassAliasSection>());
+ // On ELF platforms, add a null value for each special section so that we
+ // can always guarantee that the _start and _stop symbols will exist and be
+ // meaningful. This is not required on COFF platforms, where our start and
+ // stop symbols will create the section.
+ if (!CGM.getTriple().isOSBinFormatCOFF()) {
+ createNullGlobal(".objc_null_selector", {NULLPtr, NULLPtr},
+ sectionName<SelectorSection>());
+ if (Categories.empty())
+ createNullGlobal(".objc_null_category", {NULLPtr, NULLPtr,
+ NULLPtr, NULLPtr, NULLPtr, NULLPtr, NULLPtr},
+ sectionName<CategorySection>());
+ if (!EmittedClass) {
+ createNullGlobal(".objc_null_cls_init_ref", NULLPtr,
+ sectionName<ClassSection>());
+ createNullGlobal(".objc_null_class_ref", { NULLPtr, NULLPtr },
+ sectionName<ClassReferenceSection>());
+ }
+ if (!EmittedProtocol)
+ createNullGlobal(".objc_null_protocol", {NULLPtr, NULLPtr, NULLPtr,
+ NULLPtr, NULLPtr, NULLPtr, NULLPtr, NULLPtr, NULLPtr, NULLPtr,
+ NULLPtr}, sectionName<ProtocolSection>());
+ if (!EmittedProtocolRef)
+ createNullGlobal(".objc_null_protocol_ref", {NULLPtr},
+ sectionName<ProtocolReferenceSection>());
+ if (ClassAliases.empty())
+ createNullGlobal(".objc_null_class_alias", { NULLPtr, NULLPtr },
+ sectionName<ClassAliasSection>());
+ if (ConstantStrings.empty()) {
+ auto i32Zero = llvm::ConstantInt::get(Int32Ty, 0);
+ createNullGlobal(".objc_null_constant_string", { NULLPtr, i32Zero,
+ i32Zero, i32Zero, i32Zero, NULLPtr },
+ sectionName<ConstantStringSection>());
+ }
}
ConstantStrings.clear();
Categories.clear();
Classes.clear();
- return nullptr;//CGObjCGNU::ModuleInitFunction();
+ return nullptr;
}
/// In the v2 ABI, ivar offset variables use the type encoding in their name
/// to trigger linker failures if the types don't match.
@@ -1774,7 +1849,7 @@ class CGObjCGNUstep2 : public CGObjCGNUstep {
}
auto *classRefSymbol = GetClassVar(className);
- classRefSymbol->setSection(ClsRefSection);
+ classRefSymbol->setSection(sectionName<ClassReferenceSection>());
classRefSymbol->setInitializer(llvm::ConstantExpr::getBitCast(classStruct, IdTy));
@@ -1805,7 +1880,7 @@ class CGObjCGNUstep2 : public CGObjCGNUstep {
auto classInitRef = new llvm::GlobalVariable(TheModule,
classStruct->getType(), false, llvm::GlobalValue::ExternalLinkage,
classStruct, "._OBJC_INIT_CLASS_" + className);
- classInitRef->setSection(ClsSection);
+ classInitRef->setSection(sectionName<ClassSection>());
CGM.addUsedGlobal(classInitRef);
EmittedClass = true;
@@ -1829,6 +1904,18 @@ class CGObjCGNUstep2 : public CGObjCGNUstep {
};
+const char *const CGObjCGNUstep2::SectionsBaseNames[8] =
+{
+"__objc_selectors",
+"__objc_classes",
+"__objc_class_refs",
+"__objc_cats",
+"__objc_protocols",
+"__objc_protocol_refs",
+"__objc_class_aliases",
+"__objc_constant_string"
+};
+
/// Support for the ObjFW runtime.
class CGObjCObjFW: public CGObjCGNU {
protected:
@@ -1931,6 +2018,8 @@ CGObjCGNU::CGObjCGNU(CodeGenModule &cgm, unsigned runtimeABIVersion,
ProtocolVersion(protocolClassVersion), ClassABIVersion(classABI) {
msgSendMDKind = VMContext.getMDKindID("GNUObjCMessageSend");
+ usesSEHExceptions =
+ cgm.getContext().getTargetInfo().getTriple().isWindowsMSVCEnvironment();
CodeGenTypes &Types = CGM.getTypes();
IntTy = cast<llvm::IntegerType>(
@@ -2121,8 +2210,8 @@ llvm::Value *CGObjCGNU::EmitNSAutoreleasePoolClassRef(CodeGenFunction &CGF) {
return Value;
}
-llvm::Value *CGObjCGNU::GetSelector(CodeGenFunction &CGF, Selector Sel,
- const std::string &TypeEncoding) {
+llvm::Value *CGObjCGNU::GetTypedSelector(CodeGenFunction &CGF, Selector Sel,
+ const std::string &TypeEncoding) {
SmallVectorImpl<TypedSelector> &Types = SelectorTable[Sel];
llvm::GlobalAlias *SelValue = nullptr;
@@ -2155,13 +2244,13 @@ Address CGObjCGNU::GetAddrOfSelector(CodeGenFunction &CGF, Selector Sel) {
}
llvm::Value *CGObjCGNU::GetSelector(CodeGenFunction &CGF, Selector Sel) {
- return GetSelector(CGF, Sel, std::string());
+ return GetTypedSelector(CGF, Sel, std::string());
}
llvm::Value *CGObjCGNU::GetSelector(CodeGenFunction &CGF,
const ObjCMethodDecl *Method) {
std::string SelTypes = CGM.getContext().getObjCEncodingForMethodDecl(Method);
- return GetSelector(CGF, Method->getSelector(), SelTypes);
+ return GetTypedSelector(CGF, Method->getSelector(), SelTypes);
}
llvm::Constant *CGObjCGNU::GetEHType(QualType T) {
@@ -2186,6 +2275,9 @@ llvm::Constant *CGObjCGNU::GetEHType(QualType T) {
}
llvm::Constant *CGObjCGNUstep::GetEHType(QualType T) {
+ if (usesSEHExceptions)
+ return CGM.getCXXABI().getAddrOfRTTIDescriptor(T);
+
if (!CGM.getLangOpts().CPlusPlus)
return CGObjCGNU::GetEHType(T);
@@ -3018,18 +3110,21 @@ llvm::Constant *CGObjCGNU::MakeBitField(ArrayRef<bool> bits) {
return ptr;
}
+llvm::Constant *CGObjCGNU::GenerateCategoryProtocolList(const
+ ObjCCategoryDecl *OCD) {
+ SmallVector<std::string, 16> Protocols;
+ for (const auto *PD : OCD->getReferencedProtocols())
+ Protocols.push_back(PD->getNameAsString());
+ return GenerateProtocolList(Protocols);
+}
+
void CGObjCGNU::GenerateCategory(const ObjCCategoryImplDecl *OCD) {
const ObjCInterfaceDecl *Class = OCD->getClassInterface();
std::string ClassName = Class->getNameAsString();
std::string CategoryName = OCD->getNameAsString();
// Collect the names of referenced protocols
- SmallVector<std::string, 16> Protocols;
const ObjCCategoryDecl *CatDecl = OCD->getCategoryDecl();
- const ObjCList<ObjCProtocolDecl> &Protos = CatDecl->getReferencedProtocols();
- for (ObjCList<ObjCProtocolDecl>::iterator I = Protos.begin(),
- E = Protos.end(); I != E; ++I)
- Protocols.push_back((*I)->getNameAsString());
ConstantInitBuilder Builder(CGM);
auto Elements = Builder.beginStruct();
@@ -3051,7 +3146,7 @@ void CGObjCGNU::GenerateCategory(const ObjCCategoryImplDecl *OCD) {
GenerateMethodList(ClassName, CategoryName, ClassMethods, true),
PtrTy);
// Protocol list
- Elements.addBitCast(GenerateProtocolList(Protocols), PtrTy);
+ Elements.addBitCast(GenerateCategoryProtocolList(CatDecl), PtrTy);
if (isRuntime(ObjCRuntime::GNUstep, 2)) {
const ObjCCategoryDecl *Category =
Class->FindCategoryDeclaration(OCD->getIdentifier());
@@ -3460,12 +3555,16 @@ llvm::Function *CGObjCGNU::ModuleInitFunction() {
ConstantInitBuilder builder(CGM);
auto selectors = builder.beginArray(selStructTy);
auto &table = SelectorTable; // MSVC workaround
- for (auto &entry : table) {
+ std::vector<Selector> allSelectors;
+ for (auto &entry : table)
+ allSelectors.push_back(entry.first);
+ llvm::sort(allSelectors);
- std::string selNameStr = entry.first.getAsString();
+ for (auto &untypedSel : allSelectors) {
+ std::string selNameStr = untypedSel.getAsString();
llvm::Constant *selName = ExportUniqueString(selNameStr, ".objc_sel_name");
- for (TypedSelector &sel : entry.second) {
+ for (TypedSelector &sel : table[untypedSel]) {
llvm::Constant *selectorTypeEncoding = NULLPtr;
if (!sel.first.empty())
selectorTypeEncoding =
@@ -3726,6 +3825,7 @@ void CGObjCGNU::EmitThrowStmt(CodeGenFunction &CGF,
const ObjCAtThrowStmt &S,
bool ClearInsertionPoint) {
llvm::Value *ExceptionAsObject;
+ bool isRethrow = false;
if (const Expr *ThrowExpr = S.getThrowExpr()) {
llvm::Value *Exception = CGF.EmitObjCThrowOperand(ThrowExpr);
@@ -3734,11 +3834,24 @@ void CGObjCGNU::EmitThrowStmt(CodeGenFunction &CGF,
assert((!CGF.ObjCEHValueStack.empty() && CGF.ObjCEHValueStack.back()) &&
"Unexpected rethrow outside @catch block.");
ExceptionAsObject = CGF.ObjCEHValueStack.back();
+ isRethrow = true;
+ }
+ if (isRethrow && usesSEHExceptions) {
+ // For SEH, ExceptionAsObject may be undef, because the catch handler is
+ // not passed it for catchalls and so it is not visible to the catch
+ // funclet. The real thrown object will still be live on the stack at this
+ // point and will be rethrown. If we are explicitly rethrowing the object
+ // that was passed into the `@catch` block, then this code path is not
+ // reached and we will instead call `objc_exception_throw` with an explicit
+ // argument.
+ CGF.EmitRuntimeCallOrInvoke(ExceptionReThrowFn).setDoesNotReturn();
+ }
+ else {
+ ExceptionAsObject = CGF.Builder.CreateBitCast(ExceptionAsObject, IdTy);
+ llvm::CallSite Throw =
+ CGF.EmitRuntimeCallOrInvoke(ExceptionThrowFn, ExceptionAsObject);
+ Throw.setDoesNotReturn();
}
- ExceptionAsObject = CGF.Builder.CreateBitCast(ExceptionAsObject, IdTy);
- llvm::CallSite Throw =
- CGF.EmitRuntimeCallOrInvoke(ExceptionThrowFn, ExceptionAsObject);
- Throw.setDoesNotReturn();
CGF.Builder.CreateUnreachable();
if (ClearInsertionPoint)
CGF.Builder.ClearInsertionPoint();
@@ -3812,40 +3925,10 @@ llvm::GlobalVariable *CGObjCGNU::ObjCIvarOffsetVariable(
// is. This allows code compiled with non-fragile ivars to work correctly
// when linked against code which isn't (most of the time).
llvm::GlobalVariable *IvarOffsetPointer = TheModule.getNamedGlobal(Name);
- if (!IvarOffsetPointer) {
- // This will cause a run-time crash if we accidentally use it. A value of
- // 0 would seem more sensible, but will silently overwrite the isa pointer
- // causing a great deal of confusion.
- uint64_t Offset = -1;
- // We can't call ComputeIvarBaseOffset() here if we have the
- // implementation, because it will create an invalid ASTRecordLayout object
- // that we are then stuck with forever, so we only initialize the ivar
- // offset variable with a guess if we only have the interface. The
- // initializer will be reset later anyway, when we are generating the class
- // description.
- if (!CGM.getContext().getObjCImplementation(
- const_cast<ObjCInterfaceDecl *>(ID)))
- Offset = ComputeIvarBaseOffset(CGM, ID, Ivar);
-
- llvm::ConstantInt *OffsetGuess = llvm::ConstantInt::get(Int32Ty, Offset,
- /*isSigned*/true);
- // Don't emit the guess in non-PIC code because the linker will not be able
- // to replace it with the real version for a library. In non-PIC code you
- // must compile with the fragile ABI if you want to use ivars from a
- // GCC-compiled class.
- if (CGM.getLangOpts().PICLevel) {
- llvm::GlobalVariable *IvarOffsetGV = new llvm::GlobalVariable(TheModule,
- Int32Ty, false,
- llvm::GlobalValue::PrivateLinkage, OffsetGuess, Name+".guess");
- IvarOffsetPointer = new llvm::GlobalVariable(TheModule,
- IvarOffsetGV->getType(), false, llvm::GlobalValue::LinkOnceAnyLinkage,
- IvarOffsetGV, Name);
- } else {
- IvarOffsetPointer = new llvm::GlobalVariable(TheModule,
- llvm::Type::getInt32PtrTy(VMContext), false,
- llvm::GlobalValue::ExternalLinkage, nullptr, Name);
- }
- }
+ if (!IvarOffsetPointer)
+ IvarOffsetPointer = new llvm::GlobalVariable(TheModule,
+ llvm::Type::getInt32PtrTy(VMContext), false,
+ llvm::GlobalValue::ExternalLinkage, nullptr, Name);
return IvarOffsetPointer;
}
diff --git a/lib/CodeGen/CGObjCMac.cpp b/lib/CodeGen/CGObjCMac.cpp
index 2b54e7bd67af..d91eb43ca322 100644
--- a/lib/CodeGen/CGObjCMac.cpp
+++ b/lib/CodeGen/CGObjCMac.cpp
@@ -23,9 +23,9 @@
#include "clang/AST/DeclObjC.h"
#include "clang/AST/RecordLayout.h"
#include "clang/AST/StmtObjC.h"
+#include "clang/Basic/CodeGenOptions.h"
#include "clang/Basic/LangOptions.h"
#include "clang/CodeGen/CGFunctionInfo.h"
-#include "clang/Frontend/CodeGenOptions.h"
#include "llvm/ADT/CachedHashString.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/SetVector.h"
@@ -37,6 +37,7 @@
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
+#include "llvm/Support/ScopedPrinter.h"
#include "llvm/Support/raw_ostream.h"
#include <cstdio>
@@ -1085,9 +1086,14 @@ public:
const CGBlockInfo &blockInfo) override;
llvm::Constant *BuildRCBlockLayout(CodeGen::CodeGenModule &CGM,
const CGBlockInfo &blockInfo) override;
+ std::string getRCBlockLayoutStr(CodeGen::CodeGenModule &CGM,
+ const CGBlockInfo &blockInfo) override;
llvm::Constant *BuildByrefLayout(CodeGen::CodeGenModule &CGM,
QualType T) override;
+
+private:
+ void fillRunSkipBlockVars(CodeGenModule &CGM, const CGBlockInfo &blockInfo);
};
namespace {
@@ -2795,8 +2801,44 @@ llvm::Constant *CGObjCCommonMac::getBitmapBlockLayout(bool ComputeByrefLayout) {
return getConstantGEP(VMContext, Entry, 0, 0);
}
-llvm::Constant *CGObjCCommonMac::BuildRCBlockLayout(CodeGenModule &CGM,
- const CGBlockInfo &blockInfo) {
+static std::string getBlockLayoutInfoString(
+ const SmallVectorImpl<CGObjCCommonMac::RUN_SKIP> &RunSkipBlockVars,
+ bool HasCopyDisposeHelpers) {
+ std::string Str;
+ for (const CGObjCCommonMac::RUN_SKIP &R : RunSkipBlockVars) {
+ if (R.opcode == CGObjCCommonMac::BLOCK_LAYOUT_UNRETAINED) {
+ // Copy/dispose helpers don't have any information about
+ // __unsafe_unretained captures, so unconditionally concatenate a string.
+ Str += "u";
+ } else if (HasCopyDisposeHelpers) {
+ // Information about __strong, __weak, or byref captures has already been
+ // encoded into the names of the copy/dispose helpers. We have to add a
+ // string here only when the copy/dispose helpers aren't generated (which
+ // happens when the block is non-escaping).
+ continue;
+ } else {
+ switch (R.opcode) {
+ case CGObjCCommonMac::BLOCK_LAYOUT_STRONG:
+ Str += "s";
+ break;
+ case CGObjCCommonMac::BLOCK_LAYOUT_BYREF:
+ Str += "r";
+ break;
+ case CGObjCCommonMac::BLOCK_LAYOUT_WEAK:
+ Str += "w";
+ break;
+ default:
+ continue;
+ }
+ }
+ Str += llvm::to_string(R.block_var_bytepos.getQuantity());
+ Str += "l" + llvm::to_string(R.block_var_size.getQuantity());
+ }
+ return Str;
+}
+
+void CGObjCCommonMac::fillRunSkipBlockVars(CodeGenModule &CGM,
+ const CGBlockInfo &blockInfo) {
assert(CGM.getLangOpts().getGC() == LangOptions::NonGC);
RunSkipBlockVars.clear();
@@ -2845,9 +2887,22 @@ llvm::Constant *CGObjCCommonMac::BuildRCBlockLayout(CodeGenModule &CGM,
UpdateRunSkipBlockVars(CI.isByRef(), getBlockCaptureLifetime(type, false),
fieldOffset, fieldSize);
}
+}
+
+llvm::Constant *
+CGObjCCommonMac::BuildRCBlockLayout(CodeGenModule &CGM,
+ const CGBlockInfo &blockInfo) {
+ fillRunSkipBlockVars(CGM, blockInfo);
return getBitmapBlockLayout(false);
}
+std::string CGObjCCommonMac::getRCBlockLayoutStr(CodeGenModule &CGM,
+ const CGBlockInfo &blockInfo) {
+ fillRunSkipBlockVars(CGM, blockInfo);
+ return getBlockLayoutInfoString(RunSkipBlockVars,
+ blockInfo.needsCopyDisposeHelpers());
+}
+
llvm::Constant *CGObjCCommonMac::BuildByrefLayout(CodeGen::CodeGenModule &CGM,
QualType T) {
assert(CGM.getLangOpts().getGC() == LangOptions::NonGC);
@@ -6783,8 +6838,9 @@ llvm::Constant *CGObjCNonFragileABIMac::GetOrEmitProtocol(
return Entry;
// Use the protocol definition, if there is one.
- if (const ObjCProtocolDecl *Def = PD->getDefinition())
- PD = Def;
+ assert(PD->hasDefinition() &&
+ "emitting protocol metadata without definition");
+ PD = PD->getDefinition();
auto methodLists = ProtocolMethodLists::get(PD);
@@ -7132,15 +7188,21 @@ CGObjCNonFragileABIMac::GetClassGlobal(StringRef Name,
Weak ? llvm::GlobalValue::ExternalWeakLinkage
: llvm::GlobalValue::ExternalLinkage;
-
-
llvm::GlobalVariable *GV = CGM.getModule().getGlobalVariable(Name);
- if (!GV) {
- GV = new llvm::GlobalVariable(CGM.getModule(), ObjCTypes.ClassnfABITy,
- false, L, nullptr, Name);
+ if (!GV || GV->getType() != ObjCTypes.ClassnfABITy->getPointerTo()) {
+ auto *NewGV = new llvm::GlobalVariable(ObjCTypes.ClassnfABITy, false, L,
+ nullptr, Name);
if (DLLImport)
- GV->setDLLStorageClass(llvm::GlobalValue::DLLImportStorageClass);
+ NewGV->setDLLStorageClass(llvm::GlobalValue::DLLImportStorageClass);
+
+ if (GV) {
+ GV->replaceAllUsesWith(
+ llvm::ConstantExpr::getBitCast(NewGV, GV->getType()));
+ GV->eraseFromParent();
+ }
+ GV = NewGV;
+ CGM.getModule().getGlobalList().push_back(GV);
}
assert(GV->getLinkage() == L);
diff --git a/lib/CodeGen/CGObjCRuntime.cpp b/lib/CodeGen/CGObjCRuntime.cpp
index a43885c0f9a2..4b6f24a03f27 100644
--- a/lib/CodeGen/CGObjCRuntime.cpp
+++ b/lib/CodeGen/CGObjCRuntime.cpp
@@ -15,6 +15,7 @@
#include "CGObjCRuntime.h"
#include "CGCleanup.h"
+#include "CGCXXABI.h"
#include "CGRecordLayout.h"
#include "CodeGenFunction.h"
#include "CodeGenModule.h"
@@ -22,6 +23,7 @@
#include "clang/AST/StmtObjC.h"
#include "clang/CodeGen/CGFunctionInfo.h"
#include "llvm/IR/CallSite.h"
+#include "llvm/Support/SaveAndRestore.h"
using namespace clang;
using namespace CodeGen;
@@ -120,6 +122,8 @@ namespace {
const Stmt *Body;
llvm::BasicBlock *Block;
llvm::Constant *TypeInfo;
+ /// Flags used to differentiate cleanups and catchalls in Windows SEH
+ unsigned Flags;
};
struct CallObjCEndCatch final : EHScopeStack::Cleanup {
@@ -148,13 +152,17 @@ void CGObjCRuntime::EmitTryCatchStmt(CodeGenFunction &CGF,
if (S.getNumCatchStmts())
Cont = CGF.getJumpDestInCurrentScope("eh.cont");
+ bool useFunclets = EHPersonality::get(CGF).usesFuncletPads();
+
CodeGenFunction::FinallyInfo FinallyInfo;
- if (const ObjCAtFinallyStmt *Finally = S.getFinallyStmt())
- FinallyInfo.enter(CGF, Finally->getFinallyBody(),
- beginCatchFn, endCatchFn, exceptionRethrowFn);
+ if (!useFunclets)
+ if (const ObjCAtFinallyStmt *Finally = S.getFinallyStmt())
+ FinallyInfo.enter(CGF, Finally->getFinallyBody(),
+ beginCatchFn, endCatchFn, exceptionRethrowFn);
SmallVector<CatchHandler, 8> Handlers;
+
// Enter the catch, if there is one.
if (S.getNumCatchStmts()) {
for (unsigned I = 0, N = S.getNumCatchStmts(); I != N; ++I) {
@@ -166,10 +174,13 @@ void CGObjCRuntime::EmitTryCatchStmt(CodeGenFunction &CGF,
Handler.Variable = CatchDecl;
Handler.Body = CatchStmt->getCatchBody();
Handler.Block = CGF.createBasicBlock("catch");
+ Handler.Flags = 0;
// @catch(...) always matches.
if (!CatchDecl) {
- Handler.TypeInfo = nullptr; // catch-all
+ auto catchAll = getCatchAllTypeInfo();
+ Handler.TypeInfo = catchAll.RTTI;
+ Handler.Flags = catchAll.Flags;
// Don't consider any other catches.
break;
}
@@ -179,9 +190,31 @@ void CGObjCRuntime::EmitTryCatchStmt(CodeGenFunction &CGF,
EHCatchScope *Catch = CGF.EHStack.pushCatch(Handlers.size());
for (unsigned I = 0, E = Handlers.size(); I != E; ++I)
- Catch->setHandler(I, Handlers[I].TypeInfo, Handlers[I].Block);
+ Catch->setHandler(I, { Handlers[I].TypeInfo, Handlers[I].Flags }, Handlers[I].Block);
}
+ if (useFunclets)
+ if (const ObjCAtFinallyStmt *Finally = S.getFinallyStmt()) {
+ CodeGenFunction HelperCGF(CGM, /*suppressNewContext=*/true);
+ if (!CGF.CurSEHParent)
+ CGF.CurSEHParent = cast<NamedDecl>(CGF.CurFuncDecl);
+ // Outline the finally block.
+ const Stmt *FinallyBlock = Finally->getFinallyBody();
+ HelperCGF.startOutlinedSEHHelper(CGF, /*isFilter*/false, FinallyBlock);
+
+ // Emit the original filter expression, convert to i32, and return.
+ HelperCGF.EmitStmt(FinallyBlock);
+
+ HelperCGF.FinishFunction(FinallyBlock->getEndLoc());
+
+ llvm::Function *FinallyFunc = HelperCGF.CurFn;
+
+
+ // Push a cleanup for __finally blocks.
+ CGF.pushSEHCleanup(NormalAndEHCleanup, FinallyFunc);
+ }
+
+
// Emit the try body.
CGF.EmitStmt(S.getTryBody());
@@ -197,6 +230,13 @@ void CGObjCRuntime::EmitTryCatchStmt(CodeGenFunction &CGF,
CatchHandler &Handler = Handlers[I];
CGF.EmitBlock(Handler.Block);
+ llvm::CatchPadInst *CPI = nullptr;
+ SaveAndRestore<llvm::Instruction *> RestoreCurrentFuncletPad(CGF.CurrentFuncletPad);
+ if (useFunclets)
+ if ((CPI = dyn_cast_or_null<llvm::CatchPadInst>(Handler.Block->getFirstNonPHI()))) {
+ CGF.CurrentFuncletPad = CPI;
+ CPI->setOperand(2, CGF.getExceptionSlot().getPointer());
+ }
llvm::Value *RawExn = CGF.getExceptionFromSlot();
// Enter the catch.
@@ -223,6 +263,8 @@ void CGObjCRuntime::EmitTryCatchStmt(CodeGenFunction &CGF,
CGF.EmitAutoVarDecl(*CatchParam);
EmitInitOfCatchParam(CGF, CastExn, CatchParam);
}
+ if (CPI)
+ CGF.EHStack.pushCleanup<CatchRetScope>(NormalCleanup, CPI);
CGF.ObjCEHValueStack.push_back(Exn);
CGF.EmitStmt(Handler.Body);
@@ -232,13 +274,13 @@ void CGObjCRuntime::EmitTryCatchStmt(CodeGenFunction &CGF,
cleanups.ForceCleanup();
CGF.EmitBranchThroughCleanup(Cont);
- }
+ }
// Go back to the try-statement fallthrough.
CGF.Builder.restoreIP(SavedIP);
// Pop out of the finally.
- if (S.getFinallyStmt())
+ if (!useFunclets && S.getFinallyStmt())
FinallyInfo.exit(CGF);
if (Cont.isValid())
@@ -254,7 +296,7 @@ void CGObjCRuntime::EmitInitOfCatchParam(CodeGenFunction &CGF,
switch (paramDecl->getType().getQualifiers().getObjCLifetime()) {
case Qualifiers::OCL_Strong:
exn = CGF.EmitARCRetainNonBlock(exn);
- // fallthrough
+ LLVM_FALLTHROUGH;
case Qualifiers::OCL_None:
case Qualifiers::OCL_ExplicitNone:
@@ -277,7 +319,7 @@ namespace {
: SyncExitFn(SyncExitFn), SyncArg(SyncArg) {}
void Emit(CodeGenFunction &CGF, Flags flags) override {
- CGF.Builder.CreateCall(SyncExitFn, SyncArg)->setDoesNotThrow();
+ CGF.EmitNounwindRuntimeCall(SyncExitFn, SyncArg);
}
};
}
diff --git a/lib/CodeGen/CGObjCRuntime.h b/lib/CodeGen/CGObjCRuntime.h
index ce082a61eb5e..fa16c198adbc 100644
--- a/lib/CodeGen/CGObjCRuntime.h
+++ b/lib/CodeGen/CGObjCRuntime.h
@@ -17,6 +17,7 @@
#define LLVM_CLANG_LIB_CODEGEN_CGOBJCRUNTIME_H
#include "CGBuilder.h"
#include "CGCall.h"
+#include "CGCleanup.h"
#include "CGValue.h"
#include "clang/AST/DeclObjC.h"
#include "clang/Basic/IdentifierTable.h" // Selector
@@ -141,6 +142,8 @@ public:
/// error to Sema.
virtual llvm::Constant *GetEHType(QualType T) = 0;
+ virtual CatchTypeInfo getCatchAllTypeInfo() { return { nullptr, 0 }; }
+
/// Generate a constant string object.
virtual ConstantAddress GenerateConstantString(const StringLiteral *) = 0;
@@ -275,6 +278,10 @@ public:
const CodeGen::CGBlockInfo &blockInfo) = 0;
virtual llvm::Constant *BuildRCBlockLayout(CodeGen::CodeGenModule &CGM,
const CodeGen::CGBlockInfo &blockInfo) = 0;
+ virtual std::string getRCBlockLayoutStr(CodeGen::CodeGenModule &CGM,
+ const CGBlockInfo &blockInfo) {
+ return {};
+ }
/// Returns an i8* which points to the byref layout information.
virtual llvm::Constant *BuildByrefLayout(CodeGen::CodeGenModule &CGM,
diff --git a/lib/CodeGen/CGOpenCLRuntime.cpp b/lib/CodeGen/CGOpenCLRuntime.cpp
index 1da19a90c387..7f6f595dd5d1 100644
--- a/lib/CodeGen/CGOpenCLRuntime.cpp
+++ b/lib/CodeGen/CGOpenCLRuntime.cpp
@@ -62,6 +62,11 @@ llvm::Type *CGOpenCLRuntime::convertOpenCLSpecificType(const Type *T) {
case BuiltinType::OCLReserveID:
return llvm::PointerType::get(
llvm::StructType::create(Ctx, "opencl.reserve_id_t"), AddrSpc);
+#define EXT_OPAQUE_TYPE(ExtType, Id, Ext) \
+ case BuiltinType::Id: \
+ return llvm::PointerType::get( \
+ llvm::StructType::create(Ctx, "opencl." #ExtType), AddrSpc);
+#include "clang/Basic/OpenCLExtensionTypes.def"
}
}
@@ -118,25 +123,6 @@ llvm::PointerType *CGOpenCLRuntime::getGenericVoidPointerType() {
CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic));
}
-// Get the block literal from an expression derived from the block expression.
-// OpenCL v2.0 s6.12.5:
-// Block variable declarations are implicitly qualified with const. Therefore
-// all block variables must be initialized at declaration time and may not be
-// reassigned.
-static const BlockExpr *getBlockExpr(const Expr *E) {
- if (auto Cast = dyn_cast<CastExpr>(E)) {
- E = Cast->getSubExpr();
- }
- if (auto DR = dyn_cast<DeclRefExpr>(E)) {
- E = cast<VarDecl>(DR->getDecl())->getInit();
- }
- E = E->IgnoreImplicit();
- if (auto Cast = dyn_cast<CastExpr>(E)) {
- E = Cast->getSubExpr();
- }
- return cast<BlockExpr>(E);
-}
-
/// Record emitted llvm invoke function and llvm block literal for the
/// corresponding block expression.
void CGOpenCLRuntime::recordBlockInfo(const BlockExpr *E,
@@ -151,15 +137,21 @@ void CGOpenCLRuntime::recordBlockInfo(const BlockExpr *E,
EnqueuedBlockMap[E].Kernel = nullptr;
}
-llvm::Function *CGOpenCLRuntime::getInvokeFunction(const Expr *E) {
- return EnqueuedBlockMap[getBlockExpr(E)].InvokeFunc;
-}
-
CGOpenCLRuntime::EnqueuedBlockInfo
CGOpenCLRuntime::emitOpenCLEnqueuedBlock(CodeGenFunction &CGF, const Expr *E) {
CGF.EmitScalarExpr(E);
- const BlockExpr *Block = getBlockExpr(E);
+ // The block literal may be assigned to a const variable. Chasing down
+ // to get the block literal.
+ if (auto DR = dyn_cast<DeclRefExpr>(E)) {
+ E = cast<VarDecl>(DR->getDecl())->getInit();
+ }
+ E = E->IgnoreImplicit();
+ if (auto Cast = dyn_cast<CastExpr>(E)) {
+ E = Cast->getSubExpr();
+ }
+ auto *Block = cast<BlockExpr>(E);
+
assert(EnqueuedBlockMap.find(Block) != EnqueuedBlockMap.end() &&
"Block expression not emitted");
diff --git a/lib/CodeGen/CGOpenCLRuntime.h b/lib/CodeGen/CGOpenCLRuntime.h
index a513340827a8..750721f1b80f 100644
--- a/lib/CodeGen/CGOpenCLRuntime.h
+++ b/lib/CodeGen/CGOpenCLRuntime.h
@@ -16,6 +16,7 @@
#ifndef LLVM_CLANG_LIB_CODEGEN_CGOPENCLRUNTIME_H
#define LLVM_CLANG_LIB_CODEGEN_CGOPENCLRUNTIME_H
+#include "clang/AST/Expr.h"
#include "clang/AST/Type.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/IR/Type.h"
@@ -91,10 +92,6 @@ public:
/// \param Block block literal emitted for the block expression.
void recordBlockInfo(const BlockExpr *E, llvm::Function *InvokeF,
llvm::Value *Block);
-
- /// \return LLVM block invoke function emitted for an expression derived from
- /// the block expression.
- llvm::Function *getInvokeFunction(const Expr *E);
};
}
diff --git a/lib/CodeGen/CGOpenMPRuntime.cpp b/lib/CodeGen/CGOpenMPRuntime.cpp
index fa850155df4f..20eb0b29f427 100644
--- a/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -353,7 +353,7 @@ public:
if (VD->isLocalVarDeclOrParm())
continue;
- DeclRefExpr DRE(const_cast<VarDecl *>(VD),
+ DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
/*RefersToEnclosingVariableOrCapture=*/false,
VD->getType().getNonReferenceType(), VK_LValue,
C.getLocation());
@@ -673,6 +673,9 @@ enum OpenMPRTLFunction {
//
// Offloading related calls
//
+ // Call to void __kmpc_push_target_tripcount(int64_t device_id, kmp_uint64
+ // size);
+ OMPRTL__kmpc_push_target_tripcount,
// Call to int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t
// arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
// *arg_types);
@@ -897,25 +900,6 @@ static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
}
-static llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy>
-isDeclareTargetDeclaration(const ValueDecl *VD) {
- for (const Decl *D : VD->redecls()) {
- if (!D->hasAttrs())
- continue;
- if (const auto *Attr = D->getAttr<OMPDeclareTargetDeclAttr>())
- return Attr->getMapType();
- }
- if (const auto *V = dyn_cast<VarDecl>(VD)) {
- if (const VarDecl *TD = V->getTemplateInstantiationPattern())
- return isDeclareTargetDeclaration(TD);
- } else if (const auto *FD = dyn_cast<FunctionDecl>(VD)) {
- if (const auto *TD = FD->getTemplateInstantiationPattern())
- return isDeclareTargetDeclaration(TD);
- }
-
- return llvm::None;
-}
-
LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
return CGF.EmitOMPSharedLValue(E);
}
@@ -1242,6 +1226,17 @@ CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator,
void CGOpenMPRuntime::clear() {
InternalVars.clear();
+ // Clean non-target variable declarations possibly used only in debug info.
+ for (const auto &Data : EmittedNonTargetVariables) {
+ if (!Data.getValue().pointsToAliveValue())
+ continue;
+ auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue());
+ if (!GV)
+ continue;
+ if (!GV->isDeclaration() || GV->getNumUses() > 0)
+ continue;
+ GV->eraseFromParent();
+ }
}
std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const {
@@ -1314,27 +1309,19 @@ void CGOpenMPRuntime::emitUserDefinedReduction(
CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
if (UDRMap.count(D) > 0)
return;
- ASTContext &C = CGM.getContext();
- if (!In || !Out) {
- In = &C.Idents.get("omp_in");
- Out = &C.Idents.get("omp_out");
- }
llvm::Function *Combiner = emitCombinerOrInitializer(
- CGM, D->getType(), D->getCombiner(), cast<VarDecl>(D->lookup(In).front()),
- cast<VarDecl>(D->lookup(Out).front()),
+ CGM, D->getType(), D->getCombiner(),
+ cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()),
+ cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()),
/*IsCombiner=*/true);
llvm::Function *Initializer = nullptr;
if (const Expr *Init = D->getInitializer()) {
- if (!Priv || !Orig) {
- Priv = &C.Idents.get("omp_priv");
- Orig = &C.Idents.get("omp_orig");
- }
Initializer = emitCombinerOrInitializer(
CGM, D->getType(),
D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init
: nullptr,
- cast<VarDecl>(D->lookup(Orig).front()),
- cast<VarDecl>(D->lookup(Priv).front()),
+ cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()),
+ cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()),
/*IsCombiner=*/false);
}
UDRMap.try_emplace(D, Combiner, Initializer);
@@ -1406,8 +1393,8 @@ llvm::Value *CGOpenMPRuntime::emitTaskOutlinedFunction(
bool Tied, unsigned &NumberOfParts) {
auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
PrePostActionTy &) {
- llvm::Value *ThreadID = getThreadID(CGF, D.getLocStart());
- llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getLocStart());
+ llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc());
+ llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc());
llvm::Value *TaskArgs[] = {
UpLoc, ThreadID,
CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
@@ -1456,17 +1443,17 @@ static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM,
template <class... As>
static llvm::GlobalVariable *
-createConstantGlobalStruct(CodeGenModule &CGM, QualType Ty,
- ArrayRef<llvm::Constant *> Data, const Twine &Name,
- As &&... Args) {
+createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant,
+ ArrayRef<llvm::Constant *> Data, const Twine &Name,
+ As &&... Args) {
const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
ConstantInitBuilder CIBuilder(CGM);
ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType());
buildStructValue(Fields, CGM, RD, RL, Data);
return Fields.finishAndCreateGlobal(
- Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty),
- /*isConstant=*/true, std::forward<As>(Args)...);
+ Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant,
+ std::forward<As>(Args)...);
}
template <typename T>
@@ -1483,7 +1470,9 @@ createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty,
Address CGOpenMPRuntime::getOrCreateDefaultLocation(unsigned Flags) {
CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy);
- llvm::Value *Entry = OpenMPDefaultLocMap.lookup(Flags);
+ unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
+ FlagsTy FlagsKey(Flags, Reserved2Flags);
+ llvm::Value *Entry = OpenMPDefaultLocMap.lookup(FlagsKey);
if (!Entry) {
if (!DefaultOpenMPPSource) {
// Initialize default location for psource field of ident_t structure of
@@ -1496,21 +1485,47 @@ Address CGOpenMPRuntime::getOrCreateDefaultLocation(unsigned Flags) {
llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy);
}
- llvm::Constant *Data[] = {llvm::ConstantInt::getNullValue(CGM.Int32Ty),
- llvm::ConstantInt::get(CGM.Int32Ty, Flags),
- llvm::ConstantInt::getNullValue(CGM.Int32Ty),
- llvm::ConstantInt::getNullValue(CGM.Int32Ty),
- DefaultOpenMPPSource};
- llvm::GlobalValue *DefaultOpenMPLocation = createConstantGlobalStruct(
- CGM, IdentQTy, Data, "", llvm::GlobalValue::PrivateLinkage);
+ llvm::Constant *Data[] = {
+ llvm::ConstantInt::getNullValue(CGM.Int32Ty),
+ llvm::ConstantInt::get(CGM.Int32Ty, Flags),
+ llvm::ConstantInt::get(CGM.Int32Ty, Reserved2Flags),
+ llvm::ConstantInt::getNullValue(CGM.Int32Ty), DefaultOpenMPPSource};
+ llvm::GlobalValue *DefaultOpenMPLocation =
+ createGlobalStruct(CGM, IdentQTy, isDefaultLocationConstant(), Data, "",
+ llvm::GlobalValue::PrivateLinkage);
DefaultOpenMPLocation->setUnnamedAddr(
llvm::GlobalValue::UnnamedAddr::Global);
- OpenMPDefaultLocMap[Flags] = Entry = DefaultOpenMPLocation;
+ OpenMPDefaultLocMap[FlagsKey] = Entry = DefaultOpenMPLocation;
}
return Address(Entry, Align);
}
+void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF,
+ bool AtCurrentPoint) {
+ auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
+ assert(!Elem.second.ServiceInsertPt && "Insert point is set already.");
+
+ llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty);
+ if (AtCurrentPoint) {
+ Elem.second.ServiceInsertPt = new llvm::BitCastInst(
+ Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock());
+ } else {
+ Elem.second.ServiceInsertPt =
+ new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
+ Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt);
+ }
+}
+
+void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) {
+ auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
+ if (Elem.second.ServiceInsertPt) {
+ llvm::Instruction *Ptr = Elem.second.ServiceInsertPt;
+ Elem.second.ServiceInsertPt = nullptr;
+ Ptr->eraseFromParent();
+ }
+}
+
llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
SourceLocation Loc,
unsigned Flags) {
@@ -1537,8 +1552,10 @@ llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
Elem.second.DebugLoc = AI.getPointer();
LocValue = AI;
+ if (!Elem.second.ServiceInsertPt)
+ setLocThreadIdInsertPt(CGF);
CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
- CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt);
+ CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags),
CGF.getTypeSize(IdentQTy));
}
@@ -1608,21 +1625,25 @@ llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
// kmpc_global_thread_num(ident_t *loc).
// Generate thread id value and cache this value for use across the
// function.
+ auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
+ if (!Elem.second.ServiceInsertPt)
+ setLocThreadIdInsertPt(CGF);
CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
- CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt);
+ CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
llvm::CallInst *Call = CGF.Builder.CreateCall(
createRuntimeFunction(OMPRTL__kmpc_global_thread_num),
emitUpdateLocation(CGF, Loc));
Call->setCallingConv(CGF.getRuntimeCC());
- auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
Elem.second.ThreadID = Call;
return Call;
}
void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
assert(CGF.CurFn && "No function in current CodeGenFunction.");
- if (OpenMPLocThreadIDMap.count(CGF.CurFn))
+ if (OpenMPLocThreadIDMap.count(CGF.CurFn)) {
+ clearLocThreadIdInsertPt(CGF);
OpenMPLocThreadIDMap.erase(CGF.CurFn);
+ }
if (FunctionUDRMap.count(CGF.CurFn) > 0) {
for(auto *D : FunctionUDRMap[CGF.CurFn])
UDRMap.erase(D);
@@ -2145,6 +2166,15 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) {
FnTy, /*Name=*/"__kmpc_task_reduction_get_th_data");
break;
}
+ case OMPRTL__kmpc_push_target_tripcount: {
+ // Build void __kmpc_push_target_tripcount(int64_t device_id, kmp_uint64
+ // size);
+ llvm::Type *TypeParams[] = {CGM.Int64Ty, CGM.Int64Ty};
+ llvm::FunctionType *FnTy =
+ llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
+ RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_target_tripcount");
+ break;
+ }
case OMPRTL__tgt_target: {
// Build int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t
// arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
@@ -2417,7 +2447,7 @@ Address CGOpenMPRuntime::getAddrOfDeclareTargetLink(const VarDecl *VD) {
if (CGM.getLangOpts().OpenMPSimd)
return Address::invalid();
llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
- isDeclareTargetDeclaration(VD);
+ OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
if (Res && *Res == OMPDeclareTargetDeclAttr::MT_Link) {
SmallString<64> PtrName;
{
@@ -2496,8 +2526,7 @@ llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
return nullptr;
VD = VD->getDefinition(CGM.getContext());
- if (VD && ThreadPrivateWithDefinition.count(VD) == 0) {
- ThreadPrivateWithDefinition.insert(VD);
+ if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) {
QualType ASTTy = VD->getType();
llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
@@ -2639,16 +2668,16 @@ bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD,
llvm::GlobalVariable *Addr,
bool PerformInit) {
Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
- isDeclareTargetDeclaration(VD);
+ OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link)
- return false;
+ return CGM.getLangOpts().OpenMPIsDevice;
VD = VD->getDefinition(CGM.getContext());
- if (VD && !DeclareTargetWithDefinition.insert(VD).second)
+ if (VD && !DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second)
return CGM.getLangOpts().OpenMPIsDevice;
QualType ASTTy = VD->getType();
- SourceLocation Loc = VD->getCanonicalDecl()->getLocStart();
+ SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc();
// Produce the unique prefix to identify the new target regions. We use
// the source location of the variable declaration which we know to not
// conflict with any target region.
@@ -3197,13 +3226,7 @@ void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
}
-void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
- OpenMPDirectiveKind Kind, bool EmitChecks,
- bool ForceSimpleCall) {
- if (!CGF.HaveInsertPoint())
- return;
- // Build call __kmpc_cancel_barrier(loc, thread_id);
- // Build call __kmpc_barrier(loc, thread_id);
+unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) {
unsigned Flags;
if (Kind == OMPD_for)
Flags = OMP_IDENT_BARRIER_IMPL_FOR;
@@ -3215,6 +3238,17 @@ void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
Flags = OMP_IDENT_BARRIER_EXPL;
else
Flags = OMP_IDENT_BARRIER_IMPL;
+ return Flags;
+}
+
+void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
+ OpenMPDirectiveKind Kind, bool EmitChecks,
+ bool ForceSimpleCall) {
+ if (!CGF.HaveInsertPoint())
+ return;
+ // Build call __kmpc_cancel_barrier(loc, thread_id);
+ // Build call __kmpc_barrier(loc, thread_id);
+ unsigned Flags = getDefaultFlagsForBarriers(Kind);
// Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
// thread_id);
llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
@@ -3287,6 +3321,18 @@ bool CGOpenMPRuntime::isStaticNonchunked(
return Schedule == OMP_dist_sch_static;
}
+bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind,
+ bool Chunked) const {
+ OpenMPSchedType Schedule =
+ getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
+ return Schedule == OMP_sch_static_chunked;
+}
+
+bool CGOpenMPRuntime::isStaticChunked(
+ OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
+ OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
+ return Schedule == OMP_dist_sch_static_chunked;
+}
bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
OpenMPSchedType Schedule =
@@ -3784,8 +3830,8 @@ CGOpenMPRuntime::createOffloadingBinaryDescriptorRegistration() {
DeviceImages, Index),
HostEntriesBegin, HostEntriesEnd};
std::string Descriptor = getName({"omp_offloading", "descriptor"});
- llvm::GlobalVariable *Desc = createConstantGlobalStruct(
- CGM, getTgtBinaryDescriptorQTy(), Data, Descriptor);
+ llvm::GlobalVariable *Desc = createGlobalStruct(
+ CGM, getTgtBinaryDescriptorQTy(), /*IsConstant=*/true, Data, Descriptor);
// Emit code to register or unregister the descriptor at execution
// startup or closing, respectively.
@@ -3818,7 +3864,19 @@ CGOpenMPRuntime::createOffloadingBinaryDescriptorRegistration() {
CGF.disableDebugInfo();
const auto &FI = CGM.getTypes().arrangeNullaryFunction();
llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
- std::string Descriptor = getName({"omp_offloading", "descriptor_reg"});
+
+ // Encode offload target triples into the registration function name. It
+ // will serve as a comdat key for the registration/unregistration code for
+ // this particular combination of offloading targets.
+ SmallVector<StringRef, 4U> RegFnNameParts(Devices.size() + 2U);
+ RegFnNameParts[0] = "omp_offloading";
+ RegFnNameParts[1] = "descriptor_reg";
+ llvm::transform(Devices, std::next(RegFnNameParts.begin(), 2),
+ [](const llvm::Triple &T) -> const std::string& {
+ return T.getTriple();
+ });
+ llvm::sort(std::next(RegFnNameParts.begin(), 2), RegFnNameParts.end());
+ std::string Descriptor = getName(RegFnNameParts);
RegFn = CGM.CreateGlobalInitOrDestructFunction(FTy, Descriptor, FI);
CGF.StartFunction(GlobalDecl(), C.VoidTy, RegFn, FI, FunctionArgList());
CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_register_lib), Desc);
@@ -3868,9 +3926,9 @@ void CGOpenMPRuntime::createOffloadEntry(
llvm::ConstantInt::get(CGM.Int32Ty, Flags),
llvm::ConstantInt::get(CGM.Int32Ty, 0)};
std::string EntryName = getName({"omp_offloading", "entry", ""});
- llvm::GlobalVariable *Entry = createConstantGlobalStruct(
- CGM, getTgtOffloadEntryQTy(), Data, Twine(EntryName).concat(Name),
- llvm::GlobalValue::WeakAnyLinkage);
+ llvm::GlobalVariable *Entry = createGlobalStruct(
+ CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data,
+ Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage);
// The entry has to be created in the section the linker expects it to be.
std::string Section = getName({"omp_offloading", "entries"});
@@ -3895,6 +3953,8 @@ void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
llvm::LLVMContext &C = M.getContext();
SmallVector<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *, 16>
OrderedEntries(OffloadEntriesInfoManager.size());
+ llvm::SmallVector<StringRef, 16> ParentFunctions(
+ OffloadEntriesInfoManager.size());
// Auxiliary methods to create metadata values and strings.
auto &&GetMDInt = [this](unsigned V) {
@@ -3909,7 +3969,7 @@ void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
// Create function that emits metadata for each target region entry;
auto &&TargetRegionMetadataEmitter =
- [&C, MD, &OrderedEntries, &GetMDInt, &GetMDString](
+ [&C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt, &GetMDString](
unsigned DeviceID, unsigned FileID, StringRef ParentName,
unsigned Line,
const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) {
@@ -3929,6 +3989,7 @@ void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
// Save this entry in the right position of the ordered entries array.
OrderedEntries[E.getOrder()] = &E;
+ ParentFunctions[E.getOrder()] = ParentName;
// Add metadata to the named metadata node.
MD->addOperand(llvm::MDNode::get(C, Ops));
@@ -3970,6 +4031,10 @@ void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>(
E)) {
if (!CE->getID() || !CE->getAddress()) {
+ // Do not blame the entry if the parent funtion is not emitted.
+ StringRef FnName = ParentFunctions[CE->getOrder()];
+ if (!CGM.GetGlobalValue(FnName))
+ continue;
unsigned DiagID = CGM.getDiags().getCustomDiagID(
DiagnosticsEngine::Error,
"Offloading entry for target region is incorrect: either the "
@@ -3995,6 +4060,9 @@ void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
CGM.getDiags().Report(DiagID);
continue;
}
+ // The vaiable has no definition - no need to add the entry.
+ if (CE->getVarSize().isZero())
+ continue;
break;
}
case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink:
@@ -5226,8 +5294,8 @@ void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
LBLVal.getPointer(),
UBLVal.getPointer(),
CGF.EmitLoadOfScalar(StLVal, Loc),
- llvm::ConstantInt::getNullValue(
- CGF.IntTy), // Always 0 because taskgroup emitted by the compiler
+ llvm::ConstantInt::getSigned(
+ CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
llvm::ConstantInt::getSigned(
CGF.IntTy, Data.Schedule.getPointer()
? Data.Schedule.getInt() ? NumTasks : Grainsize
@@ -5776,7 +5844,7 @@ static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix,
std::string Name = CGM.getOpenMPRuntime().getName(
{D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)});
Out << Prefix << Name << "_"
- << D->getCanonicalDecl()->getLocStart().getRawEncoding();
+ << D->getCanonicalDecl()->getBeginLoc().getRawEncoding();
return Out.str();
}
@@ -6274,7 +6342,7 @@ void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
unsigned DeviceID;
unsigned FileID;
unsigned Line;
- getTargetEntryUniqueInfo(CGM.getContext(), D.getLocStart(), DeviceID, FileID,
+ getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID,
Line);
SmallString<64> EntryFnName;
{
@@ -6589,17 +6657,17 @@ private:
struct MapInfo {
OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
OpenMPMapClauseKind MapType = OMPC_MAP_unknown;
- OpenMPMapClauseKind MapTypeModifier = OMPC_MAP_unknown;
+ ArrayRef<OpenMPMapModifierKind> MapModifiers;
bool ReturnDevicePointer = false;
bool IsImplicit = false;
MapInfo() = default;
MapInfo(
OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
- OpenMPMapClauseKind MapType, OpenMPMapClauseKind MapTypeModifier,
+ OpenMPMapClauseKind MapType,
+ ArrayRef<OpenMPMapModifierKind> MapModifiers,
bool ReturnDevicePointer, bool IsImplicit)
- : Components(Components), MapType(MapType),
- MapTypeModifier(MapTypeModifier),
+ : Components(Components), MapType(MapType), MapModifiers(MapModifiers),
ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit) {}
};
@@ -6676,10 +6744,9 @@ private:
/// a flag marking the map as a pointer if requested. Add a flag marking the
/// map as the first one of a series of maps that relate to the same map
/// expression.
- OpenMPOffloadMappingFlags getMapTypeBits(OpenMPMapClauseKind MapType,
- OpenMPMapClauseKind MapTypeModifier,
- bool IsImplicit, bool AddPtrFlag,
- bool AddIsTargetParamFlag) const {
+ OpenMPOffloadMappingFlags getMapTypeBits(
+ OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
+ bool IsImplicit, bool AddPtrFlag, bool AddIsTargetParamFlag) const {
OpenMPOffloadMappingFlags Bits =
IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE;
switch (MapType) {
@@ -6702,7 +6769,6 @@ private:
case OMPC_MAP_delete:
Bits |= OMP_MAP_DELETE;
break;
- case OMPC_MAP_always:
case OMPC_MAP_unknown:
llvm_unreachable("Unexpected map type!");
}
@@ -6710,7 +6776,8 @@ private:
Bits |= OMP_MAP_PTR_AND_OBJ;
if (AddIsTargetParamFlag)
Bits |= OMP_MAP_TARGET_PARAM;
- if (MapTypeModifier == OMPC_MAP_always)
+ if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_always)
+ != MapModifiers.end())
Bits |= OMP_MAP_ALWAYS;
return Bits;
}
@@ -6746,10 +6813,11 @@ private:
}
// Check if the length evaluates to 1.
- llvm::APSInt ConstLength;
- if (!Length->EvaluateAsInt(ConstLength, CGF.getContext()))
+ Expr::EvalResult Result;
+ if (!Length->EvaluateAsInt(Result, CGF.getContext()))
return true; // Can have more that size 1.
+ llvm::APSInt ConstLength = Result.Val.getInt();
return ConstLength.getSExtValue() != 1;
}
@@ -6758,12 +6826,15 @@ private:
/// \a IsFirstComponent should be set to true if the provided set of
/// components is the first associated with a capture.
void generateInfoForComponentList(
- OpenMPMapClauseKind MapType, OpenMPMapClauseKind MapTypeModifier,
+ OpenMPMapClauseKind MapType,
+ ArrayRef<OpenMPMapModifierKind> MapModifiers,
OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types,
StructRangeInfoTy &PartialStruct, bool IsFirstComponentList,
- bool IsImplicit) const {
+ bool IsImplicit,
+ ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
+ OverlappedElements = llvm::None) const {
// The following summarizes what has to be generated for each map and the
// types below. The generated information is expressed in this order:
// base pointer, section pointer, size, flags
@@ -6933,19 +7004,26 @@ private:
// components.
bool IsExpressionFirstInfo = true;
Address BP = Address::invalid();
+ const Expr *AssocExpr = I->getAssociatedExpression();
+ const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr);
+ const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
- if (isa<MemberExpr>(I->getAssociatedExpression())) {
+ if (isa<MemberExpr>(AssocExpr)) {
// The base is the 'this' pointer. The content of the pointer is going
// to be the base of the field being mapped.
BP = CGF.LoadCXXThisAddress();
+ } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) ||
+ (OASE &&
+ isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) {
+ BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress();
} else {
// The base is the reference to the variable.
// BP = &Var.
- BP = CGF.EmitOMPSharedLValue(I->getAssociatedExpression()).getAddress();
+ BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress();
if (const auto *VD =
dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) {
if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
- isDeclareTargetDeclaration(VD))
+ OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD))
if (*Res == OMPDeclareTargetDeclAttr::MT_Link) {
IsLink = true;
BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetLink(VD);
@@ -7034,7 +7112,6 @@ private:
Address LB =
CGF.EmitOMPSharedLValue(I->getAssociatedExpression()).getAddress();
- llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression());
// If this component is a pointer inside the base struct then we don't
// need to create any entry for it - it will be combined with the object
@@ -7043,6 +7120,70 @@ private:
IsPointer && EncounteredME &&
(dyn_cast<MemberExpr>(I->getAssociatedExpression()) ==
EncounteredME);
+ if (!OverlappedElements.empty()) {
+ // Handle base element with the info for overlapped elements.
+ assert(!PartialStruct.Base.isValid() && "The base element is set.");
+ assert(Next == CE &&
+ "Expected last element for the overlapped elements.");
+ assert(!IsPointer &&
+ "Unexpected base element with the pointer type.");
+ // Mark the whole struct as the struct that requires allocation on the
+ // device.
+ PartialStruct.LowestElem = {0, LB};
+ CharUnits TypeSize = CGF.getContext().getTypeSizeInChars(
+ I->getAssociatedExpression()->getType());
+ Address HB = CGF.Builder.CreateConstGEP(
+ CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(LB,
+ CGF.VoidPtrTy),
+ TypeSize.getQuantity() - 1, CharUnits::One());
+ PartialStruct.HighestElem = {
+ std::numeric_limits<decltype(
+ PartialStruct.HighestElem.first)>::max(),
+ HB};
+ PartialStruct.Base = BP;
+ // Emit data for non-overlapped data.
+ OpenMPOffloadMappingFlags Flags =
+ OMP_MAP_MEMBER_OF |
+ getMapTypeBits(MapType, MapModifiers, IsImplicit,
+ /*AddPtrFlag=*/false,
+ /*AddIsTargetParamFlag=*/false);
+ LB = BP;
+ llvm::Value *Size = nullptr;
+ // Do bitcopy of all non-overlapped structure elements.
+ for (OMPClauseMappableExprCommon::MappableExprComponentListRef
+ Component : OverlappedElements) {
+ Address ComponentLB = Address::invalid();
+ for (const OMPClauseMappableExprCommon::MappableComponent &MC :
+ Component) {
+ if (MC.getAssociatedDeclaration()) {
+ ComponentLB =
+ CGF.EmitOMPSharedLValue(MC.getAssociatedExpression())
+ .getAddress();
+ Size = CGF.Builder.CreatePtrDiff(
+ CGF.EmitCastToVoidPtr(ComponentLB.getPointer()),
+ CGF.EmitCastToVoidPtr(LB.getPointer()));
+ break;
+ }
+ }
+ BasePointers.push_back(BP.getPointer());
+ Pointers.push_back(LB.getPointer());
+ Sizes.push_back(Size);
+ Types.push_back(Flags);
+ LB = CGF.Builder.CreateConstGEP(ComponentLB, 1,
+ CGF.getPointerSize());
+ }
+ BasePointers.push_back(BP.getPointer());
+ Pointers.push_back(LB.getPointer());
+ Size = CGF.Builder.CreatePtrDiff(
+ CGF.EmitCastToVoidPtr(
+ CGF.Builder.CreateConstGEP(HB, 1, CharUnits::One())
+ .getPointer()),
+ CGF.EmitCastToVoidPtr(LB.getPointer()));
+ Sizes.push_back(Size);
+ Types.push_back(Flags);
+ break;
+ }
+ llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression());
if (!IsMemberPointer) {
BasePointers.push_back(BP.getPointer());
Pointers.push_back(LB.getPointer());
@@ -7053,7 +7194,7 @@ private:
// this map is the first one that relates with the current capture
// (there is a set of entries for each capture).
OpenMPOffloadMappingFlags Flags = getMapTypeBits(
- MapType, MapTypeModifier, IsImplicit,
+ MapType, MapModifiers, IsImplicit,
!IsExpressionFirstInfo || IsLink, IsCaptureFirstInfo && !IsLink);
if (!IsExpressionFirstInfo) {
@@ -7147,6 +7288,66 @@ private:
Flags |= MemberOfFlag;
}
+ void getPlainLayout(const CXXRecordDecl *RD,
+ llvm::SmallVectorImpl<const FieldDecl *> &Layout,
+ bool AsBase) const {
+ const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD);
+
+ llvm::StructType *St =
+ AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType();
+
+ unsigned NumElements = St->getNumElements();
+ llvm::SmallVector<
+ llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4>
+ RecordLayout(NumElements);
+
+ // Fill bases.
+ for (const auto &I : RD->bases()) {
+ if (I.isVirtual())
+ continue;
+ const auto *Base = I.getType()->getAsCXXRecordDecl();
+ // Ignore empty bases.
+ if (Base->isEmpty() || CGF.getContext()
+ .getASTRecordLayout(Base)
+ .getNonVirtualSize()
+ .isZero())
+ continue;
+
+ unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base);
+ RecordLayout[FieldIndex] = Base;
+ }
+ // Fill in virtual bases.
+ for (const auto &I : RD->vbases()) {
+ const auto *Base = I.getType()->getAsCXXRecordDecl();
+ // Ignore empty bases.
+ if (Base->isEmpty())
+ continue;
+ unsigned FieldIndex = RL.getVirtualBaseIndex(Base);
+ if (RecordLayout[FieldIndex])
+ continue;
+ RecordLayout[FieldIndex] = Base;
+ }
+ // Fill in all the fields.
+ assert(!RD->isUnion() && "Unexpected union.");
+ for (const auto *Field : RD->fields()) {
+ // Fill in non-bitfields. (Bitfields always use a zero pattern, which we
+ // will fill in later.)
+ if (!Field->isBitField()) {
+ unsigned FieldIndex = RL.getLLVMFieldNo(Field);
+ RecordLayout[FieldIndex] = Field;
+ }
+ }
+ for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>
+ &Data : RecordLayout) {
+ if (Data.isNull())
+ continue;
+ if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>())
+ getPlainLayout(Base, Layout, /*AsBase=*/true);
+ else
+ Layout.push_back(Data.get<const FieldDecl *>());
+ }
+ }
+
public:
MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
: CurDir(Dir), CGF(CGF) {
@@ -7213,28 +7414,29 @@ public:
auto &&InfoGen = [&Info](
const ValueDecl *D,
OMPClauseMappableExprCommon::MappableExprComponentListRef L,
- OpenMPMapClauseKind MapType, OpenMPMapClauseKind MapModifier,
+ OpenMPMapClauseKind MapType,
+ ArrayRef<OpenMPMapModifierKind> MapModifiers,
bool ReturnDevicePointer, bool IsImplicit) {
const ValueDecl *VD =
D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr;
- Info[VD].emplace_back(L, MapType, MapModifier, ReturnDevicePointer,
+ Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer,
IsImplicit);
};
// FIXME: MSVC 2013 seems to require this-> to find member CurDir.
for (const auto *C : this->CurDir.getClausesOfKind<OMPMapClause>())
for (const auto &L : C->component_lists()) {
- InfoGen(L.first, L.second, C->getMapType(), C->getMapTypeModifier(),
+ InfoGen(L.first, L.second, C->getMapType(), C->getMapTypeModifiers(),
/*ReturnDevicePointer=*/false, C->isImplicit());
}
for (const auto *C : this->CurDir.getClausesOfKind<OMPToClause>())
for (const auto &L : C->component_lists()) {
- InfoGen(L.first, L.second, OMPC_MAP_to, OMPC_MAP_unknown,
+ InfoGen(L.first, L.second, OMPC_MAP_to, llvm::None,
/*ReturnDevicePointer=*/false, C->isImplicit());
}
for (const auto *C : this->CurDir.getClausesOfKind<OMPFromClause>())
for (const auto &L : C->component_lists()) {
- InfoGen(L.first, L.second, OMPC_MAP_from, OMPC_MAP_unknown,
+ InfoGen(L.first, L.second, OMPC_MAP_from, llvm::None,
/*ReturnDevicePointer=*/false, C->isImplicit());
}
@@ -7287,7 +7489,7 @@ public:
// Nonetheless, generateInfoForComponentList must be called to take
// the pointer into account for the calculation of the range of the
// partial struct.
- InfoGen(nullptr, L.second, OMPC_MAP_unknown, OMPC_MAP_unknown,
+ InfoGen(nullptr, L.second, OMPC_MAP_unknown, llvm::None,
/*ReturnDevicePointer=*/false, C->isImplicit());
DeferredInfo[nullptr].emplace_back(IE, VD);
} else {
@@ -7321,7 +7523,7 @@ public:
unsigned CurrentBasePointersIdx = CurBasePointers.size();
// FIXME: MSVC 2013 seems to require this-> to find the member method.
this->generateInfoForComponentList(
- L.MapType, L.MapTypeModifier, L.Components, CurBasePointers,
+ L.MapType, L.MapModifiers, L.Components, CurBasePointers,
CurPointers, CurSizes, CurTypes, PartialStruct,
IsFirstComponentList, L.IsImplicit);
@@ -7375,6 +7577,82 @@ public:
}
}
+ /// Emit capture info for lambdas for variables captured by reference.
+ void generateInfoForLambdaCaptures(
+ const ValueDecl *VD, llvm::Value *Arg, MapBaseValuesArrayTy &BasePointers,
+ MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
+ MapFlagsArrayTy &Types,
+ llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const {
+ const auto *RD = VD->getType()
+ .getCanonicalType()
+ .getNonReferenceType()
+ ->getAsCXXRecordDecl();
+ if (!RD || !RD->isLambda())
+ return;
+ Address VDAddr = Address(Arg, CGF.getContext().getDeclAlign(VD));
+ LValue VDLVal = CGF.MakeAddrLValue(
+ VDAddr, VD->getType().getCanonicalType().getNonReferenceType());
+ llvm::DenseMap<const VarDecl *, FieldDecl *> Captures;
+ FieldDecl *ThisCapture = nullptr;
+ RD->getCaptureFields(Captures, ThisCapture);
+ if (ThisCapture) {
+ LValue ThisLVal =
+ CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture);
+ LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture);
+ LambdaPointers.try_emplace(ThisLVal.getPointer(), VDLVal.getPointer());
+ BasePointers.push_back(ThisLVal.getPointer());
+ Pointers.push_back(ThisLValVal.getPointer());
+ Sizes.push_back(CGF.getTypeSize(CGF.getContext().VoidPtrTy));
+ Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
+ OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
+ }
+ for (const LambdaCapture &LC : RD->captures()) {
+ if (LC.getCaptureKind() != LCK_ByRef)
+ continue;
+ const VarDecl *VD = LC.getCapturedVar();
+ auto It = Captures.find(VD);
+ assert(It != Captures.end() && "Found lambda capture without field.");
+ LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second);
+ LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second);
+ LambdaPointers.try_emplace(VarLVal.getPointer(), VDLVal.getPointer());
+ BasePointers.push_back(VarLVal.getPointer());
+ Pointers.push_back(VarLValVal.getPointer());
+ Sizes.push_back(CGF.getTypeSize(
+ VD->getType().getCanonicalType().getNonReferenceType()));
+ Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
+ OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
+ }
+ }
+
+ /// Set correct indices for lambdas captures.
+ void adjustMemberOfForLambdaCaptures(
+ const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers,
+ MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
+ MapFlagsArrayTy &Types) const {
+ for (unsigned I = 0, E = Types.size(); I < E; ++I) {
+ // Set correct member_of idx for all implicit lambda captures.
+ if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
+ OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT))
+ continue;
+ llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]);
+ assert(BasePtr && "Unable to find base lambda address.");
+ int TgtIdx = -1;
+ for (unsigned J = I; J > 0; --J) {
+ unsigned Idx = J - 1;
+ if (Pointers[Idx] != BasePtr)
+ continue;
+ TgtIdx = Idx;
+ break;
+ }
+ assert(TgtIdx != -1 && "Unable to find parent lambda.");
+ // All other current entries will be MEMBER_OF the combined entry
+ // (except for PTR_AND_OBJ entries which do not have a placeholder value
+ // 0xFFFF in the MEMBER_OF field).
+ OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx);
+ setCorrectMemberOfFlag(Types[I], MemberOfFlag);
+ }
+ }
+
/// Generate the base pointers, section pointers, sizes and map types
/// associated to a given capture.
void generateInfoForCapture(const CapturedStmt::Capture *Cap,
@@ -7387,9 +7665,6 @@ public:
"Not expecting to generate map info for a variable array type!");
// We need to know when we generating information for the first component
- // associated with a capture, because the mapping flags depend on it.
- bool IsFirstComponentList = true;
-
const ValueDecl *VD = Cap->capturesThis()
? nullptr
: Cap->getCapturedVar()->getCanonicalDecl();
@@ -7405,19 +7680,151 @@ public:
return;
}
+ using MapData =
+ std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef,
+ OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool>;
+ SmallVector<MapData, 4> DeclComponentLists;
// FIXME: MSVC 2013 seems to require this-> to find member CurDir.
- for (const auto *C : this->CurDir.getClausesOfKind<OMPMapClause>())
+ for (const auto *C : this->CurDir.getClausesOfKind<OMPMapClause>()) {
for (const auto &L : C->decl_component_lists(VD)) {
assert(L.first == VD &&
"We got information for the wrong declaration??");
assert(!L.second.empty() &&
"Not expecting declaration with no component lists.");
- generateInfoForComponentList(C->getMapType(), C->getMapTypeModifier(),
- L.second, BasePointers, Pointers, Sizes,
- Types, PartialStruct, IsFirstComponentList,
- C->isImplicit());
- IsFirstComponentList = false;
+ DeclComponentLists.emplace_back(L.second, C->getMapType(),
+ C->getMapTypeModifiers(),
+ C->isImplicit());
+ }
+ }
+
+ // Find overlapping elements (including the offset from the base element).
+ llvm::SmallDenseMap<
+ const MapData *,
+ llvm::SmallVector<
+ OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>,
+ 4>
+ OverlappedData;
+ size_t Count = 0;
+ for (const MapData &L : DeclComponentLists) {
+ OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
+ OpenMPMapClauseKind MapType;
+ ArrayRef<OpenMPMapModifierKind> MapModifiers;
+ bool IsImplicit;
+ std::tie(Components, MapType, MapModifiers, IsImplicit) = L;
+ ++Count;
+ for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) {
+ OMPClauseMappableExprCommon::MappableExprComponentListRef Components1;
+ std::tie(Components1, MapType, MapModifiers, IsImplicit) = L1;
+ auto CI = Components.rbegin();
+ auto CE = Components.rend();
+ auto SI = Components1.rbegin();
+ auto SE = Components1.rend();
+ for (; CI != CE && SI != SE; ++CI, ++SI) {
+ if (CI->getAssociatedExpression()->getStmtClass() !=
+ SI->getAssociatedExpression()->getStmtClass())
+ break;
+ // Are we dealing with different variables/fields?
+ if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration())
+ break;
+ }
+ // Found overlapping if, at least for one component, reached the head of
+ // the components list.
+ if (CI == CE || SI == SE) {
+ assert((CI != CE || SI != SE) &&
+ "Unexpected full match of the mapping components.");
+ const MapData &BaseData = CI == CE ? L : L1;
+ OMPClauseMappableExprCommon::MappableExprComponentListRef SubData =
+ SI == SE ? Components : Components1;
+ auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData);
+ OverlappedElements.getSecond().push_back(SubData);
+ }
}
+ }
+ // Sort the overlapped elements for each item.
+ llvm::SmallVector<const FieldDecl *, 4> Layout;
+ if (!OverlappedData.empty()) {
+ if (const auto *CRD =
+ VD->getType().getCanonicalType()->getAsCXXRecordDecl())
+ getPlainLayout(CRD, Layout, /*AsBase=*/false);
+ else {
+ const auto *RD = VD->getType().getCanonicalType()->getAsRecordDecl();
+ Layout.append(RD->field_begin(), RD->field_end());
+ }
+ }
+ for (auto &Pair : OverlappedData) {
+ llvm::sort(
+ Pair.getSecond(),
+ [&Layout](
+ OMPClauseMappableExprCommon::MappableExprComponentListRef First,
+ OMPClauseMappableExprCommon::MappableExprComponentListRef
+ Second) {
+ auto CI = First.rbegin();
+ auto CE = First.rend();
+ auto SI = Second.rbegin();
+ auto SE = Second.rend();
+ for (; CI != CE && SI != SE; ++CI, ++SI) {
+ if (CI->getAssociatedExpression()->getStmtClass() !=
+ SI->getAssociatedExpression()->getStmtClass())
+ break;
+ // Are we dealing with different variables/fields?
+ if (CI->getAssociatedDeclaration() !=
+ SI->getAssociatedDeclaration())
+ break;
+ }
+
+ // Lists contain the same elements.
+ if (CI == CE && SI == SE)
+ return false;
+
+ // List with less elements is less than list with more elements.
+ if (CI == CE || SI == SE)
+ return CI == CE;
+
+ const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration());
+ const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration());
+ if (FD1->getParent() == FD2->getParent())
+ return FD1->getFieldIndex() < FD2->getFieldIndex();
+ const auto It =
+ llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) {
+ return FD == FD1 || FD == FD2;
+ });
+ return *It == FD1;
+ });
+ }
+
+ // Associated with a capture, because the mapping flags depend on it.
+ // Go through all of the elements with the overlapped elements.
+ for (const auto &Pair : OverlappedData) {
+ const MapData &L = *Pair.getFirst();
+ OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
+ OpenMPMapClauseKind MapType;
+ ArrayRef<OpenMPMapModifierKind> MapModifiers;
+ bool IsImplicit;
+ std::tie(Components, MapType, MapModifiers, IsImplicit) = L;
+ ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
+ OverlappedComponents = Pair.getSecond();
+ bool IsFirstComponentList = true;
+ generateInfoForComponentList(MapType, MapModifiers, Components,
+ BasePointers, Pointers, Sizes, Types,
+ PartialStruct, IsFirstComponentList,
+ IsImplicit, OverlappedComponents);
+ }
+ // Go through other elements without overlapped elements.
+ bool IsFirstComponentList = OverlappedData.empty();
+ for (const MapData &L : DeclComponentLists) {
+ OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
+ OpenMPMapClauseKind MapType;
+ ArrayRef<OpenMPMapModifierKind> MapModifiers;
+ bool IsImplicit;
+ std::tie(Components, MapType, MapModifiers, IsImplicit) = L;
+ auto It = OverlappedData.find(&L);
+ if (It == OverlappedData.end())
+ generateInfoForComponentList(MapType, MapModifiers, Components,
+ BasePointers, Pointers, Sizes, Types,
+ PartialStruct, IsFirstComponentList,
+ IsImplicit);
+ IsFirstComponentList = false;
+ }
}
/// Generate the base pointers, section pointers, sizes and map types
@@ -7436,12 +7843,12 @@ public:
if (!VD)
continue;
llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
- isDeclareTargetDeclaration(VD);
+ OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
if (!Res || *Res != OMPDeclareTargetDeclAttr::MT_Link)
continue;
StructRangeInfoTy PartialStruct;
generateInfoForComponentList(
- C->getMapType(), C->getMapTypeModifier(), L.second, BasePointers,
+ C->getMapType(), C->getMapTypeModifiers(), L.second, BasePointers,
Pointers, Sizes, Types, PartialStruct,
/*IsFirstComponentList=*/true, C->isImplicit());
assert(!PartialStruct.Base.isValid() &&
@@ -7658,6 +8065,183 @@ static void emitOffloadingArraysArgument(
}
}
+/// Checks if the expression is constant or does not have non-trivial function
+/// calls.
+static bool isTrivial(ASTContext &Ctx, const Expr * E) {
+ // We can skip constant expressions.
+ // We can skip expressions with trivial calls or simple expressions.
+ return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) ||
+ !E->hasNonTrivialCall(Ctx)) &&
+ !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true);
+}
+
+/// Checks if the \p Body is the \a CompoundStmt and returns its child statement
+/// iff there is only one that is not evaluatable at the compile time.
+static const Stmt *getSingleCompoundChild(ASTContext &Ctx, const Stmt *Body) {
+ if (const auto *C = dyn_cast<CompoundStmt>(Body)) {
+ const Stmt *Child = nullptr;
+ for (const Stmt *S : C->body()) {
+ if (const auto *E = dyn_cast<Expr>(S)) {
+ if (isTrivial(Ctx, E))
+ continue;
+ }
+ // Some of the statements can be ignored.
+ if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) ||
+ isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S))
+ continue;
+ // Analyze declarations.
+ if (const auto *DS = dyn_cast<DeclStmt>(S)) {
+ if (llvm::all_of(DS->decls(), [&Ctx](const Decl *D) {
+ if (isa<EmptyDecl>(D) || isa<DeclContext>(D) ||
+ isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) ||
+ isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) ||
+ isa<UsingDirectiveDecl>(D) ||
+ isa<OMPDeclareReductionDecl>(D) ||
+ isa<OMPThreadPrivateDecl>(D))
+ return true;
+ const auto *VD = dyn_cast<VarDecl>(D);
+ if (!VD)
+ return false;
+ return VD->isConstexpr() ||
+ ((VD->getType().isTrivialType(Ctx) ||
+ VD->getType()->isReferenceType()) &&
+ (!VD->hasInit() || isTrivial(Ctx, VD->getInit())));
+ }))
+ continue;
+ }
+ // Found multiple children - cannot get the one child only.
+ if (Child)
+ return Body;
+ Child = S;
+ }
+ if (Child)
+ return Child;
+ }
+ return Body;
+}
+
+/// Check for inner distribute directive.
+static const OMPExecutableDirective *
+getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) {
+ const auto *CS = D.getInnermostCapturedStmt();
+ const auto *Body =
+ CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
+ const Stmt *ChildStmt = getSingleCompoundChild(Ctx, Body);
+
+ if (const auto *NestedDir = dyn_cast<OMPExecutableDirective>(ChildStmt)) {
+ OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
+ switch (D.getDirectiveKind()) {
+ case OMPD_target:
+ if (isOpenMPDistributeDirective(DKind))
+ return NestedDir;
+ if (DKind == OMPD_teams) {
+ Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
+ /*IgnoreCaptured=*/true);
+ if (!Body)
+ return nullptr;
+ ChildStmt = getSingleCompoundChild(Ctx, Body);
+ if (const auto *NND = dyn_cast<OMPExecutableDirective>(ChildStmt)) {
+ DKind = NND->getDirectiveKind();
+ if (isOpenMPDistributeDirective(DKind))
+ return NND;
+ }
+ }
+ return nullptr;
+ case OMPD_target_teams:
+ if (isOpenMPDistributeDirective(DKind))
+ return NestedDir;
+ return nullptr;
+ case OMPD_target_parallel:
+ case OMPD_target_simd:
+ case OMPD_target_parallel_for:
+ case OMPD_target_parallel_for_simd:
+ return nullptr;
+ case OMPD_target_teams_distribute:
+ case OMPD_target_teams_distribute_simd:
+ case OMPD_target_teams_distribute_parallel_for:
+ case OMPD_target_teams_distribute_parallel_for_simd:
+ case OMPD_parallel:
+ case OMPD_for:
+ case OMPD_parallel_for:
+ case OMPD_parallel_sections:
+ case OMPD_for_simd:
+ case OMPD_parallel_for_simd:
+ case OMPD_cancel:
+ case OMPD_cancellation_point:
+ case OMPD_ordered:
+ case OMPD_threadprivate:
+ case OMPD_task:
+ case OMPD_simd:
+ case OMPD_sections:
+ case OMPD_section:
+ case OMPD_single:
+ case OMPD_master:
+ case OMPD_critical:
+ case OMPD_taskyield:
+ case OMPD_barrier:
+ case OMPD_taskwait:
+ case OMPD_taskgroup:
+ case OMPD_atomic:
+ case OMPD_flush:
+ case OMPD_teams:
+ case OMPD_target_data:
+ case OMPD_target_exit_data:
+ case OMPD_target_enter_data:
+ case OMPD_distribute:
+ case OMPD_distribute_simd:
+ case OMPD_distribute_parallel_for:
+ case OMPD_distribute_parallel_for_simd:
+ case OMPD_teams_distribute:
+ case OMPD_teams_distribute_simd:
+ case OMPD_teams_distribute_parallel_for:
+ case OMPD_teams_distribute_parallel_for_simd:
+ case OMPD_target_update:
+ case OMPD_declare_simd:
+ case OMPD_declare_target:
+ case OMPD_end_declare_target:
+ case OMPD_declare_reduction:
+ case OMPD_taskloop:
+ case OMPD_taskloop_simd:
+ case OMPD_requires:
+ case OMPD_unknown:
+ llvm_unreachable("Unexpected directive.");
+ }
+ }
+
+ return nullptr;
+}
+
+void CGOpenMPRuntime::emitTargetNumIterationsCall(
+ CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *Device,
+ const llvm::function_ref<llvm::Value *(
+ CodeGenFunction &CGF, const OMPLoopDirective &D)> &SizeEmitter) {
+ OpenMPDirectiveKind Kind = D.getDirectiveKind();
+ const OMPExecutableDirective *TD = &D;
+ // Get nested teams distribute kind directive, if any.
+ if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind))
+ TD = getNestedDistributeDirective(CGM.getContext(), D);
+ if (!TD)
+ return;
+ const auto *LD = cast<OMPLoopDirective>(TD);
+ auto &&CodeGen = [LD, &Device, &SizeEmitter, this](CodeGenFunction &CGF,
+ PrePostActionTy &) {
+ llvm::Value *NumIterations = SizeEmitter(CGF, *LD);
+
+ // Emit device ID if any.
+ llvm::Value *DeviceID;
+ if (Device)
+ DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
+ CGF.Int64Ty, /*isSigned=*/true);
+ else
+ DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
+
+ llvm::Value *Args[] = {DeviceID, NumIterations};
+ CGF.EmitRuntimeCall(
+ createRuntimeFunction(OMPRTL__kmpc_push_target_tripcount), Args);
+ };
+ emitInlinedDirective(CGF, OMPD_unknown, CodeGen);
+}
+
void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF,
const OMPExecutableDirective &D,
llvm::Value *OutlinedFn,
@@ -7790,7 +8374,7 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF,
CapturedVars.clear();
CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
}
- emitOutlinedFunctionCall(CGF, D.getLocStart(), OutlinedFn, CapturedVars);
+ emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
CGF.EmitBranch(OffloadContBlock);
CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true);
@@ -7804,7 +8388,7 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF,
CapturedVars.clear();
CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
}
- emitOutlinedFunctionCall(CGF, D.getLocStart(), OutlinedFn, CapturedVars);
+ emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
};
auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
@@ -7818,6 +8402,7 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF,
// Get mappable expression information.
MappableExprsHandler MEHandler(D, CGF);
+ llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
auto RI = CS.getCapturedRecordDecl()->field_begin();
auto CV = CapturedVars.begin();
@@ -7847,6 +8432,12 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF,
if (CurBasePointers.empty())
MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurBasePointers,
CurPointers, CurSizes, CurMapTypes);
+ // Generate correct mapping for variables captured by reference in
+ // lambdas.
+ if (CI->capturesVariable())
+ MEHandler.generateInfoForLambdaCaptures(
+ CI->getCapturedVar(), *CV, CurBasePointers, CurPointers, CurSizes,
+ CurMapTypes, LambdaPointers);
}
// We expect to have at least an element of information for this capture.
assert(!CurBasePointers.empty() &&
@@ -7868,6 +8459,9 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF,
Sizes.append(CurSizes.begin(), CurSizes.end());
MapTypes.append(CurMapTypes.begin(), CurMapTypes.end());
}
+ // Adjust MEMBER_OF flags for the lambdas captures.
+ MEHandler.adjustMemberOfForLambdaCaptures(LambdaPointers, BasePointers,
+ Pointers, MapTypes);
// Map other list items in the map clause which are not captured variables
// but "declare target link" global variables.
MEHandler.generateInfoForDeclareTargetLink(BasePointers, Pointers, Sizes,
@@ -7935,7 +8529,7 @@ void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
unsigned DeviceID;
unsigned FileID;
unsigned Line;
- getTargetEntryUniqueInfo(CGM.getContext(), E.getLocStart(), DeviceID,
+ getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID,
FileID, Line);
// Is this a target region that should not be emitted as an entry point? If
@@ -8030,6 +8624,7 @@ void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
case OMPD_declare_reduction:
case OMPD_taskloop:
case OMPD_taskloop_simd:
+ case OMPD_requires:
case OMPD_unknown:
llvm_unreachable("Unknown target directive for OpenMP device codegen.");
}
@@ -8055,19 +8650,20 @@ void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
}
bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) {
- const auto *FD = cast<FunctionDecl>(GD.getDecl());
-
// If emitting code for the host, we do not process FD here. Instead we do
// the normal code generation.
if (!CGM.getLangOpts().OpenMPIsDevice)
return false;
+ const ValueDecl *VD = cast<ValueDecl>(GD.getDecl());
+ StringRef Name = CGM.getMangledName(GD);
// Try to detect target regions in the function.
- scanForTargetRegionsFunctions(FD->getBody(), CGM.getMangledName(GD));
+ if (const auto *FD = dyn_cast<FunctionDecl>(VD))
+ scanForTargetRegionsFunctions(FD->getBody(), Name);
// Do not to emit function if it is not marked as declare target.
- return !isDeclareTargetDeclaration(FD) &&
- AlreadyEmittedTargetFunctions.count(FD->getCanonicalDecl()) == 0;
+ return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) &&
+ AlreadyEmittedTargetFunctions.count(Name) == 0;
}
bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
@@ -8093,64 +8689,105 @@ bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
// Do not to emit variable if it is not marked as declare target.
llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
- isDeclareTargetDeclaration(cast<VarDecl>(GD.getDecl()));
- return !Res || *Res == OMPDeclareTargetDeclAttr::MT_Link;
+ OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(
+ cast<VarDecl>(GD.getDecl()));
+ if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link) {
+ DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl()));
+ return true;
+ }
+ return false;
}
void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD,
llvm::Constant *Addr) {
- if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
- isDeclareTargetDeclaration(VD)) {
- OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags;
- StringRef VarName;
- CharUnits VarSize;
- llvm::GlobalValue::LinkageTypes Linkage;
- switch (*Res) {
- case OMPDeclareTargetDeclAttr::MT_To:
- Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
- VarName = CGM.getMangledName(VD);
+ llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
+ OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
+ if (!Res) {
+ if (CGM.getLangOpts().OpenMPIsDevice) {
+ // Register non-target variables being emitted in device code (debug info
+ // may cause this).
+ StringRef VarName = CGM.getMangledName(VD);
+ EmittedNonTargetVariables.try_emplace(VarName, Addr);
+ }
+ return;
+ }
+ // Register declare target variables.
+ OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags;
+ StringRef VarName;
+ CharUnits VarSize;
+ llvm::GlobalValue::LinkageTypes Linkage;
+ switch (*Res) {
+ case OMPDeclareTargetDeclAttr::MT_To:
+ Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
+ VarName = CGM.getMangledName(VD);
+ if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) {
VarSize = CGM.getContext().getTypeSizeInChars(VD->getType());
- Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false);
- // Temp solution to prevent optimizations of the internal variables.
- if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) {
- std::string RefName = getName({VarName, "ref"});
- if (!CGM.GetGlobalValue(RefName)) {
- llvm::Constant *AddrRef =
- getOrCreateInternalVariable(Addr->getType(), RefName);
- auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef);
- GVAddrRef->setConstant(/*Val=*/true);
- GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage);
- GVAddrRef->setInitializer(Addr);
- CGM.addCompilerUsedGlobal(GVAddrRef);
- }
- }
- break;
- case OMPDeclareTargetDeclAttr::MT_Link:
- Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink;
- if (CGM.getLangOpts().OpenMPIsDevice) {
- VarName = Addr->getName();
- Addr = nullptr;
- } else {
- VarName = getAddrOfDeclareTargetLink(VD).getName();
- Addr =
- cast<llvm::Constant>(getAddrOfDeclareTargetLink(VD).getPointer());
+ assert(!VarSize.isZero() && "Expected non-zero size of the variable");
+ } else {
+ VarSize = CharUnits::Zero();
+ }
+ Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false);
+ // Temp solution to prevent optimizations of the internal variables.
+ if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) {
+ std::string RefName = getName({VarName, "ref"});
+ if (!CGM.GetGlobalValue(RefName)) {
+ llvm::Constant *AddrRef =
+ getOrCreateInternalVariable(Addr->getType(), RefName);
+ auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef);
+ GVAddrRef->setConstant(/*Val=*/true);
+ GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage);
+ GVAddrRef->setInitializer(Addr);
+ CGM.addCompilerUsedGlobal(GVAddrRef);
}
- VarSize = CGM.getPointerSize();
- Linkage = llvm::GlobalValue::WeakAnyLinkage;
- break;
}
- OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
- VarName, Addr, VarSize, Flags, Linkage);
+ break;
+ case OMPDeclareTargetDeclAttr::MT_Link:
+ Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink;
+ if (CGM.getLangOpts().OpenMPIsDevice) {
+ VarName = Addr->getName();
+ Addr = nullptr;
+ } else {
+ VarName = getAddrOfDeclareTargetLink(VD).getName();
+ Addr = cast<llvm::Constant>(getAddrOfDeclareTargetLink(VD).getPointer());
+ }
+ VarSize = CGM.getPointerSize();
+ Linkage = llvm::GlobalValue::WeakAnyLinkage;
+ break;
}
+ OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
+ VarName, Addr, VarSize, Flags, Linkage);
}
bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) {
- if (isa<FunctionDecl>(GD.getDecl()))
+ if (isa<FunctionDecl>(GD.getDecl()) ||
+ isa<OMPDeclareReductionDecl>(GD.getDecl()))
return emitTargetFunctions(GD);
return emitTargetGlobalVariable(GD);
}
+void CGOpenMPRuntime::emitDeferredTargetDecls() const {
+ for (const VarDecl *VD : DeferredGlobalVariables) {
+ llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
+ OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
+ if (!Res)
+ continue;
+ if (*Res == OMPDeclareTargetDeclAttr::MT_To) {
+ CGM.EmitGlobal(VD);
+ } else {
+ assert(*Res == OMPDeclareTargetDeclAttr::MT_Link &&
+ "Expected to or link clauses.");
+ (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetLink(VD);
+ }
+ }
+}
+
+void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas(
+ CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
+ assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) &&
+ " Expected target-based directive.");
+}
+
CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII(
CodeGenModule &CGM)
: CGM(CGM) {
@@ -8169,21 +8806,20 @@ bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) {
if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal)
return true;
+ StringRef Name = CGM.getMangledName(GD);
const auto *D = cast<FunctionDecl>(GD.getDecl());
- const FunctionDecl *FD = D->getCanonicalDecl();
// Do not to emit function if it is marked as declare target as it was already
// emitted.
- if (isDeclareTargetDeclaration(D)) {
- if (D->hasBody() && AlreadyEmittedTargetFunctions.count(FD) == 0) {
- if (auto *F = dyn_cast_or_null<llvm::Function>(
- CGM.GetGlobalValue(CGM.getMangledName(GD))))
+ if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) {
+ if (D->hasBody() && AlreadyEmittedTargetFunctions.count(Name) == 0) {
+ if (auto *F = dyn_cast_or_null<llvm::Function>(CGM.GetGlobalValue(Name)))
return !F->isDeclaration();
return false;
}
return true;
}
- return !AlreadyEmittedTargetFunctions.insert(FD).second;
+ return !AlreadyEmittedTargetFunctions.insert(Name).second;
}
llvm::Function *CGOpenMPRuntime::emitRegistrationFunction() {
@@ -8478,6 +9114,7 @@ void CGOpenMPRuntime::emitTargetDataStandAloneCall(
case OMPD_target_parallel:
case OMPD_target_parallel_for:
case OMPD_target_parallel_for_simd:
+ case OMPD_requires:
case OMPD_unknown:
llvm_unreachable("Unexpected standalone target data directive.");
break;
@@ -8730,8 +9367,8 @@ void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD,
ParamAttrTy &ParamAttr = ParamAttrs[Pos];
ParamAttr.Kind = Linear;
if (*SI) {
- if (!(*SI)->EvaluateAsInt(ParamAttr.StrideOrArg, C,
- Expr::SE_AllowSideEffects)) {
+ Expr::EvalResult Result;
+ if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) {
if (const auto *DRE =
cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
if (const auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) {
@@ -8740,6 +9377,8 @@ void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD,
ParamPositions[StridePVD->getCanonicalDecl()]);
}
}
+ } else {
+ ParamAttr.StrideOrArg = Result.Val.getInt();
}
}
++SI;
@@ -8782,7 +9421,8 @@ public:
} // namespace
void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF,
- const OMPLoopDirective &D) {
+ const OMPLoopDirective &D,
+ ArrayRef<Expr *> NumIterations) {
if (!CGF.HaveInsertPoint())
return;
@@ -8805,37 +9445,50 @@ void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF,
} else {
RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl());
}
+ llvm::APInt Size(/*numBits=*/32, NumIterations.size());
+ QualType ArrayTy =
+ C.getConstantArrayType(KmpDimTy, Size, ArrayType::Normal, 0);
- Address DimsAddr = CGF.CreateMemTemp(KmpDimTy, "dims");
- CGF.EmitNullInitialization(DimsAddr, KmpDimTy);
+ Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
+ CGF.EmitNullInitialization(DimsAddr, ArrayTy);
enum { LowerFD = 0, UpperFD, StrideFD };
// Fill dims with data.
- LValue DimsLVal = CGF.MakeAddrLValue(DimsAddr, KmpDimTy);
- // dims.upper = num_iterations;
- LValue UpperLVal =
- CGF.EmitLValueForField(DimsLVal, *std::next(RD->field_begin(), UpperFD));
- llvm::Value *NumIterVal = CGF.EmitScalarConversion(
- CGF.EmitScalarExpr(D.getNumIterations()), D.getNumIterations()->getType(),
- Int64Ty, D.getNumIterations()->getExprLoc());
- CGF.EmitStoreOfScalar(NumIterVal, UpperLVal);
- // dims.stride = 1;
- LValue StrideLVal =
- CGF.EmitLValueForField(DimsLVal, *std::next(RD->field_begin(), StrideFD));
- CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1),
- StrideLVal);
+ for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) {
+ LValue DimsLVal =
+ CGF.MakeAddrLValue(CGF.Builder.CreateConstArrayGEP(
+ DimsAddr, I, C.getTypeSizeInChars(KmpDimTy)),
+ KmpDimTy);
+ // dims.upper = num_iterations;
+ LValue UpperLVal = CGF.EmitLValueForField(
+ DimsLVal, *std::next(RD->field_begin(), UpperFD));
+ llvm::Value *NumIterVal =
+ CGF.EmitScalarConversion(CGF.EmitScalarExpr(NumIterations[I]),
+ D.getNumIterations()->getType(), Int64Ty,
+ D.getNumIterations()->getExprLoc());
+ CGF.EmitStoreOfScalar(NumIterVal, UpperLVal);
+ // dims.stride = 1;
+ LValue StrideLVal = CGF.EmitLValueForField(
+ DimsLVal, *std::next(RD->field_begin(), StrideFD));
+ CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1),
+ StrideLVal);
+ }
// Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
// kmp_int32 num_dims, struct kmp_dim * dims);
- llvm::Value *Args[] = {emitUpdateLocation(CGF, D.getLocStart()),
- getThreadID(CGF, D.getLocStart()),
- llvm::ConstantInt::getSigned(CGM.Int32Ty, 1),
- CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
- DimsAddr.getPointer(), CGM.VoidPtrTy)};
+ llvm::Value *Args[] = {
+ emitUpdateLocation(CGF, D.getBeginLoc()),
+ getThreadID(CGF, D.getBeginLoc()),
+ llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()),
+ CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+ CGF.Builder
+ .CreateConstArrayGEP(DimsAddr, 0, C.getTypeSizeInChars(KmpDimTy))
+ .getPointer(),
+ CGM.VoidPtrTy)};
llvm::Value *RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_init);
CGF.EmitRuntimeCall(RTLFn, Args);
llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
- emitUpdateLocation(CGF, D.getLocEnd()), getThreadID(CGF, D.getLocEnd())};
+ emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())};
llvm::Value *FiniRTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_fini);
CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
llvm::makeArrayRef(FiniArgs));
@@ -8845,16 +9498,29 @@ void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
const OMPDependClause *C) {
QualType Int64Ty =
CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
- const Expr *CounterVal = C->getCounterValue();
- assert(CounterVal);
- llvm::Value *CntVal = CGF.EmitScalarConversion(CGF.EmitScalarExpr(CounterVal),
- CounterVal->getType(), Int64Ty,
- CounterVal->getExprLoc());
- Address CntAddr = CGF.CreateMemTemp(Int64Ty, ".cnt.addr");
- CGF.EmitStoreOfScalar(CntVal, CntAddr, /*Volatile=*/false, Int64Ty);
- llvm::Value *Args[] = {emitUpdateLocation(CGF, C->getLocStart()),
- getThreadID(CGF, C->getLocStart()),
- CntAddr.getPointer()};
+ llvm::APInt Size(/*numBits=*/32, C->getNumLoops());
+ QualType ArrayTy = CGM.getContext().getConstantArrayType(
+ Int64Ty, Size, ArrayType::Normal, 0);
+ Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr");
+ for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) {
+ const Expr *CounterVal = C->getLoopData(I);
+ assert(CounterVal);
+ llvm::Value *CntVal = CGF.EmitScalarConversion(
+ CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty,
+ CounterVal->getExprLoc());
+ CGF.EmitStoreOfScalar(
+ CntVal,
+ CGF.Builder.CreateConstArrayGEP(
+ CntAddr, I, CGM.getContext().getTypeSizeInChars(Int64Ty)),
+ /*Volatile=*/false, Int64Ty);
+ }
+ llvm::Value *Args[] = {
+ emitUpdateLocation(CGF, C->getBeginLoc()),
+ getThreadID(CGF, C->getBeginLoc()),
+ CGF.Builder
+ .CreateConstArrayGEP(CntAddr, 0,
+ CGM.getContext().getTypeSizeInChars(Int64Ty))
+ .getPointer()};
llvm::Value *RTLFn;
if (C->getDependencyKind() == OMPC_DEPEND_source) {
RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_post);
@@ -9169,7 +9835,8 @@ void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall(
}
void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF,
- const OMPLoopDirective &D) {
+ const OMPLoopDirective &D,
+ ArrayRef<Expr *> NumIterations) {
llvm_unreachable("Not supported in SIMD-only mode");
}
diff --git a/lib/CodeGen/CGOpenMPRuntime.h b/lib/CodeGen/CGOpenMPRuntime.h
index 01ff0c20fd66..1822a6fd1974 100644
--- a/lib/CodeGen/CGOpenMPRuntime.h
+++ b/lib/CodeGen/CGOpenMPRuntime.h
@@ -15,12 +15,13 @@
#define LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIME_H
#include "CGValue.h"
+#include "clang/AST/DeclOpenMP.h"
#include "clang/AST/Type.h"
#include "clang/Basic/OpenMPKinds.h"
#include "clang/Basic/SourceLocation.h"
#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringSet.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/ValueHandle.h"
@@ -278,12 +279,39 @@ protected:
/// stored.
virtual Address emitThreadIDAddress(CodeGenFunction &CGF, SourceLocation Loc);
+ void setLocThreadIdInsertPt(CodeGenFunction &CGF,
+ bool AtCurrentPoint = false);
+ void clearLocThreadIdInsertPt(CodeGenFunction &CGF);
+
+ /// Check if the default location must be constant.
+ /// Default is false to support OMPT/OMPD.
+ virtual bool isDefaultLocationConstant() const { return false; }
+
+ /// Returns additional flags that can be stored in reserved_2 field of the
+ /// default location.
+ virtual unsigned getDefaultLocationReserved2Flags() const { return 0; }
+
+ /// Returns default flags for the barriers depending on the directive, for
+ /// which this barier is going to be emitted.
+ static unsigned getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind);
+
+ /// Get the LLVM type for the critical name.
+ llvm::ArrayType *getKmpCriticalNameTy() const {return KmpCriticalNameTy;}
+
+ /// Returns corresponding lock object for the specified critical region
+ /// name. If the lock object does not exist it is created, otherwise the
+ /// reference to the existing copy is returned.
+ /// \param CriticalName Name of the critical region.
+ ///
+ llvm::Value *getCriticalRegionLock(StringRef CriticalName);
+
private:
/// Default const ident_t object used for initialization of all other
/// ident_t objects.
llvm::Constant *DefaultOpenMPPSource = nullptr;
+ using FlagsTy = std::pair<unsigned, unsigned>;
/// Map of flags and corresponding default locations.
- typedef llvm::DenseMap<unsigned, llvm::Value *> OpenMPDefaultLocMapTy;
+ using OpenMPDefaultLocMapTy = llvm::DenseMap<FlagsTy, llvm::Value *>;
OpenMPDefaultLocMapTy OpenMPDefaultLocMap;
Address getOrCreateDefaultLocation(unsigned Flags);
@@ -300,6 +328,8 @@ private:
struct DebugLocThreadIdTy {
llvm::Value *DebugLoc;
llvm::Value *ThreadID;
+ /// Insert point for the service instructions.
+ llvm::AssertingVH<llvm::Instruction> ServiceInsertPt = nullptr;
};
/// Map of local debug location, ThreadId and functions.
typedef llvm::DenseMap<llvm::Function *, DebugLocThreadIdTy>
@@ -315,10 +345,6 @@ private:
SmallVector<const OMPDeclareReductionDecl *, 4>>
FunctionUDRMapTy;
FunctionUDRMapTy FunctionUDRMap;
- IdentifierInfo *In = nullptr;
- IdentifierInfo *Out = nullptr;
- IdentifierInfo *Priv = nullptr;
- IdentifierInfo *Orig = nullptr;
/// Type kmp_critical_name, originally defined as typedef kmp_int32
/// kmp_critical_name[8];
llvm::ArrayType *KmpCriticalNameTy;
@@ -600,7 +626,15 @@ private:
OffloadEntriesInfoManagerTy OffloadEntriesInfoManager;
bool ShouldMarkAsGlobal = true;
- llvm::SmallDenseSet<const FunctionDecl *> AlreadyEmittedTargetFunctions;
+ /// List of the emitted functions.
+ llvm::StringSet<> AlreadyEmittedTargetFunctions;
+ /// List of the global variables with their addresses that should not be
+ /// emitted for the target.
+ llvm::StringMap<llvm::WeakTrackingVH> EmittedNonTargetVariables;
+
+ /// List of variables that can become declare target implicitly and, thus,
+ /// must be emitted.
+ llvm::SmallDenseSet<const VarDecl *> DeferredGlobalVariables;
/// Creates and registers offloading binary descriptor for the current
/// compilation unit. The function that does the registration is returned.
@@ -673,10 +707,10 @@ private:
const llvm::Twine &Name);
/// Set of threadprivate variables with the generated initializer.
- llvm::SmallPtrSet<const VarDecl *, 4> ThreadPrivateWithDefinition;
+ llvm::StringSet<> ThreadPrivateWithDefinition;
/// Set of declare target variables with the generated initializer.
- llvm::SmallPtrSet<const VarDecl *, 4> DeclareTargetWithDefinition;
+ llvm::StringSet<> DeclareTargetWithDefinition;
/// Emits initialization code for the threadprivate variables.
/// \param VDAddr Address of the global variable \a VD.
@@ -688,13 +722,6 @@ private:
llvm::Value *Ctor, llvm::Value *CopyCtor,
llvm::Value *Dtor, SourceLocation Loc);
- /// Returns corresponding lock object for the specified critical region
- /// name. If the lock object does not exist it is created, otherwise the
- /// reference to the existing copy is returned.
- /// \param CriticalName Name of the critical region.
- ///
- llvm::Value *getCriticalRegionLock(StringRef CriticalName);
-
struct TaskResultTy {
llvm::Value *NewTask = nullptr;
llvm::Value *TaskEntry = nullptr;
@@ -884,6 +911,20 @@ public:
virtual bool isStaticNonchunked(OpenMPDistScheduleClauseKind ScheduleKind,
bool Chunked) const;
+ /// Check if the specified \a ScheduleKind is static chunked.
+ /// \param ScheduleKind Schedule kind specified in the 'schedule' clause.
+ /// \param Chunked True if chunk is specified in the clause.
+ ///
+ virtual bool isStaticChunked(OpenMPScheduleClauseKind ScheduleKind,
+ bool Chunked) const;
+
+ /// Check if the specified \a ScheduleKind is static non-chunked.
+ /// \param ScheduleKind Schedule kind specified in the 'dist_schedule' clause.
+ /// \param Chunked True if chunk is specified in the clause.
+ ///
+ virtual bool isStaticChunked(OpenMPDistScheduleClauseKind ScheduleKind,
+ bool Chunked) const;
+
/// Check if the specified \a ScheduleKind is dynamic.
/// This kind of worksharing directive is emitted without outer loop.
/// \param ScheduleKind Schedule Kind specified in the 'schedule' clause.
@@ -1327,6 +1368,15 @@ public:
bool IsOffloadEntry,
const RegionCodeGenTy &CodeGen);
+ /// Emit code that pushes the trip count of loops associated with constructs
+ /// 'target teams distribute' and 'teams distribute parallel for'.
+ /// \param SizeEmitter Emits the int64 value for the number of iterations of
+ /// the associated loop.
+ virtual void emitTargetNumIterationsCall(
+ CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *Device,
+ const llvm::function_ref<llvm::Value *(
+ CodeGenFunction &CGF, const OMPLoopDirective &D)> &SizeEmitter);
+
/// Emit the target offloading code associated with \a D. The emitted
/// code attempts offloading the execution to the device, an the event of
/// a failure it executes the host version outlined in \a OutlinedFn.
@@ -1465,8 +1515,8 @@ public:
/// Emit initialization for doacross loop nesting support.
/// \param D Loop-based construct used in doacross nesting construct.
- virtual void emitDoacrossInit(CodeGenFunction &CGF,
- const OMPLoopDirective &D);
+ virtual void emitDoacrossInit(CodeGenFunction &CGF, const OMPLoopDirective &D,
+ ArrayRef<Expr *> NumIterations);
/// Emit code for doacross ordered directive with 'depend' clause.
/// \param C 'depend' clause with 'sink|source' dependency kind.
@@ -1490,6 +1540,18 @@ public:
const VarDecl *NativeParam,
const VarDecl *TargetParam) const;
+ /// Choose default schedule type and chunk value for the
+ /// dist_schedule clause.
+ virtual void getDefaultDistScheduleAndChunk(CodeGenFunction &CGF,
+ const OMPLoopDirective &S, OpenMPDistScheduleClauseKind &ScheduleKind,
+ llvm::Value *&Chunk) const {}
+
+ /// Choose default schedule type and chunk value for the
+ /// schedule clause.
+ virtual void getDefaultScheduleAndChunk(CodeGenFunction &CGF,
+ const OMPLoopDirective &S, OpenMPScheduleClauseKind &ScheduleKind,
+ const Expr *&ChunkExpr) const {}
+
/// Emits call of the outlined function with the provided arguments,
/// translating these arguments to correct target-specific arguments.
virtual void
@@ -1505,10 +1567,23 @@ public:
virtual Address getAddressOfLocalVariable(CodeGenFunction &CGF,
const VarDecl *VD);
- /// Marks the declaration as alread emitted for the device code and returns
+ /// Marks the declaration as already emitted for the device code and returns
/// true, if it was marked already, and false, otherwise.
bool markAsGlobalTarget(GlobalDecl GD);
+ /// Emit deferred declare target variables marked for deferred emission.
+ void emitDeferredTargetDecls() const;
+
+ /// Adjust some parameters for the target-based directives, like addresses of
+ /// the variables captured by reference in lambdas.
+ virtual void
+ adjustTargetSpecificDataForLambdas(CodeGenFunction &CGF,
+ const OMPExecutableDirective &D) const;
+
+ /// Perform check on requires decl to ensure that target architecture
+ /// supports unified addressing
+ virtual void checkArchForUnifiedAddressing(CodeGenModule &CGM,
+ const OMPRequiresDecl *D) const {}
};
/// Class supports emissionof SIMD-only code.
@@ -2051,8 +2126,8 @@ public:
/// Emit initialization for doacross loop nesting support.
/// \param D Loop-based construct used in doacross nesting construct.
- void emitDoacrossInit(CodeGenFunction &CGF,
- const OMPLoopDirective &D) override;
+ void emitDoacrossInit(CodeGenFunction &CGF, const OMPLoopDirective &D,
+ ArrayRef<Expr *> NumIterations) override;
/// Emit code for doacross ordered directive with 'depend' clause.
/// \param C 'depend' clause with 'sink|source' dependency kind.
diff --git a/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp b/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
index 036b5371fe0b..7046ab3aa35c 100644
--- a/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
+++ b/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
@@ -17,6 +17,7 @@
#include "clang/AST/DeclOpenMP.h"
#include "clang/AST/StmtOpenMP.h"
#include "clang/AST/StmtVisitor.h"
+#include "clang/Basic/Cuda.h"
#include "llvm/ADT/SmallPtrSet.h"
using namespace clang;
@@ -32,8 +33,8 @@ enum OpenMPRTLFunctionNVPTX {
/// Call to void __kmpc_spmd_kernel_init(kmp_int32 thread_limit,
/// int16_t RequiresOMPRuntime, int16_t RequiresDataSharing);
OMPRTL_NVPTX__kmpc_spmd_kernel_init,
- /// Call to void __kmpc_spmd_kernel_deinit();
- OMPRTL_NVPTX__kmpc_spmd_kernel_deinit,
+ /// Call to void __kmpc_spmd_kernel_deinit_v2(int16_t RequiresOMPRuntime);
+ OMPRTL_NVPTX__kmpc_spmd_kernel_deinit_v2,
/// Call to void __kmpc_kernel_prepare_parallel(void
/// *outlined_function, int16_t
/// IsOMPRuntimeInitialized);
@@ -55,37 +56,27 @@ enum OpenMPRTLFunctionNVPTX {
/// Call to int64_t __kmpc_shuffle_int64(int64_t element,
/// int16_t lane_offset, int16_t warp_size);
OMPRTL_NVPTX__kmpc_shuffle_int64,
- /// Call to __kmpc_nvptx_parallel_reduce_nowait(kmp_int32
+ /// Call to __kmpc_nvptx_parallel_reduce_nowait_v2(ident_t *loc, kmp_int32
/// global_tid, kmp_int32 num_vars, size_t reduce_size, void* reduce_data,
/// void (*kmp_ShuffleReductFctPtr)(void *rhsData, int16_t lane_id, int16_t
/// lane_offset, int16_t shortCircuit),
/// void (*kmp_InterWarpCopyFctPtr)(void* src, int32_t warp_num));
- OMPRTL_NVPTX__kmpc_parallel_reduce_nowait,
- /// Call to __kmpc_nvptx_simd_reduce_nowait(kmp_int32
- /// global_tid, kmp_int32 num_vars, size_t reduce_size, void* reduce_data,
- /// void (*kmp_ShuffleReductFctPtr)(void *rhsData, int16_t lane_id, int16_t
- /// lane_offset, int16_t shortCircuit),
- /// void (*kmp_InterWarpCopyFctPtr)(void* src, int32_t warp_num));
- OMPRTL_NVPTX__kmpc_simd_reduce_nowait,
- /// Call to __kmpc_nvptx_teams_reduce_nowait(int32_t global_tid,
- /// int32_t num_vars, size_t reduce_size, void *reduce_data,
- /// void (*kmp_ShuffleReductFctPtr)(void *rhs, int16_t lane_id, int16_t
- /// lane_offset, int16_t shortCircuit),
- /// void (*kmp_InterWarpCopyFctPtr)(void* src, int32_t warp_num),
- /// void (*kmp_CopyToScratchpadFctPtr)(void *reduce_data, void * scratchpad,
- /// int32_t index, int32_t width),
- /// void (*kmp_LoadReduceFctPtr)(void *reduce_data, void * scratchpad, int32_t
- /// index, int32_t width, int32_t reduce))
- OMPRTL_NVPTX__kmpc_teams_reduce_nowait,
+ OMPRTL_NVPTX__kmpc_parallel_reduce_nowait_v2,
+ /// Call to __kmpc_nvptx_teams_reduce_nowait_simple(ident_t *loc, kmp_int32
+ /// global_tid, kmp_critical_name *lck)
+ OMPRTL_NVPTX__kmpc_nvptx_teams_reduce_nowait_simple,
+ /// Call to __kmpc_nvptx_teams_end_reduce_nowait_simple(ident_t *loc,
+ /// kmp_int32 global_tid, kmp_critical_name *lck)
+ OMPRTL_NVPTX__kmpc_nvptx_teams_end_reduce_nowait_simple,
/// Call to __kmpc_nvptx_end_reduce_nowait(int32_t global_tid);
OMPRTL_NVPTX__kmpc_end_reduce_nowait,
/// Call to void __kmpc_data_sharing_init_stack();
OMPRTL_NVPTX__kmpc_data_sharing_init_stack,
/// Call to void __kmpc_data_sharing_init_stack_spmd();
OMPRTL_NVPTX__kmpc_data_sharing_init_stack_spmd,
- /// Call to void* __kmpc_data_sharing_push_stack(size_t size,
+ /// Call to void* __kmpc_data_sharing_coalesced_push_stack(size_t size,
/// int16_t UseSharedMemory);
- OMPRTL_NVPTX__kmpc_data_sharing_push_stack,
+ OMPRTL_NVPTX__kmpc_data_sharing_coalesced_push_stack,
/// Call to void __kmpc_data_sharing_pop_stack(void *a);
OMPRTL_NVPTX__kmpc_data_sharing_pop_stack,
/// Call to void __kmpc_begin_sharing_variables(void ***args,
@@ -100,6 +91,17 @@ enum OpenMPRTLFunctionNVPTX {
OMPRTL_NVPTX__kmpc_parallel_level,
/// Call to int8_t __kmpc_is_spmd_exec_mode();
OMPRTL_NVPTX__kmpc_is_spmd_exec_mode,
+ /// Call to void __kmpc_get_team_static_memory(int16_t isSPMDExecutionMode,
+ /// const void *buf, size_t size, int16_t is_shared, const void **res);
+ OMPRTL_NVPTX__kmpc_get_team_static_memory,
+ /// Call to void __kmpc_restore_team_static_memory(int16_t
+ /// isSPMDExecutionMode, int16_t is_shared);
+ OMPRTL_NVPTX__kmpc_restore_team_static_memory,
+ /// Call to void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
+ OMPRTL__kmpc_barrier,
+ /// Call to void __kmpc_barrier_simple_spmd(ident_t *loc, kmp_int32
+ /// global_tid);
+ OMPRTL__kmpc_barrier_simple_spmd,
};
/// Pre(post)-action for different OpenMP constructs specialized for NVPTX.
@@ -142,19 +144,35 @@ public:
/// a target region. The appropriate mode (SPMD|NON-SPMD) is set on entry
/// to the target region and used by containing directives such as 'parallel'
/// to emit optimized code.
-class ExecutionModeRAII {
+class ExecutionRuntimeModesRAII {
private:
- CGOpenMPRuntimeNVPTX::ExecutionMode SavedMode;
- CGOpenMPRuntimeNVPTX::ExecutionMode &Mode;
+ CGOpenMPRuntimeNVPTX::ExecutionMode SavedExecMode =
+ CGOpenMPRuntimeNVPTX::EM_Unknown;
+ CGOpenMPRuntimeNVPTX::ExecutionMode &ExecMode;
+ bool SavedRuntimeMode = false;
+ bool *RuntimeMode = nullptr;
public:
- ExecutionModeRAII(CGOpenMPRuntimeNVPTX::ExecutionMode &Mode, bool IsSPMD)
- : Mode(Mode) {
- SavedMode = Mode;
- Mode = IsSPMD ? CGOpenMPRuntimeNVPTX::EM_SPMD
- : CGOpenMPRuntimeNVPTX::EM_NonSPMD;
+ /// Constructor for Non-SPMD mode.
+ ExecutionRuntimeModesRAII(CGOpenMPRuntimeNVPTX::ExecutionMode &ExecMode)
+ : ExecMode(ExecMode) {
+ SavedExecMode = ExecMode;
+ ExecMode = CGOpenMPRuntimeNVPTX::EM_NonSPMD;
+ }
+ /// Constructor for SPMD mode.
+ ExecutionRuntimeModesRAII(CGOpenMPRuntimeNVPTX::ExecutionMode &ExecMode,
+ bool &RuntimeMode, bool FullRuntimeMode)
+ : ExecMode(ExecMode), RuntimeMode(&RuntimeMode) {
+ SavedExecMode = ExecMode;
+ SavedRuntimeMode = RuntimeMode;
+ ExecMode = CGOpenMPRuntimeNVPTX::EM_SPMD;
+ RuntimeMode = FullRuntimeMode;
+ }
+ ~ExecutionRuntimeModesRAII() {
+ ExecMode = SavedExecMode;
+ if (RuntimeMode)
+ *RuntimeMode = SavedRuntimeMode;
}
- ~ExecutionModeRAII() { Mode = SavedMode; }
};
/// GPU Configuration: This information can be derived from cuda registers,
@@ -169,16 +187,113 @@ enum MachineConfiguration : unsigned {
LaneIDMask = WarpSize - 1,
/// Global memory alignment for performance.
- GlobalMemoryAlignment = 256,
-};
+ GlobalMemoryAlignment = 128,
-enum NamedBarrier : unsigned {
- /// Synchronize on this barrier #ID using a named barrier primitive.
- /// Only the subset of active threads in a parallel region arrive at the
- /// barrier.
- NB_Parallel = 1,
+ /// Maximal size of the shared memory buffer.
+ SharedMemorySize = 128,
};
+static const ValueDecl *getPrivateItem(const Expr *RefExpr) {
+ RefExpr = RefExpr->IgnoreParens();
+ if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(RefExpr)) {
+ const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
+ while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
+ Base = TempASE->getBase()->IgnoreParenImpCasts();
+ RefExpr = Base;
+ } else if (auto *OASE = dyn_cast<OMPArraySectionExpr>(RefExpr)) {
+ const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
+ while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
+ Base = TempOASE->getBase()->IgnoreParenImpCasts();
+ while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
+ Base = TempASE->getBase()->IgnoreParenImpCasts();
+ RefExpr = Base;
+ }
+ RefExpr = RefExpr->IgnoreParenImpCasts();
+ if (const auto *DE = dyn_cast<DeclRefExpr>(RefExpr))
+ return cast<ValueDecl>(DE->getDecl()->getCanonicalDecl());
+ const auto *ME = cast<MemberExpr>(RefExpr);
+ return cast<ValueDecl>(ME->getMemberDecl()->getCanonicalDecl());
+}
+
+typedef std::pair<CharUnits /*Align*/, const ValueDecl *> VarsDataTy;
+static bool stable_sort_comparator(const VarsDataTy P1, const VarsDataTy P2) {
+ return P1.first > P2.first;
+}
+
+static RecordDecl *buildRecordForGlobalizedVars(
+ ASTContext &C, ArrayRef<const ValueDecl *> EscapedDecls,
+ ArrayRef<const ValueDecl *> EscapedDeclsForTeams,
+ llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *>
+ &MappedDeclsFields) {
+ if (EscapedDecls.empty() && EscapedDeclsForTeams.empty())
+ return nullptr;
+ SmallVector<VarsDataTy, 4> GlobalizedVars;
+ for (const ValueDecl *D : EscapedDecls)
+ GlobalizedVars.emplace_back(
+ CharUnits::fromQuantity(std::max(
+ C.getDeclAlign(D).getQuantity(),
+ static_cast<CharUnits::QuantityType>(GlobalMemoryAlignment))),
+ D);
+ for (const ValueDecl *D : EscapedDeclsForTeams)
+ GlobalizedVars.emplace_back(C.getDeclAlign(D), D);
+ std::stable_sort(GlobalizedVars.begin(), GlobalizedVars.end(),
+ stable_sort_comparator);
+ // Build struct _globalized_locals_ty {
+ // /* globalized vars */[WarSize] align (max(decl_align,
+ // GlobalMemoryAlignment))
+ // /* globalized vars */ for EscapedDeclsForTeams
+ // };
+ RecordDecl *GlobalizedRD = C.buildImplicitRecord("_globalized_locals_ty");
+ GlobalizedRD->startDefinition();
+ llvm::SmallPtrSet<const ValueDecl *, 16> SingleEscaped(
+ EscapedDeclsForTeams.begin(), EscapedDeclsForTeams.end());
+ for (const auto &Pair : GlobalizedVars) {
+ const ValueDecl *VD = Pair.second;
+ QualType Type = VD->getType();
+ if (Type->isLValueReferenceType())
+ Type = C.getPointerType(Type.getNonReferenceType());
+ else
+ Type = Type.getNonReferenceType();
+ SourceLocation Loc = VD->getLocation();
+ FieldDecl *Field;
+ if (SingleEscaped.count(VD)) {
+ Field = FieldDecl::Create(
+ C, GlobalizedRD, Loc, Loc, VD->getIdentifier(), Type,
+ C.getTrivialTypeSourceInfo(Type, SourceLocation()),
+ /*BW=*/nullptr, /*Mutable=*/false,
+ /*InitStyle=*/ICIS_NoInit);
+ Field->setAccess(AS_public);
+ if (VD->hasAttrs()) {
+ for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
+ E(VD->getAttrs().end());
+ I != E; ++I)
+ Field->addAttr(*I);
+ }
+ } else {
+ llvm::APInt ArraySize(32, WarpSize);
+ Type = C.getConstantArrayType(Type, ArraySize, ArrayType::Normal, 0);
+ Field = FieldDecl::Create(
+ C, GlobalizedRD, Loc, Loc, VD->getIdentifier(), Type,
+ C.getTrivialTypeSourceInfo(Type, SourceLocation()),
+ /*BW=*/nullptr, /*Mutable=*/false,
+ /*InitStyle=*/ICIS_NoInit);
+ Field->setAccess(AS_public);
+ llvm::APInt Align(32, std::max(C.getDeclAlign(VD).getQuantity(),
+ static_cast<CharUnits::QuantityType>(
+ GlobalMemoryAlignment)));
+ Field->addAttr(AlignedAttr::CreateImplicit(
+ C, AlignedAttr::GNU_aligned, /*IsAlignmentExpr=*/true,
+ IntegerLiteral::Create(C, Align,
+ C.getIntTypeForBitwidth(32, /*Signed=*/0),
+ SourceLocation())));
+ }
+ GlobalizedRD->addDecl(Field);
+ MappedDeclsFields.try_emplace(VD, Field);
+ }
+ GlobalizedRD->completeDefinition();
+ return GlobalizedRD;
+}
+
/// Get the list of variables that can escape their declaration context.
class CheckVarsEscapingDeclContext final
: public ConstStmtVisitor<CheckVarsEscapingDeclContext> {
@@ -191,20 +306,10 @@ class CheckVarsEscapingDeclContext final
bool AllEscaped = false;
bool IsForCombinedParallelRegion = false;
- static llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy>
- isDeclareTargetDeclaration(const ValueDecl *VD) {
- for (const Decl *D : VD->redecls()) {
- if (!D->hasAttrs())
- continue;
- if (const auto *Attr = D->getAttr<OMPDeclareTargetDeclAttr>())
- return Attr->getMapType();
- }
- return llvm::None;
- }
-
void markAsEscaped(const ValueDecl *VD) {
// Do not globalize declare target variables.
- if (!isa<VarDecl>(VD) || isDeclareTargetDeclaration(VD))
+ if (!isa<VarDecl>(VD) ||
+ OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD))
return;
VD = cast<ValueDecl>(VD->getCanonicalDecl());
// Variables captured by value must be globalized.
@@ -218,9 +323,11 @@ class CheckVarsEscapingDeclContext final
const auto *Attr = FD->getAttr<OMPCaptureKindAttr>();
if (!Attr)
return;
- if (!isOpenMPPrivate(
- static_cast<OpenMPClauseKind>(Attr->getCaptureKind())) ||
- Attr->getCaptureKind() == OMPC_map)
+ if (((Attr->getCaptureKind() != OMPC_map) &&
+ !isOpenMPPrivate(
+ static_cast<OpenMPClauseKind>(Attr->getCaptureKind()))) ||
+ ((Attr->getCaptureKind() == OMPC_map) &&
+ !FD->getType()->isAnyPointerType()))
return;
}
if (!FD->getType()->isReferenceType()) {
@@ -302,55 +409,24 @@ class CheckVarsEscapingDeclContext final
}
}
- typedef std::pair<CharUnits /*Align*/, const ValueDecl *> VarsDataTy;
- static bool stable_sort_comparator(const VarsDataTy P1, const VarsDataTy P2) {
- return P1.first > P2.first;
- }
-
- void buildRecordForGlobalizedVars() {
+ void buildRecordForGlobalizedVars(bool IsInTTDRegion) {
assert(!GlobalizedRD &&
"Record for globalized variables is built already.");
- if (EscapedDecls.empty())
- return;
- ASTContext &C = CGF.getContext();
- SmallVector<VarsDataTy, 4> GlobalizedVars;
- for (const ValueDecl *D : EscapedDecls)
- GlobalizedVars.emplace_back(C.getDeclAlign(D), D);
- std::stable_sort(GlobalizedVars.begin(), GlobalizedVars.end(),
- stable_sort_comparator);
- // Build struct _globalized_locals_ty {
- // /* globalized vars */
- // };
- GlobalizedRD = C.buildImplicitRecord("_globalized_locals_ty");
- GlobalizedRD->startDefinition();
- for (const auto &Pair : GlobalizedVars) {
- const ValueDecl *VD = Pair.second;
- QualType Type = VD->getType();
- if (Type->isLValueReferenceType())
- Type = C.getPointerType(Type.getNonReferenceType());
- else
- Type = Type.getNonReferenceType();
- SourceLocation Loc = VD->getLocation();
- auto *Field = FieldDecl::Create(
- C, GlobalizedRD, Loc, Loc, VD->getIdentifier(), Type,
- C.getTrivialTypeSourceInfo(Type, SourceLocation()),
- /*BW=*/nullptr, /*Mutable=*/false,
- /*InitStyle=*/ICIS_NoInit);
- Field->setAccess(AS_public);
- GlobalizedRD->addDecl(Field);
- if (VD->hasAttrs()) {
- for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
- E(VD->getAttrs().end());
- I != E; ++I)
- Field->addAttr(*I);
- }
- MappedDeclsFields.try_emplace(VD, Field);
- }
- GlobalizedRD->completeDefinition();
+ ArrayRef<const ValueDecl *> EscapedDeclsForParallel, EscapedDeclsForTeams;
+ if (IsInTTDRegion)
+ EscapedDeclsForTeams = EscapedDecls.getArrayRef();
+ else
+ EscapedDeclsForParallel = EscapedDecls.getArrayRef();
+ GlobalizedRD = ::buildRecordForGlobalizedVars(
+ CGF.getContext(), EscapedDeclsForParallel, EscapedDeclsForTeams,
+ MappedDeclsFields);
}
public:
- CheckVarsEscapingDeclContext(CodeGenFunction &CGF) : CGF(CGF) {}
+ CheckVarsEscapingDeclContext(CodeGenFunction &CGF,
+ ArrayRef<const ValueDecl *> TeamsReductions)
+ : CGF(CGF), EscapedDecls(TeamsReductions.begin(), TeamsReductions.end()) {
+ }
virtual ~CheckVarsEscapingDeclContext() = default;
void VisitDeclStmt(const DeclStmt *S) {
if (!S)
@@ -492,9 +568,9 @@ public:
/// Returns the record that handles all the escaped local variables and used
/// instead of their original storage.
- const RecordDecl *getGlobalizedRecord() {
+ const RecordDecl *getGlobalizedRecord(bool IsInTTDRegion) {
if (!GlobalizedRD)
- buildRecordForGlobalizedVars();
+ buildRecordForGlobalizedVars(IsInTTDRegion);
return GlobalizedRD;
}
@@ -568,31 +644,6 @@ static llvm::Value *getNVPTXNumThreads(CodeGenFunction &CGF) {
"nvptx_num_threads");
}
-/// Get barrier to synchronize all threads in a block.
-static void getNVPTXCTABarrier(CodeGenFunction &CGF) {
- CGF.EmitRuntimeCall(llvm::Intrinsic::getDeclaration(
- &CGF.CGM.getModule(), llvm::Intrinsic::nvvm_barrier0));
-}
-
-/// Get barrier #ID to synchronize selected (multiple of warp size) threads in
-/// a CTA.
-static void getNVPTXBarrier(CodeGenFunction &CGF, int ID,
- llvm::Value *NumThreads) {
- CGBuilderTy &Bld = CGF.Builder;
- llvm::Value *Args[] = {Bld.getInt32(ID), NumThreads};
- CGF.EmitRuntimeCall(llvm::Intrinsic::getDeclaration(
- &CGF.CGM.getModule(), llvm::Intrinsic::nvvm_barrier),
- Args);
-}
-
-/// Synchronize all GPU threads in a block.
-static void syncCTAThreads(CodeGenFunction &CGF) { getNVPTXCTABarrier(CGF); }
-
-/// Synchronize worker threads in a parallel region.
-static void syncParallelThreads(CodeGenFunction &CGF, llvm::Value *NumThreads) {
- return getNVPTXBarrier(CGF, NB_Parallel, NumThreads);
-}
-
/// Get the value of the thread_limit clause in the teams directive.
/// For the 'generic' execution mode, the runtime encodes thread_limit in
/// the launch parameters, always starting thread_limit+warpSize threads per
@@ -654,12 +705,58 @@ getDataSharingMode(CodeGenModule &CGM) {
: CGOpenMPRuntimeNVPTX::Generic;
}
+/// Checks if the expression is constant or does not have non-trivial function
+/// calls.
+static bool isTrivial(ASTContext &Ctx, const Expr * E) {
+ // We can skip constant expressions.
+ // We can skip expressions with trivial calls or simple expressions.
+ return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) ||
+ !E->hasNonTrivialCall(Ctx)) &&
+ !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true);
+}
+
/// Checks if the \p Body is the \a CompoundStmt and returns its child statement
-/// iff there is only one.
-static const Stmt *getSingleCompoundChild(const Stmt *Body) {
- if (const auto *C = dyn_cast<CompoundStmt>(Body))
- if (C->size() == 1)
- return C->body_front();
+/// iff there is only one that is not evaluatable at the compile time.
+static const Stmt *getSingleCompoundChild(ASTContext &Ctx, const Stmt *Body) {
+ if (const auto *C = dyn_cast<CompoundStmt>(Body)) {
+ const Stmt *Child = nullptr;
+ for (const Stmt *S : C->body()) {
+ if (const auto *E = dyn_cast<Expr>(S)) {
+ if (isTrivial(Ctx, E))
+ continue;
+ }
+ // Some of the statements can be ignored.
+ if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) ||
+ isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S))
+ continue;
+ // Analyze declarations.
+ if (const auto *DS = dyn_cast<DeclStmt>(S)) {
+ if (llvm::all_of(DS->decls(), [&Ctx](const Decl *D) {
+ if (isa<EmptyDecl>(D) || isa<DeclContext>(D) ||
+ isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) ||
+ isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) ||
+ isa<UsingDirectiveDecl>(D) ||
+ isa<OMPDeclareReductionDecl>(D) ||
+ isa<OMPThreadPrivateDecl>(D))
+ return true;
+ const auto *VD = dyn_cast<VarDecl>(D);
+ if (!VD)
+ return false;
+ return VD->isConstexpr() ||
+ ((VD->getType().isTrivialType(Ctx) ||
+ VD->getType()->isReferenceType()) &&
+ (!VD->hasInit() || isTrivial(Ctx, VD->getInit())));
+ }))
+ continue;
+ }
+ // Found multiple children - cannot get the one child only.
+ if (Child)
+ return Body;
+ Child = S;
+ }
+ if (Child)
+ return Child;
+ }
return Body;
}
@@ -686,8 +783,9 @@ static bool hasParallelIfNumThreadsClause(ASTContext &Ctx,
static bool hasNestedSPMDDirective(ASTContext &Ctx,
const OMPExecutableDirective &D) {
const auto *CS = D.getInnermostCapturedStmt();
- const auto *Body = CS->getCapturedStmt()->IgnoreContainers();
- const Stmt *ChildStmt = getSingleCompoundChild(Body);
+ const auto *Body =
+ CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
+ const Stmt *ChildStmt = getSingleCompoundChild(Ctx, Body);
if (const auto *NestedDir = dyn_cast<OMPExecutableDirective>(ChildStmt)) {
OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
@@ -696,27 +794,215 @@ static bool hasNestedSPMDDirective(ASTContext &Ctx,
if (isOpenMPParallelDirective(DKind) &&
!hasParallelIfNumThreadsClause(Ctx, *NestedDir))
return true;
- if (DKind == OMPD_teams || DKind == OMPD_teams_distribute) {
- Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers();
+ if (DKind == OMPD_teams) {
+ Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
+ /*IgnoreCaptured=*/true);
if (!Body)
return false;
- ChildStmt = getSingleCompoundChild(Body);
+ ChildStmt = getSingleCompoundChild(Ctx, Body);
if (const auto *NND = dyn_cast<OMPExecutableDirective>(ChildStmt)) {
DKind = NND->getDirectiveKind();
if (isOpenMPParallelDirective(DKind) &&
!hasParallelIfNumThreadsClause(Ctx, *NND))
return true;
- if (DKind == OMPD_distribute) {
- Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers();
+ }
+ }
+ return false;
+ case OMPD_target_teams:
+ return isOpenMPParallelDirective(DKind) &&
+ !hasParallelIfNumThreadsClause(Ctx, *NestedDir);
+ case OMPD_target_simd:
+ case OMPD_target_parallel:
+ case OMPD_target_parallel_for:
+ case OMPD_target_parallel_for_simd:
+ case OMPD_target_teams_distribute:
+ case OMPD_target_teams_distribute_simd:
+ case OMPD_target_teams_distribute_parallel_for:
+ case OMPD_target_teams_distribute_parallel_for_simd:
+ case OMPD_parallel:
+ case OMPD_for:
+ case OMPD_parallel_for:
+ case OMPD_parallel_sections:
+ case OMPD_for_simd:
+ case OMPD_parallel_for_simd:
+ case OMPD_cancel:
+ case OMPD_cancellation_point:
+ case OMPD_ordered:
+ case OMPD_threadprivate:
+ case OMPD_task:
+ case OMPD_simd:
+ case OMPD_sections:
+ case OMPD_section:
+ case OMPD_single:
+ case OMPD_master:
+ case OMPD_critical:
+ case OMPD_taskyield:
+ case OMPD_barrier:
+ case OMPD_taskwait:
+ case OMPD_taskgroup:
+ case OMPD_atomic:
+ case OMPD_flush:
+ case OMPD_teams:
+ case OMPD_target_data:
+ case OMPD_target_exit_data:
+ case OMPD_target_enter_data:
+ case OMPD_distribute:
+ case OMPD_distribute_simd:
+ case OMPD_distribute_parallel_for:
+ case OMPD_distribute_parallel_for_simd:
+ case OMPD_teams_distribute:
+ case OMPD_teams_distribute_simd:
+ case OMPD_teams_distribute_parallel_for:
+ case OMPD_teams_distribute_parallel_for_simd:
+ case OMPD_target_update:
+ case OMPD_declare_simd:
+ case OMPD_declare_target:
+ case OMPD_end_declare_target:
+ case OMPD_declare_reduction:
+ case OMPD_taskloop:
+ case OMPD_taskloop_simd:
+ case OMPD_requires:
+ case OMPD_unknown:
+ llvm_unreachable("Unexpected directive.");
+ }
+ }
+
+ return false;
+}
+
+static bool supportsSPMDExecutionMode(ASTContext &Ctx,
+ const OMPExecutableDirective &D) {
+ OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
+ switch (DirectiveKind) {
+ case OMPD_target:
+ case OMPD_target_teams:
+ return hasNestedSPMDDirective(Ctx, D);
+ case OMPD_target_parallel:
+ case OMPD_target_parallel_for:
+ case OMPD_target_parallel_for_simd:
+ case OMPD_target_teams_distribute_parallel_for:
+ case OMPD_target_teams_distribute_parallel_for_simd:
+ return !hasParallelIfNumThreadsClause(Ctx, D);
+ case OMPD_target_simd:
+ case OMPD_target_teams_distribute:
+ case OMPD_target_teams_distribute_simd:
+ return false;
+ case OMPD_parallel:
+ case OMPD_for:
+ case OMPD_parallel_for:
+ case OMPD_parallel_sections:
+ case OMPD_for_simd:
+ case OMPD_parallel_for_simd:
+ case OMPD_cancel:
+ case OMPD_cancellation_point:
+ case OMPD_ordered:
+ case OMPD_threadprivate:
+ case OMPD_task:
+ case OMPD_simd:
+ case OMPD_sections:
+ case OMPD_section:
+ case OMPD_single:
+ case OMPD_master:
+ case OMPD_critical:
+ case OMPD_taskyield:
+ case OMPD_barrier:
+ case OMPD_taskwait:
+ case OMPD_taskgroup:
+ case OMPD_atomic:
+ case OMPD_flush:
+ case OMPD_teams:
+ case OMPD_target_data:
+ case OMPD_target_exit_data:
+ case OMPD_target_enter_data:
+ case OMPD_distribute:
+ case OMPD_distribute_simd:
+ case OMPD_distribute_parallel_for:
+ case OMPD_distribute_parallel_for_simd:
+ case OMPD_teams_distribute:
+ case OMPD_teams_distribute_simd:
+ case OMPD_teams_distribute_parallel_for:
+ case OMPD_teams_distribute_parallel_for_simd:
+ case OMPD_target_update:
+ case OMPD_declare_simd:
+ case OMPD_declare_target:
+ case OMPD_end_declare_target:
+ case OMPD_declare_reduction:
+ case OMPD_taskloop:
+ case OMPD_taskloop_simd:
+ case OMPD_requires:
+ case OMPD_unknown:
+ break;
+ }
+ llvm_unreachable(
+ "Unknown programming model for OpenMP directive on NVPTX target.");
+}
+
+/// Check if the directive is loops based and has schedule clause at all or has
+/// static scheduling.
+static bool hasStaticScheduling(const OMPExecutableDirective &D) {
+ assert(isOpenMPWorksharingDirective(D.getDirectiveKind()) &&
+ isOpenMPLoopDirective(D.getDirectiveKind()) &&
+ "Expected loop-based directive.");
+ return !D.hasClausesOfKind<OMPOrderedClause>() &&
+ (!D.hasClausesOfKind<OMPScheduleClause>() ||
+ llvm::any_of(D.getClausesOfKind<OMPScheduleClause>(),
+ [](const OMPScheduleClause *C) {
+ return C->getScheduleKind() == OMPC_SCHEDULE_static;
+ }));
+}
+
+/// Check for inner (nested) lightweight runtime construct, if any
+static bool hasNestedLightweightDirective(ASTContext &Ctx,
+ const OMPExecutableDirective &D) {
+ assert(supportsSPMDExecutionMode(Ctx, D) && "Expected SPMD mode directive.");
+ const auto *CS = D.getInnermostCapturedStmt();
+ const auto *Body =
+ CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
+ const Stmt *ChildStmt = getSingleCompoundChild(Ctx, Body);
+
+ if (const auto *NestedDir = dyn_cast<OMPExecutableDirective>(ChildStmt)) {
+ OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
+ switch (D.getDirectiveKind()) {
+ case OMPD_target:
+ if (isOpenMPParallelDirective(DKind) &&
+ isOpenMPWorksharingDirective(DKind) && isOpenMPLoopDirective(DKind) &&
+ hasStaticScheduling(*NestedDir))
+ return true;
+ if (DKind == OMPD_parallel) {
+ Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
+ /*IgnoreCaptured=*/true);
+ if (!Body)
+ return false;
+ ChildStmt = getSingleCompoundChild(Ctx, Body);
+ if (const auto *NND = dyn_cast<OMPExecutableDirective>(ChildStmt)) {
+ DKind = NND->getDirectiveKind();
+ if (isOpenMPWorksharingDirective(DKind) &&
+ isOpenMPLoopDirective(DKind) && hasStaticScheduling(*NND))
+ return true;
+ }
+ } else if (DKind == OMPD_teams) {
+ Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
+ /*IgnoreCaptured=*/true);
+ if (!Body)
+ return false;
+ ChildStmt = getSingleCompoundChild(Ctx, Body);
+ if (const auto *NND = dyn_cast<OMPExecutableDirective>(ChildStmt)) {
+ DKind = NND->getDirectiveKind();
+ if (isOpenMPParallelDirective(DKind) &&
+ isOpenMPWorksharingDirective(DKind) &&
+ isOpenMPLoopDirective(DKind) && hasStaticScheduling(*NND))
+ return true;
+ if (DKind == OMPD_parallel) {
+ Body = NND->getInnermostCapturedStmt()->IgnoreContainers(
+ /*IgnoreCaptured=*/true);
if (!Body)
return false;
- ChildStmt = getSingleCompoundChild(Body);
- if (!ChildStmt)
- return false;
+ ChildStmt = getSingleCompoundChild(Ctx, Body);
if (const auto *NND = dyn_cast<OMPExecutableDirective>(ChildStmt)) {
DKind = NND->getDirectiveKind();
- return isOpenMPParallelDirective(DKind) &&
- !hasParallelIfNumThreadsClause(Ctx, *NND);
+ if (isOpenMPWorksharingDirective(DKind) &&
+ isOpenMPLoopDirective(DKind) && hasStaticScheduling(*NND))
+ return true;
}
}
}
@@ -724,25 +1010,28 @@ static bool hasNestedSPMDDirective(ASTContext &Ctx,
return false;
case OMPD_target_teams:
if (isOpenMPParallelDirective(DKind) &&
- !hasParallelIfNumThreadsClause(Ctx, *NestedDir))
+ isOpenMPWorksharingDirective(DKind) && isOpenMPLoopDirective(DKind) &&
+ hasStaticScheduling(*NestedDir))
return true;
- if (DKind == OMPD_distribute) {
- Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers();
+ if (DKind == OMPD_parallel) {
+ Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
+ /*IgnoreCaptured=*/true);
if (!Body)
return false;
- ChildStmt = getSingleCompoundChild(Body);
+ ChildStmt = getSingleCompoundChild(Ctx, Body);
if (const auto *NND = dyn_cast<OMPExecutableDirective>(ChildStmt)) {
DKind = NND->getDirectiveKind();
- return isOpenMPParallelDirective(DKind) &&
- !hasParallelIfNumThreadsClause(Ctx, *NND);
+ if (isOpenMPWorksharingDirective(DKind) &&
+ isOpenMPLoopDirective(DKind) && hasStaticScheduling(*NND))
+ return true;
}
}
return false;
+ case OMPD_target_parallel:
+ return isOpenMPWorksharingDirective(DKind) &&
+ isOpenMPLoopDirective(DKind) && hasStaticScheduling(*NestedDir);
case OMPD_target_teams_distribute:
- return isOpenMPParallelDirective(DKind) &&
- !hasParallelIfNumThreadsClause(Ctx, *NestedDir);
case OMPD_target_simd:
- case OMPD_target_parallel:
case OMPD_target_parallel_for:
case OMPD_target_parallel_for_simd:
case OMPD_target_teams_distribute_simd:
@@ -790,6 +1079,7 @@ static bool hasNestedSPMDDirective(ASTContext &Ctx,
case OMPD_declare_reduction:
case OMPD_taskloop:
case OMPD_taskloop_simd:
+ case OMPD_requires:
case OMPD_unknown:
llvm_unreachable("Unexpected directive.");
}
@@ -798,21 +1088,26 @@ static bool hasNestedSPMDDirective(ASTContext &Ctx,
return false;
}
-static bool supportsSPMDExecutionMode(ASTContext &Ctx,
- const OMPExecutableDirective &D) {
+/// Checks if the construct supports lightweight runtime. It must be SPMD
+/// construct + inner loop-based construct with static scheduling.
+static bool supportsLightweightRuntime(ASTContext &Ctx,
+ const OMPExecutableDirective &D) {
+ if (!supportsSPMDExecutionMode(Ctx, D))
+ return false;
OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
switch (DirectiveKind) {
case OMPD_target:
case OMPD_target_teams:
- case OMPD_target_teams_distribute:
- return hasNestedSPMDDirective(Ctx, D);
case OMPD_target_parallel:
+ return hasNestedLightweightDirective(Ctx, D);
case OMPD_target_parallel_for:
case OMPD_target_parallel_for_simd:
case OMPD_target_teams_distribute_parallel_for:
case OMPD_target_teams_distribute_parallel_for_simd:
- return !hasParallelIfNumThreadsClause(Ctx, D);
+ // (Last|First)-privates must be shared in parallel region.
+ return hasStaticScheduling(D);
case OMPD_target_simd:
+ case OMPD_target_teams_distribute:
case OMPD_target_teams_distribute_simd:
return false;
case OMPD_parallel:
@@ -857,6 +1152,7 @@ static bool supportsSPMDExecutionMode(ASTContext &Ctx,
case OMPD_declare_reduction:
case OMPD_taskloop:
case OMPD_taskloop_simd:
+ case OMPD_requires:
case OMPD_unknown:
break;
}
@@ -870,9 +1166,9 @@ void CGOpenMPRuntimeNVPTX::emitNonSPMDKernel(const OMPExecutableDirective &D,
llvm::Constant *&OutlinedFnID,
bool IsOffloadEntry,
const RegionCodeGenTy &CodeGen) {
- ExecutionModeRAII ModeRAII(CurrentExecutionMode, /*IsSPMD=*/false);
+ ExecutionRuntimeModesRAII ModeRAII(CurrentExecutionMode);
EntryFunctionState EST;
- WorkerFunctionState WST(CGM, D.getLocStart());
+ WorkerFunctionState WST(CGM, D.getBeginLoc());
Work.clear();
WrapperFunctionsMap.clear();
@@ -886,17 +1182,35 @@ void CGOpenMPRuntimeNVPTX::emitNonSPMDKernel(const OMPExecutableDirective &D,
CGOpenMPRuntimeNVPTX::WorkerFunctionState &WST)
: EST(EST), WST(WST) {}
void Enter(CodeGenFunction &CGF) override {
- static_cast<CGOpenMPRuntimeNVPTX &>(CGF.CGM.getOpenMPRuntime())
- .emitNonSPMDEntryHeader(CGF, EST, WST);
+ auto &RT =
+ static_cast<CGOpenMPRuntimeNVPTX &>(CGF.CGM.getOpenMPRuntime());
+ RT.emitNonSPMDEntryHeader(CGF, EST, WST);
+ // Skip target region initialization.
+ RT.setLocThreadIdInsertPt(CGF, /*AtCurrentPoint=*/true);
}
void Exit(CodeGenFunction &CGF) override {
- static_cast<CGOpenMPRuntimeNVPTX &>(CGF.CGM.getOpenMPRuntime())
- .emitNonSPMDEntryFooter(CGF, EST);
+ auto &RT =
+ static_cast<CGOpenMPRuntimeNVPTX &>(CGF.CGM.getOpenMPRuntime());
+ RT.clearLocThreadIdInsertPt(CGF);
+ RT.emitNonSPMDEntryFooter(CGF, EST);
}
} Action(EST, WST);
CodeGen.setAction(Action);
+ IsInTTDRegion = true;
+ // Reserve place for the globalized memory.
+ GlobalizedRecords.emplace_back();
+ if (!KernelStaticGlobalized) {
+ KernelStaticGlobalized = new llvm::GlobalVariable(
+ CGM.getModule(), CGM.VoidPtrTy, /*isConstant=*/false,
+ llvm::GlobalValue::InternalLinkage,
+ llvm::ConstantPointerNull::get(CGM.VoidPtrTy),
+ "_openmp_kernel_static_glob_rd$ptr", /*InsertBefore=*/nullptr,
+ llvm::GlobalValue::NotThreadLocal,
+ CGM.getContext().getTargetAddressSpace(LangAS::cuda_shared));
+ }
emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
IsOffloadEntry, CodeGen);
+ IsInTTDRegion = false;
// Now change the name of the worker function to correspond to this target
// region's entry function.
@@ -984,7 +1298,10 @@ void CGOpenMPRuntimeNVPTX::emitSPMDKernel(const OMPExecutableDirective &D,
llvm::Constant *&OutlinedFnID,
bool IsOffloadEntry,
const RegionCodeGenTy &CodeGen) {
- ExecutionModeRAII ModeRAII(CurrentExecutionMode, /*IsSPMD=*/true);
+ ExecutionRuntimeModesRAII ModeRAII(
+ CurrentExecutionMode, RequiresFullRuntime,
+ CGM.getLangOpts().OpenMPCUDAForceFullRuntime ||
+ !supportsLightweightRuntime(CGM.getContext(), D));
EntryFunctionState EST;
// Emit target region as a standalone region.
@@ -1000,14 +1317,30 @@ void CGOpenMPRuntimeNVPTX::emitSPMDKernel(const OMPExecutableDirective &D,
: RT(RT), EST(EST), D(D) {}
void Enter(CodeGenFunction &CGF) override {
RT.emitSPMDEntryHeader(CGF, EST, D);
+ // Skip target region initialization.
+ RT.setLocThreadIdInsertPt(CGF, /*AtCurrentPoint=*/true);
}
void Exit(CodeGenFunction &CGF) override {
+ RT.clearLocThreadIdInsertPt(CGF);
RT.emitSPMDEntryFooter(CGF, EST);
}
} Action(*this, EST, D);
CodeGen.setAction(Action);
+ IsInTTDRegion = true;
+ // Reserve place for the globalized memory.
+ GlobalizedRecords.emplace_back();
+ if (!KernelStaticGlobalized) {
+ KernelStaticGlobalized = new llvm::GlobalVariable(
+ CGM.getModule(), CGM.VoidPtrTy, /*isConstant=*/false,
+ llvm::GlobalValue::InternalLinkage,
+ llvm::ConstantPointerNull::get(CGM.VoidPtrTy),
+ "_openmp_kernel_static_glob_rd$ptr", /*InsertBefore=*/nullptr,
+ llvm::GlobalValue::NotThreadLocal,
+ CGM.getContext().getTargetAddressSpace(LangAS::cuda_shared));
+ }
emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
IsOffloadEntry, CodeGen);
+ IsInTTDRegion = false;
}
void CGOpenMPRuntimeNVPTX::emitSPMDEntryHeader(
@@ -1019,19 +1352,18 @@ void CGOpenMPRuntimeNVPTX::emitSPMDEntryHeader(
llvm::BasicBlock *ExecuteBB = CGF.createBasicBlock(".execute");
EST.ExitBB = CGF.createBasicBlock(".exit");
- // Initialize the OMP state in the runtime; called by all active threads.
- // TODO: Set RequiresOMPRuntime and RequiresDataSharing parameters
- // based on code analysis of the target region.
llvm::Value *Args[] = {getThreadLimit(CGF, /*IsInSPMDExecutionMode=*/true),
- /*RequiresOMPRuntime=*/Bld.getInt16(1),
- /*RequiresDataSharing=*/Bld.getInt16(1)};
+ /*RequiresOMPRuntime=*/
+ Bld.getInt16(RequiresFullRuntime ? 1 : 0),
+ /*RequiresDataSharing=*/Bld.getInt16(0)};
CGF.EmitRuntimeCall(
createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_spmd_kernel_init), Args);
- // For data sharing, we need to initialize the stack.
- CGF.EmitRuntimeCall(
- createNVPTXRuntimeFunction(
- OMPRTL_NVPTX__kmpc_data_sharing_init_stack_spmd));
+ if (RequiresFullRuntime) {
+ // For data sharing, we need to initialize the stack.
+ CGF.EmitRuntimeCall(createNVPTXRuntimeFunction(
+ OMPRTL_NVPTX__kmpc_data_sharing_init_stack_spmd));
+ }
CGF.EmitBranch(ExecuteBB);
@@ -1054,8 +1386,11 @@ void CGOpenMPRuntimeNVPTX::emitSPMDEntryFooter(CodeGenFunction &CGF,
CGF.EmitBlock(OMPDeInitBB);
// DeInitialize the OMP state in the runtime; called by all active threads.
+ llvm::Value *Args[] = {/*RequiresOMPRuntime=*/
+ CGF.Builder.getInt16(RequiresFullRuntime ? 1 : 0)};
CGF.EmitRuntimeCall(
- createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_spmd_kernel_deinit), None);
+ createNVPTXRuntimeFunction(
+ OMPRTL_NVPTX__kmpc_spmd_kernel_deinit_v2), Args);
CGF.EmitBranch(EST.ExitBB);
CGF.EmitBlock(EST.ExitBB);
@@ -1142,6 +1477,8 @@ void CGOpenMPRuntimeNVPTX::emitWorkerLoop(CodeGenFunction &CGF,
// Signal start of parallel region.
CGF.EmitBlock(ExecuteBB);
+ // Skip initialization.
+ setLocThreadIdInsertPt(CGF, /*AtCurrentPoint=*/true);
// Process work items: outlined parallel functions.
for (llvm::Function *W : Work) {
@@ -1202,6 +1539,8 @@ void CGOpenMPRuntimeNVPTX::emitWorkerLoop(CodeGenFunction &CGF,
// Exit target region.
CGF.EmitBlock(ExitBB);
+ // Skip initialization.
+ clearLocThreadIdInsertPt(CGF);
}
/// Returns specified OpenMP runtime function for the current OpenMP
@@ -1238,11 +1577,12 @@ CGOpenMPRuntimeNVPTX::createNVPTXRuntimeFunction(unsigned Function) {
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_spmd_kernel_init");
break;
}
- case OMPRTL_NVPTX__kmpc_spmd_kernel_deinit: {
- // Build void __kmpc_spmd_kernel_deinit();
+ case OMPRTL_NVPTX__kmpc_spmd_kernel_deinit_v2: {
+ // Build void __kmpc_spmd_kernel_deinit_v2(int16_t RequiresOMPRuntime);
+ llvm::Type *TypeParams[] = {CGM.Int16Ty};
auto *FnTy =
- llvm::FunctionType::get(CGM.VoidTy, llvm::None, /*isVarArg*/ false);
- RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_spmd_kernel_deinit");
+ llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
+ RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_spmd_kernel_deinit_v2");
break;
}
case OMPRTL_NVPTX__kmpc_kernel_prepare_parallel: {
@@ -1307,12 +1647,12 @@ CGOpenMPRuntimeNVPTX::createNVPTXRuntimeFunction(unsigned Function) {
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_shuffle_int64");
break;
}
- case OMPRTL_NVPTX__kmpc_parallel_reduce_nowait: {
- // Build int32_t kmpc_nvptx_parallel_reduce_nowait(kmp_int32 global_tid,
- // kmp_int32 num_vars, size_t reduce_size, void* reduce_data,
- // void (*kmp_ShuffleReductFctPtr)(void *rhsData, int16_t lane_id, int16_t
- // lane_offset, int16_t Algorithm Version),
- // void (*kmp_InterWarpCopyFctPtr)(void* src, int warp_num));
+ case OMPRTL_NVPTX__kmpc_parallel_reduce_nowait_v2: {
+ // Build int32_t kmpc_nvptx_parallel_reduce_nowait_v2(ident_t *loc,
+ // kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, void*
+ // reduce_data, void (*kmp_ShuffleReductFctPtr)(void *rhsData, int16_t
+ // lane_id, int16_t lane_offset, int16_t Algorithm Version), void
+ // (*kmp_InterWarpCopyFctPtr)(void* src, int warp_num));
llvm::Type *ShuffleReduceTypeParams[] = {CGM.VoidPtrTy, CGM.Int16Ty,
CGM.Int16Ty, CGM.Int16Ty};
auto *ShuffleReduceFnTy =
@@ -1322,7 +1662,8 @@ CGOpenMPRuntimeNVPTX::createNVPTXRuntimeFunction(unsigned Function) {
auto *InterWarpCopyFnTy =
llvm::FunctionType::get(CGM.VoidTy, InterWarpCopyTypeParams,
/*isVarArg=*/false);
- llvm::Type *TypeParams[] = {CGM.Int32Ty,
+ llvm::Type *TypeParams[] = {getIdentTyPointerTy(),
+ CGM.Int32Ty,
CGM.Int32Ty,
CGM.SizeTy,
CGM.VoidPtrTy,
@@ -1331,86 +1672,40 @@ CGOpenMPRuntimeNVPTX::createNVPTXRuntimeFunction(unsigned Function) {
auto *FnTy =
llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
RTLFn = CGM.CreateRuntimeFunction(
- FnTy, /*Name=*/"__kmpc_nvptx_parallel_reduce_nowait");
+ FnTy, /*Name=*/"__kmpc_nvptx_parallel_reduce_nowait_v2");
break;
}
- case OMPRTL_NVPTX__kmpc_simd_reduce_nowait: {
- // Build int32_t kmpc_nvptx_simd_reduce_nowait(kmp_int32 global_tid,
- // kmp_int32 num_vars, size_t reduce_size, void* reduce_data,
- // void (*kmp_ShuffleReductFctPtr)(void *rhsData, int16_t lane_id, int16_t
- // lane_offset, int16_t Algorithm Version),
- // void (*kmp_InterWarpCopyFctPtr)(void* src, int warp_num));
- llvm::Type *ShuffleReduceTypeParams[] = {CGM.VoidPtrTy, CGM.Int16Ty,
- CGM.Int16Ty, CGM.Int16Ty};
- auto *ShuffleReduceFnTy =
- llvm::FunctionType::get(CGM.VoidTy, ShuffleReduceTypeParams,
- /*isVarArg=*/false);
- llvm::Type *InterWarpCopyTypeParams[] = {CGM.VoidPtrTy, CGM.Int32Ty};
- auto *InterWarpCopyFnTy =
- llvm::FunctionType::get(CGM.VoidTy, InterWarpCopyTypeParams,
- /*isVarArg=*/false);
- llvm::Type *TypeParams[] = {CGM.Int32Ty,
- CGM.Int32Ty,
- CGM.SizeTy,
- CGM.VoidPtrTy,
- ShuffleReduceFnTy->getPointerTo(),
- InterWarpCopyFnTy->getPointerTo()};
+ case OMPRTL_NVPTX__kmpc_end_reduce_nowait: {
+ // Build __kmpc_end_reduce_nowait(kmp_int32 global_tid);
+ llvm::Type *TypeParams[] = {CGM.Int32Ty};
auto *FnTy =
- llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
+ llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
RTLFn = CGM.CreateRuntimeFunction(
- FnTy, /*Name=*/"__kmpc_nvptx_simd_reduce_nowait");
+ FnTy, /*Name=*/"__kmpc_nvptx_end_reduce_nowait");
break;
}
- case OMPRTL_NVPTX__kmpc_teams_reduce_nowait: {
- // Build int32_t __kmpc_nvptx_teams_reduce_nowait(int32_t global_tid,
- // int32_t num_vars, size_t reduce_size, void *reduce_data,
- // void (*kmp_ShuffleReductFctPtr)(void *rhsData, int16_t lane_id, int16_t
- // lane_offset, int16_t shortCircuit),
- // void (*kmp_InterWarpCopyFctPtr)(void* src, int32_t warp_num),
- // void (*kmp_CopyToScratchpadFctPtr)(void *reduce_data, void * scratchpad,
- // int32_t index, int32_t width),
- // void (*kmp_LoadReduceFctPtr)(void *reduce_data, void * scratchpad,
- // int32_t index, int32_t width, int32_t reduce))
- llvm::Type *ShuffleReduceTypeParams[] = {CGM.VoidPtrTy, CGM.Int16Ty,
- CGM.Int16Ty, CGM.Int16Ty};
- auto *ShuffleReduceFnTy =
- llvm::FunctionType::get(CGM.VoidTy, ShuffleReduceTypeParams,
- /*isVarArg=*/false);
- llvm::Type *InterWarpCopyTypeParams[] = {CGM.VoidPtrTy, CGM.Int32Ty};
- auto *InterWarpCopyFnTy =
- llvm::FunctionType::get(CGM.VoidTy, InterWarpCopyTypeParams,
- /*isVarArg=*/false);
- llvm::Type *CopyToScratchpadTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy,
- CGM.Int32Ty, CGM.Int32Ty};
- auto *CopyToScratchpadFnTy =
- llvm::FunctionType::get(CGM.VoidTy, CopyToScratchpadTypeParams,
- /*isVarArg=*/false);
- llvm::Type *LoadReduceTypeParams[] = {
- CGM.VoidPtrTy, CGM.VoidPtrTy, CGM.Int32Ty, CGM.Int32Ty, CGM.Int32Ty};
- auto *LoadReduceFnTy =
- llvm::FunctionType::get(CGM.VoidTy, LoadReduceTypeParams,
- /*isVarArg=*/false);
- llvm::Type *TypeParams[] = {CGM.Int32Ty,
- CGM.Int32Ty,
- CGM.SizeTy,
- CGM.VoidPtrTy,
- ShuffleReduceFnTy->getPointerTo(),
- InterWarpCopyFnTy->getPointerTo(),
- CopyToScratchpadFnTy->getPointerTo(),
- LoadReduceFnTy->getPointerTo()};
+ case OMPRTL_NVPTX__kmpc_nvptx_teams_reduce_nowait_simple: {
+ // Build __kmpc_nvptx_teams_reduce_nowait_simple(ident_t *loc, kmp_int32
+ // global_tid, kmp_critical_name *lck)
+ llvm::Type *TypeParams[] = {
+ getIdentTyPointerTy(), CGM.Int32Ty,
+ llvm::PointerType::getUnqual(getKmpCriticalNameTy())};
auto *FnTy =
llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
RTLFn = CGM.CreateRuntimeFunction(
- FnTy, /*Name=*/"__kmpc_nvptx_teams_reduce_nowait");
+ FnTy, /*Name=*/"__kmpc_nvptx_teams_reduce_nowait_simple");
break;
}
- case OMPRTL_NVPTX__kmpc_end_reduce_nowait: {
- // Build __kmpc_end_reduce_nowait(kmp_int32 global_tid);
- llvm::Type *TypeParams[] = {CGM.Int32Ty};
+ case OMPRTL_NVPTX__kmpc_nvptx_teams_end_reduce_nowait_simple: {
+ // Build __kmpc_nvptx_teams_end_reduce_nowait_simple(ident_t *loc, kmp_int32
+ // global_tid, kmp_critical_name *lck)
+ llvm::Type *TypeParams[] = {
+ getIdentTyPointerTy(), CGM.Int32Ty,
+ llvm::PointerType::getUnqual(getKmpCriticalNameTy())};
auto *FnTy =
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
RTLFn = CGM.CreateRuntimeFunction(
- FnTy, /*Name=*/"__kmpc_nvptx_end_reduce_nowait");
+ FnTy, /*Name=*/"__kmpc_nvptx_teams_end_reduce_nowait_simple");
break;
}
case OMPRTL_NVPTX__kmpc_data_sharing_init_stack: {
@@ -1424,17 +1719,18 @@ CGOpenMPRuntimeNVPTX::createNVPTXRuntimeFunction(unsigned Function) {
/// Build void __kmpc_data_sharing_init_stack_spmd();
auto *FnTy =
llvm::FunctionType::get(CGM.VoidTy, llvm::None, /*isVarArg*/ false);
- RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_data_sharing_init_stack_spmd");
+ RTLFn =
+ CGM.CreateRuntimeFunction(FnTy, "__kmpc_data_sharing_init_stack_spmd");
break;
}
- case OMPRTL_NVPTX__kmpc_data_sharing_push_stack: {
- // Build void *__kmpc_data_sharing_push_stack(size_t size,
+ case OMPRTL_NVPTX__kmpc_data_sharing_coalesced_push_stack: {
+ // Build void *__kmpc_data_sharing_coalesced_push_stack(size_t size,
// int16_t UseSharedMemory);
llvm::Type *TypeParams[] = {CGM.SizeTy, CGM.Int16Ty};
auto *FnTy =
llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
RTLFn = CGM.CreateRuntimeFunction(
- FnTy, /*Name=*/"__kmpc_data_sharing_push_stack");
+ FnTy, /*Name=*/"__kmpc_data_sharing_coalesced_push_stack");
break;
}
case OMPRTL_NVPTX__kmpc_data_sharing_pop_stack: {
@@ -1484,6 +1780,46 @@ CGOpenMPRuntimeNVPTX::createNVPTXRuntimeFunction(unsigned Function) {
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_is_spmd_exec_mode");
break;
}
+ case OMPRTL_NVPTX__kmpc_get_team_static_memory: {
+ // Build void __kmpc_get_team_static_memory(int16_t isSPMDExecutionMode,
+ // const void *buf, size_t size, int16_t is_shared, const void **res);
+ llvm::Type *TypeParams[] = {CGM.Int16Ty, CGM.VoidPtrTy, CGM.SizeTy,
+ CGM.Int16Ty, CGM.VoidPtrPtrTy};
+ auto *FnTy =
+ llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
+ RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_get_team_static_memory");
+ break;
+ }
+ case OMPRTL_NVPTX__kmpc_restore_team_static_memory: {
+ // Build void __kmpc_restore_team_static_memory(int16_t isSPMDExecutionMode,
+ // int16_t is_shared);
+ llvm::Type *TypeParams[] = {CGM.Int16Ty, CGM.Int16Ty};
+ auto *FnTy =
+ llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
+ RTLFn =
+ CGM.CreateRuntimeFunction(FnTy, "__kmpc_restore_team_static_memory");
+ break;
+ }
+ case OMPRTL__kmpc_barrier: {
+ // Build void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
+ llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
+ auto *FnTy =
+ llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
+ RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier");
+ cast<llvm::Function>(RTLFn)->addFnAttr(llvm::Attribute::Convergent);
+ break;
+ }
+ case OMPRTL__kmpc_barrier_simple_spmd: {
+ // Build void __kmpc_barrier_simple_spmd(ident_t *loc, kmp_int32
+ // global_tid);
+ llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
+ auto *FnTy =
+ llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
+ RTLFn =
+ CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier_simple_spmd");
+ cast<llvm::Function>(RTLFn)->addFnAttr(llvm::Attribute::Convergent);
+ break;
+ }
}
return RTLFn;
}
@@ -1530,6 +1866,37 @@ void CGOpenMPRuntimeNVPTX::emitTargetOutlinedFunction(
setPropertyExecutionMode(CGM, OutlinedFn->getName(), Mode);
}
+namespace {
+LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
+/// Enum for accesseing the reserved_2 field of the ident_t struct.
+enum ModeFlagsTy : unsigned {
+ /// Bit set to 1 when in SPMD mode.
+ KMP_IDENT_SPMD_MODE = 0x01,
+ /// Bit set to 1 when a simplified runtime is used.
+ KMP_IDENT_SIMPLE_RT_MODE = 0x02,
+ LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/KMP_IDENT_SIMPLE_RT_MODE)
+};
+
+/// Special mode Undefined. Is the combination of Non-SPMD mode + SimpleRuntime.
+static const ModeFlagsTy UndefinedMode =
+ (~KMP_IDENT_SPMD_MODE) & KMP_IDENT_SIMPLE_RT_MODE;
+} // anonymous namespace
+
+unsigned CGOpenMPRuntimeNVPTX::getDefaultLocationReserved2Flags() const {
+ switch (getExecutionMode()) {
+ case EM_SPMD:
+ if (requiresFullRuntime())
+ return KMP_IDENT_SPMD_MODE & (~KMP_IDENT_SIMPLE_RT_MODE);
+ return KMP_IDENT_SPMD_MODE | KMP_IDENT_SIMPLE_RT_MODE;
+ case EM_NonSPMD:
+ assert(requiresFullRuntime() && "Expected full runtime.");
+ return (~KMP_IDENT_SPMD_MODE) & (~KMP_IDENT_SIMPLE_RT_MODE);
+ case EM_Unknown:
+ return UndefinedMode;
+ }
+ llvm_unreachable("Unknown flags are requested.");
+}
+
CGOpenMPRuntimeNVPTX::CGOpenMPRuntimeNVPTX(CodeGenModule &CGM)
: CGOpenMPRuntime(CGM, "_", "$") {
if (!CGM.getLangOpts().OpenMPIsDevice)
@@ -1581,12 +1948,15 @@ llvm::Value *CGOpenMPRuntimeNVPTX::emitParallelOutlinedFunction(
}
} Action(IsInParallelRegion);
CodeGen.setAction(Action);
+ bool PrevIsInTTDRegion = IsInTTDRegion;
+ IsInTTDRegion = false;
bool PrevIsInTargetMasterThreadRegion = IsInTargetMasterThreadRegion;
IsInTargetMasterThreadRegion = false;
auto *OutlinedFun =
cast<llvm::Function>(CGOpenMPRuntime::emitParallelOutlinedFunction(
D, ThreadIDVar, InnermostKind, CodeGen));
IsInTargetMasterThreadRegion = PrevIsInTargetMasterThreadRegion;
+ IsInTTDRegion = PrevIsInTTDRegion;
if (getExecutionMode() != CGOpenMPRuntimeNVPTX::EM_SPMD &&
!IsInParallelRegion) {
llvm::Function *WrapperFun =
@@ -1597,26 +1967,106 @@ llvm::Value *CGOpenMPRuntimeNVPTX::emitParallelOutlinedFunction(
return OutlinedFun;
}
+/// Get list of lastprivate variables from the teams distribute ... or
+/// teams {distribute ...} directives.
+static void
+getDistributeLastprivateVars(ASTContext &Ctx, const OMPExecutableDirective &D,
+ llvm::SmallVectorImpl<const ValueDecl *> &Vars) {
+ assert(isOpenMPTeamsDirective(D.getDirectiveKind()) &&
+ "expected teams directive.");
+ const OMPExecutableDirective *Dir = &D;
+ if (!isOpenMPDistributeDirective(D.getDirectiveKind())) {
+ if (const Stmt *S = getSingleCompoundChild(
+ Ctx,
+ D.getInnermostCapturedStmt()->getCapturedStmt()->IgnoreContainers(
+ /*IgnoreCaptured=*/true))) {
+ Dir = dyn_cast<OMPExecutableDirective>(S);
+ if (Dir && !isOpenMPDistributeDirective(Dir->getDirectiveKind()))
+ Dir = nullptr;
+ }
+ }
+ if (!Dir)
+ return;
+ for (const auto *C : Dir->getClausesOfKind<OMPLastprivateClause>()) {
+ for (const Expr *E : C->getVarRefs())
+ Vars.push_back(getPrivateItem(E));
+ }
+}
+
+/// Get list of reduction variables from the teams ... directives.
+static void
+getTeamsReductionVars(ASTContext &Ctx, const OMPExecutableDirective &D,
+ llvm::SmallVectorImpl<const ValueDecl *> &Vars) {
+ assert(isOpenMPTeamsDirective(D.getDirectiveKind()) &&
+ "expected teams directive.");
+ for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
+ for (const Expr *E : C->privates())
+ Vars.push_back(getPrivateItem(E));
+ }
+}
+
llvm::Value *CGOpenMPRuntimeNVPTX::emitTeamsOutlinedFunction(
const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
- SourceLocation Loc = D.getLocStart();
+ SourceLocation Loc = D.getBeginLoc();
+
+ const RecordDecl *GlobalizedRD = nullptr;
+ llvm::SmallVector<const ValueDecl *, 4> LastPrivatesReductions;
+ llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *> MappedDeclsFields;
+ // Globalize team reductions variable unconditionally in all modes.
+ getTeamsReductionVars(CGM.getContext(), D, LastPrivatesReductions);
+ if (getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_SPMD) {
+ getDistributeLastprivateVars(CGM.getContext(), D, LastPrivatesReductions);
+ if (!LastPrivatesReductions.empty()) {
+ GlobalizedRD = ::buildRecordForGlobalizedVars(
+ CGM.getContext(), llvm::None, LastPrivatesReductions,
+ MappedDeclsFields);
+ }
+ } else if (!LastPrivatesReductions.empty()) {
+ assert(!TeamAndReductions.first &&
+ "Previous team declaration is not expected.");
+ TeamAndReductions.first = D.getCapturedStmt(OMPD_teams)->getCapturedDecl();
+ std::swap(TeamAndReductions.second, LastPrivatesReductions);
+ }
// Emit target region as a standalone region.
class NVPTXPrePostActionTy : public PrePostActionTy {
SourceLocation &Loc;
+ const RecordDecl *GlobalizedRD;
+ llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *>
+ &MappedDeclsFields;
public:
- NVPTXPrePostActionTy(SourceLocation &Loc) : Loc(Loc) {}
+ NVPTXPrePostActionTy(
+ SourceLocation &Loc, const RecordDecl *GlobalizedRD,
+ llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *>
+ &MappedDeclsFields)
+ : Loc(Loc), GlobalizedRD(GlobalizedRD),
+ MappedDeclsFields(MappedDeclsFields) {}
void Enter(CodeGenFunction &CGF) override {
- static_cast<CGOpenMPRuntimeNVPTX &>(CGF.CGM.getOpenMPRuntime())
- .emitGenericVarsProlog(CGF, Loc);
+ auto &Rt =
+ static_cast<CGOpenMPRuntimeNVPTX &>(CGF.CGM.getOpenMPRuntime());
+ if (GlobalizedRD) {
+ auto I = Rt.FunctionGlobalizedDecls.try_emplace(CGF.CurFn).first;
+ I->getSecond().GlobalRecord = GlobalizedRD;
+ I->getSecond().MappedParams =
+ llvm::make_unique<CodeGenFunction::OMPMapVars>();
+ DeclToAddrMapTy &Data = I->getSecond().LocalVarData;
+ for (const auto &Pair : MappedDeclsFields) {
+ assert(Pair.getFirst()->isCanonicalDecl() &&
+ "Expected canonical declaration");
+ Data.insert(std::make_pair(Pair.getFirst(),
+ MappedVarData(Pair.getSecond(),
+ /*IsOnePerTeam=*/true)));
+ }
+ }
+ Rt.emitGenericVarsProlog(CGF, Loc);
}
void Exit(CodeGenFunction &CGF) override {
static_cast<CGOpenMPRuntimeNVPTX &>(CGF.CGM.getOpenMPRuntime())
.emitGenericVarsEpilog(CGF);
}
- } Action(Loc);
+ } Action(Loc, GlobalizedRD, MappedDeclsFields);
CodeGen.setAction(Action);
llvm::Value *OutlinedFunVal = CGOpenMPRuntime::emitTeamsOutlinedFunction(
D, ThreadIDVar, InnermostKind, CodeGen);
@@ -1629,8 +2079,10 @@ llvm::Value *CGOpenMPRuntimeNVPTX::emitTeamsOutlinedFunction(
}
void CGOpenMPRuntimeNVPTX::emitGenericVarsProlog(CodeGenFunction &CGF,
- SourceLocation Loc) {
- if (getDataSharingMode(CGM) != CGOpenMPRuntimeNVPTX::Generic)
+ SourceLocation Loc,
+ bool WithSPMDCheck) {
+ if (getDataSharingMode(CGM) != CGOpenMPRuntimeNVPTX::Generic &&
+ getExecutionMode() != CGOpenMPRuntimeNVPTX::EM_SPMD)
return;
CGBuilderTy &Bld = CGF.Builder;
@@ -1639,33 +2091,187 @@ void CGOpenMPRuntimeNVPTX::emitGenericVarsProlog(CodeGenFunction &CGF,
if (I == FunctionGlobalizedDecls.end())
return;
if (const RecordDecl *GlobalizedVarsRecord = I->getSecond().GlobalRecord) {
- QualType RecTy = CGM.getContext().getRecordType(GlobalizedVarsRecord);
+ QualType GlobalRecTy = CGM.getContext().getRecordType(GlobalizedVarsRecord);
+ QualType SecGlobalRecTy;
// Recover pointer to this function's global record. The runtime will
// handle the specifics of the allocation of the memory.
// Use actual memory size of the record including the padding
// for alignment purposes.
unsigned Alignment =
- CGM.getContext().getTypeAlignInChars(RecTy).getQuantity();
+ CGM.getContext().getTypeAlignInChars(GlobalRecTy).getQuantity();
unsigned GlobalRecordSize =
- CGM.getContext().getTypeSizeInChars(RecTy).getQuantity();
+ CGM.getContext().getTypeSizeInChars(GlobalRecTy).getQuantity();
GlobalRecordSize = llvm::alignTo(GlobalRecordSize, Alignment);
- // TODO: allow the usage of shared memory to be controlled by
- // the user, for now, default to global.
- llvm::Value *GlobalRecordSizeArg[] = {
- llvm::ConstantInt::get(CGM.SizeTy, GlobalRecordSize),
- CGF.Builder.getInt16(/*UseSharedMemory=*/0)};
- llvm::Value *GlobalRecValue = CGF.EmitRuntimeCall(
- createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_data_sharing_push_stack),
- GlobalRecordSizeArg);
- llvm::Value *GlobalRecCastAddr = Bld.CreatePointerBitCastOrAddrSpaceCast(
- GlobalRecValue, CGF.ConvertTypeForMem(RecTy)->getPointerTo());
+
+ llvm::PointerType *GlobalRecPtrTy =
+ CGF.ConvertTypeForMem(GlobalRecTy)->getPointerTo();
+ llvm::Value *GlobalRecCastAddr;
+ llvm::Value *IsTTD = nullptr;
+ if (!IsInTTDRegion &&
+ (WithSPMDCheck ||
+ getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_Unknown)) {
+ llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".exit");
+ llvm::BasicBlock *SPMDBB = CGF.createBasicBlock(".spmd");
+ llvm::BasicBlock *NonSPMDBB = CGF.createBasicBlock(".non-spmd");
+ if (I->getSecond().SecondaryGlobalRecord.hasValue()) {
+ llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
+ llvm::Value *ThreadID = getThreadID(CGF, Loc);
+ llvm::Value *PL = CGF.EmitRuntimeCall(
+ createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_parallel_level),
+ {RTLoc, ThreadID});
+ IsTTD = Bld.CreateIsNull(PL);
+ }
+ llvm::Value *IsSPMD = Bld.CreateIsNotNull(CGF.EmitNounwindRuntimeCall(
+ createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_is_spmd_exec_mode)));
+ Bld.CreateCondBr(IsSPMD, SPMDBB, NonSPMDBB);
+ // There is no need to emit line number for unconditional branch.
+ (void)ApplyDebugLocation::CreateEmpty(CGF);
+ CGF.EmitBlock(SPMDBB);
+ Address RecPtr = Address(llvm::ConstantPointerNull::get(GlobalRecPtrTy),
+ CharUnits::fromQuantity(Alignment));
+ CGF.EmitBranch(ExitBB);
+ // There is no need to emit line number for unconditional branch.
+ (void)ApplyDebugLocation::CreateEmpty(CGF);
+ CGF.EmitBlock(NonSPMDBB);
+ llvm::Value *Size = llvm::ConstantInt::get(CGM.SizeTy, GlobalRecordSize);
+ if (const RecordDecl *SecGlobalizedVarsRecord =
+ I->getSecond().SecondaryGlobalRecord.getValueOr(nullptr)) {
+ SecGlobalRecTy =
+ CGM.getContext().getRecordType(SecGlobalizedVarsRecord);
+
+ // Recover pointer to this function's global record. The runtime will
+ // handle the specifics of the allocation of the memory.
+ // Use actual memory size of the record including the padding
+ // for alignment purposes.
+ unsigned Alignment =
+ CGM.getContext().getTypeAlignInChars(SecGlobalRecTy).getQuantity();
+ unsigned GlobalRecordSize =
+ CGM.getContext().getTypeSizeInChars(SecGlobalRecTy).getQuantity();
+ GlobalRecordSize = llvm::alignTo(GlobalRecordSize, Alignment);
+ Size = Bld.CreateSelect(
+ IsTTD, llvm::ConstantInt::get(CGM.SizeTy, GlobalRecordSize), Size);
+ }
+ // TODO: allow the usage of shared memory to be controlled by
+ // the user, for now, default to global.
+ llvm::Value *GlobalRecordSizeArg[] = {
+ Size, CGF.Builder.getInt16(/*UseSharedMemory=*/0)};
+ llvm::Value *GlobalRecValue = CGF.EmitRuntimeCall(
+ createNVPTXRuntimeFunction(
+ OMPRTL_NVPTX__kmpc_data_sharing_coalesced_push_stack),
+ GlobalRecordSizeArg);
+ GlobalRecCastAddr = Bld.CreatePointerBitCastOrAddrSpaceCast(
+ GlobalRecValue, GlobalRecPtrTy);
+ CGF.EmitBlock(ExitBB);
+ auto *Phi = Bld.CreatePHI(GlobalRecPtrTy,
+ /*NumReservedValues=*/2, "_select_stack");
+ Phi->addIncoming(RecPtr.getPointer(), SPMDBB);
+ Phi->addIncoming(GlobalRecCastAddr, NonSPMDBB);
+ GlobalRecCastAddr = Phi;
+ I->getSecond().GlobalRecordAddr = Phi;
+ I->getSecond().IsInSPMDModeFlag = IsSPMD;
+ } else if (IsInTTDRegion) {
+ assert(GlobalizedRecords.back().Records.size() < 2 &&
+ "Expected less than 2 globalized records: one for target and one "
+ "for teams.");
+ unsigned Offset = 0;
+ for (const RecordDecl *RD : GlobalizedRecords.back().Records) {
+ QualType RDTy = CGM.getContext().getRecordType(RD);
+ unsigned Alignment =
+ CGM.getContext().getTypeAlignInChars(RDTy).getQuantity();
+ unsigned Size = CGM.getContext().getTypeSizeInChars(RDTy).getQuantity();
+ Offset =
+ llvm::alignTo(llvm::alignTo(Offset, Alignment) + Size, Alignment);
+ }
+ unsigned Alignment =
+ CGM.getContext().getTypeAlignInChars(GlobalRecTy).getQuantity();
+ Offset = llvm::alignTo(Offset, Alignment);
+ GlobalizedRecords.back().Records.push_back(GlobalizedVarsRecord);
+ ++GlobalizedRecords.back().RegionCounter;
+ if (GlobalizedRecords.back().Records.size() == 1) {
+ assert(KernelStaticGlobalized &&
+ "Kernel static pointer must be initialized already.");
+ auto *UseSharedMemory = new llvm::GlobalVariable(
+ CGM.getModule(), CGM.Int16Ty, /*isConstant=*/true,
+ llvm::GlobalValue::InternalLinkage, nullptr,
+ "_openmp_static_kernel$is_shared");
+ UseSharedMemory->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
+ QualType Int16Ty = CGM.getContext().getIntTypeForBitwidth(
+ /*DestWidth=*/16, /*Signed=*/0);
+ llvm::Value *IsInSharedMemory = CGF.EmitLoadOfScalar(
+ Address(UseSharedMemory,
+ CGM.getContext().getTypeAlignInChars(Int16Ty)),
+ /*Volatile=*/false, Int16Ty, Loc);
+ auto *StaticGlobalized = new llvm::GlobalVariable(
+ CGM.getModule(), CGM.Int8Ty, /*isConstant=*/false,
+ llvm::GlobalValue::CommonLinkage, nullptr);
+ auto *RecSize = new llvm::GlobalVariable(
+ CGM.getModule(), CGM.SizeTy, /*isConstant=*/true,
+ llvm::GlobalValue::InternalLinkage, nullptr,
+ "_openmp_static_kernel$size");
+ RecSize->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
+ llvm::Value *Ld = CGF.EmitLoadOfScalar(
+ Address(RecSize, CGM.getSizeAlign()), /*Volatile=*/false,
+ CGM.getContext().getSizeType(), Loc);
+ llvm::Value *ResAddr = Bld.CreatePointerBitCastOrAddrSpaceCast(
+ KernelStaticGlobalized, CGM.VoidPtrPtrTy);
+ llvm::Value *GlobalRecordSizeArg[] = {
+ llvm::ConstantInt::get(
+ CGM.Int16Ty,
+ getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_SPMD ? 1 : 0),
+ StaticGlobalized, Ld, IsInSharedMemory, ResAddr};
+ CGF.EmitRuntimeCall(createNVPTXRuntimeFunction(
+ OMPRTL_NVPTX__kmpc_get_team_static_memory),
+ GlobalRecordSizeArg);
+ GlobalizedRecords.back().Buffer = StaticGlobalized;
+ GlobalizedRecords.back().RecSize = RecSize;
+ GlobalizedRecords.back().UseSharedMemory = UseSharedMemory;
+ GlobalizedRecords.back().Loc = Loc;
+ }
+ assert(KernelStaticGlobalized && "Global address must be set already.");
+ Address FrameAddr = CGF.EmitLoadOfPointer(
+ Address(KernelStaticGlobalized, CGM.getPointerAlign()),
+ CGM.getContext()
+ .getPointerType(CGM.getContext().VoidPtrTy)
+ .castAs<PointerType>());
+ llvm::Value *GlobalRecValue =
+ Bld.CreateConstInBoundsGEP(FrameAddr, Offset, CharUnits::One())
+ .getPointer();
+ I->getSecond().GlobalRecordAddr = GlobalRecValue;
+ I->getSecond().IsInSPMDModeFlag = nullptr;
+ GlobalRecCastAddr = Bld.CreatePointerBitCastOrAddrSpaceCast(
+ GlobalRecValue, CGF.ConvertTypeForMem(GlobalRecTy)->getPointerTo());
+ } else {
+ // TODO: allow the usage of shared memory to be controlled by
+ // the user, for now, default to global.
+ llvm::Value *GlobalRecordSizeArg[] = {
+ llvm::ConstantInt::get(CGM.SizeTy, GlobalRecordSize),
+ CGF.Builder.getInt16(/*UseSharedMemory=*/0)};
+ llvm::Value *GlobalRecValue = CGF.EmitRuntimeCall(
+ createNVPTXRuntimeFunction(
+ OMPRTL_NVPTX__kmpc_data_sharing_coalesced_push_stack),
+ GlobalRecordSizeArg);
+ GlobalRecCastAddr = Bld.CreatePointerBitCastOrAddrSpaceCast(
+ GlobalRecValue, GlobalRecPtrTy);
+ I->getSecond().GlobalRecordAddr = GlobalRecValue;
+ I->getSecond().IsInSPMDModeFlag = nullptr;
+ }
LValue Base =
- CGF.MakeNaturalAlignPointeeAddrLValue(GlobalRecCastAddr, RecTy);
- I->getSecond().GlobalRecordAddr = GlobalRecValue;
+ CGF.MakeNaturalAlignPointeeAddrLValue(GlobalRecCastAddr, GlobalRecTy);
// Emit the "global alloca" which is a GEP from the global declaration
// record using the pointer returned by the runtime.
+ LValue SecBase;
+ decltype(I->getSecond().LocalVarData)::const_iterator SecIt;
+ if (IsTTD) {
+ SecIt = I->getSecond().SecondaryLocalVarData->begin();
+ llvm::PointerType *SecGlobalRecPtrTy =
+ CGF.ConvertTypeForMem(SecGlobalRecTy)->getPointerTo();
+ SecBase = CGF.MakeNaturalAlignPointeeAddrLValue(
+ Bld.CreatePointerBitCastOrAddrSpaceCast(
+ I->getSecond().GlobalRecordAddr, SecGlobalRecPtrTy),
+ SecGlobalRecTy);
+ }
for (auto &Rec : I->getSecond().LocalVarData) {
bool EscapedParam = I->getSecond().EscapedParameters.count(Rec.first);
llvm::Value *ParValue;
@@ -1675,14 +2281,51 @@ void CGOpenMPRuntimeNVPTX::emitGenericVarsProlog(CodeGenFunction &CGF,
CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
ParValue = CGF.EmitLoadOfScalar(ParLVal, Loc);
}
- const FieldDecl *FD = Rec.second.first;
- LValue VarAddr = CGF.EmitLValueForField(Base, FD);
- Rec.second.second = VarAddr.getAddress();
+ LValue VarAddr = CGF.EmitLValueForField(Base, Rec.second.FD);
+ // Emit VarAddr basing on lane-id if required.
+ QualType VarTy;
+ if (Rec.second.IsOnePerTeam) {
+ VarTy = Rec.second.FD->getType();
+ } else {
+ llvm::Value *Ptr = CGF.Builder.CreateInBoundsGEP(
+ VarAddr.getAddress().getPointer(),
+ {Bld.getInt32(0), getNVPTXLaneID(CGF)});
+ VarTy =
+ Rec.second.FD->getType()->castAsArrayTypeUnsafe()->getElementType();
+ VarAddr = CGF.MakeAddrLValue(
+ Address(Ptr, CGM.getContext().getDeclAlign(Rec.first)), VarTy,
+ AlignmentSource::Decl);
+ }
+ Rec.second.PrivateAddr = VarAddr.getAddress();
+ if (!IsInTTDRegion &&
+ (WithSPMDCheck ||
+ getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_Unknown)) {
+ assert(I->getSecond().IsInSPMDModeFlag &&
+ "Expected unknown execution mode or required SPMD check.");
+ if (IsTTD) {
+ assert(SecIt->second.IsOnePerTeam &&
+ "Secondary glob data must be one per team.");
+ LValue SecVarAddr = CGF.EmitLValueForField(SecBase, SecIt->second.FD);
+ VarAddr.setAddress(
+ Address(Bld.CreateSelect(IsTTD, SecVarAddr.getPointer(),
+ VarAddr.getPointer()),
+ VarAddr.getAlignment()));
+ Rec.second.PrivateAddr = VarAddr.getAddress();
+ }
+ Address GlobalPtr = Rec.second.PrivateAddr;
+ Address LocalAddr = CGF.CreateMemTemp(VarTy, Rec.second.FD->getName());
+ Rec.second.PrivateAddr = Address(
+ Bld.CreateSelect(I->getSecond().IsInSPMDModeFlag,
+ LocalAddr.getPointer(), GlobalPtr.getPointer()),
+ LocalAddr.getAlignment());
+ }
if (EscapedParam) {
const auto *VD = cast<VarDecl>(Rec.first);
CGF.EmitStoreOfScalar(ParValue, VarAddr);
I->getSecond().MappedParams->setVarAddr(CGF, VD, VarAddr.getAddress());
}
+ if (IsTTD)
+ ++SecIt;
}
}
for (const ValueDecl *VD : I->getSecond().EscapedVariableLengthDecls) {
@@ -1704,7 +2347,8 @@ void CGOpenMPRuntimeNVPTX::emitGenericVarsProlog(CodeGenFunction &CGF,
llvm::Value *GlobalRecordSizeArg[] = {
Size, CGF.Builder.getInt16(/*UseSharedMemory=*/0)};
llvm::Value *GlobalRecValue = CGF.EmitRuntimeCall(
- createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_data_sharing_push_stack),
+ createNVPTXRuntimeFunction(
+ OMPRTL_NVPTX__kmpc_data_sharing_coalesced_push_stack),
GlobalRecordSizeArg);
llvm::Value *GlobalRecCastAddr = Bld.CreatePointerBitCastOrAddrSpaceCast(
GlobalRecValue, CGF.ConvertTypeForMem(VD->getType())->getPointerTo());
@@ -1718,8 +2362,10 @@ void CGOpenMPRuntimeNVPTX::emitGenericVarsProlog(CodeGenFunction &CGF,
I->getSecond().MappedParams->apply(CGF);
}
-void CGOpenMPRuntimeNVPTX::emitGenericVarsEpilog(CodeGenFunction &CGF) {
- if (getDataSharingMode(CGM) != CGOpenMPRuntimeNVPTX::Generic)
+void CGOpenMPRuntimeNVPTX::emitGenericVarsEpilog(CodeGenFunction &CGF,
+ bool WithSPMDCheck) {
+ if (getDataSharingMode(CGM) != CGOpenMPRuntimeNVPTX::Generic &&
+ getExecutionMode() != CGOpenMPRuntimeNVPTX::EM_SPMD)
return;
const auto I = FunctionGlobalizedDecls.find(CGF.CurFn);
@@ -1734,9 +2380,48 @@ void CGOpenMPRuntimeNVPTX::emitGenericVarsEpilog(CodeGenFunction &CGF) {
Addr);
}
if (I->getSecond().GlobalRecordAddr) {
- CGF.EmitRuntimeCall(
- createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_data_sharing_pop_stack),
- I->getSecond().GlobalRecordAddr);
+ if (!IsInTTDRegion &&
+ (WithSPMDCheck ||
+ getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_Unknown)) {
+ CGBuilderTy &Bld = CGF.Builder;
+ llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".exit");
+ llvm::BasicBlock *NonSPMDBB = CGF.createBasicBlock(".non-spmd");
+ Bld.CreateCondBr(I->getSecond().IsInSPMDModeFlag, ExitBB, NonSPMDBB);
+ // There is no need to emit line number for unconditional branch.
+ (void)ApplyDebugLocation::CreateEmpty(CGF);
+ CGF.EmitBlock(NonSPMDBB);
+ CGF.EmitRuntimeCall(
+ createNVPTXRuntimeFunction(
+ OMPRTL_NVPTX__kmpc_data_sharing_pop_stack),
+ CGF.EmitCastToVoidPtr(I->getSecond().GlobalRecordAddr));
+ CGF.EmitBlock(ExitBB);
+ } else if (IsInTTDRegion) {
+ assert(GlobalizedRecords.back().RegionCounter > 0 &&
+ "region counter must be > 0.");
+ --GlobalizedRecords.back().RegionCounter;
+ // Emit the restore function only in the target region.
+ if (GlobalizedRecords.back().RegionCounter == 0) {
+ QualType Int16Ty = CGM.getContext().getIntTypeForBitwidth(
+ /*DestWidth=*/16, /*Signed=*/0);
+ llvm::Value *IsInSharedMemory = CGF.EmitLoadOfScalar(
+ Address(GlobalizedRecords.back().UseSharedMemory,
+ CGM.getContext().getTypeAlignInChars(Int16Ty)),
+ /*Volatile=*/false, Int16Ty, GlobalizedRecords.back().Loc);
+ llvm::Value *Args[] = {
+ llvm::ConstantInt::get(
+ CGM.Int16Ty,
+ getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_SPMD ? 1 : 0),
+ IsInSharedMemory};
+ CGF.EmitRuntimeCall(
+ createNVPTXRuntimeFunction(
+ OMPRTL_NVPTX__kmpc_restore_team_static_memory),
+ Args);
+ }
+ } else {
+ CGF.EmitRuntimeCall(createNVPTXRuntimeFunction(
+ OMPRTL_NVPTX__kmpc_data_sharing_pop_stack),
+ I->getSecond().GlobalRecordAddr);
+ }
}
}
}
@@ -1830,7 +2515,7 @@ void CGOpenMPRuntimeNVPTX::emitNonSPMDParallelCall(
// passed from the out