aboutsummaryrefslogtreecommitdiffstats
path: root/lib/CodeGen
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2018-07-28 11:06:01 +0000
committerDimitry Andric <dim@FreeBSD.org>2018-07-28 11:06:01 +0000
commit486754660bb926339aefcf012a3f848592babb8b (patch)
treeecdbc446c9876f4f120f701c243373cd3cb43db3 /lib/CodeGen
parent55e6d896ad333f07bb3b1ba487df214fc268a4ab (diff)
downloadsrc-486754660bb926339aefcf012a3f848592babb8b.tar.gz
src-486754660bb926339aefcf012a3f848592babb8b.zip
Vendor import of clang trunk r338150:vendor/clang/clang-trunk-r338150
Notes
Notes: svn path=/vendor/clang/dist/; revision=336815 svn path=/vendor/clang/clang-trunk-r338150/; revision=336816; tag=vendor/clang/clang-trunk-r338150
Diffstat (limited to 'lib/CodeGen')
-rw-r--r--lib/CodeGen/ABIInfo.h15
-rw-r--r--lib/CodeGen/BackendUtil.cpp282
-rw-r--r--lib/CodeGen/CGAtomic.cpp67
-rw-r--r--lib/CodeGen/CGBlocks.cpp493
-rw-r--r--lib/CodeGen/CGBlocks.h9
-rw-r--r--lib/CodeGen/CGBuilder.h37
-rw-r--r--lib/CodeGen/CGBuiltin.cpp3055
-rw-r--r--lib/CodeGen/CGCUDANV.cpp394
-rw-r--r--lib/CodeGen/CGCXX.cpp29
-rw-r--r--lib/CodeGen/CGCXXABI.cpp14
-rw-r--r--lib/CodeGen/CGCXXABI.h29
-rw-r--r--lib/CodeGen/CGCall.cpp510
-rw-r--r--lib/CodeGen/CGCall.h114
-rw-r--r--lib/CodeGen/CGClass.cpp111
-rw-r--r--lib/CodeGen/CGCleanup.cpp48
-rw-r--r--lib/CodeGen/CGCleanup.h9
-rw-r--r--lib/CodeGen/CGCoroutine.cpp73
-rw-r--r--lib/CodeGen/CGDebugInfo.cpp508
-rw-r--r--lib/CodeGen/CGDebugInfo.h69
-rw-r--r--lib/CodeGen/CGDecl.cpp568
-rw-r--r--lib/CodeGen/CGDeclCXX.cpp25
-rw-r--r--lib/CodeGen/CGException.cpp299
-rw-r--r--lib/CodeGen/CGExpr.cpp201
-rw-r--r--lib/CodeGen/CGExprAgg.cpp454
-rw-r--r--lib/CodeGen/CGExprCXX.cpp80
-rw-r--r--lib/CodeGen/CGExprComplex.cpp13
-rw-r--r--lib/CodeGen/CGExprConstant.cpp174
-rw-r--r--lib/CodeGen/CGExprScalar.cpp108
-rw-r--r--lib/CodeGen/CGGPUBuiltin.cpp13
-rw-r--r--lib/CodeGen/CGLoopInfo.h70
-rw-r--r--lib/CodeGen/CGNonTrivialStruct.cpp885
-rw-r--r--lib/CodeGen/CGObjC.cpp68
-rw-r--r--lib/CodeGen/CGObjCGNU.cpp1808
-rw-r--r--lib/CodeGen/CGObjCMac.cpp47
-rw-r--r--lib/CodeGen/CGOpenCLRuntime.cpp81
-rw-r--r--lib/CodeGen/CGOpenCLRuntime.h33
-rw-r--r--lib/CodeGen/CGOpenMPRuntime.cpp4158
-rw-r--r--lib/CodeGen/CGOpenMPRuntime.h998
-rw-r--r--lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp2300
-rw-r--r--lib/CodeGen/CGOpenMPRuntimeNVPTX.h165
-rw-r--r--lib/CodeGen/CGRecordLayout.h18
-rw-r--r--lib/CodeGen/CGRecordLayoutBuilder.cpp83
-rw-r--r--lib/CodeGen/CGStmt.cpp51
-rw-r--r--lib/CodeGen/CGStmtOpenMP.cpp1676
-rw-r--r--lib/CodeGen/CGVTT.cpp2
-rw-r--r--lib/CodeGen/CGVTables.cpp283
-rw-r--r--lib/CodeGen/CGVTables.h10
-rw-r--r--lib/CodeGen/CGValue.h44
-rw-r--r--lib/CodeGen/CMakeLists.txt7
-rw-r--r--lib/CodeGen/CodeGenAction.cpp54
-rw-r--r--lib/CodeGen/CodeGenFunction.cpp255
-rw-r--r--lib/CodeGen/CodeGenFunction.h928
-rw-r--r--lib/CodeGen/CodeGenModule.cpp1020
-rw-r--r--lib/CodeGen/CodeGenModule.h155
-rw-r--r--lib/CodeGen/CodeGenPGO.cpp8
-rw-r--r--lib/CodeGen/CodeGenTBAA.cpp31
-rw-r--r--lib/CodeGen/CodeGenTBAA.h9
-rw-r--r--lib/CodeGen/CodeGenTypeCache.h2
-rw-r--r--lib/CodeGen/CodeGenTypes.cpp27
-rw-r--r--lib/CodeGen/CodeGenTypes.h9
-rw-r--r--lib/CodeGen/ConstantEmitter.h2
-rw-r--r--lib/CodeGen/CoverageMappingGen.cpp153
-rw-r--r--lib/CodeGen/CoverageMappingGen.h16
-rw-r--r--lib/CodeGen/ItaniumCXXABI.cpp423
-rw-r--r--lib/CodeGen/MacroPPCallbacks.cpp3
-rw-r--r--lib/CodeGen/MacroPPCallbacks.h3
-rw-r--r--lib/CodeGen/MicrosoftCXXABI.cpp264
-rw-r--r--lib/CodeGen/ObjectFilePCHContainerOperations.cpp12
-rw-r--r--lib/CodeGen/SanitizerMetadata.cpp9
-rw-r--r--lib/CodeGen/SwiftCallingConv.cpp44
-rw-r--r--lib/CodeGen/TargetInfo.cpp615
-rw-r--r--lib/CodeGen/TargetInfo.h12
-rw-r--r--lib/CodeGen/VarBypassDetector.cpp2
73 files changed, 17740 insertions, 6904 deletions
diff --git a/lib/CodeGen/ABIInfo.h b/lib/CodeGen/ABIInfo.h
index 575506da84d4..feed3833f24a 100644
--- a/lib/CodeGen/ABIInfo.h
+++ b/lib/CodeGen/ABIInfo.h
@@ -53,12 +53,9 @@ namespace swiftcall {
CodeGen::CodeGenTypes &CGT;
protected:
llvm::CallingConv::ID RuntimeCC;
- llvm::CallingConv::ID BuiltinCC;
public:
ABIInfo(CodeGen::CodeGenTypes &cgt)
- : CGT(cgt),
- RuntimeCC(llvm::CallingConv::C),
- BuiltinCC(llvm::CallingConv::C) {}
+ : CGT(cgt), RuntimeCC(llvm::CallingConv::C) {}
virtual ~ABIInfo();
@@ -77,11 +74,6 @@ namespace swiftcall {
return RuntimeCC;
}
- /// Return the calling convention to use for compiler builtins
- llvm::CallingConv::ID getBuiltinCC() const {
- return BuiltinCC;
- }
-
virtual void computeInfo(CodeGen::CGFunctionInfo &FI) const = 0;
/// EmitVAArg - Emit the target dependent code to load a value of
@@ -108,8 +100,6 @@ namespace swiftcall {
virtual bool isHomogeneousAggregateSmallEnough(const Type *Base,
uint64_t Members) const;
- virtual bool shouldSignExtUnsignedType(QualType Ty) const;
-
bool isHomogeneousAggregate(QualType Ty, const Type *&Base,
uint64_t &Members) const;
@@ -137,8 +127,7 @@ namespace swiftcall {
bool supportsSwift() const final override { return true; }
- virtual bool shouldPassIndirectlyForSwift(CharUnits totalSize,
- ArrayRef<llvm::Type*> types,
+ virtual bool shouldPassIndirectlyForSwift(ArrayRef<llvm::Type*> types,
bool asReturnValue) const = 0;
virtual bool isLegalVectorTypeForSwift(CharUnits totalSize,
diff --git a/lib/CodeGen/BackendUtil.cpp b/lib/CodeGen/BackendUtil.cpp
index e2349da5f0a4..415bd9626220 100644
--- a/lib/CodeGen/BackendUtil.cpp
+++ b/lib/CodeGen/BackendUtil.cpp
@@ -26,6 +26,7 @@
#include "llvm/Bitcode/BitcodeWriterPass.h"
#include "llvm/CodeGen/RegAllocRegistry.h"
#include "llvm/CodeGen/SchedulerRegistry.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/IRPrintingPasses.h"
#include "llvm/IR/LegacyPassManager.h"
@@ -44,17 +45,19 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
-#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/Transforms/Coroutines.h"
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/IPO/AlwaysInliner.h"
#include "llvm/Transforms/IPO/PassManagerBuilder.h"
#include "llvm/Transforms/IPO/ThinLTOBitcodeWriter.h"
+#include "llvm/Transforms/InstCombine/InstCombine.h"
#include "llvm/Transforms/Instrumentation.h"
#include "llvm/Transforms/Instrumentation/BoundsChecking.h"
+#include "llvm/Transforms/Instrumentation/GCOVProfiler.h"
#include "llvm/Transforms/ObjCARC.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Scalar/GVN.h"
+#include "llvm/Transforms/Utils.h"
#include "llvm/Transforms/Utils/NameAnonGlobals.h"
#include "llvm/Transforms/Utils/SymbolRewriter.h"
#include <memory>
@@ -101,7 +104,18 @@ class EmitAssemblyHelper {
///
/// \return True on success.
bool AddEmitPasses(legacy::PassManager &CodeGenPasses, BackendAction Action,
- raw_pwrite_stream &OS);
+ raw_pwrite_stream &OS, raw_pwrite_stream *DwoOS);
+
+ std::unique_ptr<llvm::ToolOutputFile> openOutputFile(StringRef Path) {
+ std::error_code EC;
+ auto F = llvm::make_unique<llvm::ToolOutputFile>(Path, EC,
+ llvm::sys::fs::F_None);
+ if (EC) {
+ Diags.Report(diag::err_fe_unable_to_open_output) << Path << EC.message();
+ F.reset();
+ }
+ return F;
+ }
public:
EmitAssemblyHelper(DiagnosticsEngine &_Diags,
@@ -231,10 +245,9 @@ static void addAddressSanitizerPasses(const PassManagerBuilder &Builder,
static void addKernelAddressSanitizerPasses(const PassManagerBuilder &Builder,
legacy::PassManagerBase &PM) {
PM.add(createAddressSanitizerFunctionPass(
- /*CompileKernel*/ true,
- /*Recover*/ true, /*UseAfterScope*/ false));
- PM.add(createAddressSanitizerModulePass(/*CompileKernel*/true,
- /*Recover*/true));
+ /*CompileKernel*/ true, /*Recover*/ true, /*UseAfterScope*/ false));
+ PM.add(createAddressSanitizerModulePass(
+ /*CompileKernel*/ true, /*Recover*/ true));
}
static void addHWAddressSanitizerPasses(const PassManagerBuilder &Builder,
@@ -243,7 +256,13 @@ static void addHWAddressSanitizerPasses(const PassManagerBuilder &Builder,
static_cast<const PassManagerBuilderWrapper &>(Builder);
const CodeGenOptions &CGOpts = BuilderWrapper.getCGOpts();
bool Recover = CGOpts.SanitizeRecover.has(SanitizerKind::HWAddress);
- PM.add(createHWAddressSanitizerPass(Recover));
+ PM.add(createHWAddressSanitizerPass(/*CompileKernel*/ false, Recover));
+}
+
+static void addKernelHWAddressSanitizerPasses(const PassManagerBuilder &Builder,
+ legacy::PassManagerBase &PM) {
+ PM.add(createHWAddressSanitizerPass(
+ /*CompileKernel*/ true, /*Recover*/ true));
}
static void addMemorySanitizerPass(const PassManagerBuilder &Builder,
@@ -361,21 +380,6 @@ getCodeModel(const CodeGenOptions &CodeGenOpts) {
return static_cast<llvm::CodeModel::Model>(CodeModel);
}
-static llvm::Reloc::Model getRelocModel(const CodeGenOptions &CodeGenOpts) {
- // Keep this synced with the equivalent code in
- // lib/Frontend/CompilerInvocation.cpp
- llvm::Optional<llvm::Reloc::Model> RM;
- RM = llvm::StringSwitch<llvm::Reloc::Model>(CodeGenOpts.RelocationModel)
- .Case("static", llvm::Reloc::Static)
- .Case("pic", llvm::Reloc::PIC_)
- .Case("ropi", llvm::Reloc::ROPI)
- .Case("rwpi", llvm::Reloc::RWPI)
- .Case("ropi-rwpi", llvm::Reloc::ROPI_RWPI)
- .Case("dynamic-no-pic", llvm::Reloc::DynamicNoPIC);
- assert(RM.hasValue() && "invalid PIC model!");
- return *RM;
-}
-
static TargetMachine::CodeGenFileType getCodeGenFileType(BackendAction Action) {
if (Action == Backend_EmitObj)
return TargetMachine::CGFT_ObjectFile;
@@ -447,7 +451,10 @@ static void initTargetOptions(llvm::TargetOptions &Options,
Options.DataSections = CodeGenOpts.DataSections;
Options.UniqueSectionNames = CodeGenOpts.UniqueSectionNames;
Options.EmulatedTLS = CodeGenOpts.EmulatedTLS;
+ Options.ExplicitEmulatedTLS = CodeGenOpts.ExplicitEmulatedTLS;
Options.DebuggerTuning = CodeGenOpts.getDebuggerTuning();
+ Options.EmitStackSizeSection = CodeGenOpts.StackSizeSection;
+ Options.EmitAddrsig = CodeGenOpts.Addrsig;
if (CodeGenOpts.EnableSplitDwarf)
Options.MCOptions.SplitDwarfFile = CodeGenOpts.SplitDwarfFile;
@@ -470,6 +477,23 @@ static void initTargetOptions(llvm::TargetOptions &Options,
Options.MCOptions.IASSearchPaths.push_back(
Entry.IgnoreSysRoot ? Entry.Path : HSOpts.Sysroot + Entry.Path);
}
+static Optional<GCOVOptions> getGCOVOptions(const CodeGenOptions &CodeGenOpts) {
+ if (CodeGenOpts.DisableGCov)
+ return None;
+ if (!CodeGenOpts.EmitGcovArcs && !CodeGenOpts.EmitGcovNotes)
+ return None;
+ // Not using 'GCOVOptions::getDefault' allows us to avoid exiting if
+ // LLVM's -default-gcov-version flag is set to something invalid.
+ GCOVOptions Options;
+ Options.EmitNotes = CodeGenOpts.EmitGcovNotes;
+ Options.EmitData = CodeGenOpts.EmitGcovArcs;
+ llvm::copy(CodeGenOpts.CoverageVersion, std::begin(Options.Version));
+ Options.UseCfgChecksum = CodeGenOpts.CoverageExtraChecksum;
+ Options.NoRedZone = CodeGenOpts.DisableRedZone;
+ Options.FunctionNamesInData = !CodeGenOpts.CoverageNoFunctionNamesInData;
+ Options.ExitBlockBeforeBody = CodeGenOpts.CoverageExitBlockBeforeBody;
+ return Options;
+}
void EmitAssemblyHelper::CreatePasses(legacy::PassManager &MPM,
legacy::FunctionPassManager &FPM) {
@@ -501,7 +525,7 @@ void EmitAssemblyHelper::CreatePasses(legacy::PassManager &MPM,
PMBuilder.Inliner = createFunctionInliningPass(
CodeGenOpts.OptimizationLevel, CodeGenOpts.OptimizeSize,
(!CodeGenOpts.SampleProfileFile.empty() &&
- CodeGenOpts.EmitSummaryIndex));
+ CodeGenOpts.PrepareForThinLTO));
}
PMBuilder.OptLevel = CodeGenOpts.OptimizationLevel;
@@ -511,7 +535,7 @@ void EmitAssemblyHelper::CreatePasses(legacy::PassManager &MPM,
PMBuilder.DisableUnrollLoops = !CodeGenOpts.UnrollLoops;
PMBuilder.MergeFunctions = CodeGenOpts.MergeFunctions;
- PMBuilder.PrepareForThinLTO = CodeGenOpts.EmitSummaryIndex;
+ PMBuilder.PrepareForThinLTO = CodeGenOpts.PrepareForThinLTO;
PMBuilder.PrepareForLTO = CodeGenOpts.PrepareForLTO;
PMBuilder.RerollLoops = CodeGenOpts.RerollLoops;
@@ -535,6 +559,9 @@ void EmitAssemblyHelper::CreatePasses(legacy::PassManager &MPM,
addObjCARCOptPass);
}
+ if (LangOpts.CoroutinesTS)
+ addCoroutinePassesToExtensionPoints(PMBuilder);
+
if (LangOpts.Sanitize.has(SanitizerKind::LocalBounds)) {
PMBuilder.addExtension(PassManagerBuilder::EP_ScalarOptimizerLate,
addBoundsCheckingPass);
@@ -572,6 +599,13 @@ void EmitAssemblyHelper::CreatePasses(legacy::PassManager &MPM,
addHWAddressSanitizerPasses);
}
+ if (LangOpts.Sanitize.has(SanitizerKind::KernelHWAddress)) {
+ PMBuilder.addExtension(PassManagerBuilder::EP_OptimizerLast,
+ addKernelHWAddressSanitizerPasses);
+ PMBuilder.addExtension(PassManagerBuilder::EP_EnabledOnOptLevel0,
+ addKernelHWAddressSanitizerPasses);
+ }
+
if (LangOpts.Sanitize.has(SanitizerKind::Memory)) {
PMBuilder.addExtension(PassManagerBuilder::EP_OptimizerLast,
addMemorySanitizerPass);
@@ -593,9 +627,6 @@ void EmitAssemblyHelper::CreatePasses(legacy::PassManager &MPM,
addDataFlowSanitizerPass);
}
- if (LangOpts.CoroutinesTS)
- addCoroutinePassesToExtensionPoints(PMBuilder);
-
if (LangOpts.Sanitize.hasOneOf(SanitizerKind::Efficiency)) {
PMBuilder.addExtension(PassManagerBuilder::EP_OptimizerLast,
addEfficiencySanitizerPass);
@@ -612,20 +643,8 @@ void EmitAssemblyHelper::CreatePasses(legacy::PassManager &MPM,
if (!CodeGenOpts.RewriteMapFiles.empty())
addSymbolRewriterPass(CodeGenOpts, &MPM);
- if (!CodeGenOpts.DisableGCov &&
- (CodeGenOpts.EmitGcovArcs || CodeGenOpts.EmitGcovNotes)) {
- // Not using 'GCOVOptions::getDefault' allows us to avoid exiting if
- // LLVM's -default-gcov-version flag is set to something invalid.
- GCOVOptions Options;
- Options.EmitNotes = CodeGenOpts.EmitGcovNotes;
- Options.EmitData = CodeGenOpts.EmitGcovArcs;
- memcpy(Options.Version, CodeGenOpts.CoverageVersion, 4);
- Options.UseCfgChecksum = CodeGenOpts.CoverageExtraChecksum;
- Options.NoRedZone = CodeGenOpts.DisableRedZone;
- Options.FunctionNamesInData =
- !CodeGenOpts.CoverageNoFunctionNamesInData;
- Options.ExitBlockBeforeBody = CodeGenOpts.CoverageExitBlockBeforeBody;
- MPM.add(createGCOVProfilerPass(Options));
+ if (Optional<GCOVOptions> Options = getGCOVOptions(CodeGenOpts)) {
+ MPM.add(createGCOVProfilerPass(*Options));
if (CodeGenOpts.getDebugInfo() == codegenoptions::NoDebugInfo)
MPM.add(createStripSymbolsPass(true));
}
@@ -664,8 +683,6 @@ static void setCommandLineOpts(const CodeGenOptions &CodeGenOpts) {
BackendArgs.push_back("-limit-float-precision");
BackendArgs.push_back(CodeGenOpts.LimitFloatPrecision.c_str());
}
- for (const std::string &BackendOption : CodeGenOpts.BackendOptions)
- BackendArgs.push_back(BackendOption.c_str());
BackendArgs.push_back(nullptr);
llvm::cl::ParseCommandLineOptions(BackendArgs.size() - 1,
BackendArgs.data());
@@ -685,7 +702,7 @@ void EmitAssemblyHelper::CreateTargetMachine(bool MustCreateTM) {
Optional<llvm::CodeModel::Model> CM = getCodeModel(CodeGenOpts);
std::string FeaturesStr =
llvm::join(TargetOpts.Features.begin(), TargetOpts.Features.end(), ",");
- llvm::Reloc::Model RM = getRelocModel(CodeGenOpts);
+ llvm::Reloc::Model RM = CodeGenOpts.RelocationModel;
CodeGenOpt::Level OptLevel = getCGOptLevel(CodeGenOpts);
llvm::TargetOptions Options;
@@ -696,7 +713,8 @@ void EmitAssemblyHelper::CreateTargetMachine(bool MustCreateTM) {
bool EmitAssemblyHelper::AddEmitPasses(legacy::PassManager &CodeGenPasses,
BackendAction Action,
- raw_pwrite_stream &OS) {
+ raw_pwrite_stream &OS,
+ raw_pwrite_stream *DwoOS) {
// Add LibraryInfo.
llvm::Triple TargetTriple(TheModule->getTargetTriple());
std::unique_ptr<TargetLibraryInfoImpl> TLII(
@@ -713,7 +731,7 @@ bool EmitAssemblyHelper::AddEmitPasses(legacy::PassManager &CodeGenPasses,
if (CodeGenOpts.OptimizationLevel > 0)
CodeGenPasses.add(createObjCARCContractPass());
- if (TM->addPassesToEmitFile(CodeGenPasses, OS, CGFT,
+ if (TM->addPassesToEmitFile(CodeGenPasses, OS, DwoOS, CGFT,
/*DisableVerify=*/!CodeGenOpts.VerifyModule)) {
Diags.Report(diag::err_fe_unable_to_interface_with_target);
return false;
@@ -724,7 +742,7 @@ bool EmitAssemblyHelper::AddEmitPasses(legacy::PassManager &CodeGenPasses,
void EmitAssemblyHelper::EmitAssembly(BackendAction Action,
std::unique_ptr<raw_pwrite_stream> OS) {
- TimeRegion Region(llvm::TimePassesIsEnabled ? &CodeGenerationTime : nullptr);
+ TimeRegion Region(FrontendTimesIsEnabled ? &CodeGenerationTime : nullptr);
setCommandLineOpts(CodeGenOpts);
@@ -752,31 +770,35 @@ void EmitAssemblyHelper::EmitAssembly(BackendAction Action,
CodeGenPasses.add(
createTargetTransformInfoWrapperPass(getTargetIRAnalysis()));
- std::unique_ptr<raw_fd_ostream> ThinLinkOS;
+ std::unique_ptr<llvm::ToolOutputFile> ThinLinkOS, DwoOS;
switch (Action) {
case Backend_EmitNothing:
break;
case Backend_EmitBC:
- if (CodeGenOpts.EmitSummaryIndex) {
+ if (CodeGenOpts.PrepareForThinLTO) {
if (!CodeGenOpts.ThinLinkBitcodeFile.empty()) {
- std::error_code EC;
- ThinLinkOS.reset(new llvm::raw_fd_ostream(
- CodeGenOpts.ThinLinkBitcodeFile, EC,
- llvm::sys::fs::F_None));
- if (EC) {
- Diags.Report(diag::err_fe_unable_to_open_output) << CodeGenOpts.ThinLinkBitcodeFile
- << EC.message();
+ ThinLinkOS = openOutputFile(CodeGenOpts.ThinLinkBitcodeFile);
+ if (!ThinLinkOS)
return;
- }
}
+ PerModulePasses.add(createWriteThinLTOBitcodePass(
+ *OS, ThinLinkOS ? &ThinLinkOS->os() : nullptr));
+ } else {
+ // Emit a module summary by default for Regular LTO except for ld64
+ // targets
+ bool EmitLTOSummary =
+ (CodeGenOpts.PrepareForLTO &&
+ llvm::Triple(TheModule->getTargetTriple()).getVendor() !=
+ llvm::Triple::Apple);
+ if (EmitLTOSummary && !TheModule->getModuleFlag("ThinLTO"))
+ TheModule->addModuleFlag(Module::Error, "ThinLTO", uint32_t(0));
+
PerModulePasses.add(
- createWriteThinLTOBitcodePass(*OS, ThinLinkOS.get()));
+ createBitcodeWriterPass(*OS, CodeGenOpts.EmitLLVMUseLists,
+ EmitLTOSummary));
}
- else
- PerModulePasses.add(
- createBitcodeWriterPass(*OS, CodeGenOpts.EmitLLVMUseLists));
break;
case Backend_EmitLL:
@@ -785,7 +807,13 @@ void EmitAssemblyHelper::EmitAssembly(BackendAction Action,
break;
default:
- if (!AddEmitPasses(CodeGenPasses, Action, *OS))
+ if (!CodeGenOpts.SplitDwarfFile.empty()) {
+ DwoOS = openOutputFile(CodeGenOpts.SplitDwarfFile);
+ if (!DwoOS)
+ return;
+ }
+ if (!AddEmitPasses(CodeGenPasses, Action, *OS,
+ DwoOS ? &DwoOS->os() : nullptr))
return;
}
@@ -814,6 +842,11 @@ void EmitAssemblyHelper::EmitAssembly(BackendAction Action,
PrettyStackTraceString CrashInfo("Code generation");
CodeGenPasses.run(*TheModule);
}
+
+ if (ThinLinkOS)
+ ThinLinkOS->keep();
+ if (DwoOS)
+ DwoOS->keep();
}
static PassBuilder::OptimizationLevel mapToLevel(const CodeGenOptions &Opts) {
@@ -827,7 +860,7 @@ static PassBuilder::OptimizationLevel mapToLevel(const CodeGenOptions &Opts) {
case 2:
switch (Opts.OptimizeSize) {
default:
- llvm_unreachable("Invalide optimization level for size!");
+ llvm_unreachable("Invalid optimization level for size!");
case 0:
return PassBuilder::O2;
@@ -854,7 +887,7 @@ static PassBuilder::OptimizationLevel mapToLevel(const CodeGenOptions &Opts) {
/// `EmitAssembly` at some point in the future when the default switches.
void EmitAssemblyHelper::EmitAssemblyWithNewPassManager(
BackendAction Action, std::unique_ptr<raw_pwrite_stream> OS) {
- TimeRegion Region(llvm::TimePassesIsEnabled ? &CodeGenerationTime : nullptr);
+ TimeRegion Region(FrontendTimesIsEnabled ? &CodeGenerationTime : nullptr);
setCommandLineOpts(CodeGenOpts);
// The new pass manager always makes a target machine available to passes
@@ -913,10 +946,13 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager(
ModulePassManager MPM(CodeGenOpts.DebugPassManager);
if (!CodeGenOpts.DisableLLVMPasses) {
- bool IsThinLTO = CodeGenOpts.EmitSummaryIndex;
+ bool IsThinLTO = CodeGenOpts.PrepareForThinLTO;
bool IsLTO = CodeGenOpts.PrepareForLTO;
if (CodeGenOpts.OptimizationLevel == 0) {
+ if (Optional<GCOVOptions> Options = getGCOVOptions(CodeGenOpts))
+ MPM.addPass(GCOVProfilerPass(*Options));
+
// Build a minimal pipeline based on the semantics required by Clang,
// which is just that always inlining occurs.
MPM.addPass(AlwaysInlinerPass());
@@ -925,8 +961,8 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager(
if (LangOpts.Sanitize.has(SanitizerKind::LocalBounds))
MPM.addPass(createModuleToFunctionPassAdaptor(BoundsCheckingPass()));
- // Lastly, add a semantically necessary pass for ThinLTO.
- if (IsThinLTO)
+ // Lastly, add a semantically necessary pass for LTO.
+ if (IsLTO || IsThinLTO)
MPM.addPass(NameAnonGlobalPass());
} else {
// Map our optimization levels into one of the distinct levels used to
@@ -940,6 +976,10 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager(
[](FunctionPassManager &FPM, PassBuilder::OptimizationLevel Level) {
FPM.addPass(BoundsCheckingPass());
});
+ if (Optional<GCOVOptions> Options = getGCOVOptions(CodeGenOpts))
+ PB.registerPipelineStartEPCallback([Options](ModulePassManager &MPM) {
+ MPM.addPass(GCOVProfilerPass(*Options));
+ });
if (IsThinLTO) {
MPM = PB.buildThinLTOPreLinkDefaultPipeline(
@@ -948,6 +988,7 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager(
} else if (IsLTO) {
MPM = PB.buildLTOPreLinkDefaultPipeline(Level,
CodeGenOpts.DebugPassManager);
+ MPM.addPass(NameAnonGlobalPass());
} else {
MPM = PB.buildPerModuleDefaultPipeline(Level,
CodeGenOpts.DebugPassManager);
@@ -959,7 +1000,7 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager(
// create that pass manager here and use it as needed below.
legacy::PassManager CodeGenPasses;
bool NeedCodeGen = false;
- Optional<raw_fd_ostream> ThinLinkOS;
+ std::unique_ptr<llvm::ToolOutputFile> ThinLinkOS, DwoOS;
// Append any output we need to the pass manager.
switch (Action) {
@@ -967,23 +1008,26 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager(
break;
case Backend_EmitBC:
- if (CodeGenOpts.EmitSummaryIndex) {
+ if (CodeGenOpts.PrepareForThinLTO) {
if (!CodeGenOpts.ThinLinkBitcodeFile.empty()) {
- std::error_code EC;
- ThinLinkOS.emplace(CodeGenOpts.ThinLinkBitcodeFile, EC,
- llvm::sys::fs::F_None);
- if (EC) {
- Diags.Report(diag::err_fe_unable_to_open_output)
- << CodeGenOpts.ThinLinkBitcodeFile << EC.message();
+ ThinLinkOS = openOutputFile(CodeGenOpts.ThinLinkBitcodeFile);
+ if (!ThinLinkOS)
return;
- }
}
- MPM.addPass(
- ThinLTOBitcodeWriterPass(*OS, ThinLinkOS ? &*ThinLinkOS : nullptr));
+ MPM.addPass(ThinLTOBitcodeWriterPass(*OS, ThinLinkOS ? &ThinLinkOS->os()
+ : nullptr));
} else {
+ // Emit a module summary by default for Regular LTO except for ld64
+ // targets
+ bool EmitLTOSummary =
+ (CodeGenOpts.PrepareForLTO &&
+ llvm::Triple(TheModule->getTargetTriple()).getVendor() !=
+ llvm::Triple::Apple);
+ if (EmitLTOSummary && !TheModule->getModuleFlag("ThinLTO"))
+ TheModule->addModuleFlag(Module::Error, "ThinLTO", uint32_t(0));
+
MPM.addPass(BitcodeWriterPass(*OS, CodeGenOpts.EmitLLVMUseLists,
- CodeGenOpts.EmitSummaryIndex,
- CodeGenOpts.EmitSummaryIndex));
+ EmitLTOSummary));
}
break;
@@ -997,7 +1041,13 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager(
NeedCodeGen = true;
CodeGenPasses.add(
createTargetTransformInfoWrapperPass(getTargetIRAnalysis()));
- if (!AddEmitPasses(CodeGenPasses, Action, *OS))
+ if (!CodeGenOpts.SplitDwarfFile.empty()) {
+ DwoOS = openOutputFile(CodeGenOpts.SplitDwarfFile);
+ if (!DwoOS)
+ return;
+ }
+ if (!AddEmitPasses(CodeGenPasses, Action, *OS,
+ DwoOS ? &DwoOS->os() : nullptr))
// FIXME: Should we handle this error differently?
return;
break;
@@ -1017,6 +1067,11 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager(
PrettyStackTraceString CrashInfo("Code generation");
CodeGenPasses.run(*TheModule);
}
+
+ if (ThinLinkOS)
+ ThinLinkOS->keep();
+ if (DwoOS)
+ DwoOS->keep();
}
Expected<BitcodeModule> clang::FindThinLTOModule(MemoryBufferRef MBRef) {
@@ -1026,16 +1081,22 @@ Expected<BitcodeModule> clang::FindThinLTOModule(MemoryBufferRef MBRef) {
// The bitcode file may contain multiple modules, we want the one that is
// marked as being the ThinLTO module.
- for (BitcodeModule &BM : *BMsOrErr) {
- Expected<BitcodeLTOInfo> LTOInfo = BM.getLTOInfo();
- if (LTOInfo && LTOInfo->IsThinLTO)
- return BM;
- }
+ if (const BitcodeModule *Bm = FindThinLTOModule(*BMsOrErr))
+ return *Bm;
return make_error<StringError>("Could not find module summary",
inconvertibleErrorCode());
}
+BitcodeModule *clang::FindThinLTOModule(MutableArrayRef<BitcodeModule> BMs) {
+ for (BitcodeModule &BM : BMs) {
+ Expected<BitcodeLTOInfo> LTOInfo = BM.getLTOInfo();
+ if (LTOInfo && LTOInfo->IsThinLTO)
+ return &BM;
+ }
+ return nullptr;
+}
+
static void runThinLTOBackend(ModuleSummaryIndex *CombinedIndex, Module *M,
const HeaderSearchOptions &HeaderOpts,
const CodeGenOptions &CGOpts,
@@ -1067,9 +1128,8 @@ static void runThinLTOBackend(ModuleSummaryIndex *CombinedIndex, Module *M,
// e.g. record required linkage changes.
if (Summary->modulePath() == M->getModuleIdentifier())
continue;
- // Doesn't matter what value we plug in to the map, just needs an entry
- // to provoke importing by thinBackend.
- ImportList[Summary->modulePath()][GUID] = 1;
+ // Add an entry to provoke importing by thinBackend.
+ ImportList[Summary->modulePath()].insert(GUID);
}
std::vector<std::unique_ptr<llvm::MemoryBuffer>> OwnedImports;
@@ -1100,15 +1160,27 @@ static void runThinLTOBackend(ModuleSummaryIndex *CombinedIndex, Module *M,
return llvm::make_unique<lto::NativeObjectStream>(std::move(OS));
};
lto::Config Conf;
+ if (CGOpts.SaveTempsFilePrefix != "") {
+ if (Error E = Conf.addSaveTemps(CGOpts.SaveTempsFilePrefix + ".",
+ /* UseInputModulePath */ false)) {
+ handleAllErrors(std::move(E), [&](ErrorInfoBase &EIB) {
+ errs() << "Error setting up ThinLTO save-temps: " << EIB.message()
+ << '\n';
+ });
+ }
+ }
Conf.CPU = TOpts.CPU;
Conf.CodeModel = getCodeModel(CGOpts);
Conf.MAttrs = TOpts.Features;
- Conf.RelocModel = getRelocModel(CGOpts);
+ Conf.RelocModel = CGOpts.RelocationModel;
Conf.CGOptLevel = getCGOptLevel(CGOpts);
initTargetOptions(Conf.Options, CGOpts, TOpts, LOpts, HeaderOpts);
Conf.SampleProfile = std::move(SampleProfile);
Conf.UseNewPM = CGOpts.ExperimentalNewPassManager;
Conf.DebugPassManager = CGOpts.DebugPassManager;
+ Conf.RemarksWithHotness = CGOpts.DiagnosticsWithHotness;
+ Conf.RemarksFilename = CGOpts.OptRecordFile;
+ Conf.DwoPath = CGOpts.SplitDwarfFile;
switch (Action) {
case Backend_EmitNothing:
Conf.PreCodeGenModuleHook = [](size_t Task, const Module &Mod) {
@@ -1123,7 +1195,7 @@ static void runThinLTOBackend(ModuleSummaryIndex *CombinedIndex, Module *M,
break;
case Backend_EmitBC:
Conf.PreCodeGenModuleHook = [&](size_t Task, const Module &Mod) {
- WriteBitcodeToFile(M, *OS, CGOpts.EmitLLVMUseLists);
+ WriteBitcodeToFile(*M, *OS, CGOpts.EmitLLVMUseLists);
return false;
};
break;
@@ -1132,7 +1204,7 @@ static void runThinLTOBackend(ModuleSummaryIndex *CombinedIndex, Module *M,
break;
}
if (Error E = thinBackend(
- Conf, 0, AddStream, *M, *CombinedIndex, ImportList,
+ Conf, -1, AddStream, *M, *CombinedIndex, ImportList,
ModuleToDefinedGVSummaries[M->getModuleIdentifier()], ModuleMap)) {
handleAllErrors(std::move(E), [&](ErrorInfoBase &EIB) {
errs() << "Error running ThinLTO backend: " << EIB.message() << '\n';
@@ -1148,6 +1220,7 @@ void clang::EmitBackendOutput(DiagnosticsEngine &Diags,
const llvm::DataLayout &TDesc, Module *M,
BackendAction Action,
std::unique_ptr<raw_pwrite_stream> OS) {
+ std::unique_ptr<llvm::Module> EmptyModule;
if (!CGOpts.ThinLTOIndexFile.empty()) {
// If we are performing a ThinLTO importing compile, load the function index
// into memory and pass it into runThinLTOBackend, which will run the
@@ -1165,11 +1238,22 @@ void clang::EmitBackendOutput(DiagnosticsEngine &Diags,
// A null CombinedIndex means we should skip ThinLTO compilation
// (LLVM will optionally ignore empty index files, returning null instead
// of an error).
- bool DoThinLTOBackend = CombinedIndex != nullptr;
- if (DoThinLTOBackend) {
- runThinLTOBackend(CombinedIndex.get(), M, HeaderOpts, CGOpts, TOpts,
- LOpts, std::move(OS), CGOpts.SampleProfileFile, Action);
- return;
+ if (CombinedIndex) {
+ if (!CombinedIndex->skipModuleByDistributedBackend()) {
+ runThinLTOBackend(CombinedIndex.get(), M, HeaderOpts, CGOpts, TOpts,
+ LOpts, std::move(OS), CGOpts.SampleProfileFile,
+ Action);
+ return;
+ }
+ // Distributed indexing detected that nothing from the module is needed
+ // for the final linking. So we can skip the compilation. We sill need to
+ // output an empty object file to make sure that a linker does not fail
+ // trying to read it. Also for some features, like CFI, we must skip
+ // the compilation as CombinedIndex does not contain all required
+ // information.
+ EmptyModule = llvm::make_unique<llvm::Module>("empty", M->getContext());
+ EmptyModule->setTargetTriple(M->getTargetTriple());
+ M = EmptyModule.get();
}
}
@@ -1228,7 +1312,7 @@ void clang::EmbedBitcode(llvm::Module *M, const CodeGenOptions &CGOpts,
// Save llvm.compiler.used and remote it.
SmallVector<Constant*, 2> UsedArray;
- SmallSet<GlobalValue*, 4> UsedGlobals;
+ SmallPtrSet<GlobalValue*, 4> UsedGlobals;
Type *UsedElementType = Type::getInt8Ty(M->getContext())->getPointerTo(0);
GlobalVariable *Used = collectUsedGlobalVariables(*M, UsedGlobals, true);
for (auto *GV : UsedGlobals) {
@@ -1253,7 +1337,7 @@ void clang::EmbedBitcode(llvm::Module *M, const CodeGenOptions &CGOpts,
// If the input is LLVM Assembly, bitcode is produced by serializing
// the module. Use-lists order need to be perserved in this case.
llvm::raw_string_ostream OS(Data);
- llvm::WriteBitcodeToFile(M, OS, /* ShouldPreserveUseListOrder */ true);
+ llvm::WriteBitcodeToFile(*M, OS, /* ShouldPreserveUseListOrder */ true);
ModuleData =
ArrayRef<uint8_t>((const uint8_t *)OS.str().data(), OS.str().size());
} else
diff --git a/lib/CodeGen/CGAtomic.cpp b/lib/CodeGen/CGAtomic.cpp
index 6862fd811186..b34bcdc1fc38 100644
--- a/lib/CodeGen/CGAtomic.cpp
+++ b/lib/CodeGen/CGAtomic.cpp
@@ -18,6 +18,7 @@
#include "TargetInfo.h"
#include "clang/AST/ASTContext.h"
#include "clang/CodeGen/CGFunctionInfo.h"
+#include "clang/Sema/SemaDiagnostic.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Intrinsics.h"
@@ -186,7 +187,7 @@ namespace {
RValue convertAtomicTempToRValue(Address addr, AggValueSlot resultSlot,
SourceLocation loc, bool AsValue) const;
- /// \brief Converts a rvalue to integer value.
+ /// Converts a rvalue to integer value.
llvm::Value *convertRValueToInt(RValue RVal) const;
RValue ConvertIntToValueOrAtomic(llvm::Value *IntVal,
@@ -207,13 +208,13 @@ namespace {
LVal.getBaseInfo(), LVal.getTBAAInfo());
}
- /// \brief Emits atomic load.
+ /// Emits atomic load.
/// \returns Loaded value.
RValue EmitAtomicLoad(AggValueSlot ResultSlot, SourceLocation Loc,
bool AsValue, llvm::AtomicOrdering AO,
bool IsVolatile);
- /// \brief Emits atomic compare-and-exchange sequence.
+ /// Emits atomic compare-and-exchange sequence.
/// \param Expected Expected value.
/// \param Desired Desired value.
/// \param Success Atomic ordering for success operation.
@@ -229,13 +230,13 @@ namespace {
llvm::AtomicOrdering::SequentiallyConsistent,
bool IsWeak = false);
- /// \brief Emits atomic update.
+ /// Emits atomic update.
/// \param AO Atomic ordering.
/// \param UpdateOp Update operation for the current lvalue.
void EmitAtomicUpdate(llvm::AtomicOrdering AO,
const llvm::function_ref<RValue(RValue)> &UpdateOp,
bool IsVolatile);
- /// \brief Emits atomic update.
+ /// Emits atomic update.
/// \param AO Atomic ordering.
void EmitAtomicUpdate(llvm::AtomicOrdering AO, RValue UpdateRVal,
bool IsVolatile);
@@ -243,25 +244,25 @@ namespace {
/// Materialize an atomic r-value in atomic-layout memory.
Address materializeRValue(RValue rvalue) const;
- /// \brief Creates temp alloca for intermediate operations on atomic value.
+ /// Creates temp alloca for intermediate operations on atomic value.
Address CreateTempAlloca() const;
private:
bool requiresMemSetZero(llvm::Type *type) const;
- /// \brief Emits atomic load as a libcall.
+ /// Emits atomic load as a libcall.
void EmitAtomicLoadLibcall(llvm::Value *AddForLoaded,
llvm::AtomicOrdering AO, bool IsVolatile);
- /// \brief Emits atomic load as LLVM instruction.
+ /// Emits atomic load as LLVM instruction.
llvm::Value *EmitAtomicLoadOp(llvm::AtomicOrdering AO, bool IsVolatile);
- /// \brief Emits atomic compare-and-exchange op as a libcall.
+ /// Emits atomic compare-and-exchange op as a libcall.
llvm::Value *EmitAtomicCompareExchangeLibcall(
llvm::Value *ExpectedAddr, llvm::Value *DesiredAddr,
llvm::AtomicOrdering Success =
llvm::AtomicOrdering::SequentiallyConsistent,
llvm::AtomicOrdering Failure =
llvm::AtomicOrdering::SequentiallyConsistent);
- /// \brief Emits atomic compare-and-exchange op as LLVM instruction.
+ /// Emits atomic compare-and-exchange op as LLVM instruction.
std::pair<llvm::Value *, llvm::Value *> EmitAtomicCompareExchangeOp(
llvm::Value *ExpectedVal, llvm::Value *DesiredVal,
llvm::AtomicOrdering Success =
@@ -269,19 +270,19 @@ namespace {
llvm::AtomicOrdering Failure =
llvm::AtomicOrdering::SequentiallyConsistent,
bool IsWeak = false);
- /// \brief Emit atomic update as libcalls.
+ /// Emit atomic update as libcalls.
void
EmitAtomicUpdateLibcall(llvm::AtomicOrdering AO,
const llvm::function_ref<RValue(RValue)> &UpdateOp,
bool IsVolatile);
- /// \brief Emit atomic update as LLVM instructions.
+ /// Emit atomic update as LLVM instructions.
void EmitAtomicUpdateOp(llvm::AtomicOrdering AO,
const llvm::function_ref<RValue(RValue)> &UpdateOp,
bool IsVolatile);
- /// \brief Emit atomic update as libcalls.
+ /// Emit atomic update as libcalls.
void EmitAtomicUpdateLibcall(llvm::AtomicOrdering AO, RValue UpdateRVal,
bool IsVolatile);
- /// \brief Emit atomic update as LLVM instructions.
+ /// Emit atomic update as LLVM instructions.
void EmitAtomicUpdateOp(llvm::AtomicOrdering AO, RValue UpdateRal,
bool IsVolatile);
};
@@ -590,11 +591,13 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest,
break;
case AtomicExpr::AO__opencl_atomic_fetch_min:
+ case AtomicExpr::AO__atomic_fetch_min:
Op = E->getValueType()->isSignedIntegerType() ? llvm::AtomicRMWInst::Min
: llvm::AtomicRMWInst::UMin;
break;
case AtomicExpr::AO__opencl_atomic_fetch_max:
+ case AtomicExpr::AO__atomic_fetch_max:
Op = E->getValueType()->isSignedIntegerType() ? llvm::AtomicRMWInst::Max
: llvm::AtomicRMWInst::UMax;
break;
@@ -751,6 +754,13 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
Address Dest = Address::invalid();
Address Ptr = EmitPointerWithAlignment(E->getPtr());
+ if (E->getOp() == AtomicExpr::AO__c11_atomic_init ||
+ E->getOp() == AtomicExpr::AO__opencl_atomic_init) {
+ LValue lvalue = MakeAddrLValue(Ptr, AtomicTy);
+ EmitAtomicInit(E->getVal1(), lvalue);
+ return RValue::get(nullptr);
+ }
+
CharUnits sizeChars, alignChars;
std::tie(sizeChars, alignChars) = getContext().getTypeInfoInChars(AtomicTy);
uint64_t Size = sizeChars.getQuantity();
@@ -758,12 +768,8 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
bool UseLibcall = ((Ptr.getAlignment() % sizeChars) != 0 ||
getContext().toBits(sizeChars) > MaxInlineWidthInBits);
- if (E->getOp() == AtomicExpr::AO__c11_atomic_init ||
- E->getOp() == AtomicExpr::AO__opencl_atomic_init) {
- LValue lvalue = MakeAddrLValue(Ptr, AtomicTy);
- EmitAtomicInit(E->getVal1(), lvalue);
- return RValue::get(nullptr);
- }
+ if (UseLibcall)
+ CGM.getDiags().Report(E->getLocStart(), diag::warn_atomic_op_misaligned);
llvm::Value *Order = EmitScalarExpr(E->getOrder());
llvm::Value *Scope =
@@ -855,6 +861,8 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
case AtomicExpr::AO__atomic_or_fetch:
case AtomicExpr::AO__atomic_xor_fetch:
case AtomicExpr::AO__atomic_nand_fetch:
+ case AtomicExpr::AO__atomic_fetch_min:
+ case AtomicExpr::AO__atomic_fetch_max:
Val1 = EmitValToTemp(*this, E->getVal1());
break;
}
@@ -909,6 +917,8 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
case AtomicExpr::AO__atomic_or_fetch:
case AtomicExpr::AO__atomic_sub_fetch:
case AtomicExpr::AO__atomic_xor_fetch:
+ case AtomicExpr::AO__atomic_fetch_min:
+ case AtomicExpr::AO__atomic_fetch_max:
// For these, only library calls for certain sizes exist.
UseOptimizedLibcall = true;
break;
@@ -1091,6 +1101,7 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1.getPointer(),
MemTy, E->getExprLoc(), sizeChars);
break;
+ case AtomicExpr::AO__atomic_fetch_min:
case AtomicExpr::AO__opencl_atomic_fetch_min:
LibCallName = E->getValueType()->isSignedIntegerType()
? "__atomic_fetch_min"
@@ -1098,6 +1109,7 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1.getPointer(),
LoweredMemTy, E->getExprLoc(), sizeChars);
break;
+ case AtomicExpr::AO__atomic_fetch_max:
case AtomicExpr::AO__opencl_atomic_fetch_max:
LibCallName = E->getValueType()->isSignedIntegerType()
? "__atomic_fetch_max"
@@ -1160,7 +1172,7 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
if (UseOptimizedLibcall && Res.getScalarVal()) {
llvm::Value *ResVal = Res.getScalarVal();
if (PostOp) {
- llvm::Value *LoadVal1 = Args[1].RV.getScalarVal();
+ llvm::Value *LoadVal1 = Args[1].getRValue(*this).getScalarVal();
ResVal = Builder.CreateBinOp(PostOp, ResVal, LoadVal1);
}
if (E->getOp() == AtomicExpr::AO__atomic_nand_fetch)
@@ -1508,11 +1520,13 @@ void AtomicInfo::emitCopyIntoMemory(RValue rvalue) const {
// which means that the caller is responsible for having zeroed
// any padding. Just do an aggregate copy of that type.
if (rvalue.isAggregate()) {
- CGF.EmitAggregateCopy(getAtomicAddress(),
- rvalue.getAggregateAddress(),
- getAtomicType(),
- (rvalue.isVolatileQualified()
- || LVal.isVolatileQualified()));
+ LValue Dest = CGF.MakeAddrLValue(getAtomicAddress(), getAtomicType());
+ LValue Src = CGF.MakeAddrLValue(rvalue.getAggregateAddress(),
+ getAtomicType());
+ bool IsVolatile = rvalue.isVolatileQualified() ||
+ LVal.isVolatileQualified();
+ CGF.EmitAggregateCopy(Dest, Src, getAtomicType(),
+ AggValueSlot::DoesNotOverlap, IsVolatile);
return;
}
@@ -2007,6 +2021,7 @@ void CodeGenFunction::EmitAtomicInit(Expr *init, LValue dest) {
AggValueSlot::IsNotDestructed,
AggValueSlot::DoesNotNeedGCBarriers,
AggValueSlot::IsNotAliased,
+ AggValueSlot::DoesNotOverlap,
Zeroed ? AggValueSlot::IsZeroed :
AggValueSlot::IsNotZeroed);
diff --git a/lib/CodeGen/CGBlocks.cpp b/lib/CodeGen/CGBlocks.cpp
index 5f73d4cf7913..617856a7b43e 100644
--- a/lib/CodeGen/CGBlocks.cpp
+++ b/lib/CodeGen/CGBlocks.cpp
@@ -66,7 +66,7 @@ static llvm::Constant *buildDisposeHelper(CodeGenModule &CGM,
/// buildBlockDescriptor - Build the block descriptor meta-data for a block.
/// buildBlockDescriptor is accessed from 5th field of the Block_literal
/// meta-data and contains stationary information about the block literal.
-/// Its definition will have 4 (or optinally 6) words.
+/// Its definition will have 4 (or optionally 6) words.
/// \code
/// struct Block_descriptor {
/// unsigned long reserved;
@@ -104,7 +104,7 @@ static llvm::Constant *buildBlockDescriptor(CodeGenModule &CGM,
elements.addInt(ulong, blockInfo.BlockSize.getQuantity());
// Optional copy/dispose helpers.
- if (blockInfo.NeedsCopyDispose) {
+ if (blockInfo.needsCopyDisposeHelpers()) {
// copy_func_helper_decl
elements.add(buildCopyHelper(CGM, blockInfo));
@@ -159,6 +159,7 @@ static llvm::Constant *buildBlockDescriptor(CodeGenModule &CGM,
/// These are the flags (with corresponding bit number) that the
/// compiler is actually supposed to know about.
+ /// 23. BLOCK_IS_NOESCAPE - indicates that the block is non-escaping
/// 25. BLOCK_HAS_COPY_DISPOSE - indicates that the block
/// descriptor provides copy and dispose helper functions
/// 26. BLOCK_HAS_CXX_OBJ - indicates that there's a captured
@@ -307,25 +308,12 @@ static void initializeForBlockHeader(CodeGenModule &CGM, CGBlockInfo &info,
assert(elementTypes.empty());
if (CGM.getLangOpts().OpenCL) {
- // The header is basically 'struct { int; int; generic void *;
+ // The header is basically 'struct { int; int;
// custom_fields; }'. Assert that struct is packed.
- auto GenericAS =
- CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic);
- auto GenPtrAlign =
- CharUnits::fromQuantity(CGM.getTarget().getPointerAlign(GenericAS) / 8);
- auto GenPtrSize =
- CharUnits::fromQuantity(CGM.getTarget().getPointerWidth(GenericAS) / 8);
- assert(CGM.getIntSize() <= GenPtrSize);
- assert(CGM.getIntAlign() <= GenPtrAlign);
- assert((2 * CGM.getIntSize()).isMultipleOf(GenPtrAlign));
elementTypes.push_back(CGM.IntTy); /* total size */
elementTypes.push_back(CGM.IntTy); /* align */
- elementTypes.push_back(
- CGM.getOpenCLRuntime()
- .getGenericVoidPointerType()); /* invoke function */
- unsigned Offset =
- 2 * CGM.getIntSize().getQuantity() + GenPtrSize.getQuantity();
- unsigned BlockAlign = GenPtrAlign.getQuantity();
+ unsigned Offset = 2 * CGM.getIntSize().getQuantity();
+ unsigned BlockAlign = CGM.getIntAlign().getQuantity();
if (auto *Helper =
CGM.getTargetCodeGenInfo().getTargetOpenCLBlockHelper()) {
for (auto I : Helper->getCustomFieldTypes()) /* custom fields */ {
@@ -343,7 +331,7 @@ static void initializeForBlockHeader(CodeGenModule &CGM, CGBlockInfo &info,
info.BlockSize = CharUnits::fromQuantity(Offset);
} else {
// The header is basically 'struct { void *; int; int; void *; void *; }'.
- // Assert that that struct is packed.
+ // Assert that the struct is packed.
assert(CGM.getIntSize() <= CGM.getPointerSize());
assert(CGM.getIntAlign() <= CGM.getPointerAlign());
assert((2 * CGM.getIntSize()).isMultipleOf(CGM.getPointerAlign()));
@@ -477,6 +465,14 @@ static void computeBlockInfo(CodeGenModule &CGM, CodeGenFunction *CGF,
info.NeedsCopyDispose = true;
info.HasCXXObject = true;
+ // So do C structs that require non-trivial copy construction or
+ // destruction.
+ } else if (variable->getType().isNonTrivialToPrimitiveCopy() ==
+ QualType::PCK_Struct ||
+ variable->getType().isDestructedType() ==
+ QualType::DK_nontrivial_c_struct) {
+ info.NeedsCopyDispose = true;
+
// And so do types with destructors.
} else if (CGM.getLangOpts().CPlusPlus) {
if (const CXXRecordDecl *record =
@@ -705,11 +701,8 @@ static void enterBlockScope(CodeGenFunction &CGF, BlockDecl *block) {
/// kind of cleanup object is a BlockDecl*.
void CodeGenFunction::enterNonTrivialFullExpression(const ExprWithCleanups *E) {
assert(E->getNumObjects() != 0);
- ArrayRef<ExprWithCleanups::CleanupObject> cleanups = E->getObjects();
- for (ArrayRef<ExprWithCleanups::CleanupObject>::iterator
- i = cleanups.begin(), e = cleanups.end(); i != e; ++i) {
- enterBlockScope(*this, *i);
- }
+ for (const ExprWithCleanups::CleanupObject &C : E->getObjects())
+ enterBlockScope(*this, C);
}
/// Find the layout for the given block in a linked list and remove it.
@@ -740,27 +733,19 @@ void CodeGenFunction::destroyBlockInfos(CGBlockInfo *head) {
}
/// Emit a block literal expression in the current function.
-llvm::Value *CodeGenFunction::EmitBlockLiteral(const BlockExpr *blockExpr,
- llvm::Function **InvokeF) {
+llvm::Value *CodeGenFunction::EmitBlockLiteral(const BlockExpr *blockExpr) {
// If the block has no captures, we won't have a pre-computed
// layout for it.
if (!blockExpr->getBlockDecl()->hasCaptures()) {
// The block literal is emitted as a global variable, and the block invoke
// function has to be extracted from its initializer.
if (llvm::Constant *Block = CGM.getAddrOfGlobalBlockIfEmitted(blockExpr)) {
- if (InvokeF) {
- auto *GV = cast<llvm::GlobalVariable>(
- cast<llvm::Constant>(Block)->stripPointerCasts());
- auto *BlockInit = cast<llvm::ConstantStruct>(GV->getInitializer());
- *InvokeF = cast<llvm::Function>(
- BlockInit->getAggregateElement(2)->stripPointerCasts());
- }
return Block;
}
CGBlockInfo blockInfo(blockExpr->getBlockDecl(), CurFn->getName());
computeBlockInfo(CGM, this, blockInfo);
blockInfo.BlockExpression = blockExpr;
- return EmitBlockLiteral(blockInfo, InvokeF);
+ return EmitBlockLiteral(blockInfo);
}
// Find the block info for this block and take ownership of it.
@@ -769,28 +754,17 @@ llvm::Value *CodeGenFunction::EmitBlockLiteral(const BlockExpr *blockExpr,
blockExpr->getBlockDecl()));
blockInfo->BlockExpression = blockExpr;
- return EmitBlockLiteral(*blockInfo, InvokeF);
+ return EmitBlockLiteral(*blockInfo);
}
-llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo,
- llvm::Function **InvokeF) {
+llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo) {
bool IsOpenCL = CGM.getContext().getLangOpts().OpenCL;
- auto GenVoidPtrTy =
- IsOpenCL ? CGM.getOpenCLRuntime().getGenericVoidPointerType() : VoidPtrTy;
- LangAS GenVoidPtrAddr = IsOpenCL ? LangAS::opencl_generic : LangAS::Default;
- auto GenVoidPtrSize = CharUnits::fromQuantity(
- CGM.getTarget().getPointerWidth(
- CGM.getContext().getTargetAddressSpace(GenVoidPtrAddr)) /
- 8);
// Using the computed layout, generate the actual block function.
bool isLambdaConv = blockInfo.getBlockDecl()->isConversionFromLambda();
CodeGenFunction BlockCGF{CGM, true};
BlockCGF.SanOpts = SanOpts;
auto *InvokeFn = BlockCGF.GenerateBlockFunction(
CurGD, blockInfo, LocalDeclMap, isLambdaConv, blockInfo.CanBeGlobal);
- if (InvokeF)
- *InvokeF = InvokeFn;
- auto *blockFn = llvm::ConstantExpr::getPointerCast(InvokeFn, GenVoidPtrTy);
// If there is nothing to capture, we can emit this as a global block.
if (blockInfo.CanBeGlobal)
@@ -805,8 +779,13 @@ llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo,
llvm::Constant *descriptor;
BlockFlags flags;
if (!IsOpenCL) {
- isa = llvm::ConstantExpr::getBitCast(CGM.getNSConcreteStackBlock(),
- VoidPtrTy);
+ // If the block is non-escaping, set field 'isa 'to NSConcreteGlobalBlock
+ // and set the BLOCK_IS_GLOBAL bit of field 'flags'. Copying a non-escaping
+ // block just returns the original block and releasing it is a no-op.
+ llvm::Constant *blockISA = blockInfo.getBlockDecl()->doesNotEscape()
+ ? CGM.getNSConcreteGlobalBlock()
+ : CGM.getNSConcreteStackBlock();
+ isa = llvm::ConstantExpr::getBitCast(blockISA, VoidPtrTy);
// Build the block descriptor.
descriptor = buildBlockDescriptor(CGM, blockInfo);
@@ -815,12 +794,14 @@ llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo,
flags = BLOCK_HAS_SIGNATURE;
if (blockInfo.HasCapturedVariableLayout)
flags |= BLOCK_HAS_EXTENDED_LAYOUT;
- if (blockInfo.NeedsCopyDispose)
+ if (blockInfo.needsCopyDisposeHelpers())
flags |= BLOCK_HAS_COPY_DISPOSE;
if (blockInfo.HasCXXObject)
flags |= BLOCK_HAS_CXX_OBJ;
if (blockInfo.UsesStret)
flags |= BLOCK_USE_STRET;
+ if (blockInfo.getBlockDecl()->doesNotEscape())
+ flags |= BLOCK_IS_NOESCAPE | BLOCK_IS_GLOBAL;
}
auto projectField =
@@ -859,11 +840,12 @@ llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo,
llvm::ConstantInt::get(IntTy, blockInfo.BlockAlign.getQuantity()),
getIntSize(), "block.align");
}
- addHeaderField(blockFn, GenVoidPtrSize, "block.invoke");
- if (!IsOpenCL)
+ if (!IsOpenCL) {
+ addHeaderField(llvm::ConstantExpr::getBitCast(InvokeFn, VoidPtrTy),
+ getPointerSize(), "block.invoke");
addHeaderField(descriptor, getPointerSize(), "block.descriptor");
- else if (auto *Helper =
- CGM.getTargetCodeGenInfo().getTargetOpenCLBlockHelper()) {
+ } else if (auto *Helper =
+ CGM.getTargetCodeGenInfo().getTargetOpenCLBlockHelper()) {
for (auto I : Helper->getCustomFieldValues(*this, blockInfo)) {
addHeaderField(
I.first,
@@ -913,7 +895,7 @@ llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo,
const CGBlockInfo::Capture &enclosingCapture =
BlockInfo->getCapture(variable);
- // This is a [[type]]*, except that a byref entry wil just be an i8**.
+ // This is a [[type]]*, except that a byref entry will just be an i8**.
src = Builder.CreateStructGEP(LoadBlockStruct(),
enclosingCapture.getIndex(),
enclosingCapture.getOffset(),
@@ -955,7 +937,8 @@ llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo,
AggValueSlot::forAddr(blockField, Qualifiers(),
AggValueSlot::IsDestructed,
AggValueSlot::DoesNotNeedGCBarriers,
- AggValueSlot::IsNotAliased);
+ AggValueSlot::IsNotAliased,
+ AggValueSlot::DoesNotOverlap);
EmitAggExpr(copyExpr, Slot);
} else {
EmitSynthesizedCXXCopyCtor(blockField, src, copyExpr);
@@ -1024,6 +1007,11 @@ llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo,
llvm::Value *result = Builder.CreatePointerCast(
blockAddr.getPointer(), ConvertType(blockInfo.getBlockExpr()->getType()));
+ if (IsOpenCL) {
+ CGM.getOpenCLRuntime().recordBlockInfo(blockInfo.BlockExpression, InvokeFn,
+ result);
+ }
+
return result;
}
@@ -1061,38 +1049,23 @@ llvm::Type *CodeGenModule::getBlockDescriptorType() {
}
llvm::Type *CodeGenModule::getGenericBlockLiteralType() {
+ assert(!getLangOpts().OpenCL && "OpenCL does not need this");
+
if (GenericBlockLiteralType)
return GenericBlockLiteralType;
llvm::Type *BlockDescPtrTy = getBlockDescriptorType();
- if (getLangOpts().OpenCL) {
- // struct __opencl_block_literal_generic {
- // int __size;
- // int __align;
- // __generic void *__invoke;
- // /* custom fields */
- // };
- SmallVector<llvm::Type *, 8> StructFields(
- {IntTy, IntTy, getOpenCLRuntime().getGenericVoidPointerType()});
- if (auto *Helper = getTargetCodeGenInfo().getTargetOpenCLBlockHelper()) {
- for (auto I : Helper->getCustomFieldTypes())
- StructFields.push_back(I);
- }
- GenericBlockLiteralType = llvm::StructType::create(
- StructFields, "struct.__opencl_block_literal_generic");
- } else {
- // struct __block_literal_generic {
- // void *__isa;
- // int __flags;
- // int __reserved;
- // void (*__invoke)(void *);
- // struct __block_descriptor *__descriptor;
- // };
- GenericBlockLiteralType =
- llvm::StructType::create("struct.__block_literal_generic", VoidPtrTy,
- IntTy, IntTy, VoidPtrTy, BlockDescPtrTy);
- }
+ // struct __block_literal_generic {
+ // void *__isa;
+ // int __flags;
+ // int __reserved;
+ // void (*__invoke)(void *);
+ // struct __block_descriptor *__descriptor;
+ // };
+ GenericBlockLiteralType =
+ llvm::StructType::create("struct.__block_literal_generic", VoidPtrTy,
+ IntTy, IntTy, VoidPtrTy, BlockDescPtrTy);
return GenericBlockLiteralType;
}
@@ -1103,27 +1076,21 @@ RValue CodeGenFunction::EmitBlockCallExpr(const CallExpr *E,
E->getCallee()->getType()->getAs<BlockPointerType>();
llvm::Value *BlockPtr = EmitScalarExpr(E->getCallee());
+ llvm::Value *FuncPtr;
- // Get a pointer to the generic block literal.
- // For OpenCL we generate generic AS void ptr to be able to reuse the same
- // block definition for blocks with captures generated as private AS local
- // variables and without captures generated as global AS program scope
- // variables.
- unsigned AddrSpace = 0;
- if (getLangOpts().OpenCL)
- AddrSpace = getContext().getTargetAddressSpace(LangAS::opencl_generic);
-
- llvm::Type *BlockLiteralTy =
- llvm::PointerType::get(CGM.getGenericBlockLiteralType(), AddrSpace);
+ if (!CGM.getLangOpts().OpenCL) {
+ // Get a pointer to the generic block literal.
+ llvm::Type *BlockLiteralTy =
+ llvm::PointerType::get(CGM.getGenericBlockLiteralType(), 0);
- // Bitcast the callee to a block literal.
- BlockPtr =
- Builder.CreatePointerCast(BlockPtr, BlockLiteralTy, "block.literal");
+ // Bitcast the callee to a block literal.
+ BlockPtr =
+ Builder.CreatePointerCast(BlockPtr, BlockLiteralTy, "block.literal");
- // Get the function pointer from the literal.
- llvm::Value *FuncPtr =
- Builder.CreateStructGEP(CGM.getGenericBlockLiteralType(), BlockPtr,
- CGM.getLangOpts().OpenCL ? 2 : 3);
+ // Get the function pointer from the literal.
+ FuncPtr =
+ Builder.CreateStructGEP(CGM.getGenericBlockLiteralType(), BlockPtr, 3);
+ }
// Add the block literal.
CallArgList Args;
@@ -1146,7 +1113,11 @@ RValue CodeGenFunction::EmitBlockCallExpr(const CallExpr *E,
EmitCallArgs(Args, FnType->getAs<FunctionProtoType>(), E->arguments());
// Load the function.
- llvm::Value *Func = Builder.CreateAlignedLoad(FuncPtr, getPointerAlign());
+ llvm::Value *Func;
+ if (CGM.getLangOpts().OpenCL)
+ Func = CGM.getOpenCLRuntime().getInvokeFunction(E->getCallee());
+ else
+ Func = Builder.CreateAlignedLoad(FuncPtr, getPointerAlign());
const FunctionType *FuncTy = FnType->castAs<FunctionType>();
const CGFunctionInfo &FnInfo =
@@ -1255,14 +1226,14 @@ static llvm::Constant *buildGlobalBlock(CodeGenModule &CGM,
// Reserved
fields.addInt(CGM.IntTy, 0);
+
+ // Function
+ fields.add(blockFn);
} else {
fields.addInt(CGM.IntTy, blockInfo.BlockSize.getQuantity());
fields.addInt(CGM.IntTy, blockInfo.BlockAlign.getQuantity());
}
- // Function
- fields.add(blockFn);
-
if (!IsOpenCL) {
// Descriptor
fields.add(buildBlockDescriptor(CGM, blockInfo));
@@ -1287,6 +1258,10 @@ static llvm::Constant *buildGlobalBlock(CodeGenModule &CGM,
llvm::Constant *Result =
llvm::ConstantExpr::getPointerCast(literal, RequiredType);
CGM.setAddrOfGlobalBlock(blockInfo.BlockExpression, Result);
+ if (CGM.getContext().getLangOpts().OpenCL)
+ CGM.getOpenCLRuntime().recordBlockInfo(
+ blockInfo.BlockExpression,
+ cast<llvm::Function>(blockFn->stripPointerCasts()), Result);
return Result;
}
@@ -1479,8 +1454,8 @@ CodeGenFunction::GenerateBlockFunction(GlobalDecl GD,
const CGBlockInfo::Capture &capture = blockInfo.getCapture(variable);
if (capture.isConstant()) {
auto addr = LocalDeclMap.find(variable)->second;
- DI->EmitDeclareOfAutoVariable(variable, addr.getPointer(),
- Builder);
+ (void)DI->EmitDeclareOfAutoVariable(variable, addr.getPointer(),
+ Builder);
continue;
}
@@ -1513,6 +1488,7 @@ enum class BlockCaptureEntityKind {
CXXRecord, // Copy or destroy
ARCWeak,
ARCStrong,
+ NonTrivialCStruct,
BlockObject, // Assign or release
None
};
@@ -1548,39 +1524,46 @@ computeCopyInfoForBlockCapture(const BlockDecl::Capture &CI, QualType T,
Flags |= BLOCK_FIELD_IS_WEAK;
return std::make_pair(BlockCaptureEntityKind::BlockObject, Flags);
}
- if (!T->isObjCRetainableType())
- // For all other types, the memcpy is fine.
- return std::make_pair(BlockCaptureEntityKind::None, Flags);
Flags = BLOCK_FIELD_IS_OBJECT;
bool isBlockPointer = T->isBlockPointerType();
if (isBlockPointer)
Flags = BLOCK_FIELD_IS_BLOCK;
- // Special rules for ARC captures:
- Qualifiers QS = T.getQualifiers();
-
- // We need to register __weak direct captures with the runtime.
- if (QS.getObjCLifetime() == Qualifiers::OCL_Weak)
+ switch (T.isNonTrivialToPrimitiveCopy()) {
+ case QualType::PCK_Struct:
+ return std::make_pair(BlockCaptureEntityKind::NonTrivialCStruct,
+ BlockFieldFlags());
+ case QualType::PCK_ARCWeak:
+ // We need to register __weak direct captures with the runtime.
return std::make_pair(BlockCaptureEntityKind::ARCWeak, Flags);
-
- // We need to retain the copied value for __strong direct captures.
- if (QS.getObjCLifetime() == Qualifiers::OCL_Strong) {
- // If it's a block pointer, we have to copy the block and
- // assign that to the destination pointer, so we might as
- // well use _Block_object_assign. Otherwise we can avoid that.
+ case QualType::PCK_ARCStrong:
+ // We need to retain the copied value for __strong direct captures.
+ // If it's a block pointer, we have to copy the block and assign that to
+ // the destination pointer, so we might as well use _Block_object_assign.
+ // Otherwise we can avoid that.
return std::make_pair(!isBlockPointer ? BlockCaptureEntityKind::ARCStrong
: BlockCaptureEntityKind::BlockObject,
Flags);
- }
+ case QualType::PCK_Trivial:
+ case QualType::PCK_VolatileTrivial: {
+ if (!T->isObjCRetainableType())
+ // For all other types, the memcpy is fine.
+ return std::make_pair(BlockCaptureEntityKind::None, BlockFieldFlags());
- // Non-ARC captures of retainable pointers are strong and
- // therefore require a call to _Block_object_assign.
- if (!QS.getObjCLifetime() && !LangOpts.ObjCAutoRefCount)
- return std::make_pair(BlockCaptureEntityKind::BlockObject, Flags);
+ // Special rules for ARC captures:
+ Qualifiers QS = T.getQualifiers();
- // Otherwise the memcpy is fine.
- return std::make_pair(BlockCaptureEntityKind::None, Flags);
+ // Non-ARC captures of retainable pointers are strong and
+ // therefore require a call to _Block_object_assign.
+ if (!QS.getObjCLifetime() && !LangOpts.ObjCAutoRefCount)
+ return std::make_pair(BlockCaptureEntityKind::BlockObject, Flags);
+
+ // Otherwise the memcpy is fine.
+ return std::make_pair(BlockCaptureEntityKind::None, BlockFieldFlags());
+ }
+ }
+ llvm_unreachable("after exhaustive PrimitiveCopyKind switch");
}
/// Find the set of block captures that need to be explicitly copied or destroy.
@@ -1602,6 +1585,64 @@ static void findBlockCapturedManagedEntities(
}
}
+namespace {
+/// Release a __block variable.
+struct CallBlockRelease final : EHScopeStack::Cleanup {
+ Address Addr;
+ BlockFieldFlags FieldFlags;
+ bool LoadBlockVarAddr;
+
+ CallBlockRelease(Address Addr, BlockFieldFlags Flags, bool LoadValue)
+ : Addr(Addr), FieldFlags(Flags), LoadBlockVarAddr(LoadValue) {}
+
+ void Emit(CodeGenFunction &CGF, Flags flags) override {
+ llvm::Value *BlockVarAddr;
+ if (LoadBlockVarAddr) {
+ BlockVarAddr = CGF.Builder.CreateLoad(Addr);
+ BlockVarAddr = CGF.Builder.CreateBitCast(BlockVarAddr, CGF.VoidPtrTy);
+ } else {
+ BlockVarAddr = Addr.getPointer();
+ }
+
+ CGF.BuildBlockRelease(BlockVarAddr, FieldFlags);
+ }
+};
+} // end anonymous namespace
+
+static void pushCaptureCleanup(BlockCaptureEntityKind CaptureKind,
+ Address Field, QualType CaptureType,
+ BlockFieldFlags Flags, bool EHOnly,
+ CodeGenFunction &CGF) {
+ switch (CaptureKind) {
+ case BlockCaptureEntityKind::CXXRecord:
+ case BlockCaptureEntityKind::ARCWeak:
+ case BlockCaptureEntityKind::NonTrivialCStruct:
+ case BlockCaptureEntityKind::ARCStrong: {
+ if (CaptureType.isDestructedType() &&
+ (!EHOnly || CGF.needsEHCleanup(CaptureType.isDestructedType()))) {
+ CodeGenFunction::Destroyer *Destroyer =
+ CaptureKind == BlockCaptureEntityKind::ARCStrong
+ ? CodeGenFunction::destroyARCStrongImprecise
+ : CGF.getDestroyer(CaptureType.isDestructedType());
+ CleanupKind Kind =
+ EHOnly ? EHCleanup
+ : CGF.getCleanupKind(CaptureType.isDestructedType());
+ CGF.pushDestroy(Kind, Field, CaptureType, Destroyer, Kind & EHCleanup);
+ }
+ break;
+ }
+ case BlockCaptureEntityKind::BlockObject: {
+ if (!EHOnly || CGF.getLangOpts().Exceptions) {
+ CleanupKind Kind = EHOnly ? EHCleanup : NormalAndEHCleanup;
+ CGF.enterByrefCleanup(Kind, Field, Flags, /*LoadBlockVarAddr*/ true);
+ }
+ break;
+ }
+ case BlockCaptureEntityKind::None:
+ llvm_unreachable("unexpected BlockCaptureEntityKind");
+ }
+}
+
/// Generate the copy-helper function for a block closure object:
/// static void block_copy_helper(block_t *dst, block_t *src);
/// The runtime will have previously initialized 'dst' by doing a
@@ -1644,7 +1685,7 @@ CodeGenFunction::GenerateCopyHelperFunction(const CGBlockInfo &blockInfo) {
false,
false);
- CGM.SetInternalFunctionAttributes(nullptr, Fn, FI);
+ CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FI);
StartFunction(FD, C.VoidTy, Fn, FI, args);
ApplyDebugLocation NL{*this, blockInfo.getBlockExpr()->getLocStart()};
@@ -1665,6 +1706,7 @@ CodeGenFunction::GenerateCopyHelperFunction(const CGBlockInfo &blockInfo) {
for (const auto &CopiedCapture : CopiedCaptures) {
const BlockDecl::Capture &CI = CopiedCapture.CI;
const CGBlockInfo::Capture &capture = CopiedCapture.Capture;
+ QualType captureType = CI.getVariable()->getType();
BlockFieldFlags flags = CopiedCapture.Flags;
unsigned index = capture.getIndex();
@@ -1677,6 +1719,13 @@ CodeGenFunction::GenerateCopyHelperFunction(const CGBlockInfo &blockInfo) {
EmitSynthesizedCXXCopyCtor(dstField, srcField, CI.getCopyExpr());
} else if (CopiedCapture.Kind == BlockCaptureEntityKind::ARCWeak) {
EmitARCCopyWeak(dstField, srcField);
+ // If this is a C struct that requires non-trivial copy construction, emit a
+ // call to its copy constructor.
+ } else if (CopiedCapture.Kind ==
+ BlockCaptureEntityKind::NonTrivialCStruct) {
+ QualType varType = CI.getVariable()->getType();
+ callCStructCopyConstructor(MakeAddrLValue(dstField, varType),
+ MakeAddrLValue(srcField, varType));
} else {
llvm::Value *srcValue = Builder.CreateLoad(srcField, "blockcopy.src");
if (CopiedCapture.Kind == BlockCaptureEntityKind::ARCStrong) {
@@ -1695,9 +1744,11 @@ CodeGenFunction::GenerateCopyHelperFunction(const CGBlockInfo &blockInfo) {
} else {
EmitARCRetainNonBlock(srcValue);
- // We don't need this anymore, so kill it. It's not quite
- // worth the annoyance to avoid creating it in the first place.
- cast<llvm::Instruction>(dstField.getPointer())->eraseFromParent();
+ // Unless EH cleanup is required, we don't need this anymore, so kill
+ // it. It's not quite worth the annoyance to avoid creating it in the
+ // first place.
+ if (!needsEHCleanup(captureType.isDestructedType()))
+ cast<llvm::Instruction>(dstField.getPointer())->eraseFromParent();
}
} else {
assert(CopiedCapture.Kind == BlockCaptureEntityKind::BlockObject);
@@ -1725,6 +1776,11 @@ CodeGenFunction::GenerateCopyHelperFunction(const CGBlockInfo &blockInfo) {
}
}
}
+
+ // Ensure that we destroy the copied object if an exception is thrown later
+ // in the helper function.
+ pushCaptureCleanup(CopiedCapture.Kind, dstField, captureType, flags, /*EHOnly*/ true,
+ *this);
}
FinishFunction();
@@ -1732,50 +1788,51 @@ CodeGenFunction::GenerateCopyHelperFunction(const CGBlockInfo &blockInfo) {
return llvm::ConstantExpr::getBitCast(Fn, VoidPtrTy);
}
+static BlockFieldFlags
+getBlockFieldFlagsForObjCObjectPointer(const BlockDecl::Capture &CI,
+ QualType T) {
+ BlockFieldFlags Flags = BLOCK_FIELD_IS_OBJECT;
+ if (T->isBlockPointerType())
+ Flags = BLOCK_FIELD_IS_BLOCK;
+ return Flags;
+}
+
static std::pair<BlockCaptureEntityKind, BlockFieldFlags>
computeDestroyInfoForBlockCapture(const BlockDecl::Capture &CI, QualType T,
const LangOptions &LangOpts) {
- BlockFieldFlags Flags;
if (CI.isByRef()) {
- Flags = BLOCK_FIELD_IS_BYREF;
+ BlockFieldFlags Flags = BLOCK_FIELD_IS_BYREF;
if (T.isObjCGCWeak())
Flags |= BLOCK_FIELD_IS_WEAK;
return std::make_pair(BlockCaptureEntityKind::BlockObject, Flags);
}
- if (const CXXRecordDecl *Record = T->getAsCXXRecordDecl()) {
- if (Record->hasTrivialDestructor())
- return std::make_pair(BlockCaptureEntityKind::None, BlockFieldFlags());
+ switch (T.isDestructedType()) {
+ case QualType::DK_cxx_destructor:
return std::make_pair(BlockCaptureEntityKind::CXXRecord, BlockFieldFlags());
- }
-
- // Other types don't need to be destroy explicitly.
- if (!T->isObjCRetainableType())
- return std::make_pair(BlockCaptureEntityKind::None, Flags);
-
- Flags = BLOCK_FIELD_IS_OBJECT;
- if (T->isBlockPointerType())
- Flags = BLOCK_FIELD_IS_BLOCK;
-
- // Special rules for ARC captures.
- Qualifiers QS = T.getQualifiers();
-
- // Use objc_storeStrong for __strong direct captures; the
- // dynamic tools really like it when we do this.
- if (QS.getObjCLifetime() == Qualifiers::OCL_Strong)
- return std::make_pair(BlockCaptureEntityKind::ARCStrong, Flags);
-
- // Support __weak direct captures.
- if (QS.getObjCLifetime() == Qualifiers::OCL_Weak)
- return std::make_pair(BlockCaptureEntityKind::ARCWeak, Flags);
-
- // Non-ARC captures are strong, and we need to use
- // _Block_object_dispose.
- if (!QS.hasObjCLifetime() && !LangOpts.ObjCAutoRefCount)
- return std::make_pair(BlockCaptureEntityKind::BlockObject, Flags);
-
- // Otherwise, we have nothing to do.
- return std::make_pair(BlockCaptureEntityKind::None, Flags);
+ case QualType::DK_objc_strong_lifetime:
+ // Use objc_storeStrong for __strong direct captures; the
+ // dynamic tools really like it when we do this.
+ return std::make_pair(BlockCaptureEntityKind::ARCStrong,
+ getBlockFieldFlagsForObjCObjectPointer(CI, T));
+ case QualType::DK_objc_weak_lifetime:
+ // Support __weak direct captures.
+ return std::make_pair(BlockCaptureEntityKind::ARCWeak,
+ getBlockFieldFlagsForObjCObjectPointer(CI, T));
+ case QualType::DK_nontrivial_c_struct:
+ return std::make_pair(BlockCaptureEntityKind::NonTrivialCStruct,
+ BlockFieldFlags());
+ case QualType::DK_none: {
+ // Non-ARC captures are strong, and we need to use _Block_object_dispose.
+ if (T->isObjCRetainableType() && !T.getQualifiers().hasObjCLifetime() &&
+ !LangOpts.ObjCAutoRefCount)
+ return std::make_pair(BlockCaptureEntityKind::BlockObject,
+ getBlockFieldFlagsForObjCObjectPointer(CI, T));
+ // Otherwise, we have nothing to do.
+ return std::make_pair(BlockCaptureEntityKind::None, BlockFieldFlags());
+ }
+ }
+ llvm_unreachable("after exhaustive DestructionKind switch");
}
/// Generate the destroy-helper function for a block closure object:
@@ -1814,7 +1871,7 @@ CodeGenFunction::GenerateDestroyHelperFunction(const CGBlockInfo &blockInfo) {
nullptr, SC_Static,
false, false);
- CGM.SetInternalFunctionAttributes(nullptr, Fn, FI);
+ CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FI);
StartFunction(FD, C.VoidTy, Fn, FI, args);
ApplyDebugLocation NL{*this, blockInfo.getBlockExpr()->getLocStart()};
@@ -1839,29 +1896,8 @@ CodeGenFunction::GenerateDestroyHelperFunction(const CGBlockInfo &blockInfo) {
Address srcField =
Builder.CreateStructGEP(src, capture.getIndex(), capture.getOffset());
- // If the captured record has a destructor then call it.
- if (DestroyedCapture.Kind == BlockCaptureEntityKind::CXXRecord) {
- const auto *Dtor =
- CI.getVariable()->getType()->getAsCXXRecordDecl()->getDestructor();
- PushDestructorCleanup(Dtor, srcField);
-
- // If this is a __weak capture, emit the release directly.
- } else if (DestroyedCapture.Kind == BlockCaptureEntityKind::ARCWeak) {
- EmitARCDestroyWeak(srcField);
-
- // Destroy strong objects with a call if requested.
- } else if (DestroyedCapture.Kind == BlockCaptureEntityKind::ARCStrong) {
- EmitARCDestroyStrong(srcField, ARCImpreciseLifetime);
-
- // Otherwise we call _Block_object_dispose. It wouldn't be too
- // hard to just emit this as a cleanup if we wanted to make sure
- // that things were done in reverse.
- } else {
- assert(DestroyedCapture.Kind == BlockCaptureEntityKind::BlockObject);
- llvm::Value *value = Builder.CreateLoad(srcField);
- value = Builder.CreateBitCast(value, VoidPtrTy);
- BuildBlockRelease(value, flags);
- }
+ pushCaptureCleanup(DestroyedCapture.Kind, srcField,
+ CI.getVariable()->getType(), flags, /*EHOnly*/ false, *this);
}
cleanups.ForceCleanup();
@@ -2020,6 +2056,36 @@ public:
id.AddPointer(VarType.getCanonicalType().getAsOpaquePtr());
}
};
+
+/// Emits the copy/dispose helpers for a __block variable that is a non-trivial
+/// C struct.
+class NonTrivialCStructByrefHelpers final : public BlockByrefHelpers {
+ QualType VarType;
+
+public:
+ NonTrivialCStructByrefHelpers(CharUnits alignment, QualType type)
+ : BlockByrefHelpers(alignment), VarType(type) {}
+
+ void emitCopy(CodeGenFunction &CGF, Address destField,
+ Address srcField) override {
+ CGF.callCStructMoveConstructor(CGF.MakeAddrLValue(destField, VarType),
+ CGF.MakeAddrLValue(srcField, VarType));
+ }
+
+ bool needsDispose() const override {
+ return VarType.isDestructedType();
+ }
+
+ void emitDispose(CodeGenFunction &CGF, Address field) override {
+ EHScopeStack::stable_iterator cleanupDepth = CGF.EHStack.stable_begin();
+ CGF.pushDestroy(VarType.isDestructedType(), field, VarType);
+ CGF.PopCleanupBlocks(cleanupDepth);
+ }
+
+ void profileImpl(llvm::FoldingSetNodeID &id) const override {
+ id.AddPointer(VarType.getCanonicalType().getAsOpaquePtr());
+ }
+};
} // end anonymous namespace
static llvm::Constant *
@@ -2059,7 +2125,7 @@ generateByrefCopyHelper(CodeGenFunction &CGF, const BlockByrefInfo &byrefInfo,
SC_Static,
false, false);
- CGF.CGM.SetInternalFunctionAttributes(nullptr, Fn, FI);
+ CGF.CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FI);
CGF.StartFunction(FD, R, Fn, FI, args);
@@ -2133,7 +2199,7 @@ generateByrefDisposeHelper(CodeGenFunction &CGF,
SC_Static,
false, false);
- CGF.CGM.SetInternalFunctionAttributes(nullptr, Fn, FI);
+ CGF.CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FI);
CGF.StartFunction(FD, R, Fn, FI, args);
@@ -2205,6 +2271,13 @@ CodeGenFunction::buildByrefHelpers(llvm::StructType &byrefType,
CGM, byrefInfo, CXXByrefHelpers(valueAlignment, type, copyExpr));
}
+ // If type is a non-trivial C struct type that is non-trivial to
+ // destructly move or destroy, build the copy and dispose helpers.
+ if (type.isNonTrivialToPrimitiveDestructiveMove() == QualType::PCK_Struct ||
+ type.isDestructedType() == QualType::DK_nontrivial_c_struct)
+ return ::buildByrefHelpers(
+ CGM, byrefInfo, NonTrivialCStructByrefHelpers(valueAlignment, type));
+
// Otherwise, if we don't have a retainable type, there's nothing to do.
// that the runtime does extra copies.
if (!type->isObjCRetainableType()) return nullptr;
@@ -2503,30 +2576,10 @@ void CodeGenFunction::BuildBlockRelease(llvm::Value *V, BlockFieldFlags flags) {
EmitNounwindRuntimeCall(F, args); // FIXME: throwing destructors?
}
-namespace {
- /// Release a __block variable.
- struct CallBlockRelease final : EHScopeStack::Cleanup {
- llvm::Value *Addr;
- CallBlockRelease(llvm::Value *Addr) : Addr(Addr) {}
-
- void Emit(CodeGenFunction &CGF, Flags flags) override {
- // Should we be passing FIELD_IS_WEAK here?
- CGF.BuildBlockRelease(Addr, BLOCK_FIELD_IS_BYREF);
- }
- };
-} // end anonymous namespace
-
-/// Enter a cleanup to destroy a __block variable. Note that this
-/// cleanup should be a no-op if the variable hasn't left the stack
-/// yet; if a cleanup is required for the variable itself, that needs
-/// to be done externally.
-void CodeGenFunction::enterByrefCleanup(const AutoVarEmission &emission) {
- // We don't enter this cleanup if we're in pure-GC mode.
- if (CGM.getLangOpts().getGC() == LangOptions::GCOnly)
- return;
-
- EHStack.pushCleanup<CallBlockRelease>(NormalAndEHCleanup,
- emission.Addr.getPointer());
+void CodeGenFunction::enterByrefCleanup(CleanupKind Kind, Address Addr,
+ BlockFieldFlags Flags,
+ bool LoadBlockVarAddr) {
+ EHStack.pushCleanup<CallBlockRelease>(Kind, Addr, Flags, LoadBlockVarAddr);
}
/// Adjust the declaration of something from the blocks API.
@@ -2559,11 +2612,11 @@ static void configureBlocksRuntimeObject(CodeGenModule &CGM,
}
}
- if (!CGM.getLangOpts().BlocksRuntimeOptional)
- return;
-
- if (GV->isDeclaration() && GV->hasExternalLinkage())
+ if (CGM.getLangOpts().BlocksRuntimeOptional && GV->isDeclaration() &&
+ GV->hasExternalLinkage())
GV->setLinkage(llvm::GlobalValue::ExternalWeakLinkage);
+
+ CGM.setDSOLocal(GV);
}
llvm::Constant *CodeGenModule::getBlockObjectDispose() {
diff --git a/lib/CodeGen/CGBlocks.h b/lib/CodeGen/CGBlocks.h
index 80e255f75417..5a8e960ffcc1 100644
--- a/lib/CodeGen/CGBlocks.h
+++ b/lib/CodeGen/CGBlocks.h
@@ -54,6 +54,7 @@ enum BlockByrefFlags {
};
enum BlockLiteralFlags {
+ BLOCK_IS_NOESCAPE = (1 << 23),
BLOCK_HAS_COPY_DISPOSE = (1 << 25),
BLOCK_HAS_CXX_OBJ = (1 << 26),
BLOCK_IS_GLOBAL = (1 << 28),
@@ -214,7 +215,8 @@ public:
/// no non-constant captures.
bool CanBeGlobal : 1;
- /// True if the block needs a custom copy or dispose function.
+ /// True if the block has captures that would necessitate custom copy or
+ /// dispose helper functions if the block were escaping.
bool NeedsCopyDispose : 1;
/// HasCXXObject - True if the block's custom copy/dispose functions
@@ -276,6 +278,11 @@ public:
}
CGBlockInfo(const BlockDecl *blockDecl, StringRef Name);
+
+ // Indicates whether the block needs a custom copy or dispose function.
+ bool needsCopyDisposeHelpers() const {
+ return NeedsCopyDispose && !Block->doesNotEscape();
+ }
};
} // end namespace CodeGen
diff --git a/lib/CodeGen/CGBuilder.h b/lib/CodeGen/CGBuilder.h
index 61fe4aac3afa..d2e5eb256d3b 100644
--- a/lib/CodeGen/CGBuilder.h
+++ b/lib/CodeGen/CGBuilder.h
@@ -20,7 +20,7 @@ namespace CodeGen {
class CodeGenFunction;
-/// \brief This is an IRBuilder insertion helper that forwards to
+/// This is an IRBuilder insertion helper that forwards to
/// CodeGenFunction::InsertHelper, which adds necessary metadata to
/// instructions.
class CGBuilderInserter : protected llvm::IRBuilderDefaultInserter {
@@ -29,7 +29,7 @@ public:
explicit CGBuilderInserter(CodeGenFunction *CGF) : CGF(CGF) {}
protected:
- /// \brief This forwards to CodeGenFunction::InsertHelper.
+ /// This forwards to CodeGenFunction::InsertHelper.
void InsertHelper(llvm::Instruction *I, const llvm::Twine &Name,
llvm::BasicBlock *BB,
llvm::BasicBlock::iterator InsertPt) const;
@@ -244,6 +244,21 @@ public:
Addr.getAlignment().alignmentAtOffset(Offset));
}
+ using CGBuilderBaseTy::CreateConstInBoundsGEP2_32;
+ Address CreateConstInBoundsGEP2_32(Address Addr, unsigned Idx0,
+ unsigned Idx1, const llvm::DataLayout &DL,
+ const llvm::Twine &Name = "") {
+ auto *GEP = cast<llvm::GetElementPtrInst>(CreateConstInBoundsGEP2_32(
+ Addr.getElementType(), Addr.getPointer(), Idx0, Idx1, Name));
+ llvm::APInt Offset(
+ DL.getIndexSizeInBits(Addr.getType()->getPointerAddressSpace()), 0,
+ /*IsSigned=*/true);
+ if (!GEP->accumulateConstantOffset(DL, Offset))
+ llvm_unreachable("offset of GEP with constants is always computable");
+ return Address(GEP, Addr.getAlignment().alignmentAtOffset(
+ CharUnits::fromQuantity(Offset.getSExtValue())));
+ }
+
llvm::Value *CreateConstInBoundsByteGEP(llvm::Value *Ptr, CharUnits Offset,
const llvm::Twine &Name = "") {
assert(Ptr->getType()->getPointerElementType() == TypeCache.Int8Ty);
@@ -258,23 +273,23 @@ public:
using CGBuilderBaseTy::CreateMemCpy;
llvm::CallInst *CreateMemCpy(Address Dest, Address Src, llvm::Value *Size,
bool IsVolatile = false) {
- auto Align = std::min(Dest.getAlignment(), Src.getAlignment());
- return CreateMemCpy(Dest.getPointer(), Src.getPointer(), Size,
- Align.getQuantity(), IsVolatile);
+ return CreateMemCpy(Dest.getPointer(), Dest.getAlignment().getQuantity(),
+ Src.getPointer(), Src.getAlignment().getQuantity(),
+ Size,IsVolatile);
}
llvm::CallInst *CreateMemCpy(Address Dest, Address Src, uint64_t Size,
bool IsVolatile = false) {
- auto Align = std::min(Dest.getAlignment(), Src.getAlignment());
- return CreateMemCpy(Dest.getPointer(), Src.getPointer(), Size,
- Align.getQuantity(), IsVolatile);
+ return CreateMemCpy(Dest.getPointer(), Dest.getAlignment().getQuantity(),
+ Src.getPointer(), Src.getAlignment().getQuantity(),
+ Size, IsVolatile);
}
using CGBuilderBaseTy::CreateMemMove;
llvm::CallInst *CreateMemMove(Address Dest, Address Src, llvm::Value *Size,
bool IsVolatile = false) {
- auto Align = std::min(Dest.getAlignment(), Src.getAlignment());
- return CreateMemMove(Dest.getPointer(), Src.getPointer(), Size,
- Align.getQuantity(), IsVolatile);
+ return CreateMemMove(Dest.getPointer(), Dest.getAlignment().getQuantity(),
+ Src.getPointer(), Src.getAlignment().getQuantity(),
+ Size, IsVolatile);
}
using CGBuilderBaseTy::CreateMemSet;
diff --git a/lib/CodeGen/CGBuiltin.cpp b/lib/CodeGen/CGBuiltin.cpp
index ba54f8342f1b..0892e84a044c 100644
--- a/lib/CodeGen/CGBuiltin.cpp
+++ b/lib/CodeGen/CGBuiltin.cpp
@@ -14,6 +14,7 @@
#include "CGCXXABI.h"
#include "CGObjCRuntime.h"
#include "CGOpenCLRuntime.h"
+#include "CGRecordLayout.h"
#include "CodeGenFunction.h"
#include "CodeGenModule.h"
#include "ConstantEmitter.h"
@@ -188,7 +189,7 @@ static RValue EmitBinaryAtomicPost(CodeGenFunction &CGF,
return RValue::get(Result);
}
-/// @brief Utility to insert an atomic cmpxchg instruction.
+/// Utility to insert an atomic cmpxchg instruction.
///
/// @param CGF The current codegen function.
/// @param E Builtin call expression to convert to cmpxchg.
@@ -319,7 +320,7 @@ static RValue emitLibraryCall(CodeGenFunction &CGF, const FunctionDecl *FD,
return CGF.EmitCall(E->getCallee()->getType(), callee, E, ReturnValueSlot());
}
-/// \brief Emit a call to llvm.{sadd,uadd,ssub,usub,smul,umul}.with.overflow.*
+/// Emit a call to llvm.{sadd,uadd,ssub,usub,smul,umul}.with.overflow.*
/// depending on IntrinsicID.
///
/// \arg CGF The current codegen function.
@@ -384,7 +385,7 @@ EncompassingIntegerType(ArrayRef<struct WidthAndSignedness> Types) {
}
// The encompassing type must have a width greater than or equal to the width
- // of the specified types. Aditionally, if the encompassing type is signed,
+ // of the specified types. Additionally, if the encompassing type is signed,
// its width must be strictly greater than the width of any unsigned types
// given.
unsigned Width = 0;
@@ -478,13 +479,261 @@ CodeGenFunction::emitBuiltinObjectSize(const Expr *E, unsigned Type,
// LLVM only supports 0 and 2, make sure that we pass along that as a boolean.
Value *Min = Builder.getInt1((Type & 2) != 0);
- // For GCC compatability, __builtin_object_size treat NULL as unknown size.
+ // For GCC compatibility, __builtin_object_size treat NULL as unknown size.
Value *NullIsUnknown = Builder.getTrue();
return Builder.CreateCall(F, {Ptr, Min, NullIsUnknown});
}
-// Many of MSVC builtins are on both x64 and ARM; to avoid repeating code, we
-// handle them here.
+namespace {
+/// A struct to generically desribe a bit test intrinsic.
+struct BitTest {
+ enum ActionKind : uint8_t { TestOnly, Complement, Reset, Set };
+ enum InterlockingKind : uint8_t {
+ Unlocked,
+ Sequential,
+ Acquire,
+ Release,
+ NoFence
+ };
+
+ ActionKind Action;
+ InterlockingKind Interlocking;
+ bool Is64Bit;
+
+ static BitTest decodeBitTestBuiltin(unsigned BuiltinID);
+};
+} // namespace
+
+BitTest BitTest::decodeBitTestBuiltin(unsigned BuiltinID) {
+ switch (BuiltinID) {
+ // Main portable variants.
+ case Builtin::BI_bittest:
+ return {TestOnly, Unlocked, false};
+ case Builtin::BI_bittestandcomplement:
+ return {Complement, Unlocked, false};
+ case Builtin::BI_bittestandreset:
+ return {Reset, Unlocked, false};
+ case Builtin::BI_bittestandset:
+ return {Set, Unlocked, false};
+ case Builtin::BI_interlockedbittestandreset:
+ return {Reset, Sequential, false};
+ case Builtin::BI_interlockedbittestandset:
+ return {Set, Sequential, false};
+
+ // X86-specific 64-bit variants.
+ case Builtin::BI_bittest64:
+ return {TestOnly, Unlocked, true};
+ case Builtin::BI_bittestandcomplement64:
+ return {Complement, Unlocked, true};
+ case Builtin::BI_bittestandreset64:
+ return {Reset, Unlocked, true};
+ case Builtin::BI_bittestandset64:
+ return {Set, Unlocked, true};
+ case Builtin::BI_interlockedbittestandreset64:
+ return {Reset, Sequential, true};
+ case Builtin::BI_interlockedbittestandset64:
+ return {Set, Sequential, true};
+
+ // ARM/AArch64-specific ordering variants.
+ case Builtin::BI_interlockedbittestandset_acq:
+ return {Set, Acquire, false};
+ case Builtin::BI_interlockedbittestandset_rel:
+ return {Set, Release, false};
+ case Builtin::BI_interlockedbittestandset_nf:
+ return {Set, NoFence, false};
+ case Builtin::BI_interlockedbittestandreset_acq:
+ return {Reset, Acquire, false};
+ case Builtin::BI_interlockedbittestandreset_rel:
+ return {Reset, Release, false};
+ case Builtin::BI_interlockedbittestandreset_nf:
+ return {Reset, NoFence, false};
+ }
+ llvm_unreachable("expected only bittest intrinsics");
+}
+
+static char bitActionToX86BTCode(BitTest::ActionKind A) {
+ switch (A) {
+ case BitTest::TestOnly: return '\0';
+ case BitTest::Complement: return 'c';
+ case BitTest::Reset: return 'r';
+ case BitTest::Set: return 's';
+ }
+ llvm_unreachable("invalid action");
+}
+
+static llvm::Value *EmitX86BitTestIntrinsic(CodeGenFunction &CGF,
+ BitTest BT,
+ const CallExpr *E, Value *BitBase,
+ Value *BitPos) {
+ char Action = bitActionToX86BTCode(BT.Action);
+ char SizeSuffix = BT.Is64Bit ? 'q' : 'l';
+
+ // Build the assembly.
+ SmallString<64> Asm;
+ raw_svector_ostream AsmOS(Asm);
+ if (BT.Interlocking != BitTest::Unlocked)
+ AsmOS << "lock ";
+ AsmOS << "bt";
+ if (Action)
+ AsmOS << Action;
+ AsmOS << SizeSuffix << " $2, ($1)\n\tsetc ${0:b}";
+
+ // Build the constraints. FIXME: We should support immediates when possible.
+ std::string Constraints = "=r,r,r,~{cc},~{flags},~{fpsr}";
+ llvm::IntegerType *IntType = llvm::IntegerType::get(
+ CGF.getLLVMContext(),
+ CGF.getContext().getTypeSize(E->getArg(1)->getType()));
+ llvm::Type *IntPtrType = IntType->getPointerTo();
+ llvm::FunctionType *FTy =
+ llvm::FunctionType::get(CGF.Int8Ty, {IntPtrType, IntType}, false);
+
+ llvm::InlineAsm *IA =
+ llvm::InlineAsm::get(FTy, Asm, Constraints, /*SideEffects=*/true);
+ return CGF.Builder.CreateCall(IA, {BitBase, BitPos});
+}
+
+static llvm::AtomicOrdering
+getBitTestAtomicOrdering(BitTest::InterlockingKind I) {
+ switch (I) {
+ case BitTest::Unlocked: return llvm::AtomicOrdering::NotAtomic;
+ case BitTest::Sequential: return llvm::AtomicOrdering::SequentiallyConsistent;
+ case BitTest::Acquire: return llvm::AtomicOrdering::Acquire;
+ case BitTest::Release: return llvm::AtomicOrdering::Release;
+ case BitTest::NoFence: return llvm::AtomicOrdering::Monotonic;
+ }
+ llvm_unreachable("invalid interlocking");
+}
+
+/// Emit a _bittest* intrinsic. These intrinsics take a pointer to an array of
+/// bits and a bit position and read and optionally modify the bit at that
+/// position. The position index can be arbitrarily large, i.e. it can be larger
+/// than 31 or 63, so we need an indexed load in the general case.
+static llvm::Value *EmitBitTestIntrinsic(CodeGenFunction &CGF,
+ unsigned BuiltinID,
+ const CallExpr *E) {
+ Value *BitBase = CGF.EmitScalarExpr(E->getArg(0));
+ Value *BitPos = CGF.EmitScalarExpr(E->getArg(1));
+
+ BitTest BT = BitTest::decodeBitTestBuiltin(BuiltinID);
+
+ // X86 has special BT, BTC, BTR, and BTS instructions that handle the array
+ // indexing operation internally. Use them if possible.
+ llvm::Triple::ArchType Arch = CGF.getTarget().getTriple().getArch();
+ if (Arch == llvm::Triple::x86 || Arch == llvm::Triple::x86_64)
+ return EmitX86BitTestIntrinsic(CGF, BT, E, BitBase, BitPos);
+
+ // Otherwise, use generic code to load one byte and test the bit. Use all but
+ // the bottom three bits as the array index, and the bottom three bits to form
+ // a mask.
+ // Bit = BitBaseI8[BitPos >> 3] & (1 << (BitPos & 0x7)) != 0;
+ Value *ByteIndex = CGF.Builder.CreateAShr(
+ BitPos, llvm::ConstantInt::get(BitPos->getType(), 3), "bittest.byteidx");
+ Value *BitBaseI8 = CGF.Builder.CreatePointerCast(BitBase, CGF.Int8PtrTy);
+ Address ByteAddr(CGF.Builder.CreateInBoundsGEP(CGF.Int8Ty, BitBaseI8,
+ ByteIndex, "bittest.byteaddr"),
+ CharUnits::One());
+ Value *PosLow =
+ CGF.Builder.CreateAnd(CGF.Builder.CreateTrunc(BitPos, CGF.Int8Ty),
+ llvm::ConstantInt::get(CGF.Int8Ty, 0x7));
+
+ // The updating instructions will need a mask.
+ Value *Mask = nullptr;
+ if (BT.Action != BitTest::TestOnly) {
+ Mask = CGF.Builder.CreateShl(llvm::ConstantInt::get(CGF.Int8Ty, 1), PosLow,
+ "bittest.mask");
+ }
+
+ // Check the action and ordering of the interlocked intrinsics.
+ llvm::AtomicOrdering Ordering = getBitTestAtomicOrdering(BT.Interlocking);
+
+ Value *OldByte = nullptr;
+ if (Ordering != llvm::AtomicOrdering::NotAtomic) {
+ // Emit a combined atomicrmw load/store operation for the interlocked
+ // intrinsics.
+ llvm::AtomicRMWInst::BinOp RMWOp = llvm::AtomicRMWInst::Or;
+ if (BT.Action == BitTest::Reset) {
+ Mask = CGF.Builder.CreateNot(Mask);
+ RMWOp = llvm::AtomicRMWInst::And;
+ }
+ OldByte = CGF.Builder.CreateAtomicRMW(RMWOp, ByteAddr.getPointer(), Mask,
+ Ordering);
+ } else {
+ // Emit a plain load for the non-interlocked intrinsics.
+ OldByte = CGF.Builder.CreateLoad(ByteAddr, "bittest.byte");
+ Value *NewByte = nullptr;
+ switch (BT.Action) {
+ case BitTest::TestOnly:
+ // Don't store anything.
+ break;
+ case BitTest::Complement:
+ NewByte = CGF.Builder.CreateXor(OldByte, Mask);
+ break;
+ case BitTest::Reset:
+ NewByte = CGF.Builder.CreateAnd(OldByte, CGF.Builder.CreateNot(Mask));
+ break;
+ case BitTest::Set:
+ NewByte = CGF.Builder.CreateOr(OldByte, Mask);
+ break;
+ }
+ if (NewByte)
+ CGF.Builder.CreateStore(NewByte, ByteAddr);
+ }
+
+ // However we loaded the old byte, either by plain load or atomicrmw, shift
+ // the bit into the low position and mask it to 0 or 1.
+ Value *ShiftedByte = CGF.Builder.CreateLShr(OldByte, PosLow, "bittest.shr");
+ return CGF.Builder.CreateAnd(
+ ShiftedByte, llvm::ConstantInt::get(CGF.Int8Ty, 1), "bittest.res");
+}
+
+namespace {
+enum class MSVCSetJmpKind {
+ _setjmpex,
+ _setjmp3,
+ _setjmp
+};
+}
+
+/// MSVC handles setjmp a bit differently on different platforms. On every
+/// architecture except 32-bit x86, the frame address is passed. On x86, extra
+/// parameters can be passed as variadic arguments, but we always pass none.
+static RValue EmitMSVCRTSetJmp(CodeGenFunction &CGF, MSVCSetJmpKind SJKind,
+ const CallExpr *E) {
+ llvm::Value *Arg1 = nullptr;
+ llvm::Type *Arg1Ty = nullptr;
+ StringRef Name;
+ bool IsVarArg = false;
+ if (SJKind == MSVCSetJmpKind::_setjmp3) {
+ Name = "_setjmp3";
+ Arg1Ty = CGF.Int32Ty;
+ Arg1 = llvm::ConstantInt::get(CGF.IntTy, 0);
+ IsVarArg = true;
+ } else {
+ Name = SJKind == MSVCSetJmpKind::_setjmp ? "_setjmp" : "_setjmpex";
+ Arg1Ty = CGF.Int8PtrTy;
+ Arg1 = CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(Intrinsic::frameaddress),
+ llvm::ConstantInt::get(CGF.Int32Ty, 0));
+ }
+
+ // Mark the call site and declaration with ReturnsTwice.
+ llvm::Type *ArgTypes[2] = {CGF.Int8PtrTy, Arg1Ty};
+ llvm::AttributeList ReturnsTwiceAttr = llvm::AttributeList::get(
+ CGF.getLLVMContext(), llvm::AttributeList::FunctionIndex,
+ llvm::Attribute::ReturnsTwice);
+ llvm::Constant *SetJmpFn = CGF.CGM.CreateRuntimeFunction(
+ llvm::FunctionType::get(CGF.IntTy, ArgTypes, IsVarArg), Name,
+ ReturnsTwiceAttr, /*Local=*/true);
+
+ llvm::Value *Buf = CGF.Builder.CreateBitOrPointerCast(
+ CGF.EmitScalarExpr(E->getArg(0)), CGF.Int8PtrTy);
+ llvm::Value *Args[] = {Buf, Arg1};
+ llvm::CallSite CS = CGF.EmitRuntimeCallOrInvoke(SetJmpFn, Args);
+ CS.setAttributes(ReturnsTwiceAttr);
+ return RValue::get(CS.getInstruction());
+}
+
+// Many of MSVC builtins are on x64, ARM and AArch64; to avoid repeating code,
+// we handle them here.
enum class CodeGenFunction::MSVCIntrin {
_BitScanForward,
_BitScanReverse,
@@ -496,7 +745,6 @@ enum class CodeGenFunction::MSVCIntrin {
_InterlockedIncrement,
_InterlockedOr,
_InterlockedXor,
- _interlockedbittestandset,
__fastfail,
};
@@ -564,22 +812,6 @@ Value *CodeGenFunction::EmitMSVCBuiltinExpr(MSVCIntrin BuiltinID,
case MSVCIntrin::_InterlockedXor:
return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E);
- case MSVCIntrin::_interlockedbittestandset: {
- llvm::Value *Addr = EmitScalarExpr(E->getArg(0));
- llvm::Value *Bit = EmitScalarExpr(E->getArg(1));
- AtomicRMWInst *RMWI = Builder.CreateAtomicRMW(
- AtomicRMWInst::Or, Addr,
- Builder.CreateShl(ConstantInt::get(Bit->getType(), 1), Bit),
- llvm::AtomicOrdering::SequentiallyConsistent);
- // Shift the relevant bit to the least significant position, truncate to
- // the result type, and test the low bit.
- llvm::Value *Shifted = Builder.CreateLShr(RMWI, Bit);
- llvm::Value *Truncated =
- Builder.CreateTrunc(Shifted, ConvertType(E->getType()));
- return Builder.CreateAnd(Truncated,
- ConstantInt::get(Truncated->getType(), 1));
- }
-
case MSVCIntrin::_InterlockedDecrement: {
llvm::Type *IntTy = ConvertType(E->getType());
AtomicRMWInst *RMWI = Builder.CreateAtomicRMW(
@@ -915,7 +1147,11 @@ EmitCheckedMixedSignMultiply(CodeGenFunction &CGF, const clang::Expr *Op1,
Overflow = CGF.Builder.CreateOr(Overflow, TruncOverflow);
}
- Result = CGF.Builder.CreateTrunc(UnsignedResult, ResTy);
+ // Negate the product if it would be negative in infinite precision.
+ Result = CGF.Builder.CreateSelect(
+ IsNegative, CGF.Builder.CreateNeg(UnsignedResult), UnsignedResult);
+
+ Result = CGF.Builder.CreateTrunc(Result, ResTy);
}
assert(Overflow && Result && "Missing overflow or result");
@@ -926,6 +1162,96 @@ EmitCheckedMixedSignMultiply(CodeGenFunction &CGF, const clang::Expr *Op1,
return RValue::get(Overflow);
}
+static llvm::Value *dumpRecord(CodeGenFunction &CGF, QualType RType,
+ Value *&RecordPtr, CharUnits Align, Value *Func,
+ int Lvl) {
+ const auto *RT = RType->getAs<RecordType>();
+ ASTContext &Context = CGF.getContext();
+ RecordDecl *RD = RT->getDecl()->getDefinition();
+ ASTContext &Ctx = RD->getASTContext();
+ const ASTRecordLayout &RL = Ctx.getASTRecordLayout(RD);
+ std::string Pad = std::string(Lvl * 4, ' ');
+
+ Value *GString =
+ CGF.Builder.CreateGlobalStringPtr(RType.getAsString() + " {\n");
+ Value *Res = CGF.Builder.CreateCall(Func, {GString});
+
+ static llvm::DenseMap<QualType, const char *> Types;
+ if (Types.empty()) {
+ Types[Context.CharTy] = "%c";
+ Types[Context.BoolTy] = "%d";
+ Types[Context.SignedCharTy] = "%hhd";
+ Types[Context.UnsignedCharTy] = "%hhu";
+ Types[Context.IntTy] = "%d";
+ Types[Context.UnsignedIntTy] = "%u";
+ Types[Context.LongTy] = "%ld";
+ Types[Context.UnsignedLongTy] = "%lu";
+ Types[Context.LongLongTy] = "%lld";
+ Types[Context.UnsignedLongLongTy] = "%llu";
+ Types[Context.ShortTy] = "%hd";
+ Types[Context.UnsignedShortTy] = "%hu";
+ Types[Context.VoidPtrTy] = "%p";
+ Types[Context.FloatTy] = "%f";
+ Types[Context.DoubleTy] = "%f";
+ Types[Context.LongDoubleTy] = "%Lf";
+ Types[Context.getPointerType(Context.CharTy)] = "%s";
+ Types[Context.getPointerType(Context.getConstType(Context.CharTy))] = "%s";
+ }
+
+ for (const auto *FD : RD->fields()) {
+ uint64_t Off = RL.getFieldOffset(FD->getFieldIndex());
+ Off = Ctx.toCharUnitsFromBits(Off).getQuantity();
+
+ Value *FieldPtr = RecordPtr;
+ if (RD->isUnion())
+ FieldPtr = CGF.Builder.CreatePointerCast(
+ FieldPtr, CGF.ConvertType(Context.getPointerType(FD->getType())));
+ else
+ FieldPtr = CGF.Builder.CreateStructGEP(CGF.ConvertType(RType), FieldPtr,
+ FD->getFieldIndex());
+
+ GString = CGF.Builder.CreateGlobalStringPtr(
+ llvm::Twine(Pad)
+ .concat(FD->getType().getAsString())
+ .concat(llvm::Twine(' '))
+ .concat(FD->getNameAsString())
+ .concat(" : ")
+ .str());
+ Value *TmpRes = CGF.Builder.CreateCall(Func, {GString});
+ Res = CGF.Builder.CreateAdd(Res, TmpRes);
+
+ QualType CanonicalType =
+ FD->getType().getUnqualifiedType().getCanonicalType();
+
+ // We check whether we are in a recursive type
+ if (CanonicalType->isRecordType()) {
+ Value *TmpRes =
+ dumpRecord(CGF, CanonicalType, FieldPtr, Align, Func, Lvl + 1);
+ Res = CGF.Builder.CreateAdd(TmpRes, Res);
+ continue;
+ }
+
+ // We try to determine the best format to print the current field
+ llvm::Twine Format = Types.find(CanonicalType) == Types.end()
+ ? Types[Context.VoidPtrTy]
+ : Types[CanonicalType];
+
+ Address FieldAddress = Address(FieldPtr, Align);
+ FieldPtr = CGF.Builder.CreateLoad(FieldAddress);
+
+ // FIXME Need to handle bitfield here
+ GString = CGF.Builder.CreateGlobalStringPtr(
+ Format.concat(llvm::Twine('\n')).str());
+ TmpRes = CGF.Builder.CreateCall(Func, {GString, FieldPtr});
+ Res = CGF.Builder.CreateAdd(Res, TmpRes);
+ }
+
+ GString = CGF.Builder.CreateGlobalStringPtr(Pad + "}\n");
+ Value *TmpRes = CGF.Builder.CreateCall(Func, {GString});
+ Res = CGF.Builder.CreateAdd(Res, TmpRes);
+ return Res;
+}
+
RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
unsigned BuiltinID, const CallExpr *E,
ReturnValueSlot ReturnValue) {
@@ -962,6 +1288,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
case Builtin::BI__builtin_copysign:
case Builtin::BI__builtin_copysignf:
case Builtin::BI__builtin_copysignl:
+ case Builtin::BI__builtin_copysignf128:
return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::copysign));
case Builtin::BIcos:
@@ -994,6 +1321,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
case Builtin::BI__builtin_fabs:
case Builtin::BI__builtin_fabsf:
case Builtin::BI__builtin_fabsl:
+ case Builtin::BI__builtin_fabsf128:
return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::fabs));
case Builtin::BIfloor:
@@ -1154,16 +1482,13 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
case Builtin::BI__builtin_abs:
case Builtin::BI__builtin_labs:
case Builtin::BI__builtin_llabs: {
+ // X < 0 ? -X : X
+ // The negation has 'nsw' because abs of INT_MIN is undefined.
Value *ArgValue = EmitScalarExpr(E->getArg(0));
-
- Value *NegOp = Builder.CreateNeg(ArgValue, "neg");
- Value *CmpResult =
- Builder.CreateICmpSGE(ArgValue,
- llvm::Constant::getNullValue(ArgValue->getType()),
- "abscond");
- Value *Result =
- Builder.CreateSelect(CmpResult, ArgValue, NegOp, "abs");
-
+ Value *NegOp = Builder.CreateNSWNeg(ArgValue, "neg");
+ Constant *Zero = llvm::Constant::getNullValue(ArgValue->getType());
+ Value *CmpResult = Builder.CreateICmpSLT(ArgValue, Zero, "abscond");
+ Value *Result = Builder.CreateSelect(CmpResult, NegOp, ArgValue, "abs");
return RValue::get(Result);
}
case Builtin::BI__builtin_conj:
@@ -1190,6 +1515,18 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
return RValue::get(ComplexVal.first);
}
+ case Builtin::BI__builtin_dump_struct: {
+ Value *Func = EmitScalarExpr(E->getArg(1)->IgnoreImpCasts());
+ CharUnits Arg0Align = EmitPointerWithAlignment(E->getArg(0)).getAlignment();
+
+ const Expr *Arg0 = E->getArg(0)->IgnoreImpCasts();
+ QualType Arg0Type = Arg0->getType()->getPointeeType();
+
+ Value *RecordPtr = EmitScalarExpr(Arg0);
+ Value *Res = dumpRecord(*this, Arg0Type, RecordPtr, Arg0Align, Func, 0);
+ return RValue::get(Res);
+ }
+
case Builtin::BI__builtin_cimag:
case Builtin::BI__builtin_cimagf:
case Builtin::BI__builtin_cimagl:
@@ -1300,20 +1637,14 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
llvm::Type *ArgType = Val->getType();
Shift = Builder.CreateIntCast(Shift, ArgType, false);
- unsigned ArgWidth = cast<llvm::IntegerType>(ArgType)->getBitWidth();
- Value *ArgTypeSize = llvm::ConstantInt::get(ArgType, ArgWidth);
- Value *ArgZero = llvm::Constant::getNullValue(ArgType);
-
+ unsigned ArgWidth = ArgType->getIntegerBitWidth();
Value *Mask = llvm::ConstantInt::get(ArgType, ArgWidth - 1);
- Shift = Builder.CreateAnd(Shift, Mask);
- Value *LeftShift = Builder.CreateSub(ArgTypeSize, Shift);
-
- Value *RightShifted = Builder.CreateLShr(Val, Shift);
- Value *LeftShifted = Builder.CreateShl(Val, LeftShift);
- Value *Rotated = Builder.CreateOr(LeftShifted, RightShifted);
- Value *ShiftIsZero = Builder.CreateICmpEQ(Shift, ArgZero);
- Value *Result = Builder.CreateSelect(ShiftIsZero, Val, Rotated);
+ Value *RightShiftAmt = Builder.CreateAnd(Shift, Mask);
+ Value *RightShifted = Builder.CreateLShr(Val, RightShiftAmt);
+ Value *LeftShiftAmt = Builder.CreateAnd(Builder.CreateNeg(Shift), Mask);
+ Value *LeftShifted = Builder.CreateShl(Val, LeftShiftAmt);
+ Value *Result = Builder.CreateOr(LeftShifted, RightShifted);
return RValue::get(Result);
}
case Builtin::BI_rotl8:
@@ -1326,20 +1657,14 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
llvm::Type *ArgType = Val->getType();
Shift = Builder.CreateIntCast(Shift, ArgType, false);
- unsigned ArgWidth = cast<llvm::IntegerType>(ArgType)->getBitWidth();
- Value *ArgTypeSize = llvm::ConstantInt::get(ArgType, ArgWidth);
- Value *ArgZero = llvm::Constant::getNullValue(ArgType);
-
+ unsigned ArgWidth = ArgType->getIntegerBitWidth();
Value *Mask = llvm::ConstantInt::get(ArgType, ArgWidth - 1);
- Shift = Builder.CreateAnd(Shift, Mask);
- Value *RightShift = Builder.CreateSub(ArgTypeSize, Shift);
-
- Value *LeftShifted = Builder.CreateShl(Val, Shift);
- Value *RightShifted = Builder.CreateLShr(Val, RightShift);
- Value *Rotated = Builder.CreateOr(LeftShifted, RightShifted);
- Value *ShiftIsZero = Builder.CreateICmpEQ(Shift, ArgZero);
- Value *Result = Builder.CreateSelect(ShiftIsZero, Val, Rotated);
+ Value *LeftShiftAmt = Builder.CreateAnd(Shift, Mask);
+ Value *LeftShifted = Builder.CreateShl(Val, LeftShiftAmt);
+ Value *RightShiftAmt = Builder.CreateAnd(Builder.CreateNeg(Shift), Mask);
+ Value *RightShifted = Builder.CreateLShr(Val, RightShiftAmt);
+ Value *Result = Builder.CreateOr(LeftShifted, RightShifted);
return RValue::get(Result);
}
case Builtin::BI__builtin_unpredictable: {
@@ -1735,6 +2060,63 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
Builder.CreateMemSet(Dest, ByteVal, SizeVal, false);
return RValue::get(Dest.getPointer());
}
+ case Builtin::BI__builtin_wmemcmp: {
+ // The MSVC runtime library does not provide a definition of wmemcmp, so we
+ // need an inline implementation.
+ if (!getTarget().getTriple().isOSMSVCRT())
+ break;
+
+ llvm::Type *WCharTy = ConvertType(getContext().WCharTy);
+
+ Value *Dst = EmitScalarExpr(E->getArg(0));
+ Value *Src = EmitScalarExpr(E->getArg(1));
+ Value *Size = EmitScalarExpr(E->getArg(2));
+
+ BasicBlock *Entry = Builder.GetInsertBlock();
+ BasicBlock *CmpGT = createBasicBlock("wmemcmp.gt");
+ BasicBlock *CmpLT = createBasicBlock("wmemcmp.lt");
+ BasicBlock *Next = createBasicBlock("wmemcmp.next");
+ BasicBlock *Exit = createBasicBlock("wmemcmp.exit");
+ Value *SizeEq0 = Builder.CreateICmpEQ(Size, ConstantInt::get(SizeTy, 0));
+ Builder.CreateCondBr(SizeEq0, Exit, CmpGT);
+
+ EmitBlock(CmpGT);
+ PHINode *DstPhi = Builder.CreatePHI(Dst->getType(), 2);
+ DstPhi->addIncoming(Dst, Entry);
+ PHINode *SrcPhi = Builder.CreatePHI(Src->getType(), 2);
+ SrcPhi->addIncoming(Src, Entry);
+ PHINode *SizePhi = Builder.CreatePHI(SizeTy, 2);
+ SizePhi->addIncoming(Size, Entry);
+ CharUnits WCharAlign =
+ getContext().getTypeAlignInChars(getContext().WCharTy);
+ Value *DstCh = Builder.CreateAlignedLoad(WCharTy, DstPhi, WCharAlign);
+ Value *SrcCh = Builder.CreateAlignedLoad(WCharTy, SrcPhi, WCharAlign);
+ Value *DstGtSrc = Builder.CreateICmpUGT(DstCh, SrcCh);
+ Builder.CreateCondBr(DstGtSrc, Exit, CmpLT);
+
+ EmitBlock(CmpLT);
+ Value *DstLtSrc = Builder.CreateICmpULT(DstCh, SrcCh);
+ Builder.CreateCondBr(DstLtSrc, Exit, Next);
+
+ EmitBlock(Next);
+ Value *NextDst = Builder.CreateConstInBoundsGEP1_32(WCharTy, DstPhi, 1);
+ Value *NextSrc = Builder.CreateConstInBoundsGEP1_32(WCharTy, SrcPhi, 1);
+ Value *NextSize = Builder.CreateSub(SizePhi, ConstantInt::get(SizeTy, 1));
+ Value *NextSizeEq0 =
+ Builder.CreateICmpEQ(NextSize, ConstantInt::get(SizeTy, 0));
+ Builder.CreateCondBr(NextSizeEq0, Exit, CmpGT);
+ DstPhi->addIncoming(NextDst, Next);
+ SrcPhi->addIncoming(NextSrc, Next);
+ SizePhi->addIncoming(NextSize, Next);
+
+ EmitBlock(Exit);
+ PHINode *Ret = Builder.CreatePHI(IntTy, 4);
+ Ret->addIncoming(ConstantInt::get(IntTy, 0), Entry);
+ Ret->addIncoming(ConstantInt::get(IntTy, 1), CmpGT);
+ Ret->addIncoming(ConstantInt::get(IntTy, -1), CmpLT);
+ Ret->addIncoming(ConstantInt::get(IntTy, 0), Next);
+ return RValue::get(Ret);
+ }
case Builtin::BI__builtin_dwarf_cfa: {
// The offset in bytes from the first argument to the CFA.
//
@@ -2033,7 +2415,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
case Builtin::BI__sync_synchronize: {
// We assume this is supposed to correspond to a C++0x-style
// sequentially-consistent fence (i.e. this is only usable for
- // synchonization, not device I/O or anything like that). This intrinsic
+ // synchronization, not device I/O or anything like that). This intrinsic
// is really badly designed in the sense that in theory, there isn't
// any way to safely use it... but in practice, it mostly works
// to use it with non-atomic loads and stores to get acquire/release
@@ -2548,11 +2930,12 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
case Builtin::BI__builtin_addressof:
return RValue::get(EmitLValue(E->getArg(0)).getPointer());
case Builtin::BI__builtin_operator_new:
- return EmitBuiltinNewDeleteCall(FD->getType()->castAs<FunctionProtoType>(),
- E->getArg(0), false);
+ return EmitBuiltinNewDeleteCall(
+ E->getCallee()->getType()->castAs<FunctionProtoType>(), E, false);
case Builtin::BI__builtin_operator_delete:
- return EmitBuiltinNewDeleteCall(FD->getType()->castAs<FunctionProtoType>(),
- E->getArg(0), true);
+ return EmitBuiltinNewDeleteCall(
+ E->getCallee()->getType()->castAs<FunctionProtoType>(), E, true);
+
case Builtin::BI__noop:
// __noop always evaluates to an integer literal zero.
return RValue::get(ConstantInt::get(IntTy, 0));
@@ -2639,9 +3022,26 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
case Builtin::BI_InterlockedXor16:
case Builtin::BI_InterlockedXor:
return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E));
+
+ case Builtin::BI_bittest64:
+ case Builtin::BI_bittest:
+ case Builtin::BI_bittestandcomplement64:
+ case Builtin::BI_bittestandcomplement:
+ case Builtin::BI_bittestandreset64:
+ case Builtin::BI_bittestandreset:
+ case Builtin::BI_bittestandset64:
+ case Builtin::BI_bittestandset:
+ case Builtin::BI_interlockedbittestandreset:
+ case Builtin::BI_interlockedbittestandreset64:
+ case Builtin::BI_interlockedbittestandset64:
case Builtin::BI_interlockedbittestandset:
- return RValue::get(
- EmitMSVCBuiltinExpr(MSVCIntrin::_interlockedbittestandset, E));
+ case Builtin::BI_interlockedbittestandset_acq:
+ case Builtin::BI_interlockedbittestandset_rel:
+ case Builtin::BI_interlockedbittestandset_nf:
+ case Builtin::BI_interlockedbittestandreset_acq:
+ case Builtin::BI_interlockedbittestandreset_rel:
+ case Builtin::BI_interlockedbittestandreset_nf:
+ return RValue::get(EmitBitTestIntrinsic(*this, BuiltinID, E));
case Builtin::BI__exception_code:
case Builtin::BI_exception_code:
@@ -2652,59 +3052,19 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
case Builtin::BI__abnormal_termination:
case Builtin::BI_abnormal_termination:
return RValue::get(EmitSEHAbnormalTermination());
- case Builtin::BI_setjmpex: {
- if (getTarget().getTriple().isOSMSVCRT()) {
- llvm::Type *ArgTypes[] = {Int8PtrTy, Int8PtrTy};
- llvm::AttributeList ReturnsTwiceAttr = llvm::AttributeList::get(
- getLLVMContext(), llvm::AttributeList::FunctionIndex,
- llvm::Attribute::ReturnsTwice);
- llvm::Constant *SetJmpEx = CGM.CreateRuntimeFunction(
- llvm::FunctionType::get(IntTy, ArgTypes, /*isVarArg=*/false),
- "_setjmpex", ReturnsTwiceAttr, /*Local=*/true);
- llvm::Value *Buf = Builder.CreateBitOrPointerCast(
- EmitScalarExpr(E->getArg(0)), Int8PtrTy);
- llvm::Value *FrameAddr =
- Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress),
- ConstantInt::get(Int32Ty, 0));
- llvm::Value *Args[] = {Buf, FrameAddr};
- llvm::CallSite CS = EmitRuntimeCallOrInvoke(SetJmpEx, Args);
- CS.setAttributes(ReturnsTwiceAttr);
- return RValue::get(CS.getInstruction());
- }
+ case Builtin::BI_setjmpex:
+ if (getTarget().getTriple().isOSMSVCRT())
+ return EmitMSVCRTSetJmp(*this, MSVCSetJmpKind::_setjmpex, E);
break;
- }
- case Builtin::BI_setjmp: {
+ case Builtin::BI_setjmp:
if (getTarget().getTriple().isOSMSVCRT()) {
- llvm::AttributeList ReturnsTwiceAttr = llvm::AttributeList::get(
- getLLVMContext(), llvm::AttributeList::FunctionIndex,
- llvm::Attribute::ReturnsTwice);
- llvm::Value *Buf = Builder.CreateBitOrPointerCast(
- EmitScalarExpr(E->getArg(0)), Int8PtrTy);
- llvm::CallSite CS;
- if (getTarget().getTriple().getArch() == llvm::Triple::x86) {
- llvm::Type *ArgTypes[] = {Int8PtrTy, IntTy};
- llvm::Constant *SetJmp3 = CGM.CreateRuntimeFunction(
- llvm::FunctionType::get(IntTy, ArgTypes, /*isVarArg=*/true),
- "_setjmp3", ReturnsTwiceAttr, /*Local=*/true);
- llvm::Value *Count = ConstantInt::get(IntTy, 0);
- llvm::Value *Args[] = {Buf, Count};
- CS = EmitRuntimeCallOrInvoke(SetJmp3, Args);
- } else {
- llvm::Type *ArgTypes[] = {Int8PtrTy, Int8PtrTy};
- llvm::Constant *SetJmp = CGM.CreateRuntimeFunction(
- llvm::FunctionType::get(IntTy, ArgTypes, /*isVarArg=*/false),
- "_setjmp", ReturnsTwiceAttr, /*Local=*/true);
- llvm::Value *FrameAddr =
- Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress),
- ConstantInt::get(Int32Ty, 0));
- llvm::Value *Args[] = {Buf, FrameAddr};
- CS = EmitRuntimeCallOrInvoke(SetJmp, Args);
- }
- CS.setAttributes(ReturnsTwiceAttr);
- return RValue::get(CS.getInstruction());
+ if (getTarget().getTriple().getArch() == llvm::Triple::x86)
+ return EmitMSVCRTSetJmp(*this, MSVCSetJmpKind::_setjmp3, E);
+ else if (getTarget().getTriple().getArch() == llvm::Triple::aarch64)
+ return EmitMSVCRTSetJmp(*this, MSVCSetJmpKind::_setjmpex, E);
+ return EmitMSVCRTSetJmp(*this, MSVCSetJmpKind::_setjmp, E);
}
break;
- }
case Builtin::BI__GetExceptionInfo: {
if (llvm::GlobalVariable *GV =
@@ -2732,6 +3092,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
return EmitCoroutineIntrinsic(E, Intrinsic::coro_resume);
case Builtin::BI__builtin_coro_frame:
return EmitCoroutineIntrinsic(E, Intrinsic::coro_frame);
+ case Builtin::BI__builtin_coro_noop:
+ return EmitCoroutineIntrinsic(E, Intrinsic::coro_noop);
case Builtin::BI__builtin_coro_free:
return EmitCoroutineIntrinsic(E, Intrinsic::coro_free);
case Builtin::BI__builtin_coro_destroy:
@@ -2882,11 +3244,14 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
// OpenCL v2.0 s6.13.16.4 Built-in pipe query functions
case Builtin::BIget_pipe_num_packets:
case Builtin::BIget_pipe_max_packets: {
- const char *Name;
+ const char *BaseName;
+ const PipeType *PipeTy = E->getArg(0)->getType()->getAs<PipeType>();
if (BuiltinID == Builtin::BIget_pipe_num_packets)
- Name = "__get_pipe_num_packets";
+ BaseName = "__get_pipe_num_packets";
else
- Name = "__get_pipe_max_packets";
+ BaseName = "__get_pipe_max_packets";
+ auto Name = std::string(BaseName) +
+ std::string(PipeTy->isReadOnly() ? "_ro" : "_wo");
// Building the generic function prototype.
Value *Arg0 = EmitScalarExpr(E->getArg(0));
@@ -2992,10 +3357,10 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
return Ptr;
};
- // Could have events and/or vaargs.
+ // Could have events and/or varargs.
if (E->getArg(3)->getType()->isBlockPointerType()) {
// No events passed, but has variadic arguments.
- Name = "__enqueue_kernel_vaargs";
+ Name = "__enqueue_kernel_varargs";
auto Info =
CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(3));
llvm::Value *Kernel =
@@ -3063,7 +3428,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
// Pass the number of variadics to the runtime function too.
Args.push_back(ConstantInt::get(Int32Ty, NumArgs - 7));
ArgTys.push_back(Int32Ty);
- Name = "__enqueue_kernel_events_vaargs";
+ Name = "__enqueue_kernel_events_varargs";
auto *PtrToSizeArray = CreateArrayForSizeVar(7);
Args.push_back(PtrToSizeArray);
@@ -3104,7 +3469,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
CGM.CreateRuntimeFunction(
llvm::FunctionType::get(IntTy, {GenericVoidPtrTy, GenericVoidPtrTy},
false),
- "__get_kernel_preferred_work_group_multiple_impl"),
+ "__get_kernel_preferred_work_group_size_multiple_impl"),
{Kernel, Arg}));
}
case Builtin::BIget_kernel_max_sub_group_size_for_ndrange:
@@ -3175,6 +3540,11 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
case Builtin::BI__xray_customevent: {
if (!ShouldXRayInstrumentFunction())
return RValue::getIgnored();
+
+ if (!CGM.getCodeGenOpts().XRayInstrumentationBundle.has(
+ XRayInstrKind::Custom))
+ return RValue::getIgnored();
+
if (const auto *XRayAttr = CurFuncDecl->getAttr<XRayInstrumentAttr>())
if (XRayAttr->neverXRayInstrument() && !AlwaysEmitXRayCustomEvents())
return RValue::getIgnored();
@@ -3198,6 +3568,44 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
return RValue::get(Builder.CreateCall(F, {Arg0Val, Arg1}));
}
+ case Builtin::BI__xray_typedevent: {
+ // TODO: There should be a way to always emit events even if the current
+ // function is not instrumented. Losing events in a stream can cripple
+ // a trace.
+ if (!ShouldXRayInstrumentFunction())
+ return RValue::getIgnored();
+
+ if (!CGM.getCodeGenOpts().XRayInstrumentationBundle.has(
+ XRayInstrKind::Typed))
+ return RValue::getIgnored();
+
+ if (const auto *XRayAttr = CurFuncDecl->getAttr<XRayInstrumentAttr>())
+ if (XRayAttr->neverXRayInstrument() && !AlwaysEmitXRayTypedEvents())
+ return RValue::getIgnored();
+
+ Function *F = CGM.getIntrinsic(Intrinsic::xray_typedevent);
+ auto FTy = F->getFunctionType();
+ auto Arg0 = EmitScalarExpr(E->getArg(0));
+ auto PTy0 = FTy->getParamType(0);
+ if (PTy0 != Arg0->getType())
+ Arg0 = Builder.CreateTruncOrBitCast(Arg0, PTy0);
+ auto Arg1 = E->getArg(1);
+ auto Arg1Val = EmitScalarExpr(Arg1);
+ auto Arg1Ty = Arg1->getType();
+ auto PTy1 = FTy->getParamType(1);
+ if (PTy1 != Arg1Val->getType()) {
+ if (Arg1Ty->isArrayType())
+ Arg1Val = EmitArrayToPointerDecay(Arg1).getPointer();
+ else
+ Arg1Val = Builder.CreatePointerCast(Arg1Val, PTy1);
+ }
+ auto Arg2 = EmitScalarExpr(E->getArg(2));
+ auto PTy2 = FTy->getParamType(2);
+ if (PTy2 != Arg2->getType())
+ Arg2 = Builder.CreateTruncOrBitCast(Arg2, PTy2);
+ return RValue::get(Builder.CreateCall(F, {Arg0, Arg1Val, Arg2}));
+ }
+
case Builtin::BI__builtin_ms_va_start:
case Builtin::BI__builtin_ms_va_end:
return RValue::get(
@@ -3246,6 +3654,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
// can move this up to the beginning of the function.
checkTargetFeatures(E, FD);
+ if (unsigned VectorWidth = getContext().BuiltinInfo.getRequiredVectorWidth(BuiltinID))
+ LargestVectorWidth = std::max(LargestVectorWidth, VectorWidth);
+
// See if we have a target specific intrinsic.
const char *Name = getContext().BuiltinInfo.getName(BuiltinID);
Intrinsic::ID IntrinsicID = Intrinsic::not_intrinsic;
@@ -3253,7 +3664,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
llvm::Triple::getArchTypePrefix(getTarget().getTriple().getArch());
if (!Prefix.empty()) {
IntrinsicID = Intrinsic::getIntrinsicForGCCBuiltin(Prefix.data(), Name);
- // NOTE we dont need to perform a compatibility flag check here since the
+ // NOTE we don't need to perform a compatibility flag check here since the
// intrinsics are declared in Builtins*.def via LANGBUILTIN which filter the
// MS builtins via ALL_MS_LANGUAGES and are filtered earlier.
if (IntrinsicID == Intrinsic::not_intrinsic)
@@ -3378,7 +3789,7 @@ Value *CodeGenFunction::EmitTargetBuiltinExpr(unsigned BuiltinID,
static llvm::VectorType *GetNeonType(CodeGenFunction *CGF,
NeonTypeFlags TypeFlags,
- llvm::Triple::ArchType Arch,
+ bool HasLegalHalfType=true,
bool V1Ty=false) {
int IsQuad = TypeFlags.isQuad();
switch (TypeFlags.getEltType()) {
@@ -3389,9 +3800,7 @@ static llvm::VectorType *GetNeonType(CodeGenFunction *CGF,
case NeonTypeFlags::Poly16:
return llvm::VectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
case NeonTypeFlags::Float16:
- // FIXME: Only AArch64 backend can so far properly handle half types.
- // Remove else part once ARM backend support for half is complete.
- if (Arch == llvm::Triple::aarch64)
+ if (HasLegalHalfType)
return llvm::VectorType::get(CGF->HalfTy, V1Ty ? 1 : (4 << IsQuad));
else
return llvm::VectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
@@ -3454,7 +3863,7 @@ Value *CodeGenFunction::EmitNeonShiftVector(Value *V, llvm::Type *Ty,
return ConstantInt::get(Ty, neg ? -SV : SV);
}
-// \brief Right-shift a vector by a constant.
+// Right-shift a vector by a constant.
Value *CodeGenFunction::EmitNeonRShiftImm(Value *Vec, Value *Shift,
llvm::Type *Ty, bool usgn,
const char *name) {
@@ -3557,13 +3966,24 @@ static const NeonIntrinsicInfo ARMSIMDIntrinsicMap [] = {
NEONMAP1(vcaleq_v, arm_neon_vacge, 0),
NEONMAP1(vcalt_v, arm_neon_vacgt, 0),
NEONMAP1(vcaltq_v, arm_neon_vacgt, 0),
+ NEONMAP0(vceqz_v),
+ NEONMAP0(vceqzq_v),
+ NEONMAP0(vcgez_v),
+ NEONMAP0(vcgezq_v),
+ NEONMAP0(vcgtz_v),
+ NEONMAP0(vcgtzq_v),
+ NEONMAP0(vclez_v),
+ NEONMAP0(vclezq_v),
NEONMAP1(vcls_v, arm_neon_vcls, Add1ArgType),
NEONMAP1(vclsq_v, arm_neon_vcls, Add1ArgType),
+ NEONMAP0(vcltz_v),
+ NEONMAP0(vcltzq_v),
NEONMAP1(vclz_v, ctlz, Add1ArgType),
NEONMAP1(vclzq_v, ctlz, Add1ArgType),
NEONMAP1(vcnt_v, ctpop, Add1ArgType),
NEONMAP1(vcntq_v, ctpop, Add1ArgType),
NEONMAP1(vcvt_f16_f32, arm_neon_vcvtfp2hf, 0),
+ NEONMAP0(vcvt_f16_v),
NEONMAP1(vcvt_f32_f16, arm_neon_vcvthf2fp, 0),
NEONMAP0(vcvt_f32_v),
NEONMAP2(vcvt_n_f16_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
@@ -3583,6 +4003,7 @@ static const NeonIntrinsicInfo ARMSIMDIntrinsicMap [] = {
NEONMAP1(vcvta_s16_v, arm_neon_vcvtas, 0),
NEONMAP1(vcvta_s32_v, arm_neon_vcvtas, 0),
NEONMAP1(vcvta_s64_v, arm_neon_vcvtas, 0),
+ NEONMAP1(vcvta_u16_v, arm_neon_vcvtau, 0),
NEONMAP1(vcvta_u32_v, arm_neon_vcvtau, 0),
NEONMAP1(vcvta_u64_v, arm_neon_vcvtau, 0),
NEONMAP1(vcvtaq_s16_v, arm_neon_vcvtas, 0),
@@ -3627,6 +4048,7 @@ static const NeonIntrinsicInfo ARMSIMDIntrinsicMap [] = {
NEONMAP1(vcvtpq_u16_v, arm_neon_vcvtpu, 0),
NEONMAP1(vcvtpq_u32_v, arm_neon_vcvtpu, 0),
NEONMAP1(vcvtpq_u64_v, arm_neon_vcvtpu, 0),
+ NEONMAP0(vcvtq_f16_v),
NEONMAP0(vcvtq_f32_v),
NEONMAP2(vcvtq_n_f16_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
NEONMAP2(vcvtq_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
@@ -3642,6 +4064,8 @@ static const NeonIntrinsicInfo ARMSIMDIntrinsicMap [] = {
NEONMAP0(vcvtq_u16_v),
NEONMAP0(vcvtq_u32_v),
NEONMAP0(vcvtq_u64_v),
+ NEONMAP2(vdot_v, arm_neon_udot, arm_neon_sdot, 0),
+ NEONMAP2(vdotq_v, arm_neon_udot, arm_neon_sdot, 0),
NEONMAP0(vext_v),
NEONMAP0(vextq_v),
NEONMAP0(vfma_v),
@@ -3652,18 +4076,30 @@ static const NeonIntrinsicInfo ARMSIMDIntrinsicMap [] = {
NEONMAP2(vhsubq_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
NEONMAP0(vld1_dup_v),
NEONMAP1(vld1_v, arm_neon_vld1, 0),
+ NEONMAP1(vld1_x2_v, arm_neon_vld1x2, 0),
+ NEONMAP1(vld1_x3_v, arm_neon_vld1x3, 0),
+ NEONMAP1(vld1_x4_v, arm_neon_vld1x4, 0),
NEONMAP0(vld1q_dup_v),
NEONMAP1(vld1q_v, arm_neon_vld1, 0),
+ NEONMAP1(vld1q_x2_v, arm_neon_vld1x2, 0),
+ NEONMAP1(vld1q_x3_v, arm_neon_vld1x3, 0),
+ NEONMAP1(vld1q_x4_v, arm_neon_vld1x4, 0),
+ NEONMAP1(vld2_dup_v, arm_neon_vld2dup, 0),
NEONMAP1(vld2_lane_v, arm_neon_vld2lane, 0),
NEONMAP1(vld2_v, arm_neon_vld2, 0),
+ NEONMAP1(vld2q_dup_v, arm_neon_vld2dup, 0),
NEONMAP1(vld2q_lane_v, arm_neon_vld2lane, 0),
NEONMAP1(vld2q_v, arm_neon_vld2, 0),
+ NEONMAP1(vld3_dup_v, arm_neon_vld3dup, 0),
NEONMAP1(vld3_lane_v, arm_neon_vld3lane, 0),
NEONMAP1(vld3_v, arm_neon_vld3, 0),
+ NEONMAP1(vld3q_dup_v, arm_neon_vld3dup, 0),
NEONMAP1(vld3q_lane_v, arm_neon_vld3lane, 0),
NEONMAP1(vld3q_v, arm_neon_vld3, 0),
+ NEONMAP1(vld4_dup_v, arm_neon_vld4dup, 0),
NEONMAP1(vld4_lane_v, arm_neon_vld4lane, 0),
NEONMAP1(vld4_v, arm_neon_vld4, 0),
+ NEONMAP1(vld4q_dup_v, arm_neon_vld4dup, 0),
NEONMAP1(vld4q_lane_v, arm_neon_vld4lane, 0),
NEONMAP1(vld4q_v, arm_neon_vld4, 0),
NEONMAP2(vmax_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
@@ -3722,6 +4158,8 @@ static const NeonIntrinsicInfo ARMSIMDIntrinsicMap [] = {
NEONMAP1(vrnd_v, arm_neon_vrintz, Add1ArgType),
NEONMAP1(vrnda_v, arm_neon_vrinta, Add1ArgType),
NEONMAP1(vrndaq_v, arm_neon_vrinta, Add1ArgType),
+ NEONMAP0(vrndi_v),
+ NEONMAP0(vrndiq_v),
NEONMAP1(vrndm_v, arm_neon_vrintm, Add1ArgType),
NEONMAP1(vrndmq_v, arm_neon_vrintm, Add1ArgType),
NEONMAP1(vrndn_v, arm_neon_vrintn, Add1ArgType),
@@ -3755,7 +4193,13 @@ static const NeonIntrinsicInfo ARMSIMDIntrinsicMap [] = {
NEONMAP0(vshrn_n_v),
NEONMAP0(vshrq_n_v),
NEONMAP1(vst1_v, arm_neon_vst1, 0),
+ NEONMAP1(vst1_x2_v, arm_neon_vst1x2, 0),
+ NEONMAP1(vst1_x3_v, arm_neon_vst1x3, 0),
+ NEONMAP1(vst1_x4_v, arm_neon_vst1x4, 0),
NEONMAP1(vst1q_v, arm_neon_vst1, 0),
+ NEONMAP1(vst1q_x2_v, arm_neon_vst1x2, 0),
+ NEONMAP1(vst1q_x3_v, arm_neon_vst1x3, 0),
+ NEONMAP1(vst1q_x4_v, arm_neon_vst1x4, 0),
NEONMAP1(vst2_lane_v, arm_neon_vst2lane, 0),
NEONMAP1(vst2_v, arm_neon_vst2, 0),
NEONMAP1(vst2q_lane_v, arm_neon_vst2lane, 0),
@@ -3795,8 +4239,18 @@ static const NeonIntrinsicInfo AArch64SIMDIntrinsicMap[] = {
NEONMAP1(vcaleq_v, aarch64_neon_facge, 0),
NEONMAP1(vcalt_v, aarch64_neon_facgt, 0),
NEONMAP1(vcaltq_v, aarch64_neon_facgt, 0),
+ NEONMAP0(vceqz_v),
+ NEONMAP0(vceqzq_v),
+ NEONMAP0(vcgez_v),
+ NEONMAP0(vcgezq_v),
+ NEONMAP0(vcgtz_v),
+ NEONMAP0(vcgtzq_v),
+ NEONMAP0(vclez_v),
+ NEONMAP0(vclezq_v),
NEONMAP1(vcls_v, aarch64_neon_cls, Add1ArgType),
NEONMAP1(vclsq_v, aarch64_neon_cls, Add1ArgType),
+ NEONMAP0(vcltz_v),
+ NEONMAP0(vcltzq_v),
NEONMAP1(vclz_v, ctlz, Add1ArgType),
NEONMAP1(vclzq_v, ctlz, Add1ArgType),
NEONMAP1(vcnt_v, ctpop, Add1ArgType),
@@ -3826,6 +4280,8 @@ static const NeonIntrinsicInfo AArch64SIMDIntrinsicMap[] = {
NEONMAP1(vcvtq_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
NEONMAP1(vcvtq_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
NEONMAP1(vcvtx_f32_v, aarch64_neon_fcvtxn, AddRetType | Add1ArgType),
+ NEONMAP2(vdot_v, aarch64_neon_udot, aarch64_neon_sdot, 0),
+ NEONMAP2(vdotq_v, aarch64_neon_udot, aarch64_neon_sdot, 0),
NEONMAP0(vext_v),
NEONMAP0(vextq_v),
NEONMAP0(vfma_v),
@@ -3834,6 +4290,12 @@ static const NeonIntrinsicInfo AArch64SIMDIntrinsicMap[] = {
NEONMAP2(vhaddq_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts),
NEONMAP2(vhsub_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts),
NEONMAP2(vhsubq_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts),
+ NEONMAP1(vld1_x2_v, aarch64_neon_ld1x2, 0),
+ NEONMAP1(vld1_x3_v, aarch64_neon_ld1x3, 0),
+ NEONMAP1(vld1_x4_v, aarch64_neon_ld1x4, 0),
+ NEONMAP1(vld1q_x2_v, aarch64_neon_ld1x2, 0),
+ NEONMAP1(vld1q_x3_v, aarch64_neon_ld1x3, 0),
+ NEONMAP1(vld1q_x4_v, aarch64_neon_ld1x4, 0),
NEONMAP0(vmovl_v),
NEONMAP0(vmovn_v),
NEONMAP1(vmul_v, aarch64_neon_pmul, Add1ArgType),
@@ -3874,6 +4336,8 @@ static const NeonIntrinsicInfo AArch64SIMDIntrinsicMap[] = {
NEONMAP1(vrecpsq_v, aarch64_neon_frecps, Add1ArgType),
NEONMAP2(vrhadd_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
NEONMAP2(vrhaddq_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
+ NEONMAP0(vrndi_v),
+ NEONMAP0(vrndiq_v),
NEONMAP2(vrshl_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts),
NEONMAP2(vrshlq_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts),
NEONMAP2(vrshr_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts),
@@ -3897,6 +4361,12 @@ static const NeonIntrinsicInfo AArch64SIMDIntrinsicMap[] = {
NEONMAP0(vshr_n_v),
NEONMAP0(vshrn_n_v),
NEONMAP0(vshrq_n_v),
+ NEONMAP1(vst1_x2_v, aarch64_neon_st1x2, 0),
+ NEONMAP1(vst1_x3_v, aarch64_neon_st1x3, 0),
+ NEONMAP1(vst1_x4_v, aarch64_neon_st1x4, 0),
+ NEONMAP1(vst1q_x2_v, aarch64_neon_st1x2, 0),
+ NEONMAP1(vst1q_x3_v, aarch64_neon_st1x3, 0),
+ NEONMAP1(vst1q_x4_v, aarch64_neon_st1x4, 0),
NEONMAP0(vsubhn_v),
NEONMAP0(vtst_v),
NEONMAP0(vtstq_v),
@@ -4095,6 +4565,37 @@ static const NeonIntrinsicInfo AArch64SISDIntrinsicMap[] = {
NEONMAP1(vuqaddd_s64, aarch64_neon_suqadd, Add1ArgType),
NEONMAP1(vuqaddh_s16, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),
NEONMAP1(vuqadds_s32, aarch64_neon_suqadd, Add1ArgType),
+ // FP16 scalar intrinisics go here.
+ NEONMAP1(vabdh_f16, aarch64_sisd_fabd, Add1ArgType),
+ NEONMAP1(vcvtah_s32_f16, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
+ NEONMAP1(vcvtah_s64_f16, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
+ NEONMAP1(vcvtah_u32_f16, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
+ NEONMAP1(vcvtah_u64_f16, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
+ NEONMAP1(vcvth_n_f16_s32, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
+ NEONMAP1(vcvth_n_f16_s64, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
+ NEONMAP1(vcvth_n_f16_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
+ NEONMAP1(vcvth_n_f16_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
+ NEONMAP1(vcvth_n_s32_f16, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
+ NEONMAP1(vcvth_n_s64_f16, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
+ NEONMAP1(vcvth_n_u32_f16, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
+ NEONMAP1(vcvth_n_u64_f16, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
+ NEONMAP1(vcvtmh_s32_f16, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
+ NEONMAP1(vcvtmh_s64_f16, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
+ NEONMAP1(vcvtmh_u32_f16, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
+ NEONMAP1(vcvtmh_u64_f16, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
+ NEONMAP1(vcvtnh_s32_f16, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
+ NEONMAP1(vcvtnh_s64_f16, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
+ NEONMAP1(vcvtnh_u32_f16, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
+ NEONMAP1(vcvtnh_u64_f16, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
+ NEONMAP1(vcvtph_s32_f16, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
+ NEONMAP1(vcvtph_s64_f16, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
+ NEONMAP1(vcvtph_u32_f16, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
+ NEONMAP1(vcvtph_u64_f16, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
+ NEONMAP1(vmulxh_f16, aarch64_neon_fmulx, Add1ArgType),
+ NEONMAP1(vrecpeh_f16, aarch64_neon_frecpe, Add1ArgType),
+ NEONMAP1(vrecpxh_f16, aarch64_neon_frecpx, Add1ArgType),
+ NEONMAP1(vrsqrteh_f16, aarch64_neon_frsqrte, Add1ArgType),
+ NEONMAP1(vrsqrtsh_f16, aarch64_neon_frsqrts, Add1ArgType),
};
#undef NEONMAP0
@@ -4244,8 +4745,9 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
NeonTypeFlags Type(NeonTypeConst.getZExtValue());
bool Usgn = Type.isUnsigned();
bool Quad = Type.isQuad();
+ const bool HasLegalHalfType = getTarget().hasLegalHalfType();
- llvm::VectorType *VTy = GetNeonType(this, Type, Arch);
+ llvm::VectorType *VTy = GetNeonType(this, Type, HasLegalHalfType);
llvm::Type *Ty = VTy;
if (!Ty)
return nullptr;
@@ -4310,6 +4812,26 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
return EmitNeonCall(F, Ops, NameHint);
}
+ case NEON::BI__builtin_neon_vceqz_v:
+ case NEON::BI__builtin_neon_vceqzq_v:
+ return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OEQ,
+ ICmpInst::ICMP_EQ, "vceqz");
+ case NEON::BI__builtin_neon_vcgez_v:
+ case NEON::BI__builtin_neon_vcgezq_v:
+ return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGE,
+ ICmpInst::ICMP_SGE, "vcgez");
+ case NEON::BI__builtin_neon_vclez_v:
+ case NEON::BI__builtin_neon_vclezq_v:
+ return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLE,
+ ICmpInst::ICMP_SLE, "vclez");
+ case NEON::BI__builtin_neon_vcgtz_v:
+ case NEON::BI__builtin_neon_vcgtzq_v:
+ return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGT,
+ ICmpInst::ICMP_SGT, "vcgtz");
+ case NEON::BI__builtin_neon_vcltz_v:
+ case NEON::BI__builtin_neon_vcltzq_v:
+ return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLT,
+ ICmpInst::ICMP_SLT, "vcltz");
case NEON::BI__builtin_neon_vclz_v:
case NEON::BI__builtin_neon_vclzq_v:
// We generate target-independent intrinsic, which needs a second argument
@@ -4319,13 +4841,15 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
case NEON::BI__builtin_neon_vcvt_f32_v:
case NEON::BI__builtin_neon_vcvtq_f32_v:
Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
- Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, Quad), Arch);
+ Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, Quad),
+ HasLegalHalfType);
return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
: Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
case NEON::BI__builtin_neon_vcvt_f16_v:
case NEON::BI__builtin_neon_vcvtq_f16_v:
Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
- Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float16, false, Quad), Arch);
+ Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float16, false, Quad),
+ HasLegalHalfType);
return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
: Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
case NEON::BI__builtin_neon_vcvt_n_f16_v:
@@ -4374,6 +4898,7 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
case NEON::BI__builtin_neon_vcvta_s16_v:
case NEON::BI__builtin_neon_vcvta_s32_v:
case NEON::BI__builtin_neon_vcvta_s64_v:
+ case NEON::BI__builtin_neon_vcvta_u16_v:
case NEON::BI__builtin_neon_vcvta_u32_v:
case NEON::BI__builtin_neon_vcvta_u64_v:
case NEON::BI__builtin_neon_vcvtaq_s16_v:
@@ -4448,12 +4973,33 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
Ops.push_back(getAlignmentValue32(PtrOp0));
return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "vld1");
}
+ case NEON::BI__builtin_neon_vld1_x2_v:
+ case NEON::BI__builtin_neon_vld1q_x2_v:
+ case NEON::BI__builtin_neon_vld1_x3_v:
+ case NEON::BI__builtin_neon_vld1q_x3_v:
+ case NEON::BI__builtin_neon_vld1_x4_v:
+ case NEON::BI__builtin_neon_vld1q_x4_v: {
+ llvm::Type *PTy = llvm::PointerType::getUnqual(VTy->getVectorElementType());
+ Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
+ llvm::Type *Tys[2] = { VTy, PTy };
+ Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
+ Ops[1] = Builder.CreateCall(F, Ops[1], "vld1xN");
+ Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
+ Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
+ return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
+ }
case NEON::BI__builtin_neon_vld2_v:
case NEON::BI__builtin_neon_vld2q_v:
case NEON::BI__builtin_neon_vld3_v:
case NEON::BI__builtin_neon_vld3q_v:
case NEON::BI__builtin_neon_vld4_v:
- case NEON::BI__builtin_neon_vld4q_v: {
+ case NEON::BI__builtin_neon_vld4q_v:
+ case NEON::BI__builtin_neon_vld2_dup_v:
+ case NEON::BI__builtin_neon_vld2q_dup_v:
+ case NEON::BI__builtin_neon_vld3_dup_v:
+ case NEON::BI__builtin_neon_vld3q_dup_v:
+ case NEON::BI__builtin_neon_vld4_dup_v:
+ case NEON::BI__builtin_neon_vld4q_dup_v: {
llvm::Type *Tys[] = {Ty, Int8PtrTy};
Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
Value *Align = getAlignmentValue32(PtrOp1);
@@ -4552,7 +5098,10 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
case NEON::BI__builtin_neon_vrsqrteq_v:
Int = Ty->isFPOrFPVectorTy() ? LLVMIntrinsic : AltLLVMIntrinsic;
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, NameHint);
-
+ case NEON::BI__builtin_neon_vrndi_v:
+ case NEON::BI__builtin_neon_vrndiq_v:
+ Int = Intrinsic::nearbyint;
+ return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, NameHint);
case NEON::BI__builtin_neon_vrshr_n_v:
case NEON::BI__builtin_neon_vrshrq_n_v:
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshr_n",
@@ -4603,6 +5152,23 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
Ops.push_back(getAlignmentValue32(PtrOp0));
return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "");
}
+ case NEON::BI__builtin_neon_vst1_x2_v:
+ case NEON::BI__builtin_neon_vst1q_x2_v:
+ case NEON::BI__builtin_neon_vst1_x3_v:
+ case NEON::BI__builtin_neon_vst1q_x3_v:
+ case NEON::BI__builtin_neon_vst1_x4_v:
+ case NEON::BI__builtin_neon_vst1q_x4_v: {
+ llvm::Type *PTy = llvm::PointerType::getUnqual(VTy->getVectorElementType());
+ // TODO: Currently in AArch32 mode the pointer operand comes first, whereas
+ // in AArch64 it comes last. We may want to stick to one or another.
+ if (Arch == llvm::Triple::aarch64 || Arch == llvm::Triple::aarch64_be) {
+ llvm::Type *Tys[2] = { VTy, PTy };
+ std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
+ return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "");
+ }
+ llvm::Type *Tys[2] = { PTy, VTy };
+ return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "");
+ }
case NEON::BI__builtin_neon_vsubhn_v: {
llvm::VectorType *SrcTy =
llvm::VectorType::getExtendedElementVectorType(VTy);
@@ -4685,6 +5251,14 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
}
return SV;
}
+ case NEON::BI__builtin_neon_vdot_v:
+ case NEON::BI__builtin_neon_vdotq_v: {
+ llvm::Type *InputTy =
+ llvm::VectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
+ llvm::Type *Tys[2] = { Ty, InputTy };
+ Int = Usgn ? LLVMIntrinsic : AltLLVMIntrinsic;
+ return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vdot");
+ }
}
assert(Int && "Expected valid intrinsic number");
@@ -4893,6 +5467,34 @@ static bool HasExtraNeonArgument(unsigned BuiltinID) {
return true;
}
+Value *CodeGenFunction::EmitISOVolatileLoad(const CallExpr *E) {
+ Value *Ptr = EmitScalarExpr(E->getArg(0));
+ QualType ElTy = E->getArg(0)->getType()->getPointeeType();
+ CharUnits LoadSize = getContext().getTypeSizeInChars(ElTy);
+ llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(),
+ LoadSize.getQuantity() * 8);
+ Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo());
+ llvm::LoadInst *Load =
+ Builder.CreateAlignedLoad(Ptr, LoadSize);
+ Load->setVolatile(true);
+ return Load;
+}
+
+Value *CodeGenFunction::EmitISOVolatileStore(const CallExpr *E) {
+ Value *Ptr = EmitScalarExpr(E->getArg(0));
+ Value *Value = EmitScalarExpr(E->getArg(1));
+ QualType ElTy = E->getArg(0)->getType()->getPointeeType();
+ CharUnits StoreSize = getContext().getTypeSizeInChars(ElTy);
+ llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(),
+ StoreSize.getQuantity() * 8);
+ Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo());
+ llvm::StoreInst *Store =
+ Builder.CreateAlignedStore(Value, Ptr,
+ StoreSize);
+ Store->setVolatile(true);
+ return Store;
+}
+
Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
const CallExpr *E,
llvm::Triple::ArchType Arch) {
@@ -5135,35 +5737,13 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
case ARM::BI__iso_volatile_load8:
case ARM::BI__iso_volatile_load16:
case ARM::BI__iso_volatile_load32:
- case ARM::BI__iso_volatile_load64: {
- Value *Ptr = EmitScalarExpr(E->getArg(0));
- QualType ElTy = E->getArg(0)->getType()->getPointeeType();
- CharUnits LoadSize = getContext().getTypeSizeInChars(ElTy);
- llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(),
- LoadSize.getQuantity() * 8);
- Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo());
- llvm::LoadInst *Load =
- Builder.CreateAlignedLoad(Ptr, LoadSize);
- Load->setVolatile(true);
- return Load;
- }
+ case ARM::BI__iso_volatile_load64:
+ return EmitISOVolatileLoad(E);
case ARM::BI__iso_volatile_store8:
case ARM::BI__iso_volatile_store16:
case ARM::BI__iso_volatile_store32:
- case ARM::BI__iso_volatile_store64: {
- Value *Ptr = EmitScalarExpr(E->getArg(0));
- Value *Value = EmitScalarExpr(E->getArg(1));
- QualType ElTy = E->getArg(0)->getType()->getPointeeType();
- CharUnits StoreSize = getContext().getTypeSizeInChars(ElTy);
- llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(),
- StoreSize.getQuantity() * 8);
- Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo());
- llvm::StoreInst *Store =
- Builder.CreateAlignedStore(Value, Ptr,
- StoreSize);
- Store->setVolatile(true);
- return Store;
- }
+ case ARM::BI__iso_volatile_store64:
+ return EmitISOVolatileStore(E);
}
if (BuiltinID == ARM::BI__builtin_arm_clrex) {
@@ -5308,8 +5888,11 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
case NEON::BI__builtin_neon_vld4_lane_v:
case NEON::BI__builtin_neon_vld4q_lane_v:
case NEON::BI__builtin_neon_vld2_dup_v:
+ case NEON::BI__builtin_neon_vld2q_dup_v:
case NEON::BI__builtin_neon_vld3_dup_v:
+ case NEON::BI__builtin_neon_vld3q_dup_v:
case NEON::BI__builtin_neon_vld4_dup_v:
+ case NEON::BI__builtin_neon_vld4q_dup_v:
// Get the alignment for the argument in addition to the value;
// we'll use it later.
PtrOp1 = EmitPointerWithAlignment(E->getArg(1));
@@ -5345,6 +5928,12 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
case NEON::BI__builtin_neon_vgetq_lane_f32:
return Builder.CreateExtractElement(Ops[0], Ops[1], "vget_lane");
+ case NEON::BI__builtin_neon_vrndns_f32: {
+ Value *Arg = EmitScalarExpr(E->getArg(0));
+ llvm::Type *Tys[] = {Arg->getType()};
+ Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vrintn, Tys);
+ return Builder.CreateCall(F, {Arg}, "vrndn"); }
+
case NEON::BI__builtin_neon_vset_lane_i8:
case NEON::BI__builtin_neon_vset_lane_i16:
case NEON::BI__builtin_neon_vset_lane_i32:
@@ -5434,7 +6023,8 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
bool usgn = Type.isUnsigned();
bool rightShift = false;
- llvm::VectorType *VTy = GetNeonType(this, Type, Arch);
+ llvm::VectorType *VTy = GetNeonType(this, Type,
+ getTarget().hasLegalHalfType());
llvm::Type *Ty = VTy;
if (!Ty)
return nullptr;
@@ -5479,68 +6069,6 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
Value *Ld = Builder.CreateLoad(PtrOp0);
return Builder.CreateInsertElement(Ops[1], Ld, Ops[2], "vld1_lane");
}
- case NEON::BI__builtin_neon_vld2_dup_v:
- case NEON::BI__builtin_neon_vld3_dup_v:
- case NEON::BI__builtin_neon_vld4_dup_v: {
- // Handle 64-bit elements as a special-case. There is no "dup" needed.
- if (VTy->getElementType()->getPrimitiveSizeInBits() == 64) {
- switch (BuiltinID) {
- case NEON::BI__builtin_neon_vld2_dup_v:
- Int = Intrinsic::arm_neon_vld2;
- break;
- case NEON::BI__builtin_neon_vld3_dup_v:
- Int = Intrinsic::arm_neon_vld3;
- break;
- case NEON::BI__builtin_neon_vld4_dup_v:
- Int = Intrinsic::arm_neon_vld4;
- break;
- default: llvm_unreachable("unknown vld_dup intrinsic?");
- }
- llvm::Type *Tys[] = {Ty, Int8PtrTy};
- Function *F = CGM.getIntrinsic(Int, Tys);
- llvm::Value *Align = getAlignmentValue32(PtrOp1);
- Ops[1] = Builder.CreateCall(F, {Ops[1], Align}, "vld_dup");
- Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
- Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
- return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
- }
- switch (BuiltinID) {
- case NEON::BI__builtin_neon_vld2_dup_v:
- Int = Intrinsic::arm_neon_vld2lane;
- break;
- case NEON::BI__builtin_neon_vld3_dup_v:
- Int = Intrinsic::arm_neon_vld3lane;
- break;
- case NEON::BI__builtin_neon_vld4_dup_v:
- Int = Intrinsic::arm_neon_vld4lane;
- break;
- default: llvm_unreachable("unknown vld_dup intrinsic?");
- }
- llvm::Type *Tys[] = {Ty, Int8PtrTy};
- Function *F = CGM.getIntrinsic(Int, Tys);
- llvm::StructType *STy = cast<llvm::StructType>(F->getReturnType());
-
- SmallVector<Value*, 6> Args;
- Args.push_back(Ops[1]);
- Args.append(STy->getNumElements(), UndefValue::get(Ty));
-
- llvm::Constant *CI = ConstantInt::get(Int32Ty, 0);
- Args.push_back(CI);
- Args.push_back(getAlignmentValue32(PtrOp1));
-
- Ops[1] = Builder.CreateCall(F, Args, "vld_dup");
- // splat lane 0 to all elts in each vector of the result.
- for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
- Value *Val = Builder.CreateExtractValue(Ops[1], i);
- Value *Elt = Builder.CreateBitCast(Val, Ty);
- Elt = EmitNeonSplat(Elt, CI);
- Elt = Builder.CreateBitCast(Elt, Val->getType());
- Ops[1] = Builder.CreateInsertValue(Ops[1], Elt, i);
- }
- Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
- Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
- return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
- }
case NEON::BI__builtin_neon_vqrshrn_n_v:
Int =
usgn ? Intrinsic::arm_neon_vqrshiftnu : Intrinsic::arm_neon_vqrshiftns;
@@ -5680,7 +6208,7 @@ static Value *EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID
// Determine the type of this overloaded NEON intrinsic.
NeonTypeFlags Type(Result.getZExtValue());
- llvm::VectorType *Ty = GetNeonType(&CGF, Type, Arch);
+ llvm::VectorType *Ty = GetNeonType(&CGF, Type);
if (!Ty)
return nullptr;
@@ -5799,18 +6327,23 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
HintID = 0;
break;
case AArch64::BI__builtin_arm_yield:
+ case AArch64::BI__yield:
HintID = 1;
break;
case AArch64::BI__builtin_arm_wfe:
+ case AArch64::BI__wfe:
HintID = 2;
break;
case AArch64::BI__builtin_arm_wfi:
+ case AArch64::BI__wfi:
HintID = 3;
break;
case AArch64::BI__builtin_arm_sev:
+ case AArch64::BI__sev:
HintID = 4;
break;
case AArch64::BI__builtin_arm_sevl:
+ case AArch64::BI__sevl:
HintID = 5;
break;
}
@@ -6077,6 +6610,9 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
// Handle non-overloaded intrinsics first.
switch (BuiltinID) {
default: break;
+ case NEON::BI__builtin_neon_vabsh_f16:
+ Ops.push_back(EmitScalarExpr(E->getArg(0)));
+ return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, HalfTy), Ops, "vabs");
case NEON::BI__builtin_neon_vldrq_p128: {
llvm::Type *Int128Ty = llvm::Type::getIntNTy(getLLVMContext(), 128);
llvm::Type *Int128PTy = llvm::PointerType::get(Int128Ty, 0);
@@ -6119,6 +6655,153 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
return Builder.CreateUIToFP(Ops[0], FTy);
return Builder.CreateSIToFP(Ops[0], FTy);
}
+ case NEON::BI__builtin_neon_vcvth_f16_u16:
+ case NEON::BI__builtin_neon_vcvth_f16_u32:
+ case NEON::BI__builtin_neon_vcvth_f16_u64:
+ usgn = true;
+ // FALL THROUGH
+ case NEON::BI__builtin_neon_vcvth_f16_s16:
+ case NEON::BI__builtin_neon_vcvth_f16_s32:
+ case NEON::BI__builtin_neon_vcvth_f16_s64: {
+ Ops.push_back(EmitScalarExpr(E->getArg(0)));
+ llvm::Type *FTy = HalfTy;
+ llvm::Type *InTy;
+ if (Ops[0]->getType()->getPrimitiveSizeInBits() == 64)
+ InTy = Int64Ty;
+ else if (Ops[0]->getType()->getPrimitiveSizeInBits() == 32)
+ InTy = Int32Ty;
+ else
+ InTy = Int16Ty;
+ Ops[0] = Builder.CreateBitCast(Ops[0], InTy);
+ if (usgn)
+ return Builder.CreateUIToFP(Ops[0], FTy);
+ return Builder.CreateSIToFP(Ops[0], FTy);
+ }
+ case NEON::BI__builtin_neon_vcvth_u16_f16:
+ usgn = true;
+ // FALL THROUGH
+ case NEON::BI__builtin_neon_vcvth_s16_f16: {
+ Ops.push_back(EmitScalarExpr(E->getArg(0)));
+ Ops[0] = Builder.CreateBitCast(Ops[0], HalfTy);
+ if (usgn)
+ return Builder.CreateFPToUI(Ops[0], Int16Ty);
+ return Builder.CreateFPToSI(Ops[0], Int16Ty);
+ }
+ case NEON::BI__builtin_neon_vcvth_u32_f16:
+ usgn = true;
+ // FALL THROUGH
+ case NEON::BI__builtin_neon_vcvth_s32_f16: {
+ Ops.push_back(EmitScalarExpr(E->getArg(0)));
+ Ops[0] = Builder.CreateBitCast(Ops[0], HalfTy);
+ if (usgn)
+ return Builder.CreateFPToUI(Ops[0], Int32Ty);
+ return Builder.CreateFPToSI(Ops[0], Int32Ty);
+ }
+ case NEON::BI__builtin_neon_vcvth_u64_f16:
+ usgn = true;
+ // FALL THROUGH
+ case NEON::BI__builtin_neon_vcvth_s64_f16: {
+ Ops.push_back(EmitScalarExpr(E->getArg(0)));
+ Ops[0] = Builder.CreateBitCast(Ops[0], HalfTy);
+ if (usgn)
+ return Builder.CreateFPToUI(Ops[0], Int64Ty);
+ return Builder.CreateFPToSI(Ops[0], Int64Ty);
+ }
+ case NEON::BI__builtin_neon_vcvtah_u16_f16:
+ case NEON::BI__builtin_neon_vcvtmh_u16_f16:
+ case NEON::BI__builtin_neon_vcvtnh_u16_f16:
+ case NEON::BI__builtin_neon_vcvtph_u16_f16:
+ case NEON::BI__builtin_neon_vcvtah_s16_f16:
+ case NEON::BI__builtin_neon_vcvtmh_s16_f16:
+ case NEON::BI__builtin_neon_vcvtnh_s16_f16:
+ case NEON::BI__builtin_neon_vcvtph_s16_f16: {
+ unsigned Int;
+ llvm::Type* InTy = Int32Ty;
+ llvm::Type* FTy = HalfTy;
+ llvm::Type *Tys[2] = {InTy, FTy};
+ Ops.push_back(EmitScalarExpr(E->getArg(0)));
+ switch (BuiltinID) {
+ default: llvm_unreachable("missing builtin ID in switch!");
+ case NEON::BI__builtin_neon_vcvtah_u16_f16:
+ Int = Intrinsic::aarch64_neon_fcvtau; break;
+ case NEON::BI__builtin_neon_vcvtmh_u16_f16:
+ Int = Intrinsic::aarch64_neon_fcvtmu; break;
+ case NEON::BI__builtin_neon_vcvtnh_u16_f16:
+ Int = Intrinsic::aarch64_neon_fcvtnu; break;
+ case NEON::BI__builtin_neon_vcvtph_u16_f16:
+ Int = Intrinsic::aarch64_neon_fcvtpu; break;
+ case NEON::BI__builtin_neon_vcvtah_s16_f16:
+ Int = Intrinsic::aarch64_neon_fcvtas; break;
+ case NEON::BI__builtin_neon_vcvtmh_s16_f16:
+ Int = Intrinsic::aarch64_neon_fcvtms; break;
+ case NEON::BI__builtin_neon_vcvtnh_s16_f16:
+ Int = Intrinsic::aarch64_neon_fcvtns; break;
+ case NEON::BI__builtin_neon_vcvtph_s16_f16:
+ Int = Intrinsic::aarch64_neon_fcvtps; break;
+ }
+ Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvt");
+ return Builder.CreateTrunc(Ops[0], Int16Ty);
+ }
+ case NEON::BI__builtin_neon_vcaleh_f16:
+ case NEON::BI__builtin_neon_vcalth_f16:
+ case NEON::BI__builtin_neon_vcageh_f16:
+ case NEON::BI__builtin_neon_vcagth_f16: {
+ unsigned Int;
+ llvm::Type* InTy = Int32Ty;
+ llvm::Type* FTy = HalfTy;
+ llvm::Type *Tys[2] = {InTy, FTy};
+ Ops.push_back(EmitScalarExpr(E->getArg(1)));
+ switch (BuiltinID) {
+ default: llvm_unreachable("missing builtin ID in switch!");
+ case NEON::BI__builtin_neon_vcageh_f16:
+ Int = Intrinsic::aarch64_neon_facge; break;
+ case NEON::BI__builtin_neon_vcagth_f16:
+ Int = Intrinsic::aarch64_neon_facgt; break;
+ case NEON::BI__builtin_neon_vcaleh_f16:
+ Int = Intrinsic::aarch64_neon_facge; std::swap(Ops[0], Ops[1]); break;
+ case NEON::BI__builtin_neon_vcalth_f16:
+ Int = Intrinsic::aarch64_neon_facgt; std::swap(Ops[0], Ops[1]); break;
+ }
+ Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "facg");
+ return Builder.CreateTrunc(Ops[0], Int16Ty);
+ }
+ case NEON::BI__builtin_neon_vcvth_n_s16_f16:
+ case NEON::BI__builtin_neon_vcvth_n_u16_f16: {
+ unsigned Int;
+ llvm::Type* InTy = Int32Ty;
+ llvm::Type* FTy = HalfTy;
+ llvm::Type *Tys[2] = {InTy, FTy};
+ Ops.push_back(EmitScalarExpr(E->getArg(1)));
+ switch (BuiltinID) {
+ default: llvm_unreachable("missing builtin ID in switch!");
+ case NEON::BI__builtin_neon_vcvth_n_s16_f16:
+ Int = Intrinsic::aarch64_neon_vcvtfp2fxs; break;
+ case NEON::BI__builtin_neon_vcvth_n_u16_f16:
+ Int = Intrinsic::aarch64_neon_vcvtfp2fxu; break;
+ }
+ Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvth_n");
+ return Builder.CreateTrunc(Ops[0], Int16Ty);
+ }
+ case NEON::BI__builtin_neon_vcvth_n_f16_s16:
+ case NEON::BI__builtin_neon_vcvth_n_f16_u16: {
+ unsigned Int;
+ llvm::Type* FTy = HalfTy;
+ llvm::Type* InTy = Int32Ty;
+ llvm::Type *Tys[2] = {FTy, InTy};
+ Ops.push_back(EmitScalarExpr(E->getArg(1)));
+ switch (BuiltinID) {
+ default: llvm_unreachable("missing builtin ID in switch!");
+ case NEON::BI__builtin_neon_vcvth_n_f16_s16:
+ Int = Intrinsic::aarch64_neon_vcvtfxs2fp;
+ Ops[0] = Builder.CreateSExt(Ops[0], InTy, "sext");
+ break;
+ case NEON::BI__builtin_neon_vcvth_n_f16_u16:
+ Int = Intrinsic::aarch64_neon_vcvtfxu2fp;
+ Ops[0] = Builder.CreateZExt(Ops[0], InTy);
+ break;
+ }
+ return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvth_n");
+ }
case NEON::BI__builtin_neon_vpaddd_s64: {
llvm::Type *Ty = llvm::VectorType::get(Int64Ty, 2);
Value *Vec = EmitScalarExpr(E->getArg(0));
@@ -6160,6 +6843,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
case NEON::BI__builtin_neon_vceqzd_s64:
case NEON::BI__builtin_neon_vceqzd_f64:
case NEON::BI__builtin_neon_vceqzs_f32:
+ case NEON::BI__builtin_neon_vceqzh_f16:
Ops.push_back(EmitScalarExpr(E->getArg(0)));
return EmitAArch64CompareBuiltinExpr(
Ops[0], ConvertType(E->getCallReturnType(getContext())),
@@ -6167,6 +6851,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
case NEON::BI__builtin_neon_vcgezd_s64:
case NEON::BI__builtin_neon_vcgezd_f64:
case NEON::BI__builtin_neon_vcgezs_f32:
+ case NEON::BI__builtin_neon_vcgezh_f16:
Ops.push_back(EmitScalarExpr(E->getArg(0)));
return EmitAArch64CompareBuiltinExpr(
Ops[0], ConvertType(E->getCallReturnType(getContext())),
@@ -6174,6 +6859,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
case NEON::BI__builtin_neon_vclezd_s64:
case NEON::BI__builtin_neon_vclezd_f64:
case NEON::BI__builtin_neon_vclezs_f32:
+ case NEON::BI__builtin_neon_vclezh_f16:
Ops.push_back(EmitScalarExpr(E->getArg(0)));
return EmitAArch64CompareBuiltinExpr(
Ops[0], ConvertType(E->getCallReturnType(getContext())),
@@ -6181,6 +6867,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
case NEON::BI__builtin_neon_vcgtzd_s64:
case NEON::BI__builtin_neon_vcgtzd_f64:
case NEON::BI__builtin_neon_vcgtzs_f32:
+ case NEON::BI__builtin_neon_vcgtzh_f16:
Ops.push_back(EmitScalarExpr(E->getArg(0)));
return EmitAArch64CompareBuiltinExpr(
Ops[0], ConvertType(E->getCallReturnType(getContext())),
@@ -6188,6 +6875,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
case NEON::BI__builtin_neon_vcltzd_s64:
case NEON::BI__builtin_neon_vcltzd_f64:
case NEON::BI__builtin_neon_vcltzs_f32:
+ case NEON::BI__builtin_neon_vcltzh_f16:
Ops.push_back(EmitScalarExpr(E->getArg(0)));
return EmitAArch64CompareBuiltinExpr(
Ops[0], ConvertType(E->getCallReturnType(getContext())),
@@ -6240,6 +6928,26 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
return Builder.CreateSExt(Ops[0], Int32Ty, "vcmpd");
}
+ case NEON::BI__builtin_neon_vceqh_f16:
+ case NEON::BI__builtin_neon_vcleh_f16:
+ case NEON::BI__builtin_neon_vclth_f16:
+ case NEON::BI__builtin_neon_vcgeh_f16:
+ case NEON::BI__builtin_neon_vcgth_f16: {
+ llvm::CmpInst::Predicate P;
+ switch (BuiltinID) {
+ default: llvm_unreachable("missing builtin ID in switch!");
+ case NEON::BI__builtin_neon_vceqh_f16: P = llvm::FCmpInst::FCMP_OEQ; break;
+ case NEON::BI__builtin_neon_vcleh_f16: P = llvm::FCmpInst::FCMP_OLE; break;
+ case NEON::BI__builtin_neon_vclth_f16: P = llvm::FCmpInst::FCMP_OLT; break;
+ case NEON::BI__builtin_neon_vcgeh_f16: P = llvm::FCmpInst::FCMP_OGE; break;
+ case NEON::BI__builtin_neon_vcgth_f16: P = llvm::FCmpInst::FCMP_OGT; break;
+ }
+ Ops.push_back(EmitScalarExpr(E->getArg(1)));
+ Ops[0] = Builder.CreateBitCast(Ops[0], HalfTy);
+ Ops[1] = Builder.CreateBitCast(Ops[1], HalfTy);
+ Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
+ return Builder.CreateSExt(Ops[0], Int16Ty, "vcmpd");
+ }
case NEON::BI__builtin_neon_vceqd_s64:
case NEON::BI__builtin_neon_vceqd_u64:
case NEON::BI__builtin_neon_vcgtd_s64:
@@ -6377,6 +7085,31 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
llvm::VectorType::get(DoubleTy, 2));
return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
"vgetq_lane");
+ case NEON::BI__builtin_neon_vaddh_f16:
+ Ops.push_back(EmitScalarExpr(E->getArg(1)));
+ return Builder.CreateFAdd(Ops[0], Ops[1], "vaddh");
+ case NEON::BI__builtin_neon_vsubh_f16:
+ Ops.push_back(EmitScalarExpr(E->getArg(1)));
+ return Builder.CreateFSub(Ops[0], Ops[1], "vsubh");
+ case NEON::BI__builtin_neon_vmulh_f16:
+ Ops.push_back(EmitScalarExpr(E->getArg(1)));
+ return Builder.CreateFMul(Ops[0], Ops[1], "vmulh");
+ case NEON::BI__builtin_neon_vdivh_f16:
+ Ops.push_back(EmitScalarExpr(E->getArg(1)));
+ return Builder.CreateFDiv(Ops[0], Ops[1], "vdivh");
+ case NEON::BI__builtin_neon_vfmah_f16: {
+ Value *F = CGM.getIntrinsic(Intrinsic::fma, HalfTy);
+ // NEON intrinsic puts accumulator first, unlike the LLVM fma.
+ return Builder.CreateCall(F,
+ {EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2)), Ops[0]});
+ }
+ case NEON::BI__builtin_neon_vfmsh_f16: {
+ Value *F = CGM.getIntrinsic(Intrinsic::fma, HalfTy);
+ Value *Zero = llvm::ConstantFP::getZeroValueForNegation(HalfTy);
+ Value* Sub = Builder.CreateFSub(Zero, EmitScalarExpr(E->getArg(1)), "vsubh");
+ // NEON intrinsic puts accumulator first, unlike the LLVM fma.
+ return Builder.CreateCall(F, {Sub, EmitScalarExpr(E->getArg(2)), Ops[0]});
+ }
case NEON::BI__builtin_neon_vaddd_s64:
case NEON::BI__builtin_neon_vaddd_u64:
return Builder.CreateAdd(Ops[0], EmitScalarExpr(E->getArg(1)), "vaddd");
@@ -6534,7 +7267,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
}
}
- llvm::VectorType *VTy = GetNeonType(this, Type, Arch);
+ llvm::VectorType *VTy = GetNeonType(this, Type);
llvm::Type *Ty = VTy;
if (!Ty)
return nullptr;
@@ -6599,7 +7332,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy);
llvm::Type *VTy = GetNeonType(this,
- NeonTypeFlags(NeonTypeFlags::Float64, false, true), Arch);
+ NeonTypeFlags(NeonTypeFlags::Float64, false, true));
Ops[2] = Builder.CreateBitCast(Ops[2], VTy);
Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
Value *F = CGM.getIntrinsic(Intrinsic::fma, DoubleTy);
@@ -6651,12 +7384,22 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
Int = usgn ? Intrinsic::aarch64_neon_umax : Intrinsic::aarch64_neon_smax;
if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmax;
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmax");
+ case NEON::BI__builtin_neon_vmaxh_f16: {
+ Ops.push_back(EmitScalarExpr(E->getArg(1)));
+ Int = Intrinsic::aarch64_neon_fmax;
+ return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmax");
+ }
case NEON::BI__builtin_neon_vmin_v:
case NEON::BI__builtin_neon_vminq_v:
// FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
Int = usgn ? Intrinsic::aarch64_neon_umin : Intrinsic::aarch64_neon_smin;
if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmin;
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmin");
+ case NEON::BI__builtin_neon_vminh_f16: {
+ Ops.push_back(EmitScalarExpr(E->getArg(1)));
+ Int = Intrinsic::aarch64_neon_fmin;
+ return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmin");
+ }
case NEON::BI__builtin_neon_vabd_v:
case NEON::BI__builtin_neon_vabdq_v:
// FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
@@ -6695,20 +7438,31 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
case NEON::BI__builtin_neon_vminnmq_v:
Int = Intrinsic::aarch64_neon_fminnm;
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vminnm");
+ case NEON::BI__builtin_neon_vminnmh_f16:
+ Ops.push_back(EmitScalarExpr(E->getArg(1)));
+ Int = Intrinsic::aarch64_neon_fminnm;
+ return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vminnm");
case NEON::BI__builtin_neon_vmaxnm_v:
case NEON::BI__builtin_neon_vmaxnmq_v:
Int = Intrinsic::aarch64_neon_fmaxnm;
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmaxnm");
+ case NEON::BI__builtin_neon_vmaxnmh_f16:
+ Ops.push_back(EmitScalarExpr(E->getArg(1)));
+ Int = Intrinsic::aarch64_neon_fmaxnm;
+ return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmaxnm");
case NEON::BI__builtin_neon_vrecpss_f32: {
Ops.push_back(EmitScalarExpr(E->getArg(1)));
return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, FloatTy),
Ops, "vrecps");
}
- case NEON::BI__builtin_neon_vrecpsd_f64: {
+ case NEON::BI__builtin_neon_vrecpsd_f64:
Ops.push_back(EmitScalarExpr(E->getArg(1)));
return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, DoubleTy),
Ops, "vrecps");
- }
+ case NEON::BI__builtin_neon_vrecpsh_f16:
+ Ops.push_back(EmitScalarExpr(E->getArg(1)));
+ return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, HalfTy),
+ Ops, "vrecps");
case NEON::BI__builtin_neon_vqshrun_n_v:
Int = Intrinsic::aarch64_neon_sqshrun;
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrun_n");
@@ -6724,72 +7478,87 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
case NEON::BI__builtin_neon_vqrshrn_n_v:
Int = usgn ? Intrinsic::aarch64_neon_uqrshrn : Intrinsic::aarch64_neon_sqrshrn;
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n");
+ case NEON::BI__builtin_neon_vrndah_f16: {
+ Ops.push_back(EmitScalarExpr(E->getArg(0)));
+ Int = Intrinsic::round;
+ return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrnda");
+ }
case NEON::BI__builtin_neon_vrnda_v:
case NEON::BI__builtin_neon_vrndaq_v: {
Int = Intrinsic::round;
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnda");
}
- case NEON::BI__builtin_neon_vrndi_v:
- case NEON::BI__builtin_neon_vrndiq_v: {
+ case NEON::BI__builtin_neon_vrndih_f16: {
+ Ops.push_back(EmitScalarExpr(E->getArg(0)));
Int = Intrinsic::nearbyint;
- return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndi");
+ return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndi");
+ }
+ case NEON::BI__builtin_neon_vrndmh_f16: {
+ Ops.push_back(EmitScalarExpr(E->getArg(0)));
+ Int = Intrinsic::floor;
+ return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndm");
}
case NEON::BI__builtin_neon_vrndm_v:
case NEON::BI__builtin_neon_vrndmq_v: {
Int = Intrinsic::floor;
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndm");
}
+ case NEON::BI__builtin_neon_vrndnh_f16: {
+ Ops.push_back(EmitScalarExpr(E->getArg(0)));
+ Int = Intrinsic::aarch64_neon_frintn;
+ return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndn");
+ }
case NEON::BI__builtin_neon_vrndn_v:
case NEON::BI__builtin_neon_vrndnq_v: {
Int = Intrinsic::aarch64_neon_frintn;
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndn");
}
+ case NEON::BI__builtin_neon_vrndns_f32: {
+ Ops.push_back(EmitScalarExpr(E->getArg(0)));
+ Int = Intrinsic::aarch64_neon_frintn;
+ return EmitNeonCall(CGM.getIntrinsic(Int, FloatTy), Ops, "vrndn");
+ }
+ case NEON::BI__builtin_neon_vrndph_f16: {
+ Ops.push_back(EmitScalarExpr(E->getArg(0)));
+ Int = Intrinsic::ceil;
+ return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndp");
+ }
case NEON::BI__builtin_neon_vrndp_v:
case NEON::BI__builtin_neon_vrndpq_v: {
Int = Intrinsic::ceil;
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndp");
}
+ case NEON::BI__builtin_neon_vrndxh_f16: {
+ Ops.push_back(EmitScalarExpr(E->getArg(0)));
+ Int = Intrinsic::rint;
+ return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndx");
+ }
case NEON::BI__builtin_neon_vrndx_v:
case NEON::BI__builtin_neon_vrndxq_v: {
Int = Intrinsic::rint;
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndx");
}
+ case NEON::BI__builtin_neon_vrndh_f16: {
+ Ops.push_back(EmitScalarExpr(E->getArg(0)));
+ Int = Intrinsic::trunc;
+ return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndz");
+ }
case NEON::BI__builtin_neon_vrnd_v:
case NEON::BI__builtin_neon_vrndq_v: {
Int = Intrinsic::trunc;
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndz");
}
- case NEON::BI__builtin_neon_vceqz_v:
- case NEON::BI__builtin_neon_vceqzq_v:
- return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OEQ,
- ICmpInst::ICMP_EQ, "vceqz");
- case NEON::BI__builtin_neon_vcgez_v:
- case NEON::BI__builtin_neon_vcgezq_v:
- return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGE,
- ICmpInst::ICMP_SGE, "vcgez");
- case NEON::BI__builtin_neon_vclez_v:
- case NEON::BI__builtin_neon_vclezq_v:
- return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLE,
- ICmpInst::ICMP_SLE, "vclez");
- case NEON::BI__builtin_neon_vcgtz_v:
- case NEON::BI__builtin_neon_vcgtzq_v:
- return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGT,
- ICmpInst::ICMP_SGT, "vcgtz");
- case NEON::BI__builtin_neon_vcltz_v:
- case NEON::BI__builtin_neon_vcltzq_v:
- return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLT,
- ICmpInst::ICMP_SLT, "vcltz");
case NEON::BI__builtin_neon_vcvt_f64_v:
case NEON::BI__builtin_neon_vcvtq_f64_v:
Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
- Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, quad), Arch);
+ Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, quad));
return usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
: Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
case NEON::BI__builtin_neon_vcvt_f64_f32: {
assert(Type.getEltType() == NeonTypeFlags::Float64 && quad &&
"unexpected vcvt_f64_f32 builtin");
NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float32, false, false);
- Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag, Arch));
+ Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag));
return Builder.CreateFPExt(Ops[0], Ty, "vcvt");
}
@@ -6797,7 +7566,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
assert(Type.getEltType() == NeonTypeFlags::Float32 &&
"unexpected vcvt_f32_f64 builtin");
NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float64, false, true);
- Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag, Arch));
+ Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag));
return Builder.CreateFPTrunc(Ops[0], Ty, "vcvt");
}
@@ -6805,20 +7574,21 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
case NEON::BI__builtin_neon_vcvt_u32_v:
case NEON::BI__builtin_neon_vcvt_s64_v:
case NEON::BI__builtin_neon_vcvt_u64_v:
- case NEON::BI__builtin_neon_vcvt_s16_v:
- case NEON::BI__builtin_neon_vcvt_u16_v:
+ case NEON::BI__builtin_neon_vcvt_s16_v:
+ case NEON::BI__builtin_neon_vcvt_u16_v:
case NEON::BI__builtin_neon_vcvtq_s32_v:
case NEON::BI__builtin_neon_vcvtq_u32_v:
case NEON::BI__builtin_neon_vcvtq_s64_v:
case NEON::BI__builtin_neon_vcvtq_u64_v:
- case NEON::BI__builtin_neon_vcvtq_s16_v:
- case NEON::BI__builtin_neon_vcvtq_u16_v: {
+ case NEON::BI__builtin_neon_vcvtq_s16_v:
+ case NEON::BI__builtin_neon_vcvtq_u16_v: {
Ops[0] = Builder.CreateBitCast(Ops[0], GetFloatNeonType(this, Type));
if (usgn)
return Builder.CreateFPToUI(Ops[0], Ty);
return Builder.CreateFPToSI(Ops[0], Ty);
}
case NEON::BI__builtin_neon_vcvta_s16_v:
+ case NEON::BI__builtin_neon_vcvta_u16_v:
case NEON::BI__builtin_neon_vcvta_s32_v:
case NEON::BI__builtin_neon_vcvtaq_s16_v:
case NEON::BI__builtin_neon_vcvtaq_s32_v:
@@ -6886,6 +7656,16 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
Int = Intrinsic::aarch64_neon_fmulx;
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmulx");
}
+ case NEON::BI__builtin_neon_vmulxh_lane_f16:
+ case NEON::BI__builtin_neon_vmulxh_laneq_f16: {
+ // vmulx_lane should be mapped to Neon scalar mulx after
+ // extracting the scalar element
+ Ops.push_back(EmitScalarExpr(E->getArg(2)));
+ Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract");
+ Ops.pop_back();
+ Int = Intrinsic::aarch64_neon_fmulx;
+ return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmulx");
+ }
case NEON::BI__builtin_neon_vmul_lane_v:
case NEON::BI__builtin_neon_vmul_laneq_v: {
// v1f64 vmul_lane should be mapped to Neon scalar mul lane
@@ -6894,7 +7674,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
Quad = true;
Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
llvm::Type *VTy = GetNeonType(this,
- NeonTypeFlags(NeonTypeFlags::Float64, false, Quad), Arch);
+ NeonTypeFlags(NeonTypeFlags::Float64, false, Quad));
Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract");
Value *Result = Builder.CreateFMul(Ops[0], Ops[1]);
@@ -6902,6 +7682,8 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
}
case NEON::BI__builtin_neon_vnegd_s64:
return Builder.CreateNeg(EmitScalarExpr(E->getArg(0)), "vnegd");
+ case NEON::BI__builtin_neon_vnegh_f16:
+ return Builder.CreateFNeg(EmitScalarExpr(E->getArg(0)), "vnegh");
case NEON::BI__builtin_neon_vpmaxnm_v:
case NEON::BI__builtin_neon_vpmaxnmq_v: {
Int = Intrinsic::aarch64_neon_fmaxnmp;
@@ -6912,6 +7694,11 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
Int = Intrinsic::aarch64_neon_fminnmp;
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpminnm");
}
+ case NEON::BI__builtin_neon_vsqrth_f16: {
+ Ops.push_back(EmitScalarExpr(E->getArg(0)));
+ Int = Intrinsic::sqrt;
+ return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vsqrt");
+ }
case NEON::BI__builtin_neon_vsqrt_v:
case NEON::BI__builtin_neon_vsqrtq_v: {
Int = Intrinsic::sqrt;
@@ -7289,64 +8076,6 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
Ops[0] = Builder.CreateBitCast(Ops[0], VTy);
return Builder.CreateAdd(Ops[0], tmp);
}
- // FIXME: Sharing loads & stores with 32-bit is complicated by the absence
- // of an Align parameter here.
- case NEON::BI__builtin_neon_vld1_x2_v:
- case NEON::BI__builtin_neon_vld1q_x2_v:
- case NEON::BI__builtin_neon_vld1_x3_v:
- case NEON::BI__builtin_neon_vld1q_x3_v:
- case NEON::BI__builtin_neon_vld1_x4_v:
- case NEON::BI__builtin_neon_vld1q_x4_v: {
- llvm::Type *PTy = llvm::PointerType::getUnqual(VTy->getVectorElementType());
- Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
- llvm::Type *Tys[2] = { VTy, PTy };
- unsigned Int;
- switch (BuiltinID) {
- case NEON::BI__builtin_neon_vld1_x2_v:
- case NEON::BI__builtin_neon_vld1q_x2_v:
- Int = Intrinsic::aarch64_neon_ld1x2;
- break;
- case NEON::BI__builtin_neon_vld1_x3_v:
- case NEON::BI__builtin_neon_vld1q_x3_v:
- Int = Intrinsic::aarch64_neon_ld1x3;
- break;
- case NEON::BI__builtin_neon_vld1_x4_v:
- case NEON::BI__builtin_neon_vld1q_x4_v:
- Int = Intrinsic::aarch64_neon_ld1x4;
- break;
- }
- Function *F = CGM.getIntrinsic(Int, Tys);
- Ops[1] = Builder.CreateCall(F, Ops[1], "vld1xN");
- Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
- Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
- return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
- }
- case NEON::BI__builtin_neon_vst1_x2_v:
- case NEON::BI__builtin_neon_vst1q_x2_v:
- case NEON::BI__builtin_neon_vst1_x3_v:
- case NEON::BI__builtin_neon_vst1q_x3_v:
- case NEON::BI__builtin_neon_vst1_x4_v:
- case NEON::BI__builtin_neon_vst1q_x4_v: {
- llvm::Type *PTy = llvm::PointerType::getUnqual(VTy->getVectorElementType());
- llvm::Type *Tys[2] = { VTy, PTy };
- unsigned Int;
- switch (BuiltinID) {
- case NEON::BI__builtin_neon_vst1_x2_v:
- case NEON::BI__builtin_neon_vst1q_x2_v:
- Int = Intrinsic::aarch64_neon_st1x2;
- break;
- case NEON::BI__builtin_neon_vst1_x3_v:
- case NEON::BI__builtin_neon_vst1q_x3_v:
- Int = Intrinsic::aarch64_neon_st1x3;
- break;
- case NEON::BI__builtin_neon_vst1_x4_v:
- case NEON::BI__builtin_neon_vst1q_x4_v:
- Int = Intrinsic::aarch64_neon_st1x4;
- break;
- }
- std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
- return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "");
- }
case NEON::BI__builtin_neon_vld1_v:
case NEON::BI__builtin_neon_vld1q_v: {
Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(VTy));
@@ -7653,6 +8382,38 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
Int = Intrinsic::aarch64_neon_suqadd;
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vuqadd");
}
+ case AArch64::BI__iso_volatile_load8:
+ case AArch64::BI__iso_volatile_load16:
+ case AArch64::BI__iso_volatile_load32:
+ case AArch64::BI__iso_volatile_load64:
+ return EmitISOVolatileLoad(E);
+ case AArch64::BI__iso_volatile_store8:
+ case AArch64::BI__iso_volatile_store16:
+ case AArch64::BI__iso_volatile_store32:
+ case AArch64::BI__iso_volatile_store64:
+ return EmitISOVolatileStore(E);
+ case AArch64::BI_BitScanForward:
+ case AArch64::BI_BitScanForward64:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanForward, E);
+ case AArch64::BI_BitScanReverse:
+ case AArch64::BI_BitScanReverse64:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanReverse, E);
+ case AArch64::BI_InterlockedAnd64:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd, E);
+ case AArch64::BI_InterlockedExchange64:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange, E);
+ case AArch64::BI_InterlockedExchangeAdd64:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd, E);
+ case AArch64::BI_InterlockedExchangeSub64:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeSub, E);
+ case AArch64::BI_InterlockedOr64:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr, E);
+ case AArch64::BI_InterlockedXor64:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E);
+ case AArch64::BI_InterlockedDecrement64:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E);
+ case AArch64::BI_InterlockedIncrement64:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E);
}
}
@@ -7704,42 +8465,66 @@ static Value *getMaskVecValue(CodeGenFunction &CGF, Value *Mask,
}
static Value *EmitX86MaskedStore(CodeGenFunction &CGF,
- SmallVectorImpl<Value *> &Ops,
+ ArrayRef<Value *> Ops,
unsigned Align) {
// Cast the pointer to right type.
- Ops[0] = CGF.Builder.CreateBitCast(Ops[0],
+ Value *Ptr = CGF.Builder.CreateBitCast(Ops[0],
llvm::PointerType::getUnqual(Ops[1]->getType()));
- // If the mask is all ones just emit a regular store.
- if (const auto *C = dyn_cast<Constant>(Ops[2]))
- if (C->isAllOnesValue())
- return CGF.Builder.CreateAlignedStore(Ops[1], Ops[0], Align);
-
Value *MaskVec = getMaskVecValue(CGF, Ops[2],
Ops[1]->getType()->getVectorNumElements());
- return CGF.Builder.CreateMaskedStore(Ops[1], Ops[0], Align, MaskVec);
+ return CGF.Builder.CreateMaskedStore(Ops[1], Ptr, Align, MaskVec);
}
static Value *EmitX86MaskedLoad(CodeGenFunction &CGF,
- SmallVectorImpl<Value *> &Ops, unsigned Align) {
+ ArrayRef<Value *> Ops, unsigned Align) {
// Cast the pointer to right type.
- Ops[0] = CGF.Builder.CreateBitCast(Ops[0],
+ Value *Ptr = CGF.Builder.CreateBitCast(Ops[0],
llvm::PointerType::getUnqual(Ops[1]->getType()));
- // If the mask is all ones just emit a regular store.
- if (const auto *C = dyn_cast<Constant>(Ops[2]))
- if (C->isAllOnesValue())
- return CGF.Builder.CreateAlignedLoad(Ops[0], Align);
-
Value *MaskVec = getMaskVecValue(CGF, Ops[2],
Ops[1]->getType()->getVectorNumElements());
- return CGF.Builder.CreateMaskedLoad(Ops[0], Align, MaskVec, Ops[1]);
+ return CGF.Builder.CreateMaskedLoad(Ptr, Align, MaskVec, Ops[1]);
+}
+
+static Value *EmitX86ExpandLoad(CodeGenFunction &CGF,
+ ArrayRef<Value *> Ops) {
+ llvm::Type *ResultTy = Ops[1]->getType();
+ llvm::Type *PtrTy = ResultTy->getVectorElementType();
+
+ // Cast the pointer to element type.
+ Value *Ptr = CGF.Builder.CreateBitCast(Ops[0],
+ llvm::PointerType::getUnqual(PtrTy));
+
+ Value *MaskVec = getMaskVecValue(CGF, Ops[2],
+ ResultTy->getVectorNumElements());
+
+ llvm::Function *F = CGF.CGM.getIntrinsic(Intrinsic::masked_expandload,
+ ResultTy);
+ return CGF.Builder.CreateCall(F, { Ptr, MaskVec, Ops[1] });
+}
+
+static Value *EmitX86CompressStore(CodeGenFunction &CGF,
+ ArrayRef<Value *> Ops) {
+ llvm::Type *ResultTy = Ops[1]->getType();
+ llvm::Type *PtrTy = ResultTy->getVectorElementType();
+
+ // Cast the pointer to element type.
+ Value *Ptr = CGF.Builder.CreateBitCast(Ops[0],
+ llvm::PointerType::getUnqual(PtrTy));
+
+ Value *MaskVec = getMaskVecValue(CGF, Ops[2],
+ ResultTy->getVectorNumElements());
+
+ llvm::Function *F = CGF.CGM.getIntrinsic(Intrinsic::masked_compressstore,
+ ResultTy);
+ return CGF.Builder.CreateCall(F, { Ops[1], Ptr, MaskVec });
}
static Value *EmitX86MaskLogic(CodeGenFunction &CGF, Instruction::BinaryOps Opc,
- unsigned NumElts, SmallVectorImpl<Value *> &Ops,
+ unsigned NumElts, ArrayRef<Value *> Ops,
bool InvertLHS = false) {
Value *LHS = getMaskVecValue(CGF, Ops[0], NumElts);
Value *RHS = getMaskVecValue(CGF, Ops[1], NumElts);
@@ -7751,26 +8536,6 @@ static Value *EmitX86MaskLogic(CodeGenFunction &CGF, Instruction::BinaryOps Opc,
CGF.Builder.getIntNTy(std::max(NumElts, 8U)));
}
-static Value *EmitX86SubVectorBroadcast(CodeGenFunction &CGF,
- SmallVectorImpl<Value *> &Ops,
- llvm::Type *DstTy,
- unsigned SrcSizeInBits,
- unsigned Align) {
- // Load the subvector.
- Ops[0] = CGF.Builder.CreateAlignedLoad(Ops[0], Align);
-
- // Create broadcast mask.
- unsigned NumDstElts = DstTy->getVectorNumElements();
- unsigned NumSrcElts = SrcSizeInBits / DstTy->getScalarSizeInBits();
-
- SmallVector<uint32_t, 8> Mask;
- for (unsigned i = 0; i != NumDstElts; i += NumSrcElts)
- for (unsigned j = 0; j != NumSrcElts; ++j)
- Mask.push_back(j);
-
- return CGF.Builder.CreateShuffleVector(Ops[0], Ops[0], Mask, "subvecbcst");
-}
-
static Value *EmitX86Select(CodeGenFunction &CGF,
Value *Mask, Value *Op0, Value *Op1) {
@@ -7784,8 +8549,48 @@ static Value *EmitX86Select(CodeGenFunction &CGF,
return CGF.Builder.CreateSelect(Mask, Op0, Op1);
}
+static Value *EmitX86ScalarSelect(CodeGenFunction &CGF,
+ Value *Mask, Value *Op0, Value *Op1) {
+ // If the mask is all ones just return first argument.
+ if (const auto *C = dyn_cast<Constant>(Mask))
+ if (C->isAllOnesValue())
+ return Op0;
+
+ llvm::VectorType *MaskTy =
+ llvm::VectorType::get(CGF.Builder.getInt1Ty(),
+ Mask->getType()->getIntegerBitWidth());
+ Mask = CGF.Builder.CreateBitCast(Mask, MaskTy);
+ Mask = CGF.Builder.CreateExtractElement(Mask, (uint64_t)0);
+ return CGF.Builder.CreateSelect(Mask, Op0, Op1);
+}
+
+static Value *EmitX86MaskedCompareResult(CodeGenFunction &CGF, Value *Cmp,
+ unsigned NumElts, Value *MaskIn) {
+ if (MaskIn) {
+ const auto *C = dyn_cast<Constant>(MaskIn);
+ if (!C || !C->isAllOnesValue())
+ Cmp = CGF.Builder.CreateAnd(Cmp, getMaskVecValue(CGF, MaskIn, NumElts));
+ }
+
+ if (NumElts < 8) {
+ uint32_t Indices[8];
+ for (unsigned i = 0; i != NumElts; ++i)
+ Indices[i] = i;
+ for (unsigned i = NumElts; i != 8; ++i)
+ Indices[i] = i % NumElts + NumElts;
+ Cmp = CGF.Builder.CreateShuffleVector(
+ Cmp, llvm::Constant::getNullValue(Cmp->getType()), Indices);
+ }
+
+ return CGF.Builder.CreateBitCast(Cmp,
+ IntegerType::get(CGF.getLLVMContext(),
+ std::max(NumElts, 8U)));
+}
+
static Value *EmitX86MaskedCompare(CodeGenFunction &CGF, unsigned CC,
- bool Signed, SmallVectorImpl<Value *> &Ops) {
+ bool Signed, ArrayRef<Value *> Ops) {
+ assert((Ops.size() == 2 || Ops.size() == 4) &&
+ "Unexpected number of arguments");
unsigned NumElts = Ops[0]->getType()->getVectorNumElements();
Value *Cmp;
@@ -7809,22 +8614,16 @@ static Value *EmitX86MaskedCompare(CodeGenFunction &CGF, unsigned CC,
Cmp = CGF.Builder.CreateICmp(Pred, Ops[0], Ops[1]);
}
- const auto *C = dyn_cast<Constant>(Ops.back());
- if (!C || !C->isAllOnesValue())
- Cmp = CGF.Builder.CreateAnd(Cmp, getMaskVecValue(CGF, Ops.back(), NumElts));
+ Value *MaskIn = nullptr;
+ if (Ops.size() == 4)
+ MaskIn = Ops[3];
- if (NumElts < 8) {
- uint32_t Indices[8];
- for (unsigned i = 0; i != NumElts; ++i)
- Indices[i] = i;
- for (unsigned i = NumElts; i != 8; ++i)
- Indices[i] = i % NumElts + NumElts;
- Cmp = CGF.Builder.CreateShuffleVector(
- Cmp, llvm::Constant::getNullValue(Cmp->getType()), Indices);
- }
- return CGF.Builder.CreateBitCast(Cmp,
- IntegerType::get(CGF.getLLVMContext(),
- std::max(NumElts, 8U)));
+ return EmitX86MaskedCompareResult(CGF, Cmp, NumElts, MaskIn);
+}
+
+static Value *EmitX86ConvertToMask(CodeGenFunction &CGF, Value *In) {
+ Value *Zero = Constant::getNullValue(In->getType());
+ return EmitX86MaskedCompare(CGF, 1, true, { In, Zero });
}
static Value *EmitX86Abs(CodeGenFunction &CGF, ArrayRef<Value *> Ops) {
@@ -7834,9 +8633,7 @@ static Value *EmitX86Abs(CodeGenFunction &CGF, ArrayRef<Value *> Ops) {
Value *Sub = CGF.Builder.CreateSub(Zero, Ops[0]);
Value *Cmp = CGF.Builder.CreateICmp(ICmpInst::ICMP_SGT, Ops[0], Zero);
Value *Res = CGF.Builder.CreateSelect(Cmp, Ops[0], Sub);
- if (Ops.size() == 1)
- return Res;
- return EmitX86Select(CGF, Ops[2], Res, Ops[1]);
+ return Res;
}
static Value *EmitX86MinMax(CodeGenFunction &CGF, ICmpInst::Predicate Pred,
@@ -7844,11 +8641,211 @@ static Value *EmitX86MinMax(CodeGenFunction &CGF, ICmpInst::Predicate Pred,
Value *Cmp = CGF.Builder.CreateICmp(Pred, Ops[0], Ops[1]);
Value *Res = CGF.Builder.CreateSelect(Cmp, Ops[0], Ops[1]);
- if (Ops.size() == 2)
- return Res;
+ assert(Ops.size() == 2);
+ return Res;
+}
+
+// Lowers X86 FMA intrinsics to IR.
+static Value *EmitX86FMAExpr(CodeGenFunction &CGF, ArrayRef<Value *> Ops,
+ unsigned BuiltinID, bool IsAddSub) {
- assert(Ops.size() == 4);
- return EmitX86Select(CGF, Ops[3], Res, Ops[2]);
+ bool Subtract = false;
+ Intrinsic::ID IID = Intrinsic::not_intrinsic;
+ switch (BuiltinID) {
+ default: break;
+ case clang::X86::BI__builtin_ia32_vfmsubps512_mask3:
+ Subtract = true;
+ LLVM_FALLTHROUGH;
+ case clang::X86::BI__builtin_ia32_vfmaddps512_mask:
+ case clang::X86::BI__builtin_ia32_vfmaddps512_maskz:
+ case clang::X86::BI__builtin_ia32_vfmaddps512_mask3:
+ IID = llvm::Intrinsic::x86_avx512_vfmadd_ps_512; break;
+ case clang::X86::BI__builtin_ia32_vfmsubpd512_mask3:
+ Subtract = true;
+ LLVM_FALLTHROUGH;
+ case clang::X86::BI__builtin_ia32_vfmaddpd512_mask:
+ case clang::X86::BI__builtin_ia32_vfmaddpd512_maskz:
+ case clang::X86::BI__builtin_ia32_vfmaddpd512_mask3:
+ IID = llvm::Intrinsic::x86_avx512_vfmadd_pd_512; break;
+ case clang::X86::BI__builtin_ia32_vfmsubaddps512_mask3:
+ Subtract = true;
+ LLVM_FALLTHROUGH;
+ case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask:
+ case clang::X86::BI__builtin_ia32_vfmaddsubps512_maskz:
+ case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask3:
+ IID = llvm::Intrinsic::x86_avx512_vfmaddsub_ps_512;
+ break;
+ case clang::X86::BI__builtin_ia32_vfmsubaddpd512_mask3:
+ Subtract = true;
+ LLVM_FALLTHROUGH;
+ case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask:
+ case clang::X86::BI__builtin_ia32_vfmaddsubpd512_maskz:
+ case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask3:
+ IID = llvm::Intrinsic::x86_avx512_vfmaddsub_pd_512;
+ break;
+ }
+
+ Value *A = Ops[0];
+ Value *B = Ops[1];
+ Value *C = Ops[2];
+
+ if (Subtract)
+ C = CGF.Builder.CreateFNeg(C);
+
+ Value *Res;
+
+ // Only handle in case of _MM_FROUND_CUR_DIRECTION/4 (no rounding).
+ if (IID != Intrinsic::not_intrinsic &&
+ cast<llvm::ConstantInt>(Ops.back())->getZExtValue() != (uint64_t)4) {
+ Function *Intr = CGF.CGM.getIntrinsic(IID);
+ Res = CGF.Builder.CreateCall(Intr, {A, B, C, Ops.back() });
+ } else {
+ llvm::Type *Ty = A->getType();
+ Function *FMA = CGF.CGM.getIntrinsic(Intrinsic::fma, Ty);
+ Res = CGF.Builder.CreateCall(FMA, {A, B, C} );
+
+ if (IsAddSub) {
+ // Negate even elts in C using a mask.
+ unsigned NumElts = Ty->getVectorNumElements();
+ SmallVector<uint32_t, 16> Indices(NumElts);
+ for (unsigned i = 0; i != NumElts; ++i)
+ Indices[i] = i + (i % 2) * NumElts;
+
+ Value *NegC = CGF.Builder.CreateFNeg(C);
+ Value *FMSub = CGF.Builder.CreateCall(FMA, {A, B, NegC} );
+ Res = CGF.Builder.CreateShuffleVector(FMSub, Res, Indices);
+ }
+ }
+
+ // Handle any required masking.
+ Value *MaskFalseVal = nullptr;
+ switch (BuiltinID) {
+ case clang::X86::BI__builtin_ia32_vfmaddps512_mask:
+ case clang::X86::BI__builtin_ia32_vfmaddpd512_mask:
+ case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask:
+ case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask:
+ MaskFalseVal = Ops[0];
+ break;
+ case clang::X86::BI__builtin_ia32_vfmaddps512_maskz:
+ case clang::X86::BI__builtin_ia32_vfmaddpd512_maskz:
+ case clang::X86::BI__builtin_ia32_vfmaddsubps512_maskz:
+ case clang::X86::BI__builtin_ia32_vfmaddsubpd512_maskz:
+ MaskFalseVal = Constant::getNullValue(Ops[0]->getType());
+ break;
+ case clang::X86::BI__builtin_ia32_vfmsubps512_mask3:
+ case clang::X86::BI__builtin_ia32_vfmaddps512_mask3:
+ case clang::X86::BI__builtin_ia32_vfmsubpd512_mask3:
+ case clang::X86::BI__builtin_ia32_vfmaddpd512_mask3:
+ case clang::X86::BI__builtin_ia32_vfmsubaddps512_mask3:
+ case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask3:
+ case clang::X86::BI__builtin_ia32_vfmsubaddpd512_mask3:
+ case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask3:
+ MaskFalseVal = Ops[2];
+ break;
+ }
+
+ if (MaskFalseVal)
+ return EmitX86Select(CGF, Ops[3], Res, MaskFalseVal);
+
+ return Res;
+}
+
+static Value *
+EmitScalarFMAExpr(CodeGenFunction &CGF, MutableArrayRef<Value *> Ops,
+ Value *Upper, bool ZeroMask = false, unsigned PTIdx = 0,
+ bool NegAcc = false) {
+ unsigned Rnd = 4;
+ if (Ops.size() > 4)
+ Rnd = cast<llvm::ConstantInt>(Ops[4])->getZExtValue();
+
+ if (NegAcc)
+ Ops[2] = CGF.Builder.CreateFNeg(Ops[2]);
+
+ Ops[0] = CGF.Builder.CreateExtractElement(Ops[0], (uint64_t)0);
+ Ops[1] = CGF.Builder.CreateExtractElement(Ops[1], (uint64_t)0);
+ Ops[2] = CGF.Builder.CreateExtractElement(Ops[2], (uint64_t)0);
+ Value *Res;
+ if (Rnd != 4) {
+ Intrinsic::ID IID = Ops[0]->getType()->getPrimitiveSizeInBits() == 32 ?
+ Intrinsic::x86_avx512_vfmadd_f32 :
+ Intrinsic::x86_avx512_vfmadd_f64;
+ Res = CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(IID),
+ {Ops[0], Ops[1], Ops[2], Ops[4]});
+ } else {
+ Function *FMA = CGF.CGM.getIntrinsic(Intrinsic::fma, Ops[0]->getType());
+ Res = CGF.Builder.CreateCall(FMA, Ops.slice(0, 3));
+ }
+ // If we have more than 3 arguments, we need to do masking.
+ if (Ops.size() > 3) {
+ Value *PassThru = ZeroMask ? Constant::getNullValue(Res->getType())
+ : Ops[PTIdx];
+
+ // If we negated the accumulator and the its the PassThru value we need to
+ // bypass the negate. Conveniently Upper should be the same thing in this
+ // case.
+ if (NegAcc && PTIdx == 2)
+ PassThru = CGF.Builder.CreateExtractElement(Upper, (uint64_t)0);
+
+ Res = EmitX86ScalarSelect(CGF, Ops[3], Res, PassThru);
+ }
+ return CGF.Builder.CreateInsertElement(Upper, Res, (uint64_t)0);
+}
+
+static Value *EmitX86Muldq(CodeGenFunction &CGF, bool IsSigned,
+ ArrayRef<Value *> Ops) {
+ llvm::Type *Ty = Ops[0]->getType();
+ // Arguments have a vXi32 type so cast to vXi64.
+ Ty = llvm::VectorType::get(CGF.Int64Ty,
+ Ty->getPrimitiveSizeInBits() / 64);
+ Value *LHS = CGF.Builder.CreateBitCast(Ops[0], Ty);
+ Value *RHS = CGF.Builder.CreateBitCast(Ops[1], Ty);
+
+ if (IsSigned) {
+ // Shift left then arithmetic shift right.
+ Constant *ShiftAmt = ConstantInt::get(Ty, 32);
+ LHS = CGF.Builder.CreateShl(LHS, ShiftAmt);
+ LHS = CGF.Builder.CreateAShr(LHS, ShiftAmt);
+ RHS = CGF.Builder.CreateShl(RHS, ShiftAmt);
+ RHS = CGF.Builder.CreateAShr(RHS, ShiftAmt);
+ } else {
+ // Clear the upper bits.
+ Constant *Mask = ConstantInt::get(Ty, 0xffffffff);
+ LHS = CGF.Builder.CreateAnd(LHS, Mask);
+ RHS = CGF.Builder.CreateAnd(RHS, Mask);
+ }
+
+ return CGF.Builder.CreateMul(LHS, RHS);
+}
+
+// Emit a masked pternlog intrinsic. This only exists because the header has to
+// use a macro and we aren't able to pass the input argument to a pternlog
+// builtin and a select builtin without evaluating it twice.
+static Value *EmitX86Ternlog(CodeGenFunction &CGF, bool ZeroMask,
+ ArrayRef<Value *> Ops) {
+ llvm::Type *Ty = Ops[0]->getType();
+
+ unsigned VecWidth = Ty->getPrimitiveSizeInBits();
+ unsigned EltWidth = Ty->getScalarSizeInBits();
+ Intrinsic::ID IID;
+ if (VecWidth == 128 && EltWidth == 32)
+ IID = Intrinsic::x86_avx512_pternlog_d_128;
+ else if (VecWidth == 256 && EltWidth == 32)
+ IID = Intrinsic::x86_avx512_pternlog_d_256;
+ else if (VecWidth == 512 && EltWidth == 32)
+ IID = Intrinsic::x86_avx512_pternlog_d_512;
+ else if (VecWidth == 128 && EltWidth == 64)
+ IID = Intrinsic::x86_avx512_pternlog_q_128;
+ else if (VecWidth == 256 && EltWidth == 64)
+ IID = Intrinsic::x86_avx512_pternlog_q_256;
+ else if (VecWidth == 512 && EltWidth == 64)
+ IID = Intrinsic::x86_avx512_pternlog_q_512;
+ else
+ llvm_unreachable("Unexpected intrinsic");
+
+ Value *Ternlog = CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(IID),
+ Ops.drop_back());
+ Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty) : Ops[0];
+ return EmitX86Select(CGF, Ops[4], Ternlog, PassThru);
}
static Value *EmitX86SExtMask(CodeGenFunction &CGF, Value *Op,
@@ -7914,11 +8911,10 @@ Value *CodeGenFunction::EmitX86CpuSupports(const CallExpr *E) {
return EmitX86CpuSupports(FeatureStr);
}
-Value *CodeGenFunction::EmitX86CpuSupports(ArrayRef<StringRef> FeatureStrs) {
+uint32_t
+CodeGenFunction::GetX86CpuSupportsMask(ArrayRef<StringRef> FeatureStrs) {
// Processor features and mapping to processor feature value.
-
uint32_t FeaturesMask = 0;
-
for (const StringRef &FeatureStr : FeatureStrs) {
unsigned Feature =
StringSwitch<unsigned>(FeatureStr)
@@ -7927,7 +8923,14 @@ Value *CodeGenFunction::EmitX86CpuSupports(ArrayRef<StringRef> FeatureStrs) {
;
FeaturesMask |= (1U << Feature);
}
+ return FeaturesMask;
+}
+
+Value *CodeGenFunction::EmitX86CpuSupports(ArrayRef<StringRef> FeatureStrs) {
+ return EmitX86CpuSupports(GetX86CpuSupportsMask(FeatureStrs));
+}
+llvm::Value *CodeGenFunction::EmitX86CpuSupports(uint32_t FeaturesMask) {
// Matching the struct layout from the compiler-rt/libgcc structure that is
// filled in:
// unsigned int __cpu_vendor;
@@ -8063,8 +9066,37 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
return Builder.CreateBitCast(BuildVector(Ops),
llvm::Type::getX86_MMXTy(getLLVMContext()));
case X86::BI__builtin_ia32_vec_ext_v2si:
- return Builder.CreateExtractElement(Ops[0],
- llvm::ConstantInt::get(Ops[1]->getType(), 0));
+ case X86::BI__builtin_ia32_vec_ext_v16qi:
+ case X86::BI__builtin_ia32_vec_ext_v8hi:
+ case X86::BI__builtin_ia32_vec_ext_v4si:
+ case X86::BI__builtin_ia32_vec_ext_v4sf:
+ case X86::BI__builtin_ia32_vec_ext_v2di:
+ case X86::BI__builtin_ia32_vec_ext_v32qi:
+ case X86::BI__builtin_ia32_vec_ext_v16hi:
+ case X86::BI__builtin_ia32_vec_ext_v8si:
+ case X86::BI__builtin_ia32_vec_ext_v4di: {
+ unsigned NumElts = Ops[0]->getType()->getVectorNumElements();
+ uint64_t Index = cast<ConstantInt>(Ops[1])->getZExtValue();
+ Index &= NumElts - 1;
+ // These builtins exist so we can ensure the index is an ICE and in range.
+ // Otherwise we could just do this in the header file.
+ return Builder.CreateExtractElement(Ops[0], Index);
+ }
+ case X86::BI__builtin_ia32_vec_set_v16qi:
+ case X86::BI__builtin_ia32_vec_set_v8hi:
+ case X86::BI__builtin_ia32_vec_set_v4si:
+ case X86::BI__builtin_ia32_vec_set_v2di:
+ case X86::BI__builtin_ia32_vec_set_v32qi:
+ case X86::BI__builtin_ia32_vec_set_v16hi:
+ case X86::BI__builtin_ia32_vec_set_v8si:
+ case X86::BI__builtin_ia32_vec_set_v4di: {
+ unsigned NumElts = Ops[0]->getType()->getVectorNumElements();
+ unsigned Index = cast<ConstantInt>(Ops[2])->getZExtValue();
+ Index &= NumElts - 1;
+ // These builtins exist so we can ensure the index is an ICE and in range.
+ // Otherwise we could just do this in the header file.
+ return Builder.CreateInsertElement(Ops[0], Ops[1], Index);
+ }
case X86::BI_mm_setcsr:
case X86::BI__builtin_ia32_ldmxcsr: {
Address Tmp = CreateMemTemp(E->getArg(0)->getType());
@@ -8141,7 +9173,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
case X86::BI__builtin_ia32_storess128_mask:
case X86::BI__builtin_ia32_storesd128_mask: {
- return EmitX86MaskedStore(*this, Ops, 16);
+ return EmitX86MaskedStore(*this, Ops, 1);
}
case X86::BI__builtin_ia32_vpopcntb_128:
case X86::BI__builtin_ia32_vpopcntd_128:
@@ -8173,6 +9205,66 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
case X86::BI__builtin_ia32_cvtmask2q512:
return EmitX86SExtMask(*this, Ops[0], ConvertType(E->getType()));
+ case X86::BI__builtin_ia32_cvtb2mask128:
+ case X86::BI__builtin_ia32_cvtb2mask256:
+ case X86::BI__builtin_ia32_cvtb2mask512:
+ case X86::BI__builtin_ia32_cvtw2mask128:
+ case X86::BI__builtin_ia32_cvtw2mask256:
+ case X86::BI__builtin_ia32_cvtw2mask512:
+ case X86::BI__builtin_ia32_cvtd2mask128:
+ case X86::BI__builtin_ia32_cvtd2mask256:
+ case X86::BI__builtin_ia32_cvtd2mask512:
+ case X86::BI__builtin_ia32_cvtq2mask128:
+ case X86::BI__builtin_ia32_cvtq2mask256:
+ case X86::BI__builtin_ia32_cvtq2mask512:
+ return EmitX86ConvertToMask(*this, Ops[0]);
+
+ case X86::BI__builtin_ia32_vfmaddss3:
+ case X86::BI__builtin_ia32_vfmaddsd3:
+ case X86::BI__builtin_ia32_vfmaddss3_mask:
+ case X86::BI__builtin_ia32_vfmaddsd3_mask:
+ return EmitScalarFMAExpr(*this, Ops, Ops[0]);
+ case X86::BI__builtin_ia32_vfmaddss:
+ case X86::BI__builtin_ia32_vfmaddsd:
+ return EmitScalarFMAExpr(*this, Ops,
+ Constant::getNullValue(Ops[0]->getType()));
+ case X86::BI__builtin_ia32_vfmaddss3_maskz:
+ case X86::BI__builtin_ia32_vfmaddsd3_maskz:
+ return EmitScalarFMAExpr(*this, Ops, Ops[0], /*ZeroMask*/true);
+ case X86::BI__builtin_ia32_vfmaddss3_mask3:
+ case X86::BI__builtin_ia32_vfmaddsd3_mask3:
+ return EmitScalarFMAExpr(*this, Ops, Ops[2], /*ZeroMask*/false, 2);
+ case X86::BI__builtin_ia32_vfmsubss3_mask3:
+ case X86::BI__builtin_ia32_vfmsubsd3_mask3:
+ return EmitScalarFMAExpr(*this, Ops, Ops[2], /*ZeroMask*/false, 2,
+ /*NegAcc*/true);
+ case X86::BI__builtin_ia32_vfmaddps:
+ case X86::BI__builtin_ia32_vfmaddpd:
+ case X86::BI__builtin_ia32_vfmaddps256:
+ case X86::BI__builtin_ia32_vfmaddpd256:
+ case X86::BI__builtin_ia32_vfmaddps512_mask:
+ case X86::BI__builtin_ia32_vfmaddps512_maskz:
+ case X86::BI__builtin_ia32_vfmaddps512_mask3:
+ case X86::BI__builtin_ia32_vfmsubps512_mask3:
+ case X86::BI__builtin_ia32_vfmaddpd512_mask:
+ case X86::BI__builtin_ia32_vfmaddpd512_maskz:
+ case X86::BI__builtin_ia32_vfmaddpd512_mask3:
+ case X86::BI__builtin_ia32_vfmsubpd512_mask3:
+ return EmitX86FMAExpr(*this, Ops, BuiltinID, /*IsAddSub*/false);
+ case X86::BI__builtin_ia32_vfmaddsubps:
+ case X86::BI__builtin_ia32_vfmaddsubpd:
+ case X86::BI__builtin_ia32_vfmaddsubps256:
+ case X86::BI__builtin_ia32_vfmaddsubpd256:
+ case X86::BI__builtin_ia32_vfmaddsubps512_mask:
+ case X86::BI__builtin_ia32_vfmaddsubps512_maskz:
+ case X86::BI__builtin_ia32_vfmaddsubps512_mask3:
+ case X86::BI__builtin_ia32_vfmsubaddps512_mask3:
+ case X86::BI__builtin_ia32_vfmaddsubpd512_mask:
+ case X86::BI__builtin_ia32_vfmaddsubpd512_maskz:
+ case X86::BI__builtin_ia32_vfmaddsubpd512_mask3:
+ case X86::BI__builtin_ia32_vfmsubaddpd512_mask3:
+ return EmitX86FMAExpr(*this, Ops, BuiltinID, /*IsAddSub*/true);
+
case X86::BI__builtin_ia32_movdqa32store128_mask:
case X86::BI__builtin_ia32_movdqa64store128_mask:
case X86::BI__builtin_ia32_storeaps128_mask:
@@ -8211,7 +9303,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
case X86::BI__builtin_ia32_loadss128_mask:
case X86::BI__builtin_ia32_loadsd128_mask:
- return EmitX86MaskedLoad(*this, Ops, 16);
+ return EmitX86MaskedLoad(*this, Ops, 1);
case X86::BI__builtin_ia32_loadaps128_mask:
case X86::BI__builtin_ia32_loadaps256_mask:
@@ -8230,11 +9322,45 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
return EmitX86MaskedLoad(*this, Ops, Align);
}
- case X86::BI__builtin_ia32_vbroadcastf128_pd256:
- case X86::BI__builtin_ia32_vbroadcastf128_ps256: {
- llvm::Type *DstTy = ConvertType(E->getType());
- return EmitX86SubVectorBroadcast(*this, Ops, DstTy, 128, 1);
- }
+ case X86::BI__builtin_ia32_expandloaddf128_mask:
+ case X86::BI__builtin_ia32_expandloaddf256_mask:
+ case X86::BI__builtin_ia32_expandloaddf512_mask:
+ case X86::BI__builtin_ia32_expandloadsf128_mask:
+ case X86::BI__builtin_ia32_expandloadsf256_mask:
+ case X86::BI__builtin_ia32_expandloadsf512_mask:
+ case X86::BI__builtin_ia32_expandloaddi128_mask:
+ case X86::BI__builtin_ia32_expandloaddi256_mask:
+ case X86::BI__builtin_ia32_expandloaddi512_mask:
+ case X86::BI__builtin_ia32_expandloadsi128_mask:
+ case X86::BI__builtin_ia32_expandloadsi256_mask:
+ case X86::BI__builtin_ia32_expandloadsi512_mask:
+ case X86::BI__builtin_ia32_expandloadhi128_mask:
+ case X86::BI__builtin_ia32_expandloadhi256_mask:
+ case X86::BI__builtin_ia32_expandloadhi512_mask:
+ case X86::BI__builtin_ia32_expandloadqi128_mask:
+ case X86::BI__builtin_ia32_expandloadqi256_mask:
+ case X86::BI__builtin_ia32_expandloadqi512_mask:
+ return EmitX86ExpandLoad(*this, Ops);
+
+ case X86::BI__builtin_ia32_compressstoredf128_mask:
+ case X86::BI__builtin_ia32_compressstoredf256_mask:
+ case X86::BI__builtin_ia32_compressstoredf512_mask:
+ case X86::BI__builtin_ia32_compressstoresf128_mask:
+ case X86::BI__builtin_ia32_compressstoresf256_mask:
+ case X86::BI__builtin_ia32_compressstoresf512_mask:
+ case X86::BI__builtin_ia32_compressstoredi128_mask:
+ case X86::BI__builtin_ia32_compressstoredi256_mask:
+ case X86::BI__builtin_ia32_compressstoredi512_mask:
+ case X86::BI__builtin_ia32_compressstoresi128_mask:
+ case X86::BI__builtin_ia32_compressstoresi256_mask:
+ case X86::BI__builtin_ia32_compressstoresi512_mask:
+ case X86::BI__builtin_ia32_compressstorehi128_mask:
+ case X86::BI__builtin_ia32_compressstorehi256_mask:
+ case X86::BI__builtin_ia32_compressstorehi512_mask:
+ case X86::BI__builtin_ia32_compressstoreqi128_mask:
+ case X86::BI__builtin_ia32_compressstoreqi256_mask:
+ case X86::BI__builtin_ia32_compressstoreqi512_mask:
+ return EmitX86CompressStore(*this, Ops);
case X86::BI__builtin_ia32_storehps:
case X86::BI__builtin_ia32_storelps: {
@@ -8246,17 +9372,275 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
// extract (0, 1)
unsigned Index = BuiltinID == X86::BI__builtin_ia32_storelps ? 0 : 1;
- llvm::Value *Idx = llvm::ConstantInt::get(SizeTy, Index);
- Ops[1] = Builder.CreateExtractElement(Ops[1], Idx, "extract");
+ Ops[1] = Builder.CreateExtractElement(Ops[1], Index, "extract");
// cast pointer to i64 & store
Ops[0] = Builder.CreateBitCast(Ops[0], PtrTy);
return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
}
+ case X86::BI__builtin_ia32_vextractf128_pd256:
+ case X86::BI__builtin_ia32_vextractf128_ps256:
+ case X86::BI__builtin_ia32_vextractf128_si256:
+ case X86::BI__builtin_ia32_extract128i256:
+ case X86::BI__builtin_ia32_extractf64x4_mask:
+ case X86::BI__builtin_ia32_extractf32x4_mask:
+ case X86::BI__builtin_ia32_extracti64x4_mask:
+ case X86::BI__builtin_ia32_extracti32x4_mask:
+ case X86::BI__builtin_ia32_extractf32x8_mask:
+ case X86::BI__builtin_ia32_extracti32x8_mask:
+ case X86::BI__builtin_ia32_extractf32x4_256_mask:
+ case X86::BI__builtin_ia32_extracti32x4_256_mask:
+ case X86::BI__builtin_ia32_extractf64x2_256_mask:
+ case X86::BI__builtin_ia32_extracti64x2_256_mask:
+ case X86::BI__builtin_ia32_extractf64x2_512_mask:
+ case X86::BI__builtin_ia32_extracti64x2_512_mask: {
+ llvm::Type *DstTy = ConvertType(E->getType());
+ unsigned NumElts = DstTy->getVectorNumElements();
+ unsigned SrcNumElts = Ops[0]->getType()->getVectorNumElements();
+ unsigned SubVectors = SrcNumElts / NumElts;
+ unsigned Index = cast<ConstantInt>(Ops[1])->getZExtValue();
+ assert(llvm::isPowerOf2_32(SubVectors) && "Expected power of 2 subvectors");
+ Index &= SubVectors - 1; // Remove any extra bits.
+ Index *= NumElts;
+
+ uint32_t Indices[16];
+ for (unsigned i = 0; i != NumElts; ++i)
+ Indices[i] = i + Index;
+
+ Value *Res = Builder.CreateShuffleVector(Ops[0],
+ UndefValue::get(Ops[0]->getType()),
+ makeArrayRef(Indices, NumElts),
+ "extract");
+
+ if (Ops.size() == 4)
+ Res = EmitX86Select(*this, Ops[3], Res, Ops[2]);
+
+ return Res;
+ }
+ case X86::BI__builtin_ia32_vinsertf128_pd256:
+ case X86::BI__builtin_ia32_vinsertf128_ps256:
+ case X86::BI__builtin_ia32_vinsertf128_si256:
+ case X86::BI__builtin_ia32_insert128i256:
+ case X86::BI__builtin_ia32_insertf64x4:
+ case X86::BI__builtin_ia32_insertf32x4:
+ case X86::BI__builtin_ia32_inserti64x4:
+ case X86::BI__builtin_ia32_inserti32x4:
+ case X86::BI__builtin_ia32_insertf32x8:
+ case X86::BI__builtin_ia32_inserti32x8:
+ case X86::BI__builtin_ia32_insertf32x4_256:
+ case X86::BI__builtin_ia32_inserti32x4_256:
+ case X86::BI__builtin_ia32_insertf64x2_256:
+ case X86::BI__builtin_ia32_inserti64x2_256:
+ case X86::BI__builtin_ia32_insertf64x2_512:
+ case X86::BI__builtin_ia32_inserti64x2_512: {
+ unsigned DstNumElts = Ops[0]->getType()->getVectorNumElements();
+ unsigned SrcNumElts = Ops[1]->getType()->getVectorNumElements();
+ unsigned SubVectors = DstNumElts / SrcNumElts;
+ unsigned Index = cast<ConstantInt>(Ops[2])->getZExtValue();
+ assert(llvm::isPowerOf2_32(SubVectors) && "Expected power of 2 subvectors");
+ Index &= SubVectors - 1; // Remove any extra bits.
+ Index *= SrcNumElts;
+
+ uint32_t Indices[16];
+ for (unsigned i = 0; i != DstNumElts; ++i)
+ Indices[i] = (i >= SrcNumElts) ? SrcNumElts + (i % SrcNumElts) : i;
+
+ Value *Op1 = Builder.CreateShuffleVector(Ops[1],
+ UndefValue::get(Ops[1]->getType()),
+ makeArrayRef(Indices, DstNumElts),
+ "widen");
+
+ for (unsigned i = 0; i != DstNumElts; ++i) {
+ if (i >= Index && i < (Index + SrcNumElts))
+ Indices[i] = (i - Index) + DstNumElts;
+ else
+ Indices[i] = i;
+ }
+
+ return Builder.CreateShuffleVector(Ops[0], Op1,
+ makeArrayRef(Indices, DstNumElts),
+ "insert");
+ }
+ case X86::BI__builtin_ia32_pmovqd512_mask:
+ case X86::BI__builtin_ia32_pmovwb512_mask: {
+ Value *Res = Builder.CreateTrunc(Ops[0], Ops[1]->getType());
+ return EmitX86Select(*this, Ops[2], Res, Ops[1]);
+ }
+ case X86::BI__builtin_ia32_pmovdb512_mask:
+ case X86::BI__builtin_ia32_pmovdw512_mask:
+ case X86::BI__builtin_ia32_pmovqw512_mask: {
+ if (const auto *C = dyn_cast<Constant>(Ops[2]))
+ if (C->isAllOnesValue())
+ return Builder.CreateTrunc(Ops[0], Ops[1]->getType());
+
+ Intrinsic::ID IID;
+ switch (BuiltinID) {
+ default: llvm_unreachable("Unsupported intrinsic!");
+ case X86::BI__builtin_ia32_pmovdb512_mask:
+ IID = Intrinsic::x86_avx512_mask_pmov_db_512;
+ break;
+ case X86::BI__builtin_ia32_pmovdw512_mask:
+ IID = Intrinsic::x86_avx512_mask_pmov_dw_512;
+ break;
+ case X86::BI__builtin_ia32_pmovqw512_mask:
+ IID = Intrinsic::x86_avx512_mask_pmov_qw_512;
+ break;
+ }
+
+ Function *Intr = CGM.getIntrinsic(IID);
+ return Builder.CreateCall(Intr, Ops);
+ }
+ case X86::BI__builtin_ia32_pblendw128:
+ case X86::BI__builtin_ia32_blendpd:
+ case X86::BI__builtin_ia32_blendps:
+ case X86::BI__builtin_ia32_blendpd256:
+ case X86::BI__builtin_ia32_blendps256:
+ case X86::BI__builtin_ia32_pblendw256:
+ case X86::BI__builtin_ia32_pblendd128:
+ case X86::BI__builtin_ia32_pblendd256: {
+ unsigned NumElts = Ops[0]->getType()->getVectorNumElements();
+ unsigned Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
+
+ uint32_t Indices[16];
+ // If there are more than 8 elements, the immediate is used twice so make
+ // sure we handle that.
+ for (unsigned i = 0; i != NumElts; ++i)
+ Indices[i] = ((Imm >> (i % 8)) & 0x1) ? NumElts + i : i;
+
+ return Builder.CreateShuffleVector(Ops[0], Ops[1],
+ makeArrayRef(Indices, NumElts),
+ "blend");
+ }
+ case X86::BI__builtin_ia32_pshuflw:
+ case X86::BI__builtin_ia32_pshuflw256:
+ case X86::BI__builtin_ia32_pshuflw512: {
+ uint32_t Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
+ llvm::Type *Ty = Ops[0]->getType();
+ unsigned NumElts = Ty->getVectorNumElements();
+
+ // Splat the 8-bits of immediate 4 times to help the loop wrap around.
+ Imm = (Imm & 0xff) * 0x01010101;
+
+ uint32_t Indices[32];
+ for (unsigned l = 0; l != NumElts; l += 8) {
+ for (unsigned i = 0; i != 4; ++i) {
+ Indices[l + i] = l + (Imm & 3);
+ Imm >>= 2;
+ }
+ for (unsigned i = 4; i != 8; ++i)
+ Indices[l + i] = l + i;
+ }
+
+ return Builder.CreateShuffleVector(Ops[0], UndefValue::get(Ty),
+ makeArrayRef(Indices, NumElts),
+ "pshuflw");
+ }
+ case X86::BI__builtin_ia32_pshufhw:
+ case X86::BI__builtin_ia32_pshufhw256:
+ case X86::BI__builtin_ia32_pshufhw512: {
+ uint32_t Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
+ llvm::Type *Ty = Ops[0]->getType();
+ unsigned NumElts = Ty->getVectorNumElements();
+
+ // Splat the 8-bits of immediate 4 times to help the loop wrap around.
+ Imm = (Imm & 0xff) * 0x01010101;
+
+ uint32_t Indices[32];
+ for (unsigned l = 0; l != NumElts; l += 8) {
+ for (unsigned i = 0; i != 4; ++i)
+ Indices[l + i] = l + i;
+ for (unsigned i = 4; i != 8; ++i) {
+ Indices[l + i] = l + 4 + (Imm & 3);
+ Imm >>= 2;
+ }
+ }
+
+ return Builder.CreateShuffleVector(Ops[0], UndefValue::get(Ty),
+ makeArrayRef(Indices, NumElts),
+ "pshufhw");
+ }
+ case X86::BI__builtin_ia32_pshufd:
+ case X86::BI__builtin_ia32_pshufd256:
+ case X86::BI__builtin_ia32_pshufd512:
+ case X86::BI__builtin_ia32_vpermilpd:
+ case X86::BI__builtin_ia32_vpermilps:
+ case X86::BI__builtin_ia32_vpermilpd256:
+ case X86::BI__builtin_ia32_vpermilps256:
+ case X86::BI__builtin_ia32_vpermilpd512:
+ case X86::BI__builtin_ia32_vpermilps512: {
+ uint32_t Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
+ llvm::Type *Ty = Ops[0]->getType();
+ unsigned NumElts = Ty->getVectorNumElements();
+ unsigned NumLanes = Ty->getPrimitiveSizeInBits() / 128;
+ unsigned NumLaneElts = NumElts / NumLanes;
+
+ // Splat the 8-bits of immediate 4 times to help the loop wrap around.
+ Imm = (Imm & 0xff) * 0x01010101;
+
+ uint32_t Indices[16];
+ for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
+ for (unsigned i = 0; i != NumLaneElts; ++i) {
+ Indices[i + l] = (Imm % NumLaneElts) + l;
+ Imm /= NumLaneElts;
+ }
+ }
+
+ return Builder.CreateShuffleVector(Ops[0], UndefValue::get(Ty),
+ makeArrayRef(Indices, NumElts),
+ "permil");
+ }
+ case X86::BI__builtin_ia32_shufpd:
+ case X86::BI__builtin_ia32_shufpd256:
+ case X86::BI__builtin_ia32_shufpd512:
+ case X86::BI__builtin_ia32_shufps:
+ case X86::BI__builtin_ia32_shufps256:
+ case X86::BI__builtin_ia32_shufps512: {
+ uint32_t Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
+ llvm::Type *Ty = Ops[0]->getType();
+ unsigned NumElts = Ty->getVectorNumElements();
+ unsigned NumLanes = Ty->getPrimitiveSizeInBits() / 128;
+ unsigned NumLaneElts = NumElts / NumLanes;
+
+ // Splat the 8-bits of immediate 4 times to help the loop wrap around.
+ Imm = (Imm & 0xff) * 0x01010101;
+
+ uint32_t Indices[16];
+ for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
+ for (unsigned i = 0; i != NumLaneElts; ++i) {
+ unsigned Index = Imm % NumLaneElts;
+ Imm /= NumLaneElts;
+ if (i >= (NumLaneElts / 2))
+ Index += NumElts;
+ Indices[l + i] = l + Index;
+ }
+ }
+
+ return Builder.CreateShuffleVector(Ops[0], Ops[1],
+ makeArrayRef(Indices, NumElts),
+ "shufp");
+ }
+ case X86::BI__builtin_ia32_permdi256:
+ case X86::BI__builtin_ia32_permdf256:
+ case X86::BI__builtin_ia32_permdi512:
+ case X86::BI__builtin_ia32_permdf512: {
+ unsigned Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
+ llvm::Type *Ty = Ops[0]->getType();
+ unsigned NumElts = Ty->getVectorNumElements();
+
+ // These intrinsics operate on 256-bit lanes of four 64-bit elements.
+ uint32_t Indices[8];
+ for (unsigned l = 0; l != NumElts; l += 4)
+ for (unsigned i = 0; i != 4; ++i)
+ Indices[l + i] = l + ((Imm >> (2 * i)) & 0x3);
+
+ return Builder.CreateShuffleVector(Ops[0], UndefValue::get(Ty),
+ makeArrayRef(Indices, NumElts),
+ "perm");
+ }
case X86::BI__builtin_ia32_palignr128:
case X86::BI__builtin_ia32_palignr256:
- case X86::BI__builtin_ia32_palignr512_mask: {
- unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
+ case X86::BI__builtin_ia32_palignr512: {
+ unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0xff;
unsigned NumElts = Ops[0]->getType()->getVectorNumElements();
assert(NumElts % 16 == 0);
@@ -8285,15 +9669,58 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
}
}
- Value *Align = Builder.CreateShuffleVector(Ops[1], Ops[0],
- makeArrayRef(Indices, NumElts),
- "palignr");
+ return Builder.CreateShuffleVector(Ops[1], Ops[0],
+ makeArrayRef(Indices, NumElts),
+ "palignr");
+ }
+ case X86::BI__builtin_ia32_alignd128:
+ case X86::BI__builtin_ia32_alignd256:
+ case X86::BI__builtin_ia32_alignd512:
+ case X86::BI__builtin_ia32_alignq128:
+ case X86::BI__builtin_ia32_alignq256:
+ case X86::BI__builtin_ia32_alignq512: {
+ unsigned NumElts = Ops[0]->getType()->getVectorNumElements();
+ unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0xff;
+
+ // Mask the shift amount to width of two vectors.
+ ShiftVal &= (2 * NumElts) - 1;
- // If this isn't a masked builtin, just return the align operation.
- if (Ops.size() == 3)
- return Align;
+ uint32_t Indices[16];
+ for (unsigned i = 0; i != NumElts; ++i)
+ Indices[i] = i + ShiftVal;
+
+ return Builder.CreateShuffleVector(Ops[1], Ops[0],
+ makeArrayRef(Indices, NumElts),
+ "valign");
+ }
+ case X86::BI__builtin_ia32_shuf_f32x4_256:
+ case X86::BI__builtin_ia32_shuf_f64x2_256:
+ case X86::BI__builtin_ia32_shuf_i32x4_256:
+ case X86::BI__builtin_ia32_shuf_i64x2_256:
+ case X86::BI__builtin_ia32_shuf_f32x4:
+ case X86::BI__builtin_ia32_shuf_f64x2:
+ case X86::BI__builtin_ia32_shuf_i32x4:
+ case X86::BI__builtin_ia32_shuf_i64x2: {
+ unsigned Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
+ llvm::Type *Ty = Ops[0]->getType();
+ unsigned NumElts = Ty->getVectorNumElements();
+ unsigned NumLanes = Ty->getPrimitiveSizeInBits() == 512 ? 4 : 2;
+ unsigned NumLaneElts = NumElts / NumLanes;
+
+ uint32_t Indices[16];
+ for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
+ unsigned Index = (Imm % NumLanes) * NumLaneElts;
+ Imm /= NumLanes; // Discard the bits we just used.
+ if (l >= (NumElts / 2))
+ Index += NumElts; // Switch to other source.
+ for (unsigned i = 0; i != NumLaneElts; ++i) {
+ Indices[l + i] = Index + i;
+ }
+ }
- return EmitX86Select(*this, Ops[4], Align, Ops[3]);
+ return Builder.CreateShuffleVector(Ops[0], Ops[1],
+ makeArrayRef(Indices, NumElts),
+ "shuf");
}
case X86::BI__builtin_ia32_vperm2f128_pd256:
@@ -8335,6 +9762,66 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
"vperm");
}
+ case X86::BI__builtin_ia32_pslldqi128_byteshift:
+ case X86::BI__builtin_ia32_pslldqi256_byteshift:
+ case X86::BI__builtin_ia32_pslldqi512_byteshift: {
+ unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff;
+ llvm::Type *ResultType = Ops[0]->getType();
+ // Builtin type is vXi64 so multiply by 8 to get bytes.
+ unsigned NumElts = ResultType->getVectorNumElements() * 8;
+
+ // If pslldq is shifting the vector more than 15 bytes, emit zero.
+ if (ShiftVal >= 16)
+ return llvm::Constant::getNullValue(ResultType);
+
+ uint32_t Indices[64];
+ // 256/512-bit pslldq operates on 128-bit lanes so we need to handle that
+ for (unsigned l = 0; l != NumElts; l += 16) {
+ for (unsigned i = 0; i != 16; ++i) {
+ unsigned Idx = NumElts + i - ShiftVal;
+ if (Idx < NumElts) Idx -= NumElts - 16; // end of lane, switch operand.
+ Indices[l + i] = Idx + l;
+ }
+ }
+
+ llvm::Type *VecTy = llvm::VectorType::get(Int8Ty, NumElts);
+ Value *Cast = Builder.CreateBitCast(Ops[0], VecTy, "cast");
+ Value *Zero = llvm::Constant::getNullValue(VecTy);
+ Value *SV = Builder.CreateShuffleVector(Zero, Cast,
+ makeArrayRef(Indices, NumElts),
+ "pslldq");
+ return Builder.CreateBitCast(SV, Ops[0]->getType(), "cast");
+ }
+ case X86::BI__builtin_ia32_psrldqi128_byteshift:
+ case X86::BI__builtin_ia32_psrldqi256_byteshift:
+ case X86::BI__builtin_ia32_psrldqi512_byteshift: {
+ unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff;
+ llvm::Type *ResultType = Ops[0]->getType();
+ // Builtin type is vXi64 so multiply by 8 to get bytes.
+ unsigned NumElts = ResultType->getVectorNumElements() * 8;
+
+ // If psrldq is shifting the vector more than 15 bytes, emit zero.
+ if (ShiftVal >= 16)
+ return llvm::Constant::getNullValue(ResultType);
+
+ uint32_t Indices[64];
+ // 256/512-bit psrldq operates on 128-bit lanes so we need to handle that
+ for (unsigned l = 0; l != NumElts; l += 16) {
+ for (unsigned i = 0; i != 16; ++i) {
+ unsigned Idx = i + ShiftVal;
+ if (Idx >= 16) Idx += NumElts - 16; // end of lane, switch operand.
+ Indices[l + i] = Idx + l;
+ }
+ }
+
+ llvm::Type *VecTy = llvm::VectorType::get(Int8Ty, NumElts);
+ Value *Cast = Builder.CreateBitCast(Ops[0], VecTy, "cast");
+ Value *Zero = llvm::Constant::getNullValue(VecTy);
+ Value *SV = Builder.CreateShuffleVector(Cast, Zero,
+ makeArrayRef(Indices, NumElts),
+ "psrldq");
+ return Builder.CreateBitCast(SV, ResultType, "cast");
+ }
case X86::BI__builtin_ia32_movnti:
case X86::BI__builtin_ia32_movnti64:
case X86::BI__builtin_ia32_movntsd:
@@ -8380,6 +9867,13 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
case X86::BI__builtin_ia32_selectpd_256:
case X86::BI__builtin_ia32_selectpd_512:
return EmitX86Select(*this, Ops[0], Ops[1], Ops[2]);
+ case X86::BI__builtin_ia32_selectss_128:
+ case X86::BI__builtin_ia32_selectsd_128: {
+ Value *A = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
+ Value *B = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
+ A = EmitX86ScalarSelect(*this, Ops[0], A, B);
+ return Builder.CreateInsertElement(Ops[1], A, (uint64_t)0);
+ }
case X86::BI__builtin_ia32_cmpb128_mask:
case X86::BI__builtin_ia32_cmpb256_mask:
case X86::BI__builtin_ia32_cmpb512_mask:
@@ -8411,6 +9905,18 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
return EmitX86MaskedCompare(*this, CC, false, Ops);
}
+ case X86::BI__builtin_ia32_kortestchi:
+ case X86::BI__builtin_ia32_kortestzhi: {
+ Value *Or = EmitX86MaskLogic(*this, Instruction::Or, 16, Ops);
+ Value *C;
+ if (BuiltinID == X86::BI__builtin_ia32_kortestchi)
+ C = llvm::Constant::getAllOnesValue(Builder.getInt16Ty());
+ else
+ C = llvm::Constant::getNullValue(Builder.getInt16Ty());
+ Value *Cmp = Builder.CreateICmpEQ(Or, C);
+ return Builder.CreateZExt(Cmp, ConvertType(E->getType()));
+ }
+
case X86::BI__builtin_ia32_kandhi:
return EmitX86MaskLogic(*this, Instruction::And, 16, Ops);
case X86::BI__builtin_ia32_kandnhi:
@@ -8427,85 +9933,176 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
Builder.getInt16Ty());
}
- case X86::BI__builtin_ia32_vplzcntd_128_mask:
- case X86::BI__builtin_ia32_vplzcntd_256_mask:
- case X86::BI__builtin_ia32_vplzcntd_512_mask:
- case X86::BI__builtin_ia32_vplzcntq_128_mask:
- case X86::BI__builtin_ia32_vplzcntq_256_mask:
- case X86::BI__builtin_ia32_vplzcntq_512_mask: {
+ case X86::BI__builtin_ia32_kunpckdi:
+ case X86::BI__builtin_ia32_kunpcksi:
+ case X86::BI__builtin_ia32_kunpckhi: {
+ unsigned NumElts = Ops[0]->getType()->getScalarSizeInBits();
+ Value *LHS = getMaskVecValue(*this, Ops[0], NumElts);
+ Value *RHS = getMaskVecValue(*this, Ops[1], NumElts);
+ uint32_t Indices[64];
+ for (unsigned i = 0; i != NumElts; ++i)
+ Indices[i] = i;
+
+ // First extract half of each vector. This gives better codegen than
+ // doing it in a single shuffle.
+ LHS = Builder.CreateShuffleVector(LHS, LHS,
+ makeArrayRef(Indices, NumElts / 2));
+ RHS = Builder.CreateShuffleVector(RHS, RHS,
+ makeArrayRef(Indices, NumElts / 2));
+ // Concat the vectors.
+ // NOTE: Operands are swapped to match the intrinsic definition.
+ Value *Res = Builder.CreateShuffleVector(RHS, LHS,
+ makeArrayRef(Indices, NumElts));
+ return Builder.CreateBitCast(Res, Ops[0]->getType());
+ }
+
+ case X86::BI__builtin_ia32_vplzcntd_128:
+ case X86::BI__builtin_ia32_vplzcntd_256:
+ case X86::BI__builtin_ia32_vplzcntd_512:
+ case X86::BI__builtin_ia32_vplzcntq_128:
+ case X86::BI__builtin_ia32_vplzcntq_256:
+ case X86::BI__builtin_ia32_vplzcntq_512: {
Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Ops[0]->getType());
- return EmitX86Select(*this, Ops[2],
- Builder.CreateCall(F, {Ops[0],Builder.getInt1(false)}),
- Ops[1]);
+ return Builder.CreateCall(F, {Ops[0],Builder.getInt1(false)});
+ }
+ case X86::BI__builtin_ia32_sqrtss:
+ case X86::BI__builtin_ia32_sqrtsd: {
+ Value *A = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
+ Function *F = CGM.getIntrinsic(Intrinsic::sqrt, A->getType());
+ A = Builder.CreateCall(F, {A});
+ return Builder.CreateInsertElement(Ops[0], A, (uint64_t)0);
+ }
+ case X86::BI__builtin_ia32_sqrtsd_round_mask:
+ case X86::BI__builtin_ia32_sqrtss_round_mask: {
+ unsigned CC = cast<llvm::ConstantInt>(Ops[4])->getZExtValue();
+ // Support only if the rounding mode is 4 (AKA CUR_DIRECTION),
+ // otherwise keep the intrinsic.
+ if (CC != 4) {
+ Intrinsic::ID IID = BuiltinID == X86::BI__builtin_ia32_sqrtsd_round_mask ?
+ Intrinsic::x86_avx512_mask_sqrt_sd :
+ Intrinsic::x86_avx512_mask_sqrt_ss;
+ return Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
+ }
+ Value *A = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
+ Function *F = CGM.getIntrinsic(Intrinsic::sqrt, A->getType());
+ A = Builder.CreateCall(F, A);
+ Value *Src = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
+ A = EmitX86ScalarSelect(*this, Ops[3], A, Src);
+ return Builder.CreateInsertElement(Ops[0], A, (uint64_t)0);
+ }
+ case X86::BI__builtin_ia32_sqrtpd256:
+ case X86::BI__builtin_ia32_sqrtpd:
+ case X86::BI__builtin_ia32_sqrtps256:
+ case X86::BI__builtin_ia32_sqrtps:
+ case X86::BI__builtin_ia32_sqrtps512:
+ case X86::BI__builtin_ia32_sqrtpd512: {
+ if (Ops.size() == 2) {
+ unsigned CC = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
+ // Support only if the rounding mode is 4 (AKA CUR_DIRECTION),
+ // otherwise keep the intrinsic.
+ if (CC != 4) {
+ Intrinsic::ID IID = BuiltinID == X86::BI__builtin_ia32_sqrtps512 ?
+ Intrinsic::x86_avx512_sqrt_ps_512 :
+ Intrinsic::x86_avx512_sqrt_pd_512;
+ return Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
+ }
+ }
+ Function *F = CGM.getIntrinsic(Intrinsic::sqrt, Ops[0]->getType());
+ return Builder.CreateCall(F, Ops[0]);
}
-
case X86::BI__builtin_ia32_pabsb128:
case X86::BI__builtin_ia32_pabsw128:
case X86::BI__builtin_ia32_pabsd128:
case X86::BI__builtin_ia32_pabsb256:
case X86::BI__builtin_ia32_pabsw256:
case X86::BI__builtin_ia32_pabsd256:
- case X86::BI__builtin_ia32_pabsq128_mask:
- case X86::BI__builtin_ia32_pabsq256_mask:
- case X86::BI__builtin_ia32_pabsb512_mask:
- case X86::BI__builtin_ia32_pabsw512_mask:
- case X86::BI__builtin_ia32_pabsd512_mask:
- case X86::BI__builtin_ia32_pabsq512_mask:
+ case X86::BI__builtin_ia32_pabsq128:
+ case X86::BI__builtin_ia32_pabsq256:
+ case X86::BI__builtin_ia32_pabsb512:
+ case X86::BI__builtin_ia32_pabsw512:
+ case X86::BI__builtin_ia32_pabsd512:
+ case X86::BI__builtin_ia32_pabsq512:
return EmitX86Abs(*this, Ops);
case X86::BI__builtin_ia32_pmaxsb128:
case X86::BI__builtin_ia32_pmaxsw128:
case X86::BI__builtin_ia32_pmaxsd128:
- case X86::BI__builtin_ia32_pmaxsq128_mask:
+ case X86::BI__builtin_ia32_pmaxsq128:
case X86::BI__builtin_ia32_pmaxsb256:
case X86::BI__builtin_ia32_pmaxsw256:
case X86::BI__builtin_ia32_pmaxsd256:
- case X86::BI__builtin_ia32_pmaxsq256_mask:
- case X86::BI__builtin_ia32_pmaxsb512_mask:
- case X86::BI__builtin_ia32_pmaxsw512_mask:
- case X86::BI__builtin_ia32_pmaxsd512_mask:
- case X86::BI__builtin_ia32_pmaxsq512_mask:
+ case X86::BI__builtin_ia32_pmaxsq256:
+ case X86::BI__builtin_ia32_pmaxsb512:
+ case X86::BI__builtin_ia32_pmaxsw512:
+ case X86::BI__builtin_ia32_pmaxsd512:
+ case X86::BI__builtin_ia32_pmaxsq512:
return EmitX86MinMax(*this, ICmpInst::ICMP_SGT, Ops);
case X86::BI__builtin_ia32_pmaxub128:
case X86::BI__builtin_ia32_pmaxuw128:
case X86::BI__builtin_ia32_pmaxud128:
- case X86::BI__builtin_ia32_pmaxuq128_mask:
+ case X86::BI__builtin_ia32_pmaxuq128:
case X86::BI__builtin_ia32_pmaxub256:
case X86::BI__builtin_ia32_pmaxuw256:
case X86::BI__builtin_ia32_pmaxud256:
- case X86::BI__builtin_ia32_pmaxuq256_mask:
- case X86::BI__builtin_ia32_pmaxub512_mask:
- case X86::BI__builtin_ia32_pmaxuw512_mask:
- case X86::BI__builtin_ia32_pmaxud512_mask:
- case X86::BI__builtin_ia32_pmaxuq512_mask:
+ case X86::BI__builtin_ia32_pmaxuq256:
+ case X86::BI__builtin_ia32_pmaxub512:
+ case X86::BI__builtin_ia32_pmaxuw512:
+ case X86::BI__builtin_ia32_pmaxud512:
+ case X86::BI__builtin_ia32_pmaxuq512:
return EmitX86MinMax(*this, ICmpInst::ICMP_UGT, Ops);
case X86::BI__builtin_ia32_pminsb128:
case X86::BI__builtin_ia32_pminsw128:
case X86::BI__builtin_ia32_pminsd128:
- case X86::BI__builtin_ia32_pminsq128_mask:
+ case X86::BI__builtin_ia32_pminsq128:
case X86::BI__builtin_ia32_pminsb256:
case X86::BI__builtin_ia32_pminsw256:
case X86::BI__builtin_ia32_pminsd256:
- case X86::BI__builtin_ia32_pminsq256_mask:
- case X86::BI__builtin_ia32_pminsb512_mask:
- case X86::BI__builtin_ia32_pminsw512_mask:
- case X86::BI__builtin_ia32_pminsd512_mask:
- case X86::BI__builtin_ia32_pminsq512_mask:
+ case X86::BI__builtin_ia32_pminsq256:
+ case X86::BI__builtin_ia32_pminsb512:
+ case X86::BI__builtin_ia32_pminsw512:
+ case X86::BI__builtin_ia32_pminsd512:
+ case X86::BI__builtin_ia32_pminsq512:
return EmitX86MinMax(*this, ICmpInst::ICMP_SLT, Ops);
case X86::BI__builtin_ia32_pminub128:
case X86::BI__builtin_ia32_pminuw128:
case X86::BI__builtin_ia32_pminud128:
- case X86::BI__builtin_ia32_pminuq128_mask:
+ case X86::BI__builtin_ia32_pminuq128:
case X86::BI__builtin_ia32_pminub256:
case X86::BI__builtin_ia32_pminuw256:
case X86::BI__builtin_ia32_pminud256:
- case X86::BI__builtin_ia32_pminuq256_mask:
- case X86::BI__builtin_ia32_pminub512_mask:
- case X86::BI__builtin_ia32_pminuw512_mask:
- case X86::BI__builtin_ia32_pminud512_mask:
- case X86::BI__builtin_ia32_pminuq512_mask:
+ case X86::BI__builtin_ia32_pminuq256:
+ case X86::BI__builtin_ia32_pminub512:
+ case X86::BI__builtin_ia32_pminuw512:
+ case X86::BI__builtin_ia32_pminud512:
+ case X86::BI__builtin_ia32_pminuq512:
return EmitX86MinMax(*this, ICmpInst::ICMP_ULT, Ops);
+ case X86::BI__builtin_ia32_pmuludq128:
+ case X86::BI__builtin_ia32_pmuludq256:
+ case X86::BI__builtin_ia32_pmuludq512:
+ return EmitX86Muldq(*this, /*IsSigned*/false, Ops);
+
+ case X86::BI__builtin_ia32_pmuldq128:
+ case X86::BI__builtin_ia32_pmuldq256:
+ case X86::BI__builtin_ia32_pmuldq512:
+ return EmitX86Muldq(*this, /*IsSigned*/true, Ops);
+
+ case X86::BI__builtin_ia32_pternlogd512_mask:
+ case X86::BI__builtin_ia32_pternlogq512_mask:
+ case X86::BI__builtin_ia32_pternlogd128_mask:
+ case X86::BI__builtin_ia32_pternlogd256_mask:
+ case X86::BI__builtin_ia32_pternlogq128_mask:
+ case X86::BI__builtin_ia32_pternlogq256_mask:
+ return EmitX86Ternlog(*this, /*ZeroMask*/false, Ops);
+
+ case X86::BI__builtin_ia32_pternlogd512_maskz:
+ case X86::BI__builtin_ia32_pternlogq512_maskz:
+ case X86::BI__builtin_ia32_pternlogd128_maskz:
+ case X86::BI__builtin_ia32_pternlogd256_maskz:
+ case X86::BI__builtin_ia32_pternlogq128_maskz:
+ case X86::BI__builtin_ia32_pternlogq256_maskz:
+ return EmitX86Ternlog(*this, /*ZeroMask*/true, Ops);
+
// 3DNow!
case X86::BI__builtin_ia32_pswapdsf:
case X86::BI__builtin_ia32_pswapdsi: {
@@ -8549,7 +10146,44 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
return Builder.CreateExtractValue(Call, 1);
}
- // SSE packed comparison intrinsics
+ case X86::BI__builtin_ia32_fpclassps128_mask:
+ case X86::BI__builtin_ia32_fpclassps256_mask:
+ case X86::BI__builtin_ia32_fpclassps512_mask:
+ case X86::BI__builtin_ia32_fpclasspd128_mask:
+ case X86::BI__builtin_ia32_fpclasspd256_mask:
+ case X86::BI__builtin_ia32_fpclasspd512_mask: {
+ unsigned NumElts = Ops[0]->getType()->getVectorNumElements();
+ Value *MaskIn = Ops[2];
+ Ops.erase(&Ops[2]);
+
+ Intrinsic::ID ID;
+ switch (BuiltinID) {
+ default: llvm_unreachable("Unsupported intrinsic!");
+ case X86::BI__builtin_ia32_fpclassps128_mask:
+ ID = Intrinsic::x86_avx512_fpclass_ps_128;
+ break;
+ case X86::BI__builtin_ia32_fpclassps256_mask:
+ ID = Intrinsic::x86_avx512_fpclass_ps_256;
+ break;
+ case X86::BI__builtin_ia32_fpclassps512_mask:
+ ID = Intrinsic::x86_avx512_fpclass_ps_512;
+ break;
+ case X86::BI__builtin_ia32_fpclasspd128_mask:
+ ID = Intrinsic::x86_avx512_fpclass_pd_128;
+ break;
+ case X86::BI__builtin_ia32_fpclasspd256_mask:
+ ID = Intrinsic::x86_avx512_fpclass_pd_256;
+ break;
+ case X86::BI__builtin_ia32_fpclasspd512_mask:
+ ID = Intrinsic::x86_avx512_fpclass_pd_512;
+ break;
+ }
+
+ Value *Fpclass = Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
+ return EmitX86MaskedCompareResult(*this, Fpclass, NumElts, MaskIn);
+ }
+
+ // packed comparison intrinsics
case X86::BI__builtin_ia32_cmpeqps:
case X86::BI__builtin_ia32_cmpeqpd:
return getVectorFCmpIR(CmpInst::FCMP_OEQ);
@@ -8577,64 +10211,79 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
case X86::BI__builtin_ia32_cmpps:
case X86::BI__builtin_ia32_cmpps256:
case X86::BI__builtin_ia32_cmppd:
- case X86::BI__builtin_ia32_cmppd256: {
- unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
- // If this one of the SSE immediates, we can use native IR.
- if (CC < 8) {
- FCmpInst::Predicate Pred;
- switch (CC) {
- case 0: Pred = FCmpInst::FCMP_OEQ; break;
- case 1: Pred = FCmpInst::FCMP_OLT; break;
- case 2: Pred = FCmpInst::FCMP_OLE; break;
- case 3: Pred = FCmpInst::FCMP_UNO; break;
- case 4: Pred = FCmpInst::FCMP_UNE; break;
- case 5: Pred = FCmpInst::FCMP_UGE; break;
- case 6: Pred = FCmpInst::FCMP_UGT; break;
- case 7: Pred = FCmpInst::FCMP_ORD; break;
- }
- return getVectorFCmpIR(Pred);
+ case X86::BI__builtin_ia32_cmppd256:
+ case X86::BI__builtin_ia32_cmpps128_mask:
+ case X86::BI__builtin_ia32_cmpps256_mask:
+ case X86::BI__builtin_ia32_cmpps512_mask:
+ case X86::BI__builtin_ia32_cmppd128_mask:
+ case X86::BI__builtin_ia32_cmppd256_mask:
+ case X86::BI__builtin_ia32_cmppd512_mask: {
+ // Lowering vector comparisons to fcmp instructions, while
+ // ignoring signalling behaviour requested
+ // ignoring rounding mode requested
+ // This is is only possible as long as FENV_ACCESS is not implemented.
+ // See also: https://reviews.llvm.org/D45616
+
+ // The third argument is the comparison condition, and integer in the
+ // range [0, 31]
+ unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x1f;
+
+ // Lowering to IR fcmp instruction.
+ // Ignoring requested signaling behaviour,
+ // e.g. both _CMP_GT_OS & _CMP_GT_OQ are translated to FCMP_OGT.
+ FCmpInst::Predicate Pred;
+ switch (CC) {
+ case 0x00: Pred = FCmpInst::FCMP_OEQ; break;
+ case 0x01: Pred = FCmpInst::FCMP_OLT; break;
+ case 0x02: Pred = FCmpInst::FCMP_OLE; break;
+ case 0x03: Pred = FCmpInst::FCMP_UNO; break;
+ case 0x04: Pred = FCmpInst::FCMP_UNE; break;
+ case 0x05: Pred = FCmpInst::FCMP_UGE; break;
+ case 0x06: Pred = FCmpInst::FCMP_UGT; break;
+ case 0x07: Pred = FCmpInst::FCMP_ORD; break;
+ case 0x08: Pred = FCmpInst::FCMP_UEQ; break;
+ case 0x09: Pred = FCmpInst::FCMP_ULT; break;
+ case 0x0a: Pred = FCmpInst::FCMP_ULE; break;
+ case 0x0b: Pred = FCmpInst::FCMP_FALSE; break;
+ case 0x0c: Pred = FCmpInst::FCMP_ONE; break;
+ case 0x0d: Pred = FCmpInst::FCMP_OGE; break;
+ case 0x0e: Pred = FCmpInst::FCMP_OGT; break;
+ case 0x0f: Pred = FCmpInst::FCMP_TRUE; break;
+ case 0x10: Pred = FCmpInst::FCMP_OEQ; break;
+ case 0x11: Pred = FCmpInst::FCMP_OLT; break;
+ case 0x12: Pred = FCmpInst::FCMP_OLE; break;
+ case 0x13: Pred = FCmpInst::FCMP_UNO; break;
+ case 0x14: Pred = FCmpInst::FCMP_UNE; break;
+ case 0x15: Pred = FCmpInst::FCMP_UGE; break;
+ case 0x16: Pred = FCmpInst::FCMP_UGT; break;
+ case 0x17: Pred = FCmpInst::FCMP_ORD; break;
+ case 0x18: Pred = FCmpInst::FCMP_UEQ; break;
+ case 0x19: Pred = FCmpInst::FCMP_ULT; break;
+ case 0x1a: Pred = FCmpInst::FCMP_ULE; break;
+ case 0x1b: Pred = FCmpInst::FCMP_FALSE; break;
+ case 0x1c: Pred = FCmpInst::FCMP_ONE; break;
+ case 0x1d: Pred = FCmpInst::FCMP_OGE; break;
+ case 0x1e: Pred = FCmpInst::FCMP_OGT; break;
+ case 0x1f: Pred = FCmpInst::FCMP_TRUE; break;
+ default: llvm_unreachable("Unhandled CC");
}
- // We can't handle 8-31 immediates with native IR, use the intrinsic.
- // Except for predicates that create constants.
- Intrinsic::ID ID;
+ // Builtins without the _mask suffix return a vector of integers
+ // of the same width as the input vectors
switch (BuiltinID) {
- default: llvm_unreachable("Unsupported intrinsic!");
- case X86::BI__builtin_ia32_cmpps:
- ID = Intrinsic::x86_sse_cmp_ps;
- break;
- case X86::BI__builtin_ia32_cmpps256:
- // _CMP_TRUE_UQ, _CMP_TRUE_US produce -1,-1... vector
- // on any input and _CMP_FALSE_OQ, _CMP_FALSE_OS produce 0, 0...
- if (CC == 0xf || CC == 0xb || CC == 0x1b || CC == 0x1f) {
- Value *Constant = (CC == 0xf || CC == 0x1f) ?
- llvm::Constant::getAllOnesValue(Builder.getInt32Ty()) :
- llvm::Constant::getNullValue(Builder.getInt32Ty());
- Value *Vec = Builder.CreateVectorSplat(
- Ops[0]->getType()->getVectorNumElements(), Constant);
- return Builder.CreateBitCast(Vec, Ops[0]->getType());
- }
- ID = Intrinsic::x86_avx_cmp_ps_256;
- break;
- case X86::BI__builtin_ia32_cmppd:
- ID = Intrinsic::x86_sse2_cmp_pd;
- break;
- case X86::BI__builtin_ia32_cmppd256:
- // _CMP_TRUE_UQ, _CMP_TRUE_US produce -1,-1... vector
- // on any input and _CMP_FALSE_OQ, _CMP_FALSE_OS produce 0, 0...
- if (CC == 0xf || CC == 0xb || CC == 0x1b || CC == 0x1f) {
- Value *Constant = (CC == 0xf || CC == 0x1f) ?
- llvm::Constant::getAllOnesValue(Builder.getInt64Ty()) :
- llvm::Constant::getNullValue(Builder.getInt64Ty());
- Value *Vec = Builder.CreateVectorSplat(
- Ops[0]->getType()->getVectorNumElements(), Constant);
- return Builder.CreateBitCast(Vec, Ops[0]->getType());
- }
- ID = Intrinsic::x86_avx_cmp_pd_256;
- break;
+ case X86::BI__builtin_ia32_cmpps512_mask:
+ case X86::BI__builtin_ia32_cmppd512_mask:
+ case X86::BI__builtin_ia32_cmpps128_mask:
+ case X86::BI__builtin_ia32_cmpps256_mask:
+ case X86::BI__builtin_ia32_cmppd128_mask:
+ case X86::BI__builtin_ia32_cmppd256_mask: {
+ unsigned NumElts = Ops[0]->getType()->getVectorNumElements();
+ Value *Cmp = Builder.CreateFCmp(Pred, Ops[0], Ops[1]);
+ return EmitX86MaskedCompareResult(*this, Cmp, NumElts, Ops[3]);
+ }
+ default:
+ return getVectorFCmpIR(Pred);
}
-
- return Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
}
// SSE scalar comparison intrinsics
@@ -9195,19 +10844,11 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2));
Ops[1] = Builder.CreateBitCast(Ops[1], llvm::VectorType::get(Int64Ty, 2));
- // Element zero comes from the first input vector and element one comes from
- // the second. The element indices within each vector are numbered in big
- // endian order so the shuffle mask must be adjusted for this on little
- // endian platforms (i.e. index is complemented and source vector reversed).
- unsigned ElemIdx0;
- unsigned ElemIdx1;
- if (getTarget().isLittleEndian()) {
- ElemIdx0 = (~Index & 1) + 2;
- ElemIdx1 = (~Index & 2) >> 1;
- } else { // BigEndian
- ElemIdx0 = (Index & 2) >> 1;
- ElemIdx1 = 2 + (Index & 1);
- }
+ // Account for endianness by treating this as just a shuffle. So we use the
+ // same indices for both LE and BE in order to produce expected results in
+ // both cases.
+ unsigned ElemIdx0 = (Index & 2) >> 1;
+ unsigned ElemIdx1 = 2 + (Index & 1);
Constant *ShuffleElts[2] = {ConstantInt::get(Int32Ty, ElemIdx0),
ConstantInt::get(Int32Ty, ElemIdx1)};
@@ -9398,6 +11039,49 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
CI->setConvergent();
return CI;
}
+ case AMDGPU::BI__builtin_amdgcn_ds_faddf:
+ case AMDGPU::BI__builtin_amdgcn_ds_fminf:
+ case AMDGPU::BI__builtin_amdgcn_ds_fmaxf: {
+ llvm::SmallVector<llvm::Value *, 5> Args;
+ for (unsigned I = 0; I != 5; ++I)
+ Args.push_back(EmitScalarExpr(E->getArg(I)));
+ const llvm::Type *PtrTy = Args[0]->getType();
+ // check pointer parameter
+ if (!PtrTy->isPointerTy() ||
+ E->getArg(0)
+ ->getType()
+ ->getPointeeType()
+ .getQualifiers()
+ .getAddressSpace() != LangAS::opencl_local ||
+ !PtrTy->getPointerElementType()->isFloatTy()) {
+ CGM.Error(E->getArg(0)->getLocStart(),
+ "parameter should have type \"local float*\"");
+ return nullptr;
+ }
+ // check float parameter
+ if (!Args[1]->getType()->isFloatTy()) {
+ CGM.Error(E->getArg(1)->getLocStart(),
+ "parameter should have type \"float\"");
+ return nullptr;
+ }
+
+ Intrinsic::ID ID;
+ switch (BuiltinID) {
+ case AMDGPU::BI__builtin_amdgcn_ds_faddf:
+ ID = Intrinsic::amdgcn_ds_fadd;
+ break;
+ case AMDGPU::BI__builtin_amdgcn_ds_fminf:
+ ID = Intrinsic::amdgcn_ds_fmin;
+ break;
+ case AMDGPU::BI__builtin_amdgcn_ds_fmaxf:
+ ID = Intrinsic::amdgcn_ds_fmax;
+ break;
+ default:
+ llvm_unreachable("Unknown BuiltinID");
+ }
+ Value *F = CGM.getIntrinsic(ID);
+ return Builder.CreateCall(F, Args);
+ }
// amdgcn workitem
case AMDGPU::BI__builtin_amdgcn_workitem_id_x:
@@ -10028,7 +11712,15 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID,
case NVPTX::BI__hmma_m16n16k16_ld_a:
case NVPTX::BI__hmma_m16n16k16_ld_b:
case NVPTX::BI__hmma_m16n16k16_ld_c_f16:
- case NVPTX::BI__hmma_m16n16k16_ld_c_f32: {
+ case NVPTX::BI__hmma_m16n16k16_ld_c_f32:
+ case NVPTX::BI__hmma_m32n8k16_ld_a:
+ case NVPTX::BI__hmma_m32n8k16_ld_b:
+ case NVPTX::BI__hmma_m32n8k16_ld_c_f16:
+ case NVPTX::BI__hmma_m32n8k16_ld_c_f32:
+ case NVPTX::BI__hmma_m8n32k16_ld_a:
+ case NVPTX::BI__hmma_m8n32k16_ld_b:
+ case NVPTX::BI__hmma_m8n32k16_ld_c_f16:
+ case NVPTX::BI__hmma_m8n32k16_ld_c_f32: {
Address Dst = EmitPointerWithAlignment(E->getArg(0));
Value *Src = EmitScalarExpr(E->getArg(1));
Value *Ldm = EmitScalarExpr(E->getArg(2));
@@ -10040,31 +11732,70 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID,
unsigned NumResults;
switch (BuiltinID) {
case NVPTX::BI__hmma_m16n16k16_ld_a:
- IID = isColMajor ? Intrinsic::nvvm_wmma_load_a_f16_col_stride
- : Intrinsic::nvvm_wmma_load_a_f16_row_stride;
+ IID = isColMajor ? Intrinsic::nvvm_wmma_m16n16k16_load_a_f16_col_stride
+ : Intrinsic::nvvm_wmma_m16n16k16_load_a_f16_row_stride;
NumResults = 8;
break;
case NVPTX::BI__hmma_m16n16k16_ld_b:
- IID = isColMajor ? Intrinsic::nvvm_wmma_load_b_f16_col_stride
- : Intrinsic::nvvm_wmma_load_b_f16_row_stride;
+ IID = isColMajor ? Intrinsic::nvvm_wmma_m16n16k16_load_b_f16_col_stride
+ : Intrinsic::nvvm_wmma_m16n16k16_load_b_f16_row_stride;
NumResults = 8;
break;
case NVPTX::BI__hmma_m16n16k16_ld_c_f16:
- IID = isColMajor ? Intrinsic::nvvm_wmma_load_c_f16_col_stride
- : Intrinsic::nvvm_wmma_load_c_f16_row_stride;
+ IID = isColMajor ? Intrinsic::nvvm_wmma_m16n16k16_load_c_f16_col_stride
+ : Intrinsic::nvvm_wmma_m16n16k16_load_c_f16_row_stride;
NumResults = 4;
break;
case NVPTX::BI__hmma_m16n16k16_ld_c_f32:
- IID = isColMajor ? Intrinsic::nvvm_wmma_load_c_f32_col_stride
- : Intrinsic::nvvm_wmma_load_c_f32_row_stride;
+ IID = isColMajor ? Intrinsic::nvvm_wmma_m16n16k16_load_c_f32_col_stride
+ : Intrinsic::nvvm_wmma_m16n16k16_load_c_f32_row_stride;
+ NumResults = 8;
+ break;
+ case NVPTX::BI__hmma_m32n8k16_ld_a:
+ IID = isColMajor ? Intrinsic::nvvm_wmma_m32n8k16_load_a_f16_col_stride
+ : Intrinsic::nvvm_wmma_m32n8k16_load_a_f16_row_stride;
+ NumResults = 8;
+ break;
+ case NVPTX::BI__hmma_m32n8k16_ld_b:
+ IID = isColMajor ? Intrinsic::nvvm_wmma_m32n8k16_load_b_f16_col_stride
+ : Intrinsic::nvvm_wmma_m32n8k16_load_b_f16_row_stride;
+ NumResults = 8;
+ break;
+ case NVPTX::BI__hmma_m32n8k16_ld_c_f16:
+ IID = isColMajor ? Intrinsic::nvvm_wmma_m32n8k16_load_c_f16_col_stride
+ : Intrinsic::nvvm_wmma_m32n8k16_load_c_f16_row_stride;
+ NumResults = 4;
+ break;
+ case NVPTX::BI__hmma_m32n8k16_ld_c_f32:
+ IID = isColMajor ? Intrinsic::nvvm_wmma_m32n8k16_load_c_f32_col_stride
+ : Intrinsic::nvvm_wmma_m32n8k16_load_c_f32_row_stride;
+ NumResults = 8;
+ break;
+ case NVPTX::BI__hmma_m8n32k16_ld_a:
+ IID = isColMajor ? Intrinsic::nvvm_wmma_m8n32k16_load_a_f16_col_stride
+ : Intrinsic::nvvm_wmma_m8n32k16_load_a_f16_row_stride;
+ NumResults = 8;
+ break;
+ case NVPTX::BI__hmma_m8n32k16_ld_b:
+ IID = isColMajor ? Intrinsic::nvvm_wmma_m8n32k16_load_b_f16_col_stride
+ : Intrinsic::nvvm_wmma_m8n32k16_load_b_f16_row_stride;
+ NumResults = 8;
+ break;
+ case NVPTX::BI__hmma_m8n32k16_ld_c_f16:
+ IID = isColMajor ? Intrinsic::nvvm_wmma_m8n32k16_load_c_f16_col_stride
+ : Intrinsic::nvvm_wmma_m8n32k16_load_c_f16_row_stride;
+ NumResults = 4;
+ break;
+ case NVPTX::BI__hmma_m8n32k16_ld_c_f32:
+ IID = isColMajor ? Intrinsic::nvvm_wmma_m8n32k16_load_c_f32_col_stride
+ : Intrinsic::nvvm_wmma_m8n32k16_load_c_f32_row_stride;
NumResults = 8;
break;
default:
llvm_unreachable("Unexpected builtin ID.");
}
Value *Result =
- Builder.CreateCall(CGM.getIntrinsic(IID),
- {Builder.CreatePointerCast(Src, VoidPtrTy), Ldm});
+ Builder.CreateCall(CGM.getIntrinsic(IID, Src->getType()), {Src, Ldm});
// Save returned values.
for (unsigned i = 0; i < NumResults; ++i) {
@@ -10078,7 +11809,11 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID,
}
case NVPTX::BI__hmma_m16n16k16_st_c_f16:
- case NVPTX::BI__hmma_m16n16k16_st_c_f32: {
+ case NVPTX::BI__hmma_m16n16k16_st_c_f32:
+ case NVPTX::BI__hmma_m32n8k16_st_c_f16:
+ case NVPTX::BI__hmma_m32n8k16_st_c_f32:
+ case NVPTX::BI__hmma_m8n32k16_st_c_f16:
+ case NVPTX::BI__hmma_m8n32k16_st_c_f32: {
Value *Dst = EmitScalarExpr(E->getArg(0));
Address Src = EmitPointerWithAlignment(E->getArg(1));
Value *Ldm = EmitScalarExpr(E->getArg(2));
@@ -10092,21 +11827,38 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID,
// for some reason nvcc builtins use _c_.
switch (BuiltinID) {
case NVPTX::BI__hmma_m16n16k16_st_c_f16:
- IID = isColMajor ? Intrinsic::nvvm_wmma_store_d_f16_col_stride
- : Intrinsic::nvvm_wmma_store_d_f16_row_stride;
+ IID = isColMajor ? Intrinsic::nvvm_wmma_m16n16k16_store_d_f16_col_stride
+ : Intrinsic::nvvm_wmma_m16n16k16_store_d_f16_row_stride;
NumResults = 4;
break;
case NVPTX::BI__hmma_m16n16k16_st_c_f32:
- IID = isColMajor ? Intrinsic::nvvm_wmma_store_d_f32_col_stride
- : Intrinsic::nvvm_wmma_store_d_f32_row_stride;
+ IID = isColMajor ? Intrinsic::nvvm_wmma_m16n16k16_store_d_f32_col_stride
+ : Intrinsic::nvvm_wmma_m16n16k16_store_d_f32_row_stride;
+ break;
+ case NVPTX::BI__hmma_m32n8k16_st_c_f16:
+ IID = isColMajor ? Intrinsic::nvvm_wmma_m32n8k16_store_d_f16_col_stride
+ : Intrinsic::nvvm_wmma_m32n8k16_store_d_f16_row_stride;
+ NumResults = 4;
+ break;
+ case NVPTX::BI__hmma_m32n8k16_st_c_f32:
+ IID = isColMajor ? Intrinsic::nvvm_wmma_m32n8k16_store_d_f32_col_stride
+ : Intrinsic::nvvm_wmma_m32n8k16_store_d_f32_row_stride;
+ break;
+ case NVPTX::BI__hmma_m8n32k16_st_c_f16:
+ IID = isColMajor ? Intrinsic::nvvm_wmma_m8n32k16_store_d_f16_col_stride
+ : Intrinsic::nvvm_wmma_m8n32k16_store_d_f16_row_stride;
+ NumResults = 4;
+ break;
+ case NVPTX::BI__hmma_m8n32k16_st_c_f32:
+ IID = isColMajor ? Intrinsic::nvvm_wmma_m8n32k16_store_d_f32_col_stride
+ : Intrinsic::nvvm_wmma_m8n32k16_store_d_f32_row_stride;
break;
default:
llvm_unreachable("Unexpected builtin ID.");
}
- Function *Intrinsic = CGM.getIntrinsic(IID);
+ Function *Intrinsic = CGM.getIntrinsic(IID, Dst->getType());
llvm::Type *ParamType = Intrinsic->getFunctionType()->getParamType(1);
- SmallVector<Value *, 10> Values;
- Values.push_back(Builder.CreatePointerCast(Dst, VoidPtrTy));
+ SmallVector<Value *, 10> Values = {Dst};
for (unsigned i = 0; i < NumResults; ++i) {
Value *V = Builder.CreateAlignedLoad(
Builder.CreateGEP(Src.getPointer(), llvm::ConstantInt::get(IntTy, i)),
@@ -10118,12 +11870,20 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID,
return Result;
}
- // BI__hmma_m16n16k16_mma_<Dtype><CType>(d, a, b, c, layout, satf)
- // --> Intrinsic::nvvm_wmma_mma_sync<layout A,B><DType><CType><Satf>
+ // BI__hmma_m16n16k16_mma_<Dtype><CType>(d, a, b, c, layout, satf) -->
+ // Intrinsic::nvvm_wmma_m16n16k16_mma_sync<layout A,B><DType><CType><Satf>
case NVPTX::BI__hmma_m16n16k16_mma_f16f16:
case NVPTX::BI__hmma_m16n16k16_mma_f32f16:
case NVPTX::BI__hmma_m16n16k16_mma_f32f32:
- case NVPTX::BI__hmma_m16n16k16_mma_f16f32: {
+ case NVPTX::BI__hmma_m16n16k16_mma_f16f32:
+ case NVPTX::BI__hmma_m32n8k16_mma_f16f16:
+ case NVPTX::BI__hmma_m32n8k16_mma_f32f16:
+ case NVPTX::BI__hmma_m32n8k16_mma_f32f32:
+ case NVPTX::BI__hmma_m32n8k16_mma_f16f32:
+ case NVPTX::BI__hmma_m8n32k16_mma_f16f16:
+ case NVPTX::BI__hmma_m8n32k16_mma_f32f16:
+ case NVPTX::BI__hmma_m8n32k16_mma_f32f32:
+ case NVPTX::BI__hmma_m8n32k16_mma_f16f32: {
Address Dst = EmitPointerWithAlignment(E->getArg(0));
Address SrcA = EmitPointerWithAlignment(E->getArg(1));
Address SrcB = EmitPointerWithAlignment(E->getArg(2));
@@ -10140,15 +11900,15 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID,
bool Satf = SatfArg.getSExtValue();
// clang-format off
-#define MMA_VARIANTS(type) {{ \
- Intrinsic::nvvm_wmma_mma_sync_row_row_##type, \
- Intrinsic::nvvm_wmma_mma_sync_row_row_##type##_satfinite, \
- Intrinsic::nvvm_wmma_mma_sync_row_col_##type, \
- Intrinsic::nvvm_wmma_mma_sync_row_col_##type##_satfinite, \
- Intrinsic::nvvm_wmma_mma_sync_col_row_##type, \
- Intrinsic::nvvm_wmma_mma_sync_col_row_##type##_satfinite, \
- Intrinsic::nvvm_wmma_mma_sync_col_col_##type, \
- Intrinsic::nvvm_wmma_mma_sync_col_col_##type##_satfinite \
+#define MMA_VARIANTS(geom, type) {{ \
+ Intrinsic::nvvm_wmma_##geom##_mma_row_row_##type, \
+ Intrinsic::nvvm_wmma_##geom##_mma_row_row_##type##_satfinite, \
+ Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type, \
+ Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type##_satfinite, \
+ Intrinsic::nvvm_wmma_##geom##_mma_col_row_##type, \
+ Intrinsic::nvvm_wmma_##geom##_mma_col_row_##type##_satfinite, \
+ Intrinsic::nvvm_wmma_##geom##_mma_col_col_##type, \
+ Intrinsic::nvvm_wmma_##geom##_mma_col_col_##type##_satfinite \
}}
// clang-format on
@@ -10162,22 +11922,62 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID,
unsigned NumEltsD;
switch (BuiltinID) {
case NVPTX::BI__hmma_m16n16k16_mma_f16f16:
- IID = getMMAIntrinsic(MMA_VARIANTS(f16_f16));
+ IID = getMMAIntrinsic(MMA_VARIANTS(m16n16k16, f16_f16));
NumEltsC = 4;
NumEltsD = 4;
break;
case NVPTX::BI__hmma_m16n16k16_mma_f32f16:
- IID = getMMAIntrinsic(MMA_VARIANTS(f32_f16));
+ IID = getMMAIntrinsic(MMA_VARIANTS(m16n16k16, f32_f16));
NumEltsC = 4;
NumEltsD = 8;
break;
case NVPTX::BI__hmma_m16n16k16_mma_f16f32:
- IID = getMMAIntrinsic(MMA_VARIANTS(f16_f32));
+ IID = getMMAIntrinsic(MMA_VARIANTS(m16n16k16, f16_f32));
NumEltsC = 8;
NumEltsD = 4;
break;
case NVPTX::BI__hmma_m16n16k16_mma_f32f32:
- IID = getMMAIntrinsic(MMA_VARIANTS(f32_f32));
+ IID = getMMAIntrinsic(MMA_VARIANTS(m16n16k16, f32_f32));
+ NumEltsC = 8;
+ NumEltsD = 8;
+ break;
+ case NVPTX::BI__hmma_m32n8k16_mma_f16f16:
+ IID = getMMAIntrinsic(MMA_VARIANTS(m32n8k16, f16_f16));
+ NumEltsC = 4;
+ NumEltsD = 4;
+ break;
+ case NVPTX::BI__hmma_m32n8k16_mma_f32f16:
+ IID = getMMAIntrinsic(MMA_VARIANTS(m32n8k16, f32_f16));
+ NumEltsC = 4;
+ NumEltsD = 8;
+ break;
+ case NVPTX::BI__hmma_m32n8k16_mma_f16f32:
+ IID = getMMAIntrinsic(MMA_VARIANTS(m32n8k16, f16_f32));
+ NumEltsC = 8;
+ NumEltsD = 4;
+ break;
+ case NVPTX::BI__hmma_m32n8k16_mma_f32f32:
+ IID = getMMAIntrinsic(MMA_VARIANTS(m32n8k16, f32_f32));
+ NumEltsC = 8;
+ NumEltsD = 8;
+ break;
+ case NVPTX::BI__hmma_m8n32k16_mma_f16f16:
+ IID = getMMAIntrinsic(MMA_VARIANTS(m8n32k16, f16_f16));
+ NumEltsC = 4;
+ NumEltsD = 4;
+ break;
+ case NVPTX::BI__hmma_m8n32k16_mma_f32f16:
+ IID = getMMAIntrinsic(MMA_VARIANTS(m8n32k16, f32_f16));
+ NumEltsC = 4;
+ NumEltsD = 8;
+ break;
+ case NVPTX::BI__hmma_m8n32k16_mma_f16f32:
+ IID = getMMAIntrinsic(MMA_VARIANTS(m8n32k16, f16_f32));
+ NumEltsC = 8;
+ NumEltsD = 4;
+ break;
+ case NVPTX::BI__hmma_m8n32k16_mma_f32f32:
+ IID = getMMAIntrinsic(MMA_VARIANTS(m8n32k16, f32_f32));
NumEltsC = 8;
NumEltsD = 8;
break;
@@ -10231,6 +12031,36 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID,
Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
const CallExpr *E) {
switch (BuiltinID) {
+ case WebAssembly::BI__builtin_wasm_memory_size: {
+ llvm::Type *ResultType = ConvertType(E->getType());
+ Value *I = EmitScalarExpr(E->getArg(0));
+ Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_memory_size, ResultType);
+ return Builder.CreateCall(Callee, I);
+ }
+ case WebAssembly::BI__builtin_wasm_memory_grow: {
+ llvm::Type *ResultType = ConvertType(E->getType());
+ Value *Args[] = {
+ EmitScalarExpr(E->getArg(0)),
+ EmitScalarExpr(E->getArg(1))
+ };
+ Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_memory_grow, ResultType);
+ return Builder.CreateCall(Callee, Args);
+ }
+ case WebAssembly::BI__builtin_wasm_mem_size: {
+ llvm::Type *ResultType = ConvertType(E->getType());
+ Value *I = EmitScalarExpr(E->getArg(0));
+ Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_mem_size, ResultType);
+ return Builder.CreateCall(Callee, I);
+ }
+ case WebAssembly::BI__builtin_wasm_mem_grow: {
+ llvm::Type *ResultType = ConvertType(E->getType());
+ Value *Args[] = {
+ EmitScalarExpr(E->getArg(0)),
+ EmitScalarExpr(E->getArg(1))
+ };
+ Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_mem_grow, ResultType);
+ return Builder.CreateCall(Callee, Args);
+ }
case WebAssembly::BI__builtin_wasm_current_memory: {
llvm::Type *ResultType = ConvertType(E->getType());
Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_current_memory, ResultType);
@@ -10262,6 +12092,93 @@ Value *CodeGenFunction::EmitHexagonBuiltinExpr(unsigned BuiltinID,
SmallVector<llvm::Value *, 4> Ops;
Intrinsic::ID ID = Intrinsic::not_intrinsic;
+ auto MakeCircLd = [&](unsigned IntID, bool HasImm) {
+ // The base pointer is passed by address, so it needs to be loaded.
+ Address BP = EmitPointerWithAlignment(E->getArg(0));
+ BP = Address(Builder.CreateBitCast(BP.getPointer(), Int8PtrPtrTy),
+ BP.getAlignment());
+ llvm::Value *Base = Builder.CreateLoad(BP);
+ // Operands are Base, Increment, Modifier, Start.
+ if (HasImm)
+ Ops = { Base, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2)),
+ EmitScalarExpr(E->getArg(3)) };
+ else
+ Ops = { Base, EmitScalarExpr(E->getArg(1)),
+ EmitScalarExpr(E->getArg(2)) };
+
+ llvm::Value *Result = Builder.CreateCall(CGM.getIntrinsic(IntID), Ops);
+ llvm::Value *NewBase = Builder.CreateExtractValue(Result, 1);
+ llvm::Value *LV = Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)),
+ NewBase->getType()->getPointerTo());
+ Address Dest = EmitPointerWithAlignment(E->getArg(0));
+ // The intrinsic generates two results. The new value for the base pointer
+ // needs to be stored.
+ Builder.CreateAlignedStore(NewBase, LV, Dest.getAlignment());
+ return Builder.CreateExtractValue(Result, 0);
+ };
+
+ auto MakeCircSt = [&](unsigned IntID, bool HasImm) {
+ // The base pointer is passed by address, so it needs to be loaded.
+ Address BP = EmitPointerWithAlignment(E->getArg(0));
+ BP = Address(Builder.CreateBitCast(BP.getPointer(), Int8PtrPtrTy),
+ BP.getAlignment());
+ llvm::Value *Base = Builder.CreateLoad(BP);
+ // Operands are Base, Increment, Modifier, Value, Start.
+ if (HasImm)
+ Ops = { Base, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2)),
+ EmitScalarExpr(E->getArg(3)), EmitScalarExpr(E->getArg(4)) };
+ else
+ Ops = { Base, EmitScalarExpr(E->getArg(1)),
+ EmitScalarExpr(E->getArg(2)), EmitScalarExpr(E->getArg(3)) };
+
+ llvm::Value *NewBase = Builder.CreateCall(CGM.getIntrinsic(IntID), Ops);
+ llvm::Value *LV = Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)),
+ NewBase->getType()->getPointerTo());
+ Address Dest = EmitPointerWithAlignment(E->getArg(0));
+ // The intrinsic generates one result, which is the new value for the base
+ // pointer. It needs to be stored.
+ return Builder.CreateAlignedStore(NewBase, LV, Dest.getAlignment());
+ };
+
+ // Handle the conversion of bit-reverse load intrinsics to bit code.
+ // The intrinsic call after this function only reads from memory and the
+ // write to memory is dealt by the store instruction.
+ auto MakeBrevLd = [&](unsigned IntID, llvm::Type *DestTy) {
+ // The intrinsic generates one result, which is the new value for the base
+ // pointer. It needs to be returned. The result of the load instruction is
+ // passed to intrinsic by address, so the value needs to be stored.
+ llvm::Value *BaseAddress =
+ Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), Int8PtrTy);
+
+ // Expressions like &(*pt++) will be incremented per evaluation.
+ // EmitPointerWithAlignment and EmitScalarExpr evaluates the expression
+ // per call.
+ Address DestAddr = EmitPointerWithAlignment(E->getArg(1));
+ DestAddr = Address(Builder.CreateBitCast(DestAddr.getPointer(), Int8PtrTy),
+ DestAddr.getAlignment());
+ llvm::Value *DestAddress = DestAddr.getPointer();
+
+ // Operands are Base, Dest, Modifier.
+ // The intrinsic format in LLVM IR is defined as
+ // { ValueType, i8* } (i8*, i32).
+ Ops = {BaseAddress, EmitScalarExpr(E->getArg(2))};
+
+ llvm::Value *Result = Builder.CreateCall(CGM.getIntrinsic(IntID), Ops);
+ // The value needs to be stored as the variable is passed by reference.
+ llvm::Value *DestVal = Builder.CreateExtractValue(Result, 0);
+
+ // The store needs to be truncated to fit the destination type.
+ // While i32 and i64 are natively supported on Hexagon, i8 and i16 needs
+ // to be handled with stores of respective destination type.
+ DestVal = Builder.CreateTrunc(DestVal, DestTy);
+
+ llvm::Value *DestForStore =
+ Builder.CreateBitCast(DestAddress, DestVal->getType()->getPointerTo());
+ Builder.CreateAlignedStore(DestVal, DestForStore, DestAddr.getAlignment());
+ // The updated value of the base pointer is returned.
+ return Builder.CreateExtractValue(Result, 1);
+ };
+
switch (BuiltinID) {
case Hexagon::BI__builtin_HEXAGON_V6_vaddcarry:
case Hexagon::BI__builtin_HEXAGON_V6_vaddcarry_128B: {
@@ -10307,6 +12224,64 @@ Value *CodeGenFunction::EmitHexagonBuiltinExpr(unsigned BuiltinID,
Builder.CreateAlignedStore(Vprd, Base, Dest.getAlignment());
return Builder.CreateExtractValue(Result, 0);
}
+ case Hexagon::BI__builtin_HEXAGON_L2_loadrub_pci:
+ return MakeCircLd(Intrinsic::hexagon_L2_loadrub_pci, /*HasImm*/true);
+ case Hexagon::BI__builtin_HEXAGON_L2_loadrb_pci:
+ return MakeCircLd(Intrinsic::hexagon_L2_loadrb_pci, /*HasImm*/true);
+ case Hexagon::BI__builtin_HEXAGON_L2_loadruh_pci:
+ return MakeCircLd(Intrinsic::hexagon_L2_loadruh_pci, /*HasImm*/true);
+ case Hexagon::BI__builtin_HEXAGON_L2_loadrh_pci:
+ return MakeCircLd(Intrinsic::hexagon_L2_loadrh_pci, /*HasImm*/true);
+ case Hexagon::BI__builtin_HEXAGON_L2_loadri_pci:
+ return MakeCircLd(Intrinsic::hexagon_L2_loadri_pci, /*HasImm*/true);
+ case Hexagon::BI__builtin_HEXAGON_L2_loadrd_pci:
+ return MakeCircLd(Intrinsic::hexagon_L2_loadrd_pci, /*HasImm*/true);
+ case Hexagon::BI__builtin_HEXAGON_L2_loadrub_pcr:
+ return MakeCircLd(Intrinsic::hexagon_L2_loadrub_pcr, /*HasImm*/false);
+ case Hexagon::BI__builtin_HEXAGON_L2_loadrb_pcr:
+ return MakeCircLd(Intrinsic::hexagon_L2_loadrb_pcr, /*HasImm*/false);
+ case Hexagon::BI__builtin_HEXAGON_L2_loadruh_pcr:
+ return MakeCircLd(Intrinsic::hexagon_L2_loadruh_pcr, /*HasImm*/false);
+ case Hexagon::BI__builtin_HEXAGON_L2_loadrh_pcr:
+ return MakeCircLd(Intrinsic::hexagon_L2_loadrh_pcr, /*HasImm*/false);
+ case Hexagon::BI__builtin_HEXAGON_L2_loadri_pcr:
+ return MakeCircLd(Intrinsic::hexagon_L2_loadri_pcr, /*HasImm*/false);
+ case Hexagon::BI__builtin_HEXAGON_L2_loadrd_pcr:
+ return MakeCircLd(Intrinsic::hexagon_L2_loadrd_pcr, /*HasImm*/false);
+ case Hexagon::BI__builtin_HEXAGON_S2_storerb_pci:
+ return MakeCircSt(Intrinsic::hexagon_S2_storerb_pci, /*HasImm*/true);
+ case Hexagon::BI__builtin_HEXAGON_S2_storerh_pci:
+ return MakeCircSt(Intrinsic::hexagon_S2_storerh_pci, /*HasImm*/true);
+ case Hexagon::BI__builtin_HEXAGON_S2_storerf_pci:
+ return MakeCircSt(Intrinsic::hexagon_S2_storerf_pci, /*HasImm*/true);
+ case Hexagon::BI__builtin_HEXAGON_S2_storeri_pci:
+ return MakeCircSt(Intrinsic::hexagon_S2_storeri_pci, /*HasImm*/true);
+ case Hexagon::BI__builtin_HEXAGON_S2_storerd_pci:
+ return MakeCircSt(Intrinsic::hexagon_S2_storerd_pci, /*HasImm*/true);
+ case Hexagon::BI__builtin_HEXAGON_S2_storerb_pcr:
+ return MakeCircSt(Intrinsic::hexagon_S2_storerb_pcr, /*HasImm*/false);
+ case Hexagon::BI__builtin_HEXAGON_S2_storerh_pcr:
+ return MakeCircSt(Intrinsic::hexagon_S2_storerh_pcr, /*HasImm*/false);
+ case Hexagon::BI__builtin_HEXAGON_S2_storerf_pcr:
+ return MakeCircSt(Intrinsic::hexagon_S2_storerf_pcr, /*HasImm*/false);
+ case Hexagon::BI__builtin_HEXAGON_S2_storeri_pcr:
+ return MakeCircSt(Intrinsic::hexagon_S2_storeri_pcr, /*HasImm*/false);
+ case Hexagon::BI__builtin_HEXAGON_S2_storerd_pcr:
+ return MakeCircSt(Intrinsic::hexagon_S2_storerd_pcr, /*HasImm*/false);
+ case Hexagon::BI__builtin_brev_ldub:
+ return MakeBrevLd(Intrinsic::hexagon_L2_loadrub_pbr, Int8Ty);
+ case Hexagon::BI__builtin_brev_ldb:
+ return MakeBrevLd(Intrinsic::hexagon_L2_loadrb_pbr, Int8Ty);
+ case Hexagon::BI__builtin_brev_lduh:
+ return MakeBrevLd(Intrinsic::hexagon_L2_loadruh_pbr, Int16Ty);
+ case Hexagon::BI__builtin_brev_ldh:
+ return MakeBrevLd(Intrinsic::hexagon_L2_loadrh_pbr, Int16Ty);
+ case Hexagon::BI__builtin_brev_ldw:
+ return MakeBrevLd(Intrinsic::hexagon_L2_loadri_pbr, Int32Ty);
+ case Hexagon::BI__builtin_brev_ldd:
+ return MakeBrevLd(Intrinsic::hexagon_L2_loadrd_pbr, Int64Ty);
+ default:
+ break;
} // switch
return nullptr;
diff --git a/lib/CodeGen/CGCUDANV.cpp b/lib/CodeGen/CGCUDANV.cpp
index d24ef0a8a974..5fcc9e011bcb 100644
--- a/lib/CodeGen/CGCUDANV.cpp
+++ b/lib/CodeGen/CGCUDANV.cpp
@@ -15,17 +15,20 @@
#include "CGCUDARuntime.h"
#include "CodeGenFunction.h"
#include "CodeGenModule.h"
-#include "clang/CodeGen/ConstantInitBuilder.h"
#include "clang/AST/Decl.h"
+#include "clang/CodeGen/ConstantInitBuilder.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DerivedTypes.h"
+#include "llvm/Support/Format.h"
using namespace clang;
using namespace CodeGen;
namespace {
+constexpr unsigned CudaFatMagic = 0x466243b1;
+constexpr unsigned HIPFatMagic = 0x48495046; // "HIPF"
class CGNVCUDARuntime : public CGCUDARuntime {
@@ -41,14 +44,22 @@ private:
/// Keeps track of kernel launch stubs emitted in this module
llvm::SmallVector<llvm::Function *, 16> EmittedKernels;
llvm::SmallVector<std::pair<llvm::GlobalVariable *, unsigned>, 16> DeviceVars;
- /// Keeps track of variables containing handles of GPU binaries. Populated by
+ /// Keeps track of variable containing handle of GPU binary. Populated by
/// ModuleCtorFunction() and used to create corresponding cleanup calls in
/// ModuleDtorFunction()
- llvm::SmallVector<llvm::GlobalVariable *, 16> GpuBinaryHandles;
+ llvm::GlobalVariable *GpuBinaryHandle = nullptr;
+ /// Whether we generate relocatable device code.
+ bool RelocatableDeviceCode;
llvm::Constant *getSetupArgumentFn() const;
llvm::Constant *getLaunchFn() const;
+ llvm::FunctionType *getRegisterGlobalsFnTy() const;
+ llvm::FunctionType *getCallbackFnTy() const;
+ llvm::FunctionType *getRegisterLinkedBinaryFnTy() const;
+ std::string addPrefixToName(StringRef FuncName) const;
+ std::string addUnderscoredPrefixToName(StringRef FuncName) const;
+
/// Creates a function to register all kernel stubs generated in this module.
llvm::Function *makeRegisterGlobalsFn();
@@ -64,14 +75,34 @@ private:
auto ConstStr = CGM.GetAddrOfConstantCString(Str, Name.c_str());
llvm::GlobalVariable *GV =
cast<llvm::GlobalVariable>(ConstStr.getPointer());
- if (!SectionName.empty())
+ if (!SectionName.empty()) {
GV->setSection(SectionName);
+ // Mark the address as used which make sure that this section isn't
+ // merged and we will really have it in the object file.
+ GV->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::None);
+ }
if (Alignment)
GV->setAlignment(Alignment);
return llvm::ConstantExpr::getGetElementPtr(ConstStr.getElementType(),
ConstStr.getPointer(), Zeros);
- }
+ }
+
+ /// Helper function that generates an empty dummy function returning void.
+ llvm::Function *makeDummyFunction(llvm::FunctionType *FnTy) {
+ assert(FnTy->getReturnType()->isVoidTy() &&
+ "Can only generate dummy functions returning void!");
+ llvm::Function *DummyFunc = llvm::Function::Create(
+ FnTy, llvm::GlobalValue::InternalLinkage, "dummy", &TheModule);
+
+ llvm::BasicBlock *DummyBlock =
+ llvm::BasicBlock::Create(Context, "", DummyFunc);
+ CGBuilderTy FuncBuilder(CGM, Context);
+ FuncBuilder.SetInsertPoint(DummyBlock);
+ FuncBuilder.CreateRetVoid();
+
+ return DummyFunc;
+ }
void emitDeviceStubBody(CodeGenFunction &CGF, FunctionArgList &Args);
@@ -91,9 +122,22 @@ public:
}
+std::string CGNVCUDARuntime::addPrefixToName(StringRef FuncName) const {
+ if (CGM.getLangOpts().HIP)
+ return ((Twine("hip") + Twine(FuncName)).str());
+ return ((Twine("cuda") + Twine(FuncName)).str());
+}
+std::string
+CGNVCUDARuntime::addUnderscoredPrefixToName(StringRef FuncName) const {
+ if (CGM.getLangOpts().HIP)
+ return ((Twine("__hip") + Twine(FuncName)).str());
+ return ((Twine("__cuda") + Twine(FuncName)).str());
+}
+
CGNVCUDARuntime::CGNVCUDARuntime(CodeGenModule &CGM)
: CGCUDARuntime(CGM), Context(CGM.getLLVMContext()),
- TheModule(CGM.getModule()) {
+ TheModule(CGM.getModule()),
+ RelocatableDeviceCode(CGM.getLangOpts().CUDARelocatableDeviceCode) {
CodeGen::CodeGenTypes &Types = CGM.getTypes();
ASTContext &Ctx = CGM.getContext();
@@ -109,15 +153,37 @@ CGNVCUDARuntime::CGNVCUDARuntime(CodeGenModule &CGM)
llvm::Constant *CGNVCUDARuntime::getSetupArgumentFn() const {
// cudaError_t cudaSetupArgument(void *, size_t, size_t)
llvm::Type *Params[] = {VoidPtrTy, SizeTy, SizeTy};
- return CGM.CreateRuntimeFunction(llvm::FunctionType::get(IntTy,
- Params, false),
- "cudaSetupArgument");
+ return CGM.CreateRuntimeFunction(
+ llvm::FunctionType::get(IntTy, Params, false),
+ addPrefixToName("SetupArgument"));
}
llvm::Constant *CGNVCUDARuntime::getLaunchFn() const {
- // cudaError_t cudaLaunch(char *)
- return CGM.CreateRuntimeFunction(
- llvm::FunctionType::get(IntTy, CharPtrTy, false), "cudaLaunch");
+ if (CGM.getLangOpts().HIP) {
+ // hipError_t hipLaunchByPtr(char *);
+ return CGM.CreateRuntimeFunction(
+ llvm::FunctionType::get(IntTy, CharPtrTy, false), "hipLaunchByPtr");
+ } else {
+ // cudaError_t cudaLaunch(char *);
+ return CGM.CreateRuntimeFunction(
+ llvm::FunctionType::get(IntTy, CharPtrTy, false), "cudaLaunch");
+ }
+}
+
+llvm::FunctionType *CGNVCUDARuntime::getRegisterGlobalsFnTy() const {
+ return llvm::FunctionType::get(VoidTy, VoidPtrPtrTy, false);
+}
+
+llvm::FunctionType *CGNVCUDARuntime::getCallbackFnTy() const {
+ return llvm::FunctionType::get(VoidTy, VoidPtrTy, false);
+}
+
+llvm::FunctionType *CGNVCUDARuntime::getRegisterLinkedBinaryFnTy() const {
+ auto CallbackFnTy = getCallbackFnTy();
+ auto RegisterGlobalsFnTy = getRegisterGlobalsFnTy();
+ llvm::Type *Params[] = {RegisterGlobalsFnTy->getPointerTo(), VoidPtrTy,
+ VoidPtrTy, CallbackFnTy->getPointerTo()};
+ return llvm::FunctionType::get(VoidTy, Params, false);
}
void CGNVCUDARuntime::emitDeviceStub(CodeGenFunction &CGF,
@@ -181,8 +247,8 @@ llvm::Function *CGNVCUDARuntime::makeRegisterGlobalsFn() {
return nullptr;
llvm::Function *RegisterKernelsFunc = llvm::Function::Create(
- llvm::FunctionType::get(VoidTy, VoidPtrPtrTy, false),
- llvm::GlobalValue::InternalLinkage, "__cuda_register_globals", &TheModule);
+ getRegisterGlobalsFnTy(), llvm::GlobalValue::InternalLinkage,
+ addUnderscoredPrefixToName("_register_globals"), &TheModule);
llvm::BasicBlock *EntryBB =
llvm::BasicBlock::Create(Context, "entry", RegisterKernelsFunc);
CGBuilderTy Builder(CGM, Context);
@@ -195,7 +261,7 @@ llvm::Function *CGNVCUDARuntime::makeRegisterGlobalsFn() {
VoidPtrTy, VoidPtrTy, VoidPtrTy, VoidPtrTy, IntTy->getPointerTo()};
llvm::Constant *RegisterFunc = CGM.CreateRuntimeFunction(
llvm::FunctionType::get(IntTy, RegisterFuncParams, false),
- "__cudaRegisterFunction");
+ addUnderscoredPrefixToName("RegisterFunction"));
// Extract GpuBinaryHandle passed as the first argument passed to
// __cuda_register_globals() and generate __cudaRegisterFunction() call for
@@ -219,7 +285,7 @@ llvm::Function *CGNVCUDARuntime::makeRegisterGlobalsFn() {
IntTy, IntTy};
llvm::Constant *RegisterVar = CGM.CreateRuntimeFunction(
llvm::FunctionType::get(IntTy, RegisterVarParams, false),
- "__cudaRegisterVar");
+ addUnderscoredPrefixToName("RegisterVar"));
for (auto &Pair : DeviceVars) {
llvm::GlobalVariable *Var = Pair.first;
unsigned Flags = Pair.second;
@@ -243,133 +309,307 @@ llvm::Function *CGNVCUDARuntime::makeRegisterGlobalsFn() {
}
/// Creates a global constructor function for the module:
+///
+/// For CUDA:
/// \code
/// void __cuda_module_ctor(void*) {
-/// Handle0 = __cudaRegisterFatBinary(GpuBinaryBlob0);
-/// __cuda_register_globals(Handle0);
-/// ...
-/// HandleN = __cudaRegisterFatBinary(GpuBinaryBlobN);
-/// __cuda_register_globals(HandleN);
+/// Handle = __cudaRegisterFatBinary(GpuBinaryBlob);
+/// __cuda_register_globals(Handle);
+/// }
+/// \endcode
+///
+/// For HIP:
+/// \code
+/// void __hip_module_ctor(void*) {
+/// if (__hip_gpubin_handle == 0) {
+/// __hip_gpubin_handle = __hipRegisterFatBinary(GpuBinaryBlob);
+/// __hip_register_globals(__hip_gpubin_handle);
+/// }
/// }
/// \endcode
llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() {
- // No need to generate ctors/dtors if there are no GPU binaries.
- if (CGM.getCodeGenOpts().CudaGpuBinaryFileNames.empty())
+ bool IsHIP = CGM.getLangOpts().HIP;
+ // No need to generate ctors/dtors if there is no GPU binary.
+ StringRef CudaGpuBinaryFileName = CGM.getCodeGenOpts().CudaGpuBinaryFileName;
+ if (CudaGpuBinaryFileName.empty() && !IsHIP)
return nullptr;
- // void __cuda_register_globals(void* handle);
+ // void __{cuda|hip}_register_globals(void* handle);
llvm::Function *RegisterGlobalsFunc = makeRegisterGlobalsFn();
- // void ** __cudaRegisterFatBinary(void *);
+ // We always need a function to pass in as callback. Create a dummy
+ // implementation if we don't need to register anything.
+ if (RelocatableDeviceCode && !RegisterGlobalsFunc)
+ RegisterGlobalsFunc = makeDummyFunction(getRegisterGlobalsFnTy());
+
+ // void ** __{cuda|hip}RegisterFatBinary(void *);
llvm::Constant *RegisterFatbinFunc = CGM.CreateRuntimeFunction(
llvm::FunctionType::get(VoidPtrPtrTy, VoidPtrTy, false),
- "__cudaRegisterFatBinary");
+ addUnderscoredPrefixToName("RegisterFatBinary"));
// struct { int magic, int version, void * gpu_binary, void * dont_care };
llvm::StructType *FatbinWrapperTy =
llvm::StructType::get(IntTy, IntTy, VoidPtrTy, VoidPtrTy);
+ // Register GPU binary with the CUDA runtime, store returned handle in a
+ // global variable and save a reference in GpuBinaryHandle to be cleaned up
+ // in destructor on exit. Then associate all known kernels with the GPU binary
+ // handle so CUDA runtime can figure out what to call on the GPU side.
+ std::unique_ptr<llvm::MemoryBuffer> CudaGpuBinary;
+ if (!IsHIP) {
+ llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> CudaGpuBinaryOrErr =
+ llvm::MemoryBuffer::getFileOrSTDIN(CudaGpuBinaryFileName);
+ if (std::error_code EC = CudaGpuBinaryOrErr.getError()) {
+ CGM.getDiags().Report(diag::err_cannot_open_file)
+ << CudaGpuBinaryFileName << EC.message();
+ return nullptr;
+ }
+ CudaGpuBinary = std::move(CudaGpuBinaryOrErr.get());
+ }
+
llvm::Function *ModuleCtorFunc = llvm::Function::Create(
llvm::FunctionType::get(VoidTy, VoidPtrTy, false),
- llvm::GlobalValue::InternalLinkage, "__cuda_module_ctor", &TheModule);
+ llvm::GlobalValue::InternalLinkage,
+ addUnderscoredPrefixToName("_module_ctor"), &TheModule);
llvm::BasicBlock *CtorEntryBB =
llvm::BasicBlock::Create(Context, "entry", ModuleCtorFunc);
CGBuilderTy CtorBuilder(CGM, Context);
CtorBuilder.SetInsertPoint(CtorEntryBB);
- // For each GPU binary, register it with the CUDA runtime and store returned
- // handle in a global variable and save the handle in GpuBinaryHandles vector
- // to be cleaned up in destructor on exit. Then associate all known kernels
- // with the GPU binary handle so CUDA runtime can figure out what to call on
- // the GPU side.
- for (const std::string &GpuBinaryFileName :
- CGM.getCodeGenOpts().CudaGpuBinaryFileNames) {
- llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> GpuBinaryOrErr =
- llvm::MemoryBuffer::getFileOrSTDIN(GpuBinaryFileName);
- if (std::error_code EC = GpuBinaryOrErr.getError()) {
- CGM.getDiags().Report(diag::err_cannot_open_file) << GpuBinaryFileName
- << EC.message();
- continue;
- }
-
- const char *FatbinConstantName =
- CGM.getTriple().isMacOSX() ? "__NV_CUDA,__nv_fatbin" : ".nv_fatbin";
+ const char *FatbinConstantName;
+ const char *FatbinSectionName;
+ const char *ModuleIDSectionName;
+ StringRef ModuleIDPrefix;
+ llvm::Constant *FatBinStr;
+ unsigned FatMagic;
+ if (IsHIP) {
+ FatbinConstantName = ".hip_fatbin";
+ FatbinSectionName = ".hipFatBinSegment";
+
+ ModuleIDSectionName = "__hip_module_id";
+ ModuleIDPrefix = "__hip_";
+
+ // For HIP, create an external symbol __hip_fatbin in section .hip_fatbin.
+ // The external symbol is supposed to contain the fat binary but will be
+ // populated somewhere else, e.g. by lld through link script.
+ FatBinStr = new llvm::GlobalVariable(
+ CGM.getModule(), CGM.Int8Ty,
+ /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage, nullptr,
+ "__hip_fatbin", nullptr,
+ llvm::GlobalVariable::NotThreadLocal);
+ cast<llvm::GlobalVariable>(FatBinStr)->setSection(FatbinConstantName);
+
+ FatMagic = HIPFatMagic;
+ } else {
+ if (RelocatableDeviceCode)
+ FatbinConstantName = CGM.getTriple().isMacOSX()
+ ? "__NV_CUDA,__nv_relfatbin"
+ : "__nv_relfatbin";
+ else
+ FatbinConstantName =
+ CGM.getTriple().isMacOSX() ? "__NV_CUDA,__nv_fatbin" : ".nv_fatbin";
// NVIDIA's cuobjdump looks for fatbins in this section.
- const char *FatbinSectionName =
+ FatbinSectionName =
CGM.getTriple().isMacOSX() ? "__NV_CUDA,__fatbin" : ".nvFatBinSegment";
- // Create initialized wrapper structure that points to the loaded GPU binary
- ConstantInitBuilder Builder(CGM);
- auto Values = Builder.beginStruct(FatbinWrapperTy);
- // Fatbin wrapper magic.
- Values.addInt(IntTy, 0x466243b1);
- // Fatbin version.
- Values.addInt(IntTy, 1);
- // Data.
- Values.add(makeConstantString(GpuBinaryOrErr.get()->getBuffer(),
- "", FatbinConstantName, 8));
- // Unused in fatbin v1.
- Values.add(llvm::ConstantPointerNull::get(VoidPtrTy));
- llvm::GlobalVariable *FatbinWrapper =
- Values.finishAndCreateGlobal("__cuda_fatbin_wrapper",
- CGM.getPointerAlign(),
- /*constant*/ true);
- FatbinWrapper->setSection(FatbinSectionName);
+ ModuleIDSectionName = CGM.getTriple().isMacOSX()
+ ? "__NV_CUDA,__nv_module_id"
+ : "__nv_module_id";
+ ModuleIDPrefix = "__nv_";
+
+ // For CUDA, create a string literal containing the fat binary loaded from
+ // the given file.
+ FatBinStr = makeConstantString(CudaGpuBinary->getBuffer(), "",
+ FatbinConstantName, 8);
+ FatMagic = CudaFatMagic;
+ }
+ // Create initialized wrapper structure that points to the loaded GPU binary
+ ConstantInitBuilder Builder(CGM);
+ auto Values = Builder.beginStruct(FatbinWrapperTy);
+ // Fatbin wrapper magic.
+ Values.addInt(IntTy, FatMagic);
+ // Fatbin version.
+ Values.addInt(IntTy, 1);
+ // Data.
+ Values.add(FatBinStr);
+ // Unused in fatbin v1.
+ Values.add(llvm::ConstantPointerNull::get(VoidPtrTy));
+ llvm::GlobalVariable *FatbinWrapper = Values.finishAndCreateGlobal(
+ addUnderscoredPrefixToName("_fatbin_wrapper"), CGM.getPointerAlign(),
+ /*constant*/ true);
+ FatbinWrapper->setSection(FatbinSectionName);
+
+ // There is only one HIP fat binary per linked module, however there are
+ // multiple constructor functions. Make sure the fat binary is registered
+ // only once. The constructor functions are executed by the dynamic loader
+ // before the program gains control. The dynamic loader cannot execute the
+ // constructor functions concurrently since doing that would not guarantee
+ // thread safety of the loaded program. Therefore we can assume sequential
+ // execution of constructor functions here.
+ if (IsHIP) {
+ llvm::BasicBlock *IfBlock =
+ llvm::BasicBlock::Create(Context, "if", ModuleCtorFunc);
+ llvm::BasicBlock *ExitBlock =
+ llvm::BasicBlock::Create(Context, "exit", ModuleCtorFunc);
+ // The name, size, and initialization pattern of this variable is part
+ // of HIP ABI.
+ GpuBinaryHandle = new llvm::GlobalVariable(
+ TheModule, VoidPtrPtrTy, /*isConstant=*/false,
+ llvm::GlobalValue::LinkOnceAnyLinkage,
+ /*Initializer=*/llvm::ConstantPointerNull::get(VoidPtrPtrTy),
+ "__hip_gpubin_handle");
+ GpuBinaryHandle->setAlignment(CGM.getPointerAlign().getQuantity());
+ Address GpuBinaryAddr(
+ GpuBinaryHandle,
+ CharUnits::fromQuantity(GpuBinaryHandle->getAlignment()));
+ {
+ auto HandleValue = CtorBuilder.CreateLoad(GpuBinaryAddr);
+ llvm::Constant *Zero =
+ llvm::Constant::getNullValue(HandleValue->getType());
+ llvm::Value *EQZero = CtorBuilder.CreateICmpEQ(HandleValue, Zero);
+ CtorBuilder.CreateCondBr(EQZero, IfBlock, ExitBlock);
+ }
+ {
+ CtorBuilder.SetInsertPoint(IfBlock);
+ // GpuBinaryHandle = __hipRegisterFatBinary(&FatbinWrapper);
+ llvm::CallInst *RegisterFatbinCall = CtorBuilder.CreateCall(
+ RegisterFatbinFunc,
+ CtorBuilder.CreateBitCast(FatbinWrapper, VoidPtrTy));
+ CtorBuilder.CreateStore(RegisterFatbinCall, GpuBinaryAddr);
+ CtorBuilder.CreateBr(ExitBlock);
+ }
+ {
+ CtorBuilder.SetInsertPoint(ExitBlock);
+ // Call __hip_register_globals(GpuBinaryHandle);
+ if (RegisterGlobalsFunc) {
+ auto HandleValue = CtorBuilder.CreateLoad(GpuBinaryAddr);
+ CtorBuilder.CreateCall(RegisterGlobalsFunc, HandleValue);
+ }
+ }
+ } else if (!RelocatableDeviceCode) {
+ // Register binary with CUDA runtime. This is substantially different in
+ // default mode vs. separate compilation!
// GpuBinaryHandle = __cudaRegisterFatBinary(&FatbinWrapper);
llvm::CallInst *RegisterFatbinCall = CtorBuilder.CreateCall(
RegisterFatbinFunc,
CtorBuilder.CreateBitCast(FatbinWrapper, VoidPtrTy));
- llvm::GlobalVariable *GpuBinaryHandle = new llvm::GlobalVariable(
+ GpuBinaryHandle = new llvm::GlobalVariable(
TheModule, VoidPtrPtrTy, false, llvm::GlobalValue::InternalLinkage,
llvm::ConstantPointerNull::get(VoidPtrPtrTy), "__cuda_gpubin_handle");
+ GpuBinaryHandle->setAlignment(CGM.getPointerAlign().getQuantity());
CtorBuilder.CreateAlignedStore(RegisterFatbinCall, GpuBinaryHandle,
CGM.getPointerAlign());
// Call __cuda_register_globals(GpuBinaryHandle);
if (RegisterGlobalsFunc)
CtorBuilder.CreateCall(RegisterGlobalsFunc, RegisterFatbinCall);
+ } else {
+ // Generate a unique module ID.
+ SmallString<64> ModuleID;
+ llvm::raw_svector_ostream OS(ModuleID);
+ OS << ModuleIDPrefix << llvm::format("%x", FatbinWrapper->getGUID());
+ llvm::Constant *ModuleIDConstant =
+ makeConstantString(ModuleID.str(), "", ModuleIDSectionName, 32);
+
+ // Create an alias for the FatbinWrapper that nvcc will look for.
+ llvm::GlobalAlias::create(llvm::GlobalValue::ExternalLinkage,
+ Twine("__fatbinwrap") + ModuleID, FatbinWrapper);
+
+ // void __cudaRegisterLinkedBinary%ModuleID%(void (*)(void *), void *,
+ // void *, void (*)(void **))
+ SmallString<128> RegisterLinkedBinaryName("__cudaRegisterLinkedBinary");
+ RegisterLinkedBinaryName += ModuleID;
+ llvm::Constant *RegisterLinkedBinaryFunc = CGM.CreateRuntimeFunction(
+ getRegisterLinkedBinaryFnTy(), RegisterLinkedBinaryName);
+
+ assert(RegisterGlobalsFunc && "Expecting at least dummy function!");
+ llvm::Value *Args[] = {RegisterGlobalsFunc,
+ CtorBuilder.CreateBitCast(FatbinWrapper, VoidPtrTy),
+ ModuleIDConstant,
+ makeDummyFunction(getCallbackFnTy())};
+ CtorBuilder.CreateCall(RegisterLinkedBinaryFunc, Args);
+ }
- // Save GpuBinaryHandle so we can unregister it in destructor.
- GpuBinaryHandles.push_back(GpuBinaryHandle);
+ // Create destructor and register it with atexit() the way NVCC does it. Doing
+ // it during regular destructor phase worked in CUDA before 9.2 but results in
+ // double-free in 9.2.
+ if (llvm::Function *CleanupFn = makeModuleDtorFunction()) {
+ // extern "C" int atexit(void (*f)(void));
+ llvm::FunctionType *AtExitTy =
+ llvm::FunctionType::get(IntTy, CleanupFn->getType(), false);
+ llvm::Constant *AtExitFunc =
+ CGM.CreateRuntimeFunction(AtExitTy, "atexit", llvm::AttributeList(),
+ /*Local=*/true);
+ CtorBuilder.CreateCall(AtExitFunc, CleanupFn);
}
CtorBuilder.CreateRetVoid();
return ModuleCtorFunc;
}
-/// Creates a global destructor function that unregisters all GPU code blobs
+/// Creates a global destructor function that unregisters the GPU code blob
/// registered by constructor.
+///
+/// For CUDA:
/// \code
/// void __cuda_module_dtor(void*) {
-/// __cudaUnregisterFatBinary(Handle0);
-/// ...
-/// __cudaUnregisterFatBinary(HandleN);
+/// __cudaUnregisterFatBinary(Handle);
+/// }
+/// \endcode
+///
+/// For HIP:
+/// \code
+/// void __hip_module_dtor(void*) {
+/// if (__hip_gpubin_handle) {
+/// __hipUnregisterFatBinary(__hip_gpubin_handle);
+/// __hip_gpubin_handle = 0;
+/// }
/// }
/// \endcode
llvm::Function *CGNVCUDARuntime::makeModuleDtorFunction() {
- // No need for destructor if we don't have handles to unregister.
- if (GpuBinaryHandles.empty())
+ // No need for destructor if we don't have a handle to unregister.
+ if (!GpuBinaryHandle)
return nullptr;
// void __cudaUnregisterFatBinary(void ** handle);
llvm::Constant *UnregisterFatbinFunc = CGM.CreateRuntimeFunction(
llvm::FunctionType::get(VoidTy, VoidPtrPtrTy, false),
- "__cudaUnregisterFatBinary");
+ addUnderscoredPrefixToName("UnregisterFatBinary"));
llvm::Function *ModuleDtorFunc = llvm::Function::Create(
llvm::FunctionType::get(VoidTy, VoidPtrTy, false),
- llvm::GlobalValue::InternalLinkage, "__cuda_module_dtor", &TheModule);
+ llvm::GlobalValue::InternalLinkage,
+ addUnderscoredPrefixToName("_module_dtor"), &TheModule);
+
llvm::BasicBlock *DtorEntryBB =
llvm::BasicBlock::Create(Context, "entry", ModuleDtorFunc);
CGBuilderTy DtorBuilder(CGM, Context);
DtorBuilder.SetInsertPoint(DtorEntryBB);
- for (llvm::GlobalVariable *GpuBinaryHandle : GpuBinaryHandles) {
- auto HandleValue =
- DtorBuilder.CreateAlignedLoad(GpuBinaryHandle, CGM.getPointerAlign());
+ Address GpuBinaryAddr(GpuBinaryHandle, CharUnits::fromQuantity(
+ GpuBinaryHandle->getAlignment()));
+ auto HandleValue = DtorBuilder.CreateLoad(GpuBinaryAddr);
+ // There is only one HIP fat binary per linked module, however there are
+ // multiple destructor functions. Make sure the fat binary is unregistered
+ // only once.
+ if (CGM.getLangOpts().HIP) {
+ llvm::BasicBlock *IfBlock =
+ llvm::BasicBlock::Create(Context, "if", ModuleDtorFunc);
+ llvm::BasicBlock *ExitBlock =
+ llvm::BasicBlock::Create(Context, "exit", ModuleDtorFunc);
+ llvm::Constant *Zero = llvm::Constant::getNullValue(HandleValue->getType());
+ llvm::Value *NEZero = DtorBuilder.CreateICmpNE(HandleValue, Zero);
+ DtorBuilder.CreateCondBr(NEZero, IfBlock, ExitBlock);
+
+ DtorBuilder.SetInsertPoint(IfBlock);
DtorBuilder.CreateCall(UnregisterFatbinFunc, HandleValue);
- }
+ DtorBuilder.CreateStore(Zero, GpuBinaryAddr);
+ DtorBuilder.CreateBr(ExitBlock);
+ DtorBuilder.SetInsertPoint(ExitBlock);
+ } else {
+ DtorBuilder.CreateCall(UnregisterFatbinFunc, HandleValue);
+ }
DtorBuilder.CreateRetVoid();
return ModuleDtorFunc;
}
diff --git a/lib/CodeGen/CGCXX.cpp b/lib/CodeGen/CGCXX.cpp
index 5ef4dc45fba1..475f17b77d92 100644
--- a/lib/CodeGen/CGCXX.cpp
+++ b/lib/CodeGen/CGCXX.cpp
@@ -109,17 +109,8 @@ bool CodeGenModule::TryEmitBaseDestructorAsAlias(const CXXDestructorDecl *D) {
D->getType()->getAs<FunctionType>()->getCallConv())
return true;
- return TryEmitDefinitionAsAlias(GlobalDecl(D, Dtor_Base),
- GlobalDecl(BaseD, Dtor_Base));
-}
-
-/// Try to emit a definition as a global alias for another definition.
-/// If \p InEveryTU is true, we know that an equivalent alias can be produced
-/// in every translation unit.
-bool CodeGenModule::TryEmitDefinitionAsAlias(GlobalDecl AliasDecl,
- GlobalDecl TargetDecl) {
- if (!getCodeGenOpts().CXXCtorDtorAliases)
- return true;
+ GlobalDecl AliasDecl(D, Dtor_Base);
+ GlobalDecl TargetDecl(BaseD, Dtor_Base);
// The alias will use the linkage of the referent. If we can't
// support aliases with that linkage, fail.
@@ -193,6 +184,9 @@ bool CodeGenModule::TryEmitDefinitionAsAlias(GlobalDecl AliasDecl,
auto *Alias = llvm::GlobalAlias::create(AliasValueType, 0, Linkage, "",
Aliasee, &getModule());
+ // Destructors are always unnamed_addr.
+ Alias->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
+
// Switch any previous uses to the alias.
if (Entry) {
assert(Entry->getType() == AliasType &&
@@ -205,7 +199,7 @@ bool CodeGenModule::TryEmitDefinitionAsAlias(GlobalDecl AliasDecl,
}
// Finally, set up the alias with its proper name and attributes.
- setAliasAttributes(cast<NamedDecl>(AliasDecl.getDecl()), Alias);
+ SetCommonAttributes(AliasDecl, Alias);
return false;
}
@@ -227,10 +221,9 @@ llvm::Function *CodeGenModule::codegenCXXStructor(const CXXMethodDecl *MD,
}
setFunctionLinkage(GD, Fn);
- setFunctionDLLStorageClass(GD, Fn);
CodeGenFunction(*this).GenerateCode(GD, Fn, FnInfo);
- setFunctionDefinitionAttributes(MD, Fn);
+ setNonAliasAttributes(GD, Fn);
SetLLVMFunctionAttributesForDefinition(MD, Fn);
return Fn;
}
@@ -243,6 +236,11 @@ llvm::Constant *CodeGenModule::getAddrOfCXXStructor(
if (auto *CD = dyn_cast<CXXConstructorDecl>(MD)) {
GD = GlobalDecl(CD, toCXXCtorType(Type));
} else {
+ // Always alias equivalent complete destructors to base destructors in the
+ // MS ABI.
+ if (getTarget().getCXXABI().isMicrosoft() &&
+ Type == StructorType::Complete && MD->getParent()->getNumVBases() == 0)
+ Type = StructorType::Base;
GD = GlobalDecl(cast<CXXDestructorDecl>(MD), toCXXDtorType(Type));
}
@@ -263,7 +261,6 @@ static CGCallee BuildAppleKextVirtualCall(CodeGenFunction &CGF,
const CXXRecordDecl *RD) {
assert(!CGF.CGM.getTarget().getCXXABI().isMicrosoft() &&
"No kext in Microsoft ABI");
- GD = GD.getCanonicalDecl();
CodeGenModule &CGM = CGF.CGM;
llvm::Value *VTable = CGM.getCXXABI().getAddrOfVTable(RD, CharUnits());
Ty = Ty->getPointerTo()->getPointerTo();
@@ -279,7 +276,7 @@ static CGCallee BuildAppleKextVirtualCall(CodeGenFunction &CGF,
CGF.Builder.CreateConstInBoundsGEP1_64(VTable, VTableIndex, "vfnkxt");
llvm::Value *VFunc =
CGF.Builder.CreateAlignedLoad(VFuncPtr, CGF.PointerAlignInBytes);
- CGCallee Callee(GD.getDecl(), VFunc);
+ CGCallee Callee(GD.getDecl()->getCanonicalDecl(), VFunc);
return Callee;
}
diff --git a/lib/CodeGen/CGCXXABI.cpp b/lib/CodeGen/CGCXXABI.cpp
index a27c3e9d27e3..0611749acf17 100644
--- a/lib/CodeGen/CGCXXABI.cpp
+++ b/lib/CodeGen/CGCXXABI.cpp
@@ -287,6 +287,20 @@ CGCXXABI::EmitCtorCompleteObjectHandler(CodeGenFunction &CGF,
return nullptr;
}
+void CGCXXABI::setCXXDestructorDLLStorage(llvm::GlobalValue *GV,
+ const CXXDestructorDecl *Dtor,
+ CXXDtorType DT) const {
+ // Assume the base C++ ABI has no special rules for destructor variants.
+ CGM.setDLLImportDLLExport(GV, Dtor);
+}
+
+llvm::GlobalValue::LinkageTypes CGCXXABI::getCXXDestructorLinkage(
+ GVALinkage Linkage, const CXXDestructorDecl *Dtor, CXXDtorType DT) const {
+ // Delegate back to CGM by default.
+ return CGM.getLLVMLinkageForDeclarator(Dtor, Linkage,
+ /*isConstantVariable=*/false);
+}
+
bool CGCXXABI::NeedsVTTParameter(GlobalDecl GD) {
return false;
}
diff --git a/lib/CodeGen/CGCXXABI.h b/lib/CodeGen/CGCXXABI.h
index 83426dc3a03c..65b50e14f436 100644
--- a/lib/CodeGen/CGCXXABI.h
+++ b/lib/CodeGen/CGCXXABI.h
@@ -40,7 +40,7 @@ class CodeGenFunction;
class CodeGenModule;
struct CatchTypeInfo;
-/// \brief Implements C++ ABI-specific code generation functions.
+/// Implements C++ ABI-specific code generation functions.
class CGCXXABI {
protected:
CodeGenModule &CGM;
@@ -222,7 +222,7 @@ protected:
/// is required.
llvm::Constant *getMemberPointerAdjustment(const CastExpr *E);
- /// \brief Computes the non-virtual adjustment needed for a member pointer
+ /// Computes the non-virtual adjustment needed for a member pointer
/// conversion along an inheritance path stored in an APValue. Unlike
/// getMemberPointerAdjustment(), the adjustment can be negative if the path
/// is from a derived type to a base type.
@@ -237,7 +237,7 @@ public:
virtual void emitThrow(CodeGenFunction &CGF, const CXXThrowExpr *E) = 0;
virtual llvm::GlobalVariable *getThrowInfo(QualType T) { return nullptr; }
- /// \brief Determine whether it's possible to emit a vtable for \p RD, even
+ /// Determine whether it's possible to emit a vtable for \p RD, even
/// though we do not know that the vtable has been marked as used by semantic
/// analysis.
virtual bool canSpeculativelyEmitVTable(const CXXRecordDecl *RD) const = 0;
@@ -319,6 +319,14 @@ public:
virtual bool useThunkForDtorVariant(const CXXDestructorDecl *Dtor,
CXXDtorType DT) const = 0;
+ virtual void setCXXDestructorDLLStorage(llvm::GlobalValue *GV,
+ const CXXDestructorDecl *Dtor,
+ CXXDtorType DT) const;
+
+ virtual llvm::GlobalValue::LinkageTypes
+ getCXXDestructorLinkage(GVALinkage Linkage, const CXXDestructorDecl *Dtor,
+ CXXDtorType DT) const;
+
/// Emit destructor variants required by this ABI.
virtual void EmitCXXDestructors(const CXXDestructorDecl *D) = 0;
@@ -414,8 +422,7 @@ public:
/// Build a virtual function pointer in the ABI-specific way.
virtual CGCallee getVirtualFunctionPointer(CodeGenFunction &CGF,
- GlobalDecl GD,
- Address This,
+ GlobalDecl GD, Address This,
llvm::Type *Ty,
SourceLocation Loc) = 0;
@@ -434,6 +441,7 @@ public:
/// base tables.
virtual void emitVirtualInheritanceTables(const CXXRecordDecl *RD) = 0;
+ virtual bool exportThunk() = 0;
virtual void setThunkLinkage(llvm::Function *Thunk, bool ForVTable,
GlobalDecl GD, bool ReturnAdjustment) = 0;
@@ -599,6 +607,17 @@ CGCXXABI *CreateItaniumCXXABI(CodeGenModule &CGM);
/// Creates a Microsoft-family ABI.
CGCXXABI *CreateMicrosoftCXXABI(CodeGenModule &CGM);
+struct CatchRetScope final : EHScopeStack::Cleanup {
+ llvm::CatchPadInst *CPI;
+
+ CatchRetScope(llvm::CatchPadInst *CPI) : CPI(CPI) {}
+
+ void Emit(CodeGenFunction &CGF, Flags flags) override {
+ llvm::BasicBlock *BB = CGF.createBasicBlock("catchret.dest");
+ CGF.Builder.CreateCatchRet(CPI, BB);
+ CGF.EmitBlock(BB);
+ }
+};
}
}
diff --git a/lib/CodeGen/CGCall.cpp b/lib/CodeGen/CGCall.cpp
index 38d7344572d3..f066ce168588 100644
--- a/lib/CodeGen/CGCall.cpp
+++ b/lib/CodeGen/CGCall.cpp
@@ -29,15 +29,15 @@
#include "clang/CodeGen/SwiftCallingConv.h"
#include "clang/Frontend/CodeGenOptions.h"
#include "llvm/ADT/StringExtras.h"
+#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Attributes.h"
-#include "llvm/IR/CallingConv.h"
#include "llvm/IR/CallSite.h"
+#include "llvm/IR/CallingConv.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/InlineAsm.h"
-#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/IR/Intrinsics.h"
using namespace clang;
using namespace CodeGen;
@@ -255,6 +255,16 @@ CodeGenTypes::arrangeCXXMethodType(const CXXRecordDecl *RD,
FTP->getCanonicalTypeUnqualified().getAs<FunctionProtoType>(), MD);
}
+/// Set calling convention for CUDA/HIP kernel.
+static void setCUDAKernelCallingConvention(CanQualType &FTy, CodeGenModule &CGM,
+ const FunctionDecl *FD) {
+ if (FD->hasAttr<CUDAGlobalAttr>()) {
+ const FunctionType *FT = FTy->getAs<FunctionType>();
+ CGM.getTargetCodeGenInfo().setCUDAKernelCallingConvention(FT);
+ FTy = FT->getCanonicalTypeUnqualified();
+ }
+}
+
/// Arrange the argument and result information for a declaration or
/// definition of the given C++ non-static member function. The
/// member function must be an ordinary function, i.e. not a
@@ -264,7 +274,9 @@ CodeGenTypes::arrangeCXXMethodDeclaration(const CXXMethodDecl *MD) {
assert(!isa<CXXConstructorDecl>(MD) && "wrong method for constructors!");
assert(!isa<CXXDestructorDecl>(MD) && "wrong method for destructors!");
- CanQual<FunctionProtoType> prototype = GetFormalType(MD);
+ CanQualType FT = GetFormalType(MD).getAs<Type>();
+ setCUDAKernelCallingConvention(FT, CGM, MD);
+ auto prototype = FT.getAs<FunctionProtoType>();
if (MD->isInstance()) {
// The abstract case is perfectly fine.
@@ -424,6 +436,7 @@ CodeGenTypes::arrangeFunctionDeclaration(const FunctionDecl *FD) {
CanQualType FTy = FD->getType()->getCanonicalTypeUnqualified();
assert(isa<FunctionType>(FTy));
+ setCUDAKernelCallingConvention(FTy, CGM, FD);
// When declaring a function without a prototype, always use a
// non-variadic type.
@@ -513,8 +526,8 @@ CodeGenTypes::arrangeGlobalDeclaration(GlobalDecl GD) {
/// correct type, and the caller will bitcast the function to the correct
/// prototype.
const CGFunctionInfo &
-CodeGenTypes::arrangeMSMemberPointerThunk(const CXXMethodDecl *MD) {
- assert(MD->isVirtual() && "only virtual memptrs have thunks");
+CodeGenTypes::arrangeUnprototypedMustTailThunk(const CXXMethodDecl *MD) {
+ assert(MD->isVirtual() && "only methods have thunks");
CanQual<FunctionProtoType> FTP = GetFormalType(MD);
CanQualType ArgTys[] = { GetThisType(Context, MD->getParent()) };
return arrangeLLVMFunctionInfo(Context.VoidTy, /*instanceMethod=*/false,
@@ -803,6 +816,7 @@ CGFunctionInfo *CGFunctionInfo::create(unsigned llvmCC,
FI->NoReturn = info.getNoReturn();
FI->ReturnsRetained = info.getProducesResult();
FI->NoCallerSavedRegs = info.getNoCallerSavedRegs();
+ FI->NoCfCheck = info.getNoCfCheck();
FI->Required = required;
FI->HasRegParm = info.getHasRegParm();
FI->RegParm = info.getRegParm();
@@ -904,8 +918,7 @@ getTypeExpansion(QualType Ty, const ASTContext &Context) {
CharUnits UnionSize = CharUnits::Zero();
for (const auto *FD : RD->fields()) {
- // Skip zero length bitfields.
- if (FD->isBitField() && FD->getBitWidthValue(Context) == 0)
+ if (FD->isZeroLengthBitField(Context))
continue;
assert(!FD->isBitField() &&
"Cannot expand structure with bit-field members.");
@@ -926,8 +939,7 @@ getTypeExpansion(QualType Ty, const ASTContext &Context) {
}
for (const auto *FD : RD->fields()) {
- // Skip zero length bitfields.
- if (FD->isBitField() && FD->getBitWidthValue(Context) == 0)
+ if (FD->isZeroLengthBitField(Context))
continue;
assert(!FD->isBitField() &&
"Cannot expand structure with bit-field members.");
@@ -1040,42 +1052,49 @@ void CodeGenFunction::ExpandTypeFromArgs(
}
void CodeGenFunction::ExpandTypeToArgs(
- QualType Ty, RValue RV, llvm::FunctionType *IRFuncTy,
+ QualType Ty, CallArg Arg, llvm::FunctionType *IRFuncTy,
SmallVectorImpl<llvm::Value *> &IRCallArgs, unsigned &IRCallArgPos) {
auto Exp = getTypeExpansion(Ty, getContext());
if (auto CAExp = dyn_cast<ConstantArrayExpansion>(Exp.get())) {
- forConstantArrayExpansion(*this, CAExp, RV.getAggregateAddress(),
- [&](Address EltAddr) {
- RValue EltRV =
- convertTempToRValue(EltAddr, CAExp->EltTy, SourceLocation());
- ExpandTypeToArgs(CAExp->EltTy, EltRV, IRFuncTy, IRCallArgs, IRCallArgPos);
- });
+ Address Addr = Arg.hasLValue() ? Arg.getKnownLValue().getAddress()
+ : Arg.getKnownRValue().getAggregateAddress();
+ forConstantArrayExpansion(
+ *this, CAExp, Addr, [&](Address EltAddr) {
+ CallArg EltArg = CallArg(
+ convertTempToRValue(EltAddr, CAExp->EltTy, SourceLocation()),
+ CAExp->EltTy);
+ ExpandTypeToArgs(CAExp->EltTy, EltArg, IRFuncTy, IRCallArgs,
+ IRCallArgPos);
+ });
} else if (auto RExp = dyn_cast<RecordExpansion>(Exp.get())) {
- Address This = RV.getAggregateAddress();
+ Address This = Arg.hasLValue() ? Arg.getKnownLValue().getAddress()
+ : Arg.getKnownRValue().getAggregateAddress();
for (const CXXBaseSpecifier *BS : RExp->Bases) {
// Perform a single step derived-to-base conversion.
Address Base =
GetAddressOfBaseClass(This, Ty->getAsCXXRecordDecl(), &BS, &BS + 1,
/*NullCheckValue=*/false, SourceLocation());
- RValue BaseRV = RValue::getAggregate(Base);
+ CallArg BaseArg = CallArg(RValue::getAggregate(Base), BS->getType());
// Recurse onto bases.
- ExpandTypeToArgs(BS->getType(), BaseRV, IRFuncTy, IRCallArgs,
+ ExpandTypeToArgs(BS->getType(), BaseArg, IRFuncTy, IRCallArgs,
IRCallArgPos);
}
LValue LV = MakeAddrLValue(This, Ty);
for (auto FD : RExp->Fields) {
- RValue FldRV = EmitRValueForField(LV, FD, SourceLocation());
- ExpandTypeToArgs(FD->getType(), FldRV, IRFuncTy, IRCallArgs,
+ CallArg FldArg =
+ CallArg(EmitRValueForField(LV, FD, SourceLocation()), FD->getType());
+ ExpandTypeToArgs(FD->getType(), FldArg, IRFuncTy, IRCallArgs,
IRCallArgPos);
}
} else if (isa<ComplexExpansion>(Exp.get())) {
- ComplexPairTy CV = RV.getComplexVal();
+ ComplexPairTy CV = Arg.getKnownRValue().getComplexVal();
IRCallArgs[IRCallArgPos++] = CV.first;
IRCallArgs[IRCallArgPos++] = CV.second;
} else {
assert(isa<NoExpansion>(Exp.get()));
+ auto RV = Arg.getKnownRValue();
assert(RV.isScalar() &&
"Unexpected non-scalar rvalue during struct expansion.");
@@ -1479,7 +1498,8 @@ void ClangToLLVMArgMapping::construct(const ASTContext &Context,
/***/
bool CodeGenModule::ReturnTypeUsesSRet(const CGFunctionInfo &FI) {
- return FI.getReturnInfo().isIndirect();
+ const auto &RI = FI.getReturnInfo();
+ return RI.isIndirect() || (RI.isInAlloca() && RI.getInAllocaSRet());
}
bool CodeGenModule::ReturnSlotInterferesWithArgs(const CGFunctionInfo &FI) {
@@ -1672,7 +1692,7 @@ static void AddAttributesFromFunctionProtoType(ASTContext &Ctx,
return;
if (!isUnresolvedExceptionSpec(FPT->getExceptionSpecType()) &&
- FPT->isNothrow(Ctx))
+ FPT->isNothrow())
FuncAttrs.addAttribute(llvm::Attribute::NoUnwind);
}
@@ -1714,12 +1734,19 @@ void CodeGenModule::ConstructDefaultFnAttrList(StringRef Name, bool HasOptnone,
FuncAttrs.addAttribute("less-precise-fpmad",
llvm::toStringRef(CodeGenOpts.LessPreciseFPMAD));
+ if (CodeGenOpts.NullPointerIsValid)
+ FuncAttrs.addAttribute("null-pointer-is-valid", "true");
if (!CodeGenOpts.FPDenormalMode.empty())
FuncAttrs.addAttribute("denormal-fp-math", CodeGenOpts.FPDenormalMode);
FuncAttrs.addAttribute("no-trapping-math",
llvm::toStringRef(CodeGenOpts.NoTrappingMath));
+ // Strict (compliant) code is the default, so only add this attribute to
+ // indicate that we are trying to workaround a problem case.
+ if (!CodeGenOpts.StrictFloatCastOverflow)
+ FuncAttrs.addAttribute("strict-float-cast-overflow", "false");
+
// TODO: Are these all needed?
// unsafe/inf/nan/nsz are handled by instruction-level FastMathFlags.
FuncAttrs.addAttribute("no-infs-fp-math",
@@ -1738,6 +1765,10 @@ void CodeGenModule::ConstructDefaultFnAttrList(StringRef Name, bool HasOptnone,
"correctly-rounded-divide-sqrt-fp-math",
llvm::toStringRef(CodeGenOpts.CorrectlyRoundedDivSqrt));
+ if (getLangOpts().OpenCL)
+ FuncAttrs.addAttribute("denorms-are-zero",
+ llvm::toStringRef(CodeGenOpts.FlushDenorm));
+
// TODO: Reciprocal estimate codegen options should apply to instructions?
const std::vector<std::string> &Recips = CodeGenOpts.Reciprocals;
if (!Recips.empty())
@@ -1769,7 +1800,7 @@ void CodeGenModule::ConstructDefaultFnAttrList(StringRef Name, bool HasOptnone,
FuncAttrs.addAttribute(llvm::Attribute::NoUnwind);
// Respect -fcuda-flush-denormals-to-zero.
- if (getLangOpts().CUDADeviceFlushDenormalsToZero)
+ if (CodeGenOpts.FlushDenorm)
FuncAttrs.addAttribute("nvptx-f32ftz", "true");
}
}
@@ -1793,7 +1824,7 @@ void CodeGenModule::ConstructAttributeList(
FuncAttrs.addAttribute(llvm::Attribute::NoReturn);
// If we have information about the function prototype, we can learn
- // attributes form there.
+ // attributes from there.
AddAttributesFromFunctionProtoType(getContext(), FuncAttrs,
CalleeInfo.getCalleeFunctionProtoType());
@@ -1838,18 +1869,20 @@ void CodeGenModule::ConstructAttributeList(
}
if (TargetDecl->hasAttr<RestrictAttr>())
RetAttrs.addAttribute(llvm::Attribute::NoAlias);
- if (TargetDecl->hasAttr<ReturnsNonNullAttr>())
+ if (TargetDecl->hasAttr<ReturnsNonNullAttr>() &&
+ !CodeGenOpts.NullPointerIsValid)
RetAttrs.addAttribute(llvm::Attribute::NonNull);
if (TargetDecl->hasAttr<AnyX86NoCallerSavedRegistersAttr>())
FuncAttrs.addAttribute("no_caller_saved_registers");
+ if (TargetDecl->hasAttr<AnyX86NoCfCheckAttr>())
+ FuncAttrs.addAttribute(llvm::Attribute::NoCfCheck);
HasOptnone = TargetDecl->hasAttr<OptimizeNoneAttr>();
if (auto *AllocSize = TargetDecl->getAttr<AllocSizeAttr>()) {
Optional<unsigned> NumElemsParam;
- // alloc_size args are base-1, 0 means not present.
- if (unsigned N = AllocSize->getNumElemsParam())
- NumElemsParam = N - 1;
- FuncAttrs.addAllocSizeAttr(AllocSize->getElemSizeParam() - 1,
+ if (AllocSize->getNumElemsParam().isValid())
+ NumElemsParam = AllocSize->getNumElemsParam().getLLVMIndex();
+ FuncAttrs.addAllocSizeAttr(AllocSize->getElemSizeParam().getLLVMIndex(),
NumElemsParam);
}
}
@@ -1870,53 +1903,40 @@ void CodeGenModule::ConstructAttributeList(
}
}
- if (!AttrOnCallSite) {
- bool DisableTailCalls =
- CodeGenOpts.DisableTailCalls ||
- (TargetDecl && (TargetDecl->hasAttr<DisableTailCallsAttr>() ||
- TargetDecl->hasAttr<AnyX86InterruptAttr>()));
- FuncAttrs.addAttribute("disable-tail-calls",
- llvm::toStringRef(DisableTailCalls));
-
- // Add target-cpu and target-features attributes to functions. If
- // we have a decl for the function and it has a target attribute then
- // parse that and add it to the feature set.
- StringRef TargetCPU = getTarget().getTargetOpts().CPU;
- std::vector<std::string> Features;
- const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(TargetDecl);
- if (FD && FD->hasAttr<TargetAttr>()) {
- llvm::StringMap<bool> FeatureMap;
- getFunctionFeatureMap(FeatureMap, FD);
-
- // Produce the canonical string for this set of features.
- for (llvm::StringMap<bool>::const_iterator it = FeatureMap.begin(),
- ie = FeatureMap.end();
- it != ie; ++it)
- Features.push_back((it->second ? "+" : "-") + it->first().str());
-
- // Now add the target-cpu and target-features to the function.
- // While we populated the feature map above, we still need to
- // get and parse the target attribute so we can get the cpu for
- // the function.
- const auto *TD = FD->getAttr<TargetAttr>();
- TargetAttr::ParsedTargetAttr ParsedAttr = TD->parse();
- if (ParsedAttr.Architecture != "" &&
- getTarget().isValidCPUName(ParsedAttr.Architecture))
- TargetCPU = ParsedAttr.Architecture;
+ if (TargetDecl && TargetDecl->hasAttr<OpenCLKernelAttr>()) {
+ if (getLangOpts().OpenCLVersion <= 120) {
+ // OpenCL v1.2 Work groups are always uniform
+ FuncAttrs.addAttribute("uniform-work-group-size", "true");
} else {
- // Otherwise just add the existing target cpu and target features to the
- // function.
- Features = getTarget().getTargetOpts().Features;
+ // OpenCL v2.0 Work groups may be whether uniform or not.
+ // '-cl-uniform-work-group-size' compile option gets a hint
+ // to the compiler that the global work-size be a multiple of
+ // the work-group size specified to clEnqueueNDRangeKernel
+ // (i.e. work groups are uniform).
+ FuncAttrs.addAttribute("uniform-work-group-size",
+ llvm::toStringRef(CodeGenOpts.UniformWGSize));
}
+ }
- if (TargetCPU != "")
- FuncAttrs.addAttribute("target-cpu", TargetCPU);
- if (!Features.empty()) {
- std::sort(Features.begin(), Features.end());
- FuncAttrs.addAttribute(
- "target-features",
- llvm::join(Features, ","));
+ if (!AttrOnCallSite) {
+ bool DisableTailCalls = false;
+
+ if (CodeGenOpts.DisableTailCalls)
+ DisableTailCalls = true;
+ else if (TargetDecl) {
+ if (TargetDecl->hasAttr<DisableTailCallsAttr>() ||
+ TargetDecl->hasAttr<AnyX86InterruptAttr>())
+ DisableTailCalls = true;
+ else if (CodeGenOpts.NoEscapingBlockTailCalls) {
+ if (const auto *BD = dyn_cast<BlockDecl>(TargetDecl))
+ if (!BD->doesNotEscape())
+ DisableTailCalls = true;
+ }
}
+
+ FuncAttrs.addAttribute("disable-tail-calls",
+ llvm::toStringRef(DisableTailCalls));
+ GetCPUAndFeaturesAttributes(TargetDecl, FuncAttrs);
}
ClangToLLVMArgMapping IRFunctionArgs(getContext(), FI);
@@ -1925,9 +1945,9 @@ void CodeGenModule::ConstructAttributeList(
const ABIArgInfo &RetAI = FI.getReturnInfo();
switch (RetAI.getKind()) {
case ABIArgInfo::Extend:
- if (RetTy->hasSignedIntegerRepresentation())
+ if (RetAI.isSignExt())
RetAttrs.addAttribute(llvm::Attribute::SExt);
- else if (RetTy->hasUnsignedIntegerRepresentation())
+ else
RetAttrs.addAttribute(llvm::Attribute::ZExt);
LLVM_FALLTHROUGH;
case ABIArgInfo::Direct:
@@ -1957,7 +1977,8 @@ void CodeGenModule::ConstructAttributeList(
if (!PTy->isIncompleteType() && PTy->isConstantSizeType())
RetAttrs.addDereferenceableAttr(getContext().getTypeSizeInChars(PTy)
.getQuantity());
- else if (getContext().getTargetAddressSpace(PTy) == 0)
+ else if (getContext().getTargetAddressSpace(PTy) == 0 &&
+ !CodeGenOpts.NullPointerIsValid)
RetAttrs.addAttribute(llvm::Attribute::NonNull);
}
@@ -1967,7 +1988,8 @@ void CodeGenModule::ConstructAttributeList(
// Attach attributes to sret.
if (IRFunctionArgs.hasSRetArg()) {
llvm::AttrBuilder SRETAttrs;
- SRETAttrs.addAttribute(llvm::Attribute::StructRet);
+ if (!RetAI.getSuppressSRet())
+ SRETAttrs.addAttribute(llvm::Attribute::StructRet);
hasUsedSRet = true;
if (RetAI.getInReg())
SRETAttrs.addAttribute(llvm::Attribute::InReg);
@@ -2006,14 +2028,10 @@ void CodeGenModule::ConstructAttributeList(
// sense to do it here because parameters are so messed up.
switch (AI.getKind()) {
case ABIArgInfo::Extend:
- if (ParamType->isSignedIntegerOrEnumerationType())
+ if (AI.isSignExt())
Attrs.addAttribute(llvm::Attribute::SExt);
- else if (ParamType->isUnsignedIntegerOrEnumerationType()) {
- if (getTypes().getABIInfo().shouldSignExtUnsignedType(ParamType))
- Attrs.addAttribute(llvm::Attribute::SExt);
- else
- Attrs.addAttribute(llvm::Attribute::ZExt);
- }
+ else
+ Attrs.addAttribute(llvm::Attribute::ZExt);
LLVM_FALLTHROUGH;
case ABIArgInfo::Direct:
if (ArgNo == 0 && FI.isChainCall())
@@ -2070,7 +2088,8 @@ void CodeGenModule::ConstructAttributeList(
if (!PTy->isIncompleteType() && PTy->isConstantSizeType())
Attrs.addDereferenceableAttr(getContext().getTypeSizeInChars(PTy)
.getQuantity());
- else if (getContext().getTargetAddressSpace(PTy) == 0)
+ else if (getContext().getTargetAddressSpace(PTy) == 0 &&
+ !CodeGenOpts.NullPointerIsValid)
Attrs.addAttribute(llvm::Attribute::NonNull);
}
@@ -2255,11 +2274,16 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,
for (FunctionArgList::const_iterator i = Args.begin(), e = Args.end();
i != e; ++i, ++info_it, ++ArgNo) {
const VarDecl *Arg = *i;
- QualType Ty = info_it->type;
const ABIArgInfo &ArgI = info_it->info;
bool isPromoted =
isa<ParmVarDecl>(Arg) && cast<ParmVarDecl>(Arg)->isKNRPromoted();
+ // We are converting from ABIArgInfo type to VarDecl type directly, unless
+ // the parameter is promoted. In this case we convert to
+ // CGFunctionInfo::ArgInfo type with subsequent argument demotion.
+ QualType Ty = isPromoted ? info_it->type : Arg->getType();
+ assert(hasScalarEvaluationKind(Ty) ==
+ hasScalarEvaluationKind(Arg->getType()));
unsigned FirstIRArg, NumIRArgs;
std::tie(FirstIRArg, NumIRArgs) = IRFunctionArgs.getIRArgs(ArgNo);
@@ -2325,7 +2349,8 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,
if (const ParmVarDecl *PVD = dyn_cast<ParmVarDecl>(Arg)) {
if (getNonNullAttr(CurCodeDecl, PVD, PVD->getType(),
- PVD->getFunctionScopeIndex()))
+ PVD->getFunctionScopeIndex()) &&
+ !CGM.getCodeGenOpts().NullPointerIsValid)
AI->addAttr(llvm::Attribute::NonNull);
QualType OTy = PVD->getOriginalType();
@@ -2344,7 +2369,8 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,
Attrs.addDereferenceableAttr(
getContext().getTypeSizeInChars(ETy).getQuantity()*ArrSize);
AI->addAttrs(Attrs);
- } else if (getContext().getTargetAddressSpace(ETy) == 0) {
+ } else if (getContext().getTargetAddressSpace(ETy) == 0 &&
+ !CGM.getCodeGenOpts().NullPointerIsValid) {
AI->addAttr(llvm::Attribute::NonNull);
}
}
@@ -2354,7 +2380,8 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,
// we can't use the dereferenceable attribute, but in addrspace(0)
// we know that it must be nonnull.
if (ArrTy->getSizeModifier() == VariableArrayType::Static &&
- !getContext().getTargetAddressSpace(ArrTy->getElementType()))
+ !getContext().getTargetAddressSpace(ArrTy->getElementType()) &&
+ !CGM.getCodeGenOpts().NullPointerIsValid)
AI->addAttr(llvm::Attribute::NonNull);
}
@@ -3022,7 +3049,8 @@ static AggValueSlot createPlaceholderSlot(CodeGenFunction &CGF,
Ty.getQualifiers(),
AggValueSlot::IsNotDestructed,
AggValueSlot::DoesNotNeedGCBarriers,
- AggValueSlot::IsNotAliased);
+ AggValueSlot::IsNotAliased,
+ AggValueSlot::DoesNotOverlap);
}
void CodeGenFunction::EmitDelegateCallArg(CallArgList &args,
@@ -3062,6 +3090,19 @@ void CodeGenFunction::EmitDelegateCallArg(CallArgList &args,
} else {
args.add(convertTempToRValue(local, type, loc), type);
}
+
+ // Deactivate the cleanup for the callee-destructed param that was pushed.
+ if (hasAggregateEvaluationKind(type) && !CurFuncIsThunk &&
+ type->getAs<RecordType>()->getDecl()->isParamDestroyedInCallee() &&
+ type.isDestructedType()) {
+ EHScopeStack::stable_iterator cleanup =
+ CalleeDestructedParamCleanups.lookup(cast<ParmVarDecl>(param));
+ assert(cleanup.isValid() &&
+ "cleanup for callee-destructed param not recorded");
+ // This unreachable is a temporary marker which will be removed later.
+ llvm::Instruction *isActive = Builder.CreateUnreachable();
+ args.addArgCleanupDeactivation(cleanup, isActive);
+ }
}
static bool isProvablyNull(llvm::Value *addr) {
@@ -3143,7 +3184,6 @@ static void emitWritebacks(CodeGenFunction &CGF,
static void deactivateArgCleanupsBeforeCall(CodeGenFunction &CGF,
const CallArgList &CallArgs) {
- assert(CGF.getTarget().getCXXABI().areArgsDestroyedLeftToRightInCallee());
ArrayRef<CallArgList::CallArgCleanup> Cleanups =
CallArgs.getCleanupsToDeactivate();
// Iterate in reverse to increase the likelihood of popping the cleanup.
@@ -3430,13 +3470,17 @@ void CodeGenFunction::EmitCallArgs(
assert(InitialArgSize + 1 == Args.size() &&
"The code below depends on only adding one arg per EmitCallArg");
(void)InitialArgSize;
- RValue RVArg = Args.back().RV;
- EmitNonNullArgCheck(RVArg, ArgTypes[Idx], (*Arg)->getExprLoc(), AC,
- ParamsToSkip + Idx);
- // @llvm.objectsize should never have side-effects and shouldn't need
- // destruction/cleanups, so we can safely "emit" it after its arg,
- // regardless of right-to-leftness
- MaybeEmitImplicitObjectSize(Idx, *Arg, RVArg);
+ // Since pointer argument are never emitted as LValue, it is safe to emit
+ // non-null argument check for r-value only.
+ if (!Args.back().hasLValue()) {
+ RValue RVArg = Args.back().getKnownRValue();
+ EmitNonNullArgCheck(RVArg, ArgTypes[Idx], (*Arg)->getExprLoc(), AC,
+ ParamsToSkip + Idx);
+ // @llvm.objectsize should never have side-effects and shouldn't need
+ // destruction/cleanups, so we can safely "emit" it after its arg,
+ // regardless of right-to-leftness
+ MaybeEmitImplicitObjectSize(Idx, *Arg, RVArg);
+ }
}
if (!LeftToRight) {
@@ -3456,10 +3500,15 @@ struct DestroyUnpassedArg final : EHScopeStack::Cleanup {
QualType Ty;
void Emit(CodeGenFunction &CGF, Flags flags) override {
- const CXXDestructorDecl *Dtor = Ty->getAsCXXRecordDecl()->getDestructor();
- assert(!Dtor->isTrivial());
- CGF.EmitCXXDestructorCall(Dtor, Dtor_Complete, /*for vbase*/ false,
- /*Delegating=*/false, Addr);
+ QualType::DestructionKind DtorKind = Ty.isDestructedType();
+ if (DtorKind == QualType::DK_cxx_destructor) {
+ const CXXDestructorDecl *Dtor = Ty->getAsCXXRecordDecl()->getDestructor();
+ assert(!Dtor->isTrivial());
+ CGF.EmitCXXDestructorCall(Dtor, Dtor_Complete, /*for vbase*/ false,
+ /*Delegating=*/false, Addr);
+ } else {
+ CGF.callCStructDestructor(CGF.MakeAddrLValue(Addr, Ty));
+ }
}
};
@@ -3478,6 +3527,33 @@ struct DisableDebugLocationUpdates {
} // end anonymous namespace
+RValue CallArg::getRValue(CodeGenFunction &CGF) const {
+ if (!HasLV)
+ return RV;
+ LValue Copy = CGF.MakeAddrLValue(CGF.CreateMemTemp(Ty), Ty);
+ CGF.EmitAggregateCopy(Copy, LV, Ty, AggValueSlot::DoesNotOverlap,
+ LV.isVolatile());
+ IsUsed = true;
+ return RValue::getAggregate(Copy.getAddress());
+}
+
+void CallArg::copyInto(CodeGenFunction &CGF, Address Addr) const {
+ LValue Dst = CGF.MakeAddrLValue(Addr, Ty);
+ if (!HasLV && RV.isScalar())
+ CGF.EmitStoreOfScalar(RV.getScalarVal(), Dst, /*init=*/true);
+ else if (!HasLV && RV.isComplex())
+ CGF.EmitStoreOfComplex(RV.getComplexVal(), Dst, /*init=*/true);
+ else {
+ auto Addr = HasLV ? LV.getAddress() : RV.getAggregateAddress();
+ LValue SrcLV = CGF.MakeAddrLValue(Addr, Ty);
+ // We assume that call args are never copied into subobjects.
+ CGF.EmitAggregateCopy(Dst, SrcLV, Ty, AggValueSlot::DoesNotOverlap,
+ HasLV ? LV.isVolatileQualified()
+ : RV.isVolatileQualified());
+ }
+ IsUsed = true;
+}
+
void CodeGenFunction::EmitCallArg(CallArgList &args, const Expr *E,
QualType type) {
DisableDebugLocationUpdates Dis(*this, E);
@@ -3501,7 +3577,7 @@ void CodeGenFunction::EmitCallArg(CallArgList &args, const Expr *E,
// However, we still have to push an EH-only cleanup in case we unwind before
// we make it to the call.
if (HasAggregateEvalKind &&
- CGM.getTarget().getCXXABI().areArgsDestroyedLeftToRightInCallee()) {
+ type->getAs<RecordType>()->getDecl()->isParamDestroyedInCallee()) {
// If we're using inalloca, use the argument memory. Otherwise, use a
// temporary.
AggValueSlot Slot;
@@ -3510,10 +3586,12 @@ void CodeGenFunction::EmitCallArg(CallArgList &args, const Expr *E,
else
Slot = CreateAggTemp(type, "agg.tmp");
- const CXXRecordDecl *RD = type->getAsCXXRecordDecl();
- bool DestroyedInCallee =
- RD && RD->hasNonTrivialDestructor() &&
- CGM.getCXXABI().getRecordArgABI(RD) != CGCXXABI::RAA_Default;
+ bool DestroyedInCallee = true, NeedsEHCleanup = true;
+ if (const auto *RD = type->getAsCXXRecordDecl())
+ DestroyedInCallee = RD->hasNonTrivialDestructor();
+ else
+ NeedsEHCleanup = needsEHCleanup(type.isDestructedType());
+
if (DestroyedInCallee)
Slot.setExternallyDestructed();
@@ -3521,7 +3599,7 @@ void CodeGenFunction::EmitCallArg(CallArgList &args, const Expr *E,
RValue RV = Slot.asRValue();
args.add(RV, type);
- if (DestroyedInCallee) {
+ if (DestroyedInCallee && NeedsEHCleanup) {
// Create a no-op GEP between the placeholder and the cleanup so we can
// RAUW it successfully. It also serves as a marker of the first
// instruction where the cleanup is active.
@@ -3538,15 +3616,7 @@ void CodeGenFunction::EmitCallArg(CallArgList &args, const Expr *E,
cast<CastExpr>(E)->getCastKind() == CK_LValueToRValue) {
LValue L = EmitLValue(cast<CastExpr>(E)->getSubExpr());
assert(L.isSimple());
- if (L.getAlignment() >= getContext().getTypeAlignInChars(type)) {
- args.add(L.asAggregateRValue(), type, /*NeedsCopy*/true);
- } else {
- // We can't represent a misaligned lvalue in the CallArgList, so copy
- // to an aligned temporary now.
- Address tmp = CreateMemTemp(type);
- EmitAggregateCopy(tmp, L.getAddress(), type, L.isVolatile());
- args.add(RValue::getAggregate(tmp), type);
- }
+ args.addUncopiedAggregate(L, type);
return;
}
@@ -3608,20 +3678,21 @@ CodeGenFunction::EmitRuntimeCall(llvm::Value *callee,
// Calls which may throw must have operand bundles indicating which funclet
// they are nested within.
-static void
-getBundlesForFunclet(llvm::Value *Callee, llvm::Instruction *CurrentFuncletPad,
- SmallVectorImpl<llvm::OperandBundleDef> &BundleList) {
+SmallVector<llvm::OperandBundleDef, 1>
+CodeGenFunction::getBundlesForFunclet(llvm::Value *Callee) {
+ SmallVector<llvm::OperandBundleDef, 1> BundleList;
// There is no need for a funclet operand bundle if we aren't inside a
// funclet.
if (!CurrentFuncletPad)
- return;
+ return BundleList;
// Skip intrinsics which cannot throw.
auto *CalleeFn = dyn_cast<llvm::Function>(Callee->stripPointerCasts());
if (CalleeFn && CalleeFn->isIntrinsic() && CalleeFn->doesNotThrow())
- return;
+ return BundleList;
BundleList.emplace_back("funclet", CurrentFuncletPad);
+ return BundleList;
}
/// Emits a simple call (never an invoke) to the given runtime function.
@@ -3629,10 +3700,8 @@ llvm::CallInst *
CodeGenFunction::EmitRuntimeCall(llvm::Value *callee,
ArrayRef<llvm::Value*> args,
const llvm::Twine &name) {
- SmallVector<llvm::OperandBundleDef, 1> BundleList;
- getBundlesForFunclet(callee, CurrentFuncletPad, BundleList);
-
- llvm::CallInst *call = Builder.CreateCall(callee, args, BundleList, name);
+ llvm::CallInst *call =
+ Builder.CreateCall(callee, args, getBundlesForFunclet(callee), name);
call->setCallingConv(getRuntimeCC());
return call;
}
@@ -3640,8 +3709,8 @@ CodeGenFunction::EmitRuntimeCall(llvm::Value *callee,
/// Emits a call or invoke to the given noreturn runtime function.
void CodeGenFunction::EmitNoreturnRuntimeCallOrInvoke(llvm::Value *callee,
ArrayRef<llvm::Value*> args) {
- SmallVector<llvm::OperandBundleDef, 1> BundleList;
- getBundlesForFunclet(callee, CurrentFuncletPad, BundleList);
+ SmallVector<llvm::OperandBundleDef, 1> BundleList =
+ getBundlesForFunclet(callee);
if (getInvokeDest()) {
llvm::InvokeInst *invoke =
@@ -3684,8 +3753,8 @@ CodeGenFunction::EmitCallOrInvoke(llvm::Value *Callee,
ArrayRef<llvm::Value *> Args,
const Twine &Name) {
llvm::BasicBlock *InvokeDest = getInvokeDest();
- SmallVector<llvm::OperandBundleDef, 1> BundleList;
- getBundlesForFunclet(Callee, CurrentFuncletPad, BundleList);
+ SmallVector<llvm::OperandBundleDef, 1> BundleList =
+ getBundlesForFunclet(Callee);
llvm::Instruction *Inst;
if (!InvokeDest)
@@ -3705,16 +3774,6 @@ CodeGenFunction::EmitCallOrInvoke(llvm::Value *Callee,
return llvm::CallSite(Inst);
}
-/// \brief Store a non-aggregate value to an address to initialize it. For
-/// initialization, a non-atomic store will be used.
-static void EmitInitStoreOfNonAggregate(CodeGenFunction &CGF, RValue Src,
- LValue Dst) {
- if (Src.isScalar())
- CGF.EmitStoreOfScalar(Src.getScalarVal(), Dst, /*init=*/true);
- else
- CGF.EmitStoreOfComplex(Src.getComplexVal(), Dst, /*init=*/true);
-}
-
void CodeGenFunction::deferPlaceholderReplacement(llvm::Instruction *Old,
llvm::Value *New) {
DeferredReplacements.push_back(std::make_pair(Old, New));
@@ -3728,7 +3787,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
SourceLocation Loc) {
// FIXME: We no longer need the types from CallArgs; lift up and simplify.
- assert(Callee.isOrdinary());
+ assert(Callee.isOrdinary() || Callee.isVirtual());
// Handle struct-return functions by passing a pointer to the
// location that we would like to return into.
@@ -3775,17 +3834,17 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
// If the call returns a temporary with struct return, create a temporary
// alloca to hold the result, unless one is given to us.
Address SRetPtr = Address::invalid();
- size_t UnusedReturnSize = 0;
+ Address SRetAlloca = Address::invalid();
+ llvm::Value *UnusedReturnSizePtr = nullptr;
if (RetAI.isIndirect() || RetAI.isInAlloca() || RetAI.isCoerceAndExpand()) {
if (!ReturnValue.isNull()) {
SRetPtr = ReturnValue.getValue();
} else {
- SRetPtr = CreateMemTemp(RetTy);
+ SRetPtr = CreateMemTemp(RetTy, "tmp", &SRetAlloca);
if (HaveInsertPoint() && ReturnValue.isUnused()) {
uint64_t size =
CGM.getDataLayout().getTypeAllocSize(ConvertTypeForMem(RetTy));
- if (EmitLifetimeStart(size, SRetPtr.getPointer()))
- UnusedReturnSize = size;
+ UnusedReturnSizePtr = EmitLifetimeStart(size, SRetAlloca.getPointer());
}
}
if (IRFunctionArgs.hasSRetArg()) {
@@ -3807,7 +3866,6 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
for (CallArgList::const_iterator I = CallArgs.begin(), E = CallArgs.end();
I != E; ++I, ++info_it, ++ArgNo) {
const ABIArgInfo &ArgInfo = info_it->info;
- RValue RV = I->RV;
// Insert a padding argument to ensure proper alignment.
if (IRFunctionArgs.hasPaddingArg(ArgNo))
@@ -3821,13 +3879,16 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
case ABIArgInfo::InAlloca: {
assert(NumIRArgs == 0);
assert(getTarget().getTriple().getArch() == llvm::Triple::x86);
- if (RV.isAggregate()) {
+ if (I->isAggregate()) {
// Replace the placeholder with the appropriate argument slot GEP.
+ Address Addr = I->hasLValue()
+ ? I->getKnownLValue().getAddress()
+ : I->getKnownRValue().getAggregateAddress();
llvm::Instruction *Placeholder =
- cast<llvm::Instruction>(RV.getAggregatePointer());
+ cast<llvm::Instruction>(Addr.getPointer());
CGBuilderTy::InsertPoint IP = Builder.saveIP();
Builder.SetInsertPoint(Placeholder);
- Address Addr = createInAllocaStructGEP(ArgInfo.getInAllocaFieldIndex());
+ Addr = createInAllocaStructGEP(ArgInfo.getInAllocaFieldIndex());
Builder.restoreIP(IP);
deferPlaceholderReplacement(Placeholder, Addr.getPointer());
} else {
@@ -3840,22 +3901,20 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
// from {}* to (%struct.foo*)*.
if (Addr.getType() != MemType)
Addr = Builder.CreateBitCast(Addr, MemType);
- LValue argLV = MakeAddrLValue(Addr, I->Ty);
- EmitInitStoreOfNonAggregate(*this, RV, argLV);
+ I->copyInto(*this, Addr);
}
break;
}
case ABIArgInfo::Indirect: {
assert(NumIRArgs == 1);
- if (RV.isScalar() || RV.isComplex()) {
+ if (!I->isAggregate()) {
// Make a temporary alloca to pass the argument.
- Address Addr = CreateMemTemp(I->Ty, ArgInfo.getIndirectAlign(),
- "indirect-arg-temp", false);
+ Address Addr = CreateMemTempWithoutCast(
+ I->Ty, ArgInfo.getIndirectAlign(), "indirect-arg-temp");
IRCallArgs[FirstIRArg] = Addr.getPointer();
- LValue argLV = MakeAddrLValue(Addr, I->Ty);
- EmitInitStoreOfNonAggregate(*this, RV, argLV);
+ I->copyInto(*this, Addr);
} else {
// We want to avoid creating an unnecessary temporary+copy here;
// however, we need one in three cases:
@@ -3863,30 +3922,51 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
// source. (This case doesn't occur on any common architecture.)
// 2. If the argument is byval, RV is not sufficiently aligned, and
// we cannot force it to be sufficiently aligned.
- // 3. If the argument is byval, but RV is located in an address space
- // different than that of the argument (0).
- Address Addr = RV.getAggregateAddress();
+ // 3. If the argument is byval, but RV is not located in default
+ // or alloca address space.
+ Address Addr = I->hasLValue()
+ ? I->getKnownLValue().getAddress()
+ : I->getKnownRValue().getAggregateAddress();
+ llvm::Value *V = Addr.getPointer();
CharUnits Align = ArgInfo.getIndirectAlign();
const llvm::DataLayout *TD = &CGM.getDataLayout();
- const unsigned RVAddrSpace = Addr.getType()->getAddressSpace();
- const unsigned ArgAddrSpace =
- (FirstIRArg < IRFuncTy->getNumParams()
- ? IRFuncTy->getParamType(FirstIRArg)->getPointerAddressSpace()
- : 0);
- if ((!ArgInfo.getIndirectByVal() && I->NeedsCopy) ||
- (ArgInfo.getIndirectByVal() && Addr.getAlignment() < Align &&
- llvm::getOrEnforceKnownAlignment(Addr.getPointer(),
- Align.getQuantity(), *TD)
- < Align.getQuantity()) ||
- (ArgInfo.getIndirectByVal() && (RVAddrSpace != ArgAddrSpace))) {
+
+ assert((FirstIRArg >= IRFuncTy->getNumParams() ||
+ IRFuncTy->getParamType(FirstIRArg)->getPointerAddressSpace() ==
+ TD->getAllocaAddrSpace()) &&
+ "indirect argument must be in alloca address space");
+
+ bool NeedCopy = false;
+
+ if (Addr.getAlignment() < Align &&
+ llvm::getOrEnforceKnownAlignment(V, Align.getQuantity(), *TD) <
+ Align.getQuantity()) {
+ NeedCopy = true;
+ } else if (I->hasLValue()) {
+ auto LV = I->getKnownLValue();
+ auto AS = LV.getAddressSpace();
+ if ((!ArgInfo.getIndirectByVal() &&
+ (LV.getAlignment() >=
+ getContext().getTypeAlignInChars(I->Ty))) ||
+ (ArgInfo.getIndirectByVal() &&
+ ((AS != LangAS::Default && AS != LangAS::opencl_private &&
+ AS != CGM.getASTAllocaAddressSpace())))) {
+ NeedCopy = true;
+ }
+ }
+ if (NeedCopy) {
// Create an aligned temporary, and copy to it.
- Address AI = CreateMemTemp(I->Ty, ArgInfo.getIndirectAlign(),
- "byval-temp", false);
+ Address AI = CreateMemTempWithoutCast(
+ I->Ty, ArgInfo.getIndirectAlign(), "byval-temp");
IRCallArgs[FirstIRArg] = AI.getPointer();
- EmitAggregateCopy(AI, Addr, I->Ty, RV.isVolatileQualified());
+ I->copyInto(*this, AI);
} else {
// Skip the extra memcpy call.
- IRCallArgs[FirstIRArg] = Addr.getPointer();
+ auto *T = V->getType()->getPointerElementType()->getPointerTo(
+ CGM.getDataLayout().getAllocaAddrSpace());
+ IRCallArgs[FirstIRArg] = getTargetHooks().performAddrSpaceCast(
+ *this, V, LangAS::Default, CGM.getASTAllocaAddressSpace(), T,
+ true);
}
}
break;
@@ -3903,10 +3983,12 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
ArgInfo.getDirectOffset() == 0) {
assert(NumIRArgs == 1);
llvm::Value *V;
- if (RV.isScalar())
- V = RV.getScalarVal();
+ if (!I->isAggregate())
+ V = I->getKnownRValue().getScalarVal();
else
- V = Builder.CreateLoad(RV.getAggregateAddress());
+ V = Builder.CreateLoad(
+ I->hasLValue() ? I->getKnownLValue().getAddress()
+ : I->getKnownRValue().getAggregateAddress());
// Implement swifterror by copying into a new swifterror argument.
// We'll write back in the normal path out of the call.
@@ -3944,12 +4026,12 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
// FIXME: Avoid the conversion through memory if possible.
Address Src = Address::invalid();
- if (RV.isScalar() || RV.isComplex()) {
+ if (!I->isAggregate()) {
Src = CreateMemTemp(I->Ty, "coerce");
- LValue SrcLV = MakeAddrLValue(Src, I->Ty);
- EmitInitStoreOfNonAggregate(*this, RV, SrcLV);
+ I->copyInto(*this, Src);
} else {
- Src = RV.getAggregateAddress();
+ Src = I->hasLValue() ? I->getKnownLValue().getAddress()
+ : I->getKnownRValue().getAggregateAddress();
}
// If the value is offset in memory, apply the offset now.
@@ -4003,22 +4085,26 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
llvm::Value *tempSize = nullptr;
Address addr = Address::invalid();
- if (RV.isAggregate()) {
- addr = RV.getAggregateAddress();
+ Address AllocaAddr = Address::invalid();
+ if (I->isAggregate()) {
+ addr = I->hasLValue() ? I->getKnownLValue().getAddress()
+ : I->getKnownRValue().getAggregateAddress();
+
} else {
+ RValue RV = I->getKnownRValue();
assert(RV.isScalar()); // complex should always just be direct
llvm::Type *scalarType = RV.getScalarVal()->getType();
auto scalarSize = CGM.getDataLayout().getTypeAllocSize(scalarType);
auto scalarAlign = CGM.getDataLayout().getPrefTypeAlignment(scalarType);
- tempSize = llvm::ConstantInt::get(CGM.Int64Ty, scalarSize);
-
// Materialize to a temporary.
addr = CreateTempAlloca(RV.getScalarVal()->getType(),
- CharUnits::fromQuantity(std::max(layout->getAlignment(),
- scalarAlign)));
- EmitLifetimeStart(scalarSize, addr.getPointer());
+ CharUnits::fromQuantity(std::max(
+ layout->getAlignment(), scalarAlign)),
+ "tmp",
+ /*ArraySize=*/nullptr, &AllocaAddr);
+ tempSize = EmitLifetimeStart(scalarSize, AllocaAddr.getPointer());
Builder.CreateStore(RV.getScalarVal(), addr);
}
@@ -4036,7 +4122,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
assert(IRArgPos == FirstIRArg + NumIRArgs);
if (tempSize) {
- EmitLifetimeEnd(tempSize, addr.getPointer());
+ EmitLifetimeEnd(tempSize, AllocaAddr.getPointer());
}
break;
@@ -4044,13 +4130,14 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
case ABIArgInfo::Expand:
unsigned IRArgPos = FirstIRArg;
- ExpandTypeToArgs(I->Ty, RV, IRFuncTy, IRCallArgs, IRArgPos);
+ ExpandTypeToArgs(I->Ty, *I, IRFuncTy, IRCallArgs, IRArgPos);
assert(IRArgPos == FirstIRArg + NumIRArgs);
break;
}
}
- llvm::Value *CalleePtr = Callee.getFunctionPointer();
+ const CGCallee &ConcreteCallee = Callee.prepareConcreteCallee(*this);
+ llvm::Value *CalleePtr = ConcreteCallee.getFunctionPointer();
// If we're using inalloca, set up that argument.
if (ArgMemory.isValid()) {
@@ -4191,10 +4278,19 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
CannotThrow = Attrs.hasAttribute(llvm::AttributeList::FunctionIndex,
llvm::Attribute::NoUnwind);
}
+
+ // If we made a temporary, be sure to clean up after ourselves. Note that we
+ // can't depend on being inside of an ExprWithCleanups, so we need to manually
+ // pop this cleanup later on. Being eager about this is OK, since this
+ // temporary is 'invisible' outside of the callee.
+ if (UnusedReturnSizePtr)
+ pushFullExprCleanup<CallLifetimeEnd>(NormalEHLifetimeMarker, SRetAlloca,
+ UnusedReturnSizePtr);
+
llvm::BasicBlock *InvokeDest = CannotThrow ? nullptr : getInvokeDest();
- SmallVector<llvm::OperandBundleDef, 1> BundleList;
- getBundlesForFunclet(CalleePtr, CurrentFuncletPad, BundleList);
+ SmallVector<llvm::OperandBundleDef, 1> BundleList =
+ getBundlesForFunclet(CalleePtr);
// Emit the actual call/invoke instruction.
llvm::CallSite CS;
@@ -4244,9 +4340,8 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
// insertion point; this allows the rest of IRGen to discard
// unreachable code.
if (CS.doesNotReturn()) {
- if (UnusedReturnSize)
- EmitLifetimeEnd(llvm::ConstantInt::get(Int64Ty, UnusedReturnSize),
- SRetPtr.getPointer());
+ if (UnusedReturnSizePtr)
+ PopCleanupBlock();
// Strip away the noreturn attribute to better diagnose unreachable UB.
if (SanOpts.has(SanitizerKind::Unreachable)) {
@@ -4315,9 +4410,8 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
case ABIArgInfo::InAlloca:
case ABIArgInfo::Indirect: {
RValue ret = convertTempToRValue(SRetPtr, RetTy, SourceLocation());
- if (UnusedReturnSize)
- EmitLifetimeEnd(llvm::ConstantInt::get(Int64Ty, UnusedReturnSize),
- SRetPtr.getPointer());
+ if (UnusedReturnSizePtr)
+ PopCleanupBlock();
return ret;
}
@@ -4395,7 +4489,8 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
OffsetValue);
} else if (const auto *AA = TargetDecl->getAttr<AllocAlignAttr>()) {
llvm::Value *ParamVal =
- CallArgs[AA->getParamIndex() - 1].RV.getScalarVal();
+ CallArgs[AA->getParamIndex().getLLVMIndex()].getRValue(
+ *this).getScalarVal();
EmitAlignmentAssumption(Ret.getScalarVal(), ParamVal);
}
}
@@ -4403,6 +4498,17 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
return Ret;
}
+CGCallee CGCallee::prepareConcreteCallee(CodeGenFunction &CGF) const {
+ if (isVirtual()) {
+ const CallExpr *CE = getVirtualCallExpr();
+ return CGF.CGM.getCXXABI().getVirtualFunctionPointer(
+ CGF, getVirtualMethodDecl(), getThisAddress(),
+ getFunctionType(), CE ? CE->getLocStart() : SourceLocation());
+ }
+
+ return *this;
+}
+
/* VarArg handling */
Address CodeGenFunction::EmitVAArg(VAArgExpr *VE, Address &VAListAddr) {
diff --git a/lib/CodeGen/CGCall.h b/lib/CodeGen/CGCall.h
index 7e10407fc31c..8adbe76fa6c3 100644
--- a/lib/CodeGen/CGCall.h
+++ b/lib/CodeGen/CGCall.h
@@ -18,6 +18,7 @@
#include "CGValue.h"
#include "EHScopeStack.h"
#include "clang/AST/CanonicalType.h"
+#include "clang/AST/GlobalDecl.h"
#include "clang/AST/Type.h"
#include "llvm/IR/Value.h"
@@ -42,9 +43,9 @@ namespace CodeGen {
/// Abstract information about a function or function prototype.
class CGCalleeInfo {
- /// \brief The function prototype of the callee.
+ /// The function prototype of the callee.
const FunctionProtoType *CalleeProtoTy;
- /// \brief The function declaration of the callee.
+ /// The function declaration of the callee.
const Decl *CalleeDecl;
public:
@@ -68,8 +69,9 @@ public:
Invalid,
Builtin,
PseudoDestructor,
+ Virtual,
- Last = PseudoDestructor
+ Last = Virtual
};
struct BuiltinInfoStorage {
@@ -79,12 +81,19 @@ public:
struct PseudoDestructorInfoStorage {
const CXXPseudoDestructorExpr *Expr;
};
+ struct VirtualInfoStorage {
+ const CallExpr *CE;
+ GlobalDecl MD;
+ Address Addr;
+ llvm::FunctionType *FTy;
+ };
SpecialKind KindOrFunctionPointer;
union {
CGCalleeInfo AbstractInfo;
BuiltinInfoStorage BuiltinInfo;
PseudoDestructorInfoStorage PseudoDestructorInfo;
+ VirtualInfoStorage VirtualInfo;
};
explicit CGCallee(SpecialKind kind) : KindOrFunctionPointer(kind) {}
@@ -127,6 +136,16 @@ public:
return CGCallee(abstractInfo, functionPtr);
}
+ static CGCallee forVirtual(const CallExpr *CE, GlobalDecl MD, Address Addr,
+ llvm::FunctionType *FTy) {
+ CGCallee result(SpecialKind::Virtual);
+ result.VirtualInfo.CE = CE;
+ result.VirtualInfo.MD = MD;
+ result.VirtualInfo.Addr = Addr;
+ result.VirtualInfo.FTy = FTy;
+ return result;
+ }
+
bool isBuiltin() const {
return KindOrFunctionPointer == SpecialKind::Builtin;
}
@@ -150,7 +169,9 @@ public:
bool isOrdinary() const {
return uintptr_t(KindOrFunctionPointer) > uintptr_t(SpecialKind::Last);
}
- const CGCalleeInfo &getAbstractInfo() const {
+ CGCalleeInfo getAbstractInfo() const {
+ if (isVirtual())
+ return VirtualInfo.MD.getDecl();
assert(isOrdinary());
return AbstractInfo;
}
@@ -158,29 +179,86 @@ public:
assert(isOrdinary());
return reinterpret_cast<llvm::Value*>(uintptr_t(KindOrFunctionPointer));
}
- llvm::FunctionType *getFunctionType() const {
- return cast<llvm::FunctionType>(
- getFunctionPointer()->getType()->getPointerElementType());
- }
void setFunctionPointer(llvm::Value *functionPtr) {
assert(isOrdinary());
KindOrFunctionPointer = SpecialKind(uintptr_t(functionPtr));
}
+
+ bool isVirtual() const {
+ return KindOrFunctionPointer == SpecialKind::Virtual;
+ }
+ const CallExpr *getVirtualCallExpr() const {
+ assert(isVirtual());
+ return VirtualInfo.CE;
+ }
+ GlobalDecl getVirtualMethodDecl() const {
+ assert(isVirtual());
+ return VirtualInfo.MD;
+ }
+ Address getThisAddress() const {
+ assert(isVirtual());
+ return VirtualInfo.Addr;
+ }
+
+ llvm::FunctionType *getFunctionType() const {
+ if (isVirtual())
+ return VirtualInfo.FTy;
+ return cast<llvm::FunctionType>(
+ getFunctionPointer()->getType()->getPointerElementType());
+ }
+
+ /// If this is a delayed callee computation of some sort, prepare
+ /// a concrete callee.
+ CGCallee prepareConcreteCallee(CodeGenFunction &CGF) const;
};
struct CallArg {
- RValue RV;
+ private:
+ union {
+ RValue RV;
+ LValue LV; /// The argument is semantically a load from this l-value.
+ };
+ bool HasLV;
+
+ /// A data-flow flag to make sure getRValue and/or copyInto are not
+ /// called twice for duplicated IR emission.
+ mutable bool IsUsed;
+
+ public:
QualType Ty;
- bool NeedsCopy;
- CallArg(RValue rv, QualType ty, bool needscopy)
- : RV(rv), Ty(ty), NeedsCopy(needscopy)
- { }
+ CallArg(RValue rv, QualType ty)
+ : RV(rv), HasLV(false), IsUsed(false), Ty(ty) {}
+ CallArg(LValue lv, QualType ty)
+ : LV(lv), HasLV(true), IsUsed(false), Ty(ty) {}
+ bool hasLValue() const { return HasLV; }
+ QualType getType() const { return Ty; }
+
+ /// \returns an independent RValue. If the CallArg contains an LValue,
+ /// a temporary copy is returned.
+ RValue getRValue(CodeGenFunction &CGF) const;
+
+ LValue getKnownLValue() const {
+ assert(HasLV && !IsUsed);
+ return LV;
+ }
+ RValue getKnownRValue() const {
+ assert(!HasLV && !IsUsed);
+ return RV;
+ }
+ void setRValue(RValue _RV) {
+ assert(!HasLV);
+ RV = _RV;
+ }
+
+ bool isAggregate() const { return HasLV || RV.isAggregate(); }
+
+ void copyInto(CodeGenFunction &CGF, Address A) const;
};
/// CallArgList - Type for representing both the value and type of
/// arguments in a call.
class CallArgList :
- public SmallVector<CallArg, 16> {
+ public SmallVector<CallArg, 8> {
public:
CallArgList() : StackBase(nullptr) {}
@@ -204,8 +282,10 @@ public:
llvm::Instruction *IsActiveIP;
};
- void add(RValue rvalue, QualType type, bool needscopy = false) {
- push_back(CallArg(rvalue, type, needscopy));
+ void add(RValue rvalue, QualType type) { push_back(CallArg(rvalue, type)); }
+
+ void addUncopiedAggregate(LValue LV, QualType type) {
+ push_back(CallArg(LV, type));
}
/// Add all the arguments from another CallArgList to this one. After doing
@@ -254,7 +334,7 @@ public:
llvm::Instruction *getStackBase() const { return StackBase; }
void freeArgumentMemory(CodeGenFunction &CGF) const;
- /// \brief Returns if we're using an inalloca struct to pass arguments in
+ /// Returns if we're using an inalloca struct to pass arguments in
/// memory.
bool isUsingInAlloca() const { return StackBase; }
diff --git a/lib/CodeGen/CGClass.cpp b/lib/CodeGen/CGClass.cpp
index a6915071ec17..0b9311f7771c 100644
--- a/lib/CodeGen/CGClass.cpp
+++ b/lib/CodeGen/CGClass.cpp
@@ -406,8 +406,8 @@ CodeGenFunction::GetAddressOfDerivedClass(Address BaseAddr,
// Apply the offset.
llvm::Value *Value = Builder.CreateBitCast(BaseAddr.getPointer(), Int8PtrTy);
- Value = Builder.CreateGEP(Value, Builder.CreateNeg(NonVirtualOffset),
- "sub.ptr");
+ Value = Builder.CreateInBoundsGEP(Value, Builder.CreateNeg(NonVirtualOffset),
+ "sub.ptr");
// Just cast.
Value = Builder.CreateBitCast(Value, DerivedPtrTy);
@@ -555,10 +555,12 @@ static void EmitBaseInitializer(CodeGenFunction &CGF,
BaseClassDecl,
isBaseVirtual);
AggValueSlot AggSlot =
- AggValueSlot::forAddr(V, Qualifiers(),
- AggValueSlot::IsDestructed,
- AggValueSlot::DoesNotNeedGCBarriers,
- AggValueSlot::IsNotAliased);
+ AggValueSlot::forAddr(
+ V, Qualifiers(),
+ AggValueSlot::IsDestructed,
+ AggValueSlot::DoesNotNeedGCBarriers,
+ AggValueSlot::IsNotAliased,
+ CGF.overlapForBaseInit(ClassDecl, BaseClassDecl, isBaseVirtual));
CGF.EmitAggExpr(BaseInit->getInit(), AggSlot);
@@ -615,7 +617,14 @@ static void EmitMemberInitializer(CodeGenFunction &CGF,
llvm::Value *ThisPtr = CGF.LoadCXXThis();
QualType RecordTy = CGF.getContext().getTypeDeclType(ClassDecl);
- LValue LHS = CGF.MakeNaturalAlignAddrLValue(ThisPtr, RecordTy);
+ LValue LHS;
+
+ // If a base constructor is being emitted, create an LValue that has the
+ // non-virtual alignment.
+ if (CGF.CurGD.getCtorType() == Ctor_Base)
+ LHS = CGF.MakeNaturalAlignPointeeAddrLValue(ThisPtr, RecordTy);
+ else
+ LHS = CGF.MakeNaturalAlignAddrLValue(ThisPtr, RecordTy);
EmitLValueForAnyFieldInitialization(CGF, MemberInit, LHS);
@@ -640,7 +649,7 @@ static void EmitMemberInitializer(CodeGenFunction &CGF,
LValue Src = CGF.EmitLValueForFieldInitialization(ThisRHSLV, Field);
// Copy the aggregate.
- CGF.EmitAggregateCopy(LHS.getAddress(), Src.getAddress(), FieldType,
+ CGF.EmitAggregateCopy(LHS, Src, FieldType, CGF.overlapForFieldInit(Field),
LHS.isVolatileQualified());
// Ensure that we destroy the objects if an exception is thrown later in
// the constructor.
@@ -671,10 +680,12 @@ void CodeGenFunction::EmitInitializerForField(FieldDecl *Field, LValue LHS,
break;
case TEK_Aggregate: {
AggValueSlot Slot =
- AggValueSlot::forLValue(LHS,
- AggValueSlot::IsDestructed,
- AggValueSlot::DoesNotNeedGCBarriers,
- AggValueSlot::IsNotAliased);
+ AggValueSlot::forLValue(
+ LHS,
+ AggValueSlot::IsDestructed,
+ AggValueSlot::DoesNotNeedGCBarriers,
+ AggValueSlot::IsNotAliased,
+ overlapForFieldInit(Field));
EmitAggExpr(Init, Slot);
break;
}
@@ -905,15 +916,15 @@ namespace {
}
CharUnits getMemcpySize(uint64_t FirstByteOffset) const {
+ ASTContext &Ctx = CGF.getContext();
unsigned LastFieldSize =
- LastField->isBitField() ?
- LastField->getBitWidthValue(CGF.getContext()) :
- CGF.getContext().getTypeSize(LastField->getType());
- uint64_t MemcpySizeBits =
- LastFieldOffset + LastFieldSize - FirstByteOffset +
- CGF.getContext().getCharWidth() - 1;
- CharUnits MemcpySize =
- CGF.getContext().toCharUnitsFromBits(MemcpySizeBits);
+ LastField->isBitField()
+ ? LastField->getBitWidthValue(Ctx)
+ : Ctx.toBits(
+ Ctx.getTypeInfoDataSizeInChars(LastField->getType()).first);
+ uint64_t MemcpySizeBits = LastFieldOffset + LastFieldSize -
+ FirstByteOffset + Ctx.getCharWidth() - 1;
+ CharUnits MemcpySize = Ctx.toCharUnitsFromBits(MemcpySizeBits);
return MemcpySize;
}
@@ -1265,7 +1276,7 @@ void CodeGenFunction::EmitCtorPrologue(const CXXConstructorDecl *CD,
if (CGM.getCodeGenOpts().StrictVTablePointers &&
CGM.getCodeGenOpts().OptimizationLevel > 0 &&
isInitializerOfDynamicClass(*B))
- CXXThisValue = Builder.CreateInvariantGroupBarrier(LoadCXXThis());
+ CXXThisValue = Builder.CreateLaunderInvariantGroup(LoadCXXThis());
EmitBaseInitializer(*this, ClassDecl, *B, CtorType);
}
@@ -1282,7 +1293,7 @@ void CodeGenFunction::EmitCtorPrologue(const CXXConstructorDecl *CD,
if (CGM.getCodeGenOpts().StrictVTablePointers &&
CGM.getCodeGenOpts().OptimizationLevel > 0 &&
isInitializerOfDynamicClass(*B))
- CXXThisValue = Builder.CreateInvariantGroupBarrier(LoadCXXThis());
+ CXXThisValue = Builder.CreateLaunderInvariantGroup(LoadCXXThis());
EmitBaseInitializer(*this, ClassDecl, *B, CtorType);
}
@@ -1466,11 +1477,11 @@ void CodeGenFunction::EmitDestructorBody(FunctionArgList &Args) {
// Initialize the vtable pointers before entering the body.
if (!CanSkipVTablePointerInitialization(*this, Dtor)) {
- // Insert the llvm.invariant.group.barrier intrinsic before initializing
+ // Insert the llvm.launder.invariant.group intrinsic before initializing
// the vptrs to cancel any previous assumptions we might have made.
if (CGM.getCodeGenOpts().StrictVTablePointers &&
CGM.getCodeGenOpts().OptimizationLevel > 0)
- CXXThisValue = Builder.CreateInvariantGroupBarrier(LoadCXXThis());
+ CXXThisValue = Builder.CreateLaunderInvariantGroup(LoadCXXThis());
InitializeVTablePointers(Dtor->getParent());
}
@@ -1728,7 +1739,7 @@ namespace {
};
} // end anonymous namespace
-/// \brief Emit all code that comes at the end of class's
+/// Emit all code that comes at the end of class's
/// destructor. This is to call destructors on members and base classes
/// in reverse order of their construction.
///
@@ -1954,7 +1965,8 @@ void CodeGenFunction::EmitCXXAggrConstructorCall(const CXXConstructorDecl *ctor,
}
EmitCXXConstructorCall(ctor, Ctor_Complete, /*ForVirtualBase=*/false,
- /*Delegating=*/false, curAddr, E);
+ /*Delegating=*/false, curAddr, E,
+ AggValueSlot::DoesNotOverlap);
}
// Go to the next element.
@@ -1989,7 +2001,8 @@ void CodeGenFunction::EmitCXXConstructorCall(const CXXConstructorDecl *D,
CXXCtorType Type,
bool ForVirtualBase,
bool Delegating, Address This,
- const CXXConstructExpr *E) {
+ const CXXConstructExpr *E,
+ AggValueSlot::Overlap_t Overlap) {
CallArgList Args;
// Push the this ptr.
@@ -2002,10 +2015,10 @@ void CodeGenFunction::EmitCXXConstructorCall(const CXXConstructorDecl *D,
assert(E->getNumArgs() == 1 && "unexpected argcount for trivial ctor");
const Expr *Arg = E->getArg(0);
- QualType SrcTy = Arg->getType();
- Address Src = EmitLValue(Arg).getAddress();
+ LValue Src = EmitLValue(Arg);
QualType DestTy = getContext().getTypeDeclType(D->getParent());
- EmitAggregateCopyCtor(This, Src, DestTy, SrcTy);
+ LValue Dest = MakeAddrLValue(This, DestTy);
+ EmitAggregateCopyCtor(Dest, Src, Overlap);
return;
}
@@ -2017,7 +2030,8 @@ void CodeGenFunction::EmitCXXConstructorCall(const CXXConstructorDecl *D,
EmitCallArgs(Args, FPT, E->arguments(), E->getConstructor(),
/*ParamsToSkip*/ 0, Order);
- EmitCXXConstructorCall(D, Type, ForVirtualBase, Delegating, This, Args);
+ EmitCXXConstructorCall(D, Type, ForVirtualBase, Delegating, This, Args,
+ Overlap, E->getExprLoc());
}
static bool canEmitDelegateCallArgs(CodeGenFunction &CGF,
@@ -2049,14 +2063,15 @@ void CodeGenFunction::EmitCXXConstructorCall(const CXXConstructorDecl *D,
bool ForVirtualBase,
bool Delegating,
Address This,
- CallArgList &Args) {
+ CallArgList &Args,
+ AggValueSlot::Overlap_t Overlap,
+ SourceLocation Loc) {
const CXXRecordDecl *ClassDecl = D->getParent();
// C++11 [class.mfct.non-static]p2:
// If a non-static member function of a class X is called for an object that
// is not of type X, or of a type derived from X, the behavior is undefined.
- // FIXME: Provide a source location here.
- EmitTypeCheck(CodeGenFunction::TCK_ConstructorCall, SourceLocation(),
+ EmitTypeCheck(CodeGenFunction::TCK_ConstructorCall, Loc,
This.getPointer(), getContext().getRecordType(ClassDecl));
if (D->isTrivial() && D->isDefaultConstructor()) {
@@ -2071,9 +2086,12 @@ void CodeGenFunction::EmitCXXConstructorCall(const CXXConstructorDecl *D,
assert(Args.size() == 2 && "unexpected argcount for trivial ctor");
QualType SrcTy = D->getParamDecl(0)->getType().getNonReferenceType();
- Address Src(Args[1].RV.getScalarVal(), getNaturalTypeAlignment(SrcTy));
+ Address Src(Args[1].getRValue(*this).getScalarVal(),
+ getNaturalTypeAlignment(SrcTy));
+ LValue SrcLVal = MakeAddrLValue(Src, SrcTy);
QualType DestTy = getContext().getTypeDeclType(ClassDecl);
- EmitAggregateCopyCtor(This, Src, DestTy, SrcTy);
+ LValue DestLVal = MakeAddrLValue(This, DestTy);
+ EmitAggregateCopyCtor(DestLVal, SrcLVal, Overlap);
return;
}
@@ -2123,8 +2141,7 @@ void CodeGenFunction::EmitInheritedCXXConstructorCall(
const CXXConstructorDecl *D, bool ForVirtualBase, Address This,
bool InheritedFromVBase, const CXXInheritedCtorInitExpr *E) {
CallArgList Args;
- CallArg ThisArg(RValue::get(This.getPointer()), D->getThisType(getContext()),
- /*NeedsCopy=*/false);
+ CallArg ThisArg(RValue::get(This.getPointer()), D->getThisType(getContext()));
// Forward the parameters.
if (InheritedFromVBase &&
@@ -2163,7 +2180,8 @@ void CodeGenFunction::EmitInheritedCXXConstructorCall(
}
EmitCXXConstructorCall(D, Ctor_Base, ForVirtualBase, /*Delegating*/false,
- This, Args);
+ This, Args, AggValueSlot::MayOverlap,
+ E->getLocation());
}
void CodeGenFunction::EmitInlinedInheritingCXXConstructorCall(
@@ -2188,7 +2206,7 @@ void CodeGenFunction::EmitInlinedInheritingCXXConstructorCall(
assert(Args.size() >= Params.size() && "too few arguments for call");
for (unsigned I = 0, N = Args.size(); I != N; ++I) {
if (I < Params.size() && isa<ImplicitParamDecl>(Params[I])) {
- const RValue &RV = Args[I].RV;
+ const RValue &RV = Args[I].getRValue(*this);
assert(!RV.isComplex() && "complex indirect params not supported");
ParamValue Val = RV.isScalar()
? ParamValue::forDirect(RV.getScalarVal())
@@ -2259,7 +2277,8 @@ CodeGenFunction::EmitSynthesizedCXXCopyCtorCall(const CXXConstructorDecl *D,
EmitCallArgs(Args, FPT, drop_begin(E->arguments(), 1), E->getConstructor(),
/*ParamsToSkip*/ 1);
- EmitCXXConstructorCall(D, Ctor_Complete, false, false, This, Args);
+ EmitCXXConstructorCall(D, Ctor_Complete, false, false, This, Args,
+ AggValueSlot::MayOverlap, E->getExprLoc());
}
void
@@ -2294,7 +2313,8 @@ CodeGenFunction::EmitDelegateCXXConstructorCall(const CXXConstructorDecl *Ctor,
}
EmitCXXConstructorCall(Ctor, CtorType, /*ForVirtualBase=*/false,
- /*Delegating=*/true, This, DelegateArgs);
+ /*Delegating=*/true, This, DelegateArgs,
+ AggValueSlot::MayOverlap, Loc);
}
namespace {
@@ -2325,7 +2345,8 @@ CodeGenFunction::EmitDelegatingCXXConstructorCall(const CXXConstructorDecl *Ctor
AggValueSlot::forAddr(ThisPtr, Qualifiers(),
AggValueSlot::IsDestructed,
AggValueSlot::DoesNotNeedGCBarriers,
- AggValueSlot::IsNotAliased);
+ AggValueSlot::IsNotAliased,
+ AggValueSlot::MayOverlap);
EmitAggExpr(Ctor->init_begin()[0]->getInit(), AggSlot);
@@ -2667,7 +2688,9 @@ void CodeGenFunction::EmitVTablePtrCheck(const CXXRecordDecl *RD,
SSK = llvm::SanStat_CFI_UnrelatedCast;
break;
case CFITCK_ICall:
- llvm_unreachable("not expecting CFITCK_ICall");
+ case CFITCK_NVMFCall:
+ case CFITCK_VMFCall:
+ llvm_unreachable("unexpected sanitizer kind");
}
std::string TypeName = RD->getQualifiedNameAsString();
diff --git a/lib/CodeGen/CGCleanup.cpp b/lib/CodeGen/CGCleanup.cpp
index 22055b2cb902..cfd230997ed0 100644
--- a/lib/CodeGen/CGCleanup.cpp
+++ b/lib/CodeGen/CGCleanup.cpp
@@ -281,10 +281,10 @@ void EHScopeStack::popNullFixups() {
BranchFixups.pop_back();
}
-void CodeGenFunction::initFullExprCleanup() {
+Address CodeGenFunction::createCleanupActiveFlag() {
// Create a variable to decide whether the cleanup needs to be run.
- Address active = CreateTempAlloca(Builder.getInt1Ty(), CharUnits::One(),
- "cleanup.cond");
+ Address active = CreateTempAllocaWithoutCast(
+ Builder.getInt1Ty(), CharUnits::One(), "cleanup.cond");
// Initialize it to false at a site that's guaranteed to be run
// before each evaluation.
@@ -293,10 +293,14 @@ void CodeGenFunction::initFullExprCleanup() {
// Initialize it to true at the current location.
Builder.CreateStore(Builder.getTrue(), active);
+ return active;
+}
+
+void CodeGenFunction::initFullExprCleanupWithFlag(Address ActiveFlag) {
// Set that as the active flag in the cleanup.
EHCleanupScope &cleanup = cast<EHCleanupScope>(*EHStack.begin());
assert(!cleanup.hasActiveFlag() && "cleanup already has active flag?");
- cleanup.setActiveFlag(active);
+ cleanup.setActiveFlag(ActiveFlag);
if (cleanup.isNormalCleanup()) cleanup.setTestFlagInNormalCleanup();
if (cleanup.isEHCleanup()) cleanup.setTestFlagInEHCleanup();
@@ -494,6 +498,13 @@ void CodeGenFunction::PopCleanupBlocks(
&LifetimeExtendedCleanupStack[I],
Header.getSize());
I += Header.getSize();
+
+ if (Header.isConditional()) {
+ Address ActiveFlag =
+ reinterpret_cast<Address &>(LifetimeExtendedCleanupStack[I]);
+ initFullExprCleanupWithFlag(ActiveFlag);
+ I += sizeof(ActiveFlag);
+ }
}
LifetimeExtendedCleanupStack.resize(OldLifetimeExtendedSize);
}
@@ -624,7 +635,7 @@ static void destroyOptimisticNormalEntry(CodeGenFunction &CGF,
si->eraseFromParent();
// Destroy the load.
- assert(condition->getOperand(0) == CGF.NormalCleanupDest);
+ assert(condition->getOperand(0) == CGF.NormalCleanupDest.getPointer());
assert(condition->use_empty());
condition->eraseFromParent();
}
@@ -833,7 +844,7 @@ void CodeGenFunction::PopCleanupBlock(bool FallthroughIsBranchThrough) {
if (NormalCleanupDestSlot->hasOneUse()) {
NormalCleanupDestSlot->user_back()->eraseFromParent();
NormalCleanupDestSlot->eraseFromParent();
- NormalCleanupDest = nullptr;
+ NormalCleanupDest = Address::invalid();
}
llvm::BasicBlock *BranchAfter = Scope.getBranchAfterBlock(0);
@@ -971,16 +982,21 @@ void CodeGenFunction::PopCleanupBlock(bool FallthroughIsBranchThrough) {
SaveAndRestore<llvm::Instruction *> RestoreCurrentFuncletPad(
CurrentFuncletPad);
llvm::CleanupPadInst *CPI = nullptr;
- if (!EHPersonality::get(*this).usesFuncletPads()) {
- EHStack.pushTerminate();
- PushedTerminate = true;
- } else {
+
+ const EHPersonality &Personality = EHPersonality::get(*this);
+ if (Personality.usesFuncletPads()) {
llvm::Value *ParentPad = CurrentFuncletPad;
if (!ParentPad)
ParentPad = llvm::ConstantTokenNone::get(CGM.getLLVMContext());
CurrentFuncletPad = CPI = Builder.CreateCleanupPad(ParentPad);
}
+ // Non-MSVC personalities need to terminate when an EH cleanup throws.
+ if (!Personality.isMSVCPersonality()) {
+ EHStack.pushTerminate();
+ PushedTerminate = true;
+ }
+
// We only actually emit the cleanup code if the cleanup is either
// active or was used before it was deactivated.
if (EHActiveFlag.isValid() || IsActive) {
@@ -1233,8 +1249,10 @@ void CodeGenFunction::DeactivateCleanupBlock(EHScopeStack::stable_iterator C,
EHCleanupScope &Scope = cast<EHCleanupScope>(*EHStack.find(C));
assert(Scope.isActive() && "double deactivation");
- // If it's the top of the stack, just pop it.
- if (C == EHStack.stable_begin()) {
+ // If it's the top of the stack, just pop it, but do so only if it belongs
+ // to the current RunCleanupsScope.
+ if (C == EHStack.stable_begin() &&
+ CurrentCleanupScopeDepth.strictlyEncloses(C)) {
// If it's a normal cleanup, we need to pretend that the
// fallthrough is unreachable.
CGBuilderTy::InsertPoint SavedIP = Builder.saveAndClearIP();
@@ -1250,10 +1268,10 @@ void CodeGenFunction::DeactivateCleanupBlock(EHScopeStack::stable_iterator C,
}
Address CodeGenFunction::getNormalCleanupDestSlot() {
- if (!NormalCleanupDest)
+ if (!NormalCleanupDest.isValid())
NormalCleanupDest =
- CreateTempAlloca(Builder.getInt32Ty(), "cleanup.dest.slot");
- return Address(NormalCleanupDest, CharUnits::fromQuantity(4));
+ CreateDefaultAlignTempAlloca(Builder.getInt32Ty(), "cleanup.dest.slot");
+ return NormalCleanupDest;
}
/// Emits all the code to cause the given temporary to be cleaned up.
diff --git a/lib/CodeGen/CGCleanup.h b/lib/CodeGen/CGCleanup.h
index 105c5629d50c..93be3e6c1502 100644
--- a/lib/CodeGen/CGCleanup.h
+++ b/lib/CodeGen/CGCleanup.h
@@ -230,7 +230,7 @@ public:
};
/// A cleanup scope which generates the cleanup blocks lazily.
-class LLVM_ALIGNAS(/*alignof(uint64_t)*/ 8) EHCleanupScope : public EHScope {
+class alignas(8) EHCleanupScope : public EHScope {
/// The nearest normal cleanup scope enclosing this one.
EHScopeStack::stable_iterator EnclosingNormal;
@@ -627,16 +627,21 @@ struct EHPersonality {
static const EHPersonality MSVC_except_handler;
static const EHPersonality MSVC_C_specific_handler;
static const EHPersonality MSVC_CxxFrameHandler3;
+ static const EHPersonality GNU_Wasm_CPlusPlus;
/// Does this personality use landingpads or the family of pad instructions
/// designed to form funclets?
- bool usesFuncletPads() const { return isMSVCPersonality(); }
+ bool usesFuncletPads() const {
+ return isMSVCPersonality() || isWasmPersonality();
+ }
bool isMSVCPersonality() const {
return this == &MSVC_except_handler || this == &MSVC_C_specific_handler ||
this == &MSVC_CxxFrameHandler3;
}
+ bool isWasmPersonality() const { return this == &GNU_Wasm_CPlusPlus; }
+
bool isMSVCXXPersonality() const { return this == &MSVC_CxxFrameHandler3; }
};
}
diff --git a/lib/CodeGen/CGCoroutine.cpp b/lib/CodeGen/CGCoroutine.cpp
index 5842e7b3ff93..4f525c8aac85 100644
--- a/lib/CodeGen/CGCoroutine.cpp
+++ b/lib/CodeGen/CGCoroutine.cpp
@@ -44,6 +44,15 @@ struct clang::CodeGen::CGCoroData {
// A branch to this block is emitted when coroutine needs to suspend.
llvm::BasicBlock *SuspendBB = nullptr;
+ // The promise type's 'unhandled_exception' handler, if it defines one.
+ Stmt *ExceptionHandler = nullptr;
+
+ // A temporary i1 alloca that stores whether 'await_resume' threw an
+ // exception. If it did, 'true' is stored in this variable, and the coroutine
+ // body must be skipped. If the promise type does not define an exception
+ // handler, this is null.
+ llvm::Value *ResumeEHVar = nullptr;
+
// Stores the jump destination just before the coroutine memory is freed.
// This is the destination that every suspend point jumps to for the cleanup
// branch.
@@ -121,6 +130,16 @@ static SmallString<32> buildSuspendPrefixStr(CGCoroData &Coro, AwaitKind Kind) {
return Prefix;
}
+static bool memberCallExpressionCanThrow(const Expr *E) {
+ if (const auto *CE = dyn_cast<CXXMemberCallExpr>(E))
+ if (const auto *Proto =
+ CE->getMethodDecl()->getType()->getAs<FunctionProtoType>())
+ if (isNoexceptExceptionSpec(Proto->getExceptionSpecType()) &&
+ Proto->canThrow() == CT_Cannot)
+ return false;
+ return true;
+}
+
// Emit suspend expression which roughly looks like:
//
// auto && x = CommonExpr();
@@ -208,11 +227,36 @@ static LValueOrRValue emitSuspendExpression(CodeGenFunction &CGF, CGCoroData &Co
// Emit await_resume expression.
CGF.EmitBlock(ReadyBlock);
+
+ // Exception handling requires additional IR. If the 'await_resume' function
+ // is marked as 'noexcept', we avoid generating this additional IR.
+ CXXTryStmt *TryStmt = nullptr;
+ if (Coro.ExceptionHandler && Kind == AwaitKind::Init &&
+ memberCallExpressionCanThrow(S.getResumeExpr())) {
+ Coro.ResumeEHVar =
+ CGF.CreateTempAlloca(Builder.getInt1Ty(), Prefix + Twine("resume.eh"));
+ Builder.CreateFlagStore(true, Coro.ResumeEHVar);
+
+ auto Loc = S.getResumeExpr()->getExprLoc();
+ auto *Catch = new (CGF.getContext())
+ CXXCatchStmt(Loc, /*exDecl=*/nullptr, Coro.ExceptionHandler);
+ auto *TryBody =
+ CompoundStmt::Create(CGF.getContext(), S.getResumeExpr(), Loc, Loc);
+ TryStmt = CXXTryStmt::Create(CGF.getContext(), Loc, TryBody, Catch);
+ CGF.EnterCXXTryStmt(*TryStmt);
+ }
+
LValueOrRValue Res;
if (forLValue)
Res.LV = CGF.EmitLValue(S.getResumeExpr());
else
Res.RV = CGF.EmitAnyExpr(S.getResumeExpr(), aggSlot, ignoreResult);
+
+ if (TryStmt) {
+ Builder.CreateFlagStore(false, Coro.ResumeEHVar);
+ CGF.ExitCXXTryStmt(*TryStmt);
+ }
+
return Res;
}
@@ -315,7 +359,7 @@ namespace {
GetParamRef Visitor;
Visitor.Visit(const_cast<Expr*>(InitExpr));
assert(Visitor.Expr);
- auto *DREOrig = cast<DeclRefExpr>(Visitor.Expr);
+ DeclRefExpr *DREOrig = Visitor.Expr;
auto *PD = DREOrig->getDecl();
auto it = LocalDeclMap.find(PD);
@@ -588,19 +632,40 @@ void CodeGenFunction::EmitCoroutineBody(const CoroutineBodyStmt &S) {
EHStack.pushCleanup<CallCoroEnd>(EHCleanup);
CurCoro.Data->CurrentAwaitKind = AwaitKind::Init;
+ CurCoro.Data->ExceptionHandler = S.getExceptionHandler();
EmitStmt(S.getInitSuspendStmt());
CurCoro.Data->FinalJD = getJumpDestInCurrentScope(FinalBB);
CurCoro.Data->CurrentAwaitKind = AwaitKind::Normal;
- if (auto *OnException = S.getExceptionHandler()) {
+ if (CurCoro.Data->ExceptionHandler) {
+ // If we generated IR to record whether an exception was thrown from
+ // 'await_resume', then use that IR to determine whether the coroutine
+ // body should be skipped.
+ // If we didn't generate the IR (perhaps because 'await_resume' was marked
+ // as 'noexcept'), then we skip this check.
+ BasicBlock *ContBB = nullptr;
+ if (CurCoro.Data->ResumeEHVar) {
+ BasicBlock *BodyBB = createBasicBlock("coro.resumed.body");
+ ContBB = createBasicBlock("coro.resumed.cont");
+ Value *SkipBody = Builder.CreateFlagLoad(CurCoro.Data->ResumeEHVar,
+ "coro.resumed.eh");
+ Builder.CreateCondBr(SkipBody, ContBB, BodyBB);
+ EmitBlock(BodyBB);
+ }
+
auto Loc = S.getLocStart();
- CXXCatchStmt Catch(Loc, /*exDecl=*/nullptr, OnException);
- auto *TryStmt = CXXTryStmt::Create(getContext(), Loc, S.getBody(), &Catch);
+ CXXCatchStmt Catch(Loc, /*exDecl=*/nullptr,
+ CurCoro.Data->ExceptionHandler);
+ auto *TryStmt =
+ CXXTryStmt::Create(getContext(), Loc, S.getBody(), &Catch);
EnterCXXTryStmt(*TryStmt);
emitBodyAndFallthrough(*this, S, TryStmt->getTryBlock());
ExitCXXTryStmt(*TryStmt);
+
+ if (ContBB)
+ EmitBlock(ContBB);
}
else {
emitBodyAndFallthrough(*this, S, S.getBody());
diff --git a/lib/CodeGen/CGDebugInfo.cpp b/lib/CodeGen/CGDebugInfo.cpp
index aeed4d658a4e..097a1e043047 100644
--- a/lib/CodeGen/CGDebugInfo.cpp
+++ b/lib/CodeGen/CGDebugInfo.cpp
@@ -289,8 +289,7 @@ StringRef CGDebugInfo::getObjCMethodName(const ObjCMethodDecl *OMD) {
<< OC->getIdentifier()->getNameStart() << ')';
}
} else if (const auto *OCD = dyn_cast<ObjCCategoryImplDecl>(DC)) {
- OS << OCD->getClassInterface()->getName() << '('
- << OCD->getName() << ')';
+ OS << OCD->getClassInterface()->getName() << '(' << OCD->getName() << ')';
} else if (isa<ObjCProtocolDecl>(DC)) {
// We can extract the type of the class from the self pointer.
if (ImplicitParamDecl *SelfDecl = OMD->getSelfDecl()) {
@@ -361,18 +360,19 @@ StringRef CGDebugInfo::getClassName(const RecordDecl *RD) {
return StringRef();
}
-llvm::DIFile::ChecksumKind
+Optional<llvm::DIFile::ChecksumKind>
CGDebugInfo::computeChecksum(FileID FID, SmallString<32> &Checksum) const {
Checksum.clear();
- if (!CGM.getCodeGenOpts().EmitCodeView)
- return llvm::DIFile::CSK_None;
+ if (!CGM.getCodeGenOpts().EmitCodeView &&
+ CGM.getCodeGenOpts().DwarfVersion < 5)
+ return None;
SourceManager &SM = CGM.getContext().getSourceManager();
bool Invalid;
llvm::MemoryBuffer *MemBuffer = SM.getBuffer(FID, &Invalid);
if (Invalid)
- return llvm::DIFile::CSK_None;
+ return None;
llvm::MD5 Hash;
llvm::MD5::MD5Result Result;
@@ -384,51 +384,62 @@ CGDebugInfo::computeChecksum(FileID FID, SmallString<32> &Checksum) const {
return llvm::DIFile::CSK_MD5;
}
+Optional<StringRef> CGDebugInfo::getSource(const SourceManager &SM,
+ FileID FID) {
+ if (!CGM.getCodeGenOpts().EmbedSource)
+ return None;
+
+ bool SourceInvalid = false;
+ StringRef Source = SM.getBufferData(FID, &SourceInvalid);
+
+ if (SourceInvalid)
+ return None;
+
+ return Source;
+}
+
llvm::DIFile *CGDebugInfo::getOrCreateFile(SourceLocation Loc) {
if (!Loc.isValid())
// If Location is not valid then use main input file.
- return DBuilder.createFile(remapDIPath(TheCU->getFilename()),
- remapDIPath(TheCU->getDirectory()),
- TheCU->getFile()->getChecksumKind(),
- TheCU->getFile()->getChecksum());
+ return getOrCreateMainFile();
SourceManager &SM = CGM.getContext().getSourceManager();
PresumedLoc PLoc = SM.getPresumedLoc(Loc);
if (PLoc.isInvalid() || StringRef(PLoc.getFilename()).empty())
// If the location is not valid then use main input file.
- return DBuilder.createFile(remapDIPath(TheCU->getFilename()),
- remapDIPath(TheCU->getDirectory()),
- TheCU->getFile()->getChecksumKind(),
- TheCU->getFile()->getChecksum());
+ return getOrCreateMainFile();
// Cache the results.
const char *fname = PLoc.getFilename();
- auto it = DIFileCache.find(fname);
+ auto It = DIFileCache.find(fname);
- if (it != DIFileCache.end()) {
+ if (It != DIFileCache.end()) {
// Verify that the information still exists.
- if (llvm::Metadata *V = it->second)
+ if (llvm::Metadata *V = It->second)
return cast<llvm::DIFile>(V);
}
SmallString<32> Checksum;
- llvm::DIFile::ChecksumKind CSKind =
+ Optional<llvm::DIFile::ChecksumKind> CSKind =
computeChecksum(SM.getFileID(Loc), Checksum);
+ Optional<llvm::DIFile::ChecksumInfo<StringRef>> CSInfo;
+ if (CSKind)
+ CSInfo.emplace(*CSKind, Checksum);
- llvm::DIFile *F = DBuilder.createFile(remapDIPath(PLoc.getFilename()),
- remapDIPath(getCurrentDirname()),
- CSKind, Checksum);
+ llvm::DIFile *F = DBuilder.createFile(
+ remapDIPath(PLoc.getFilename()), remapDIPath(getCurrentDirname()), CSInfo,
+ getSource(SM, SM.getFileID(Loc)));
DIFileCache[fname].reset(F);
return F;
}
llvm::DIFile *CGDebugInfo::getOrCreateMainFile() {
- return DBuilder.createFile(remapDIPath(TheCU->getFilename()),
- remapDIPath(TheCU->getDirectory()),
- TheCU->getFile()->getChecksumKind(),
- TheCU->getFile()->getChecksum());
+ return DBuilder.createFile(
+ remapDIPath(TheCU->getFilename()), remapDIPath(TheCU->getDirectory()),
+ TheCU->getFile()->getChecksum(),
+ CGM.getCodeGenOpts().EmbedSource ? TheCU->getSource() : None);
}
std::string CGDebugInfo::remapDIPath(StringRef Path) const {
@@ -472,7 +483,8 @@ StringRef CGDebugInfo::getCurrentDirname() {
void CGDebugInfo::CreateCompileUnit() {
SmallString<32> Checksum;
- llvm::DIFile::ChecksumKind CSKind = llvm::DIFile::CSK_None;
+ Optional<llvm::DIFile::ChecksumKind> CSKind;
+ Optional<llvm::DIFile::ChecksumInfo<StringRef>> CSInfo;
// Should we be asking the SourceManager for the main file name, instead of
// accepting it as an argument? This just causes the main file name to
@@ -551,14 +563,19 @@ void CGDebugInfo::CreateCompileUnit() {
break;
}
+ if (CSKind)
+ CSInfo.emplace(*CSKind, Checksum);
+
// Create new compile unit.
// FIXME - Eliminate TheCU.
auto &CGOpts = CGM.getCodeGenOpts();
TheCU = DBuilder.createCompileUnit(
LangTag,
DBuilder.createFile(remapDIPath(MainFileName),
- remapDIPath(getCurrentDirname()), CSKind, Checksum),
- Producer, LO.Optimize || CGOpts.PrepareForLTO || CGOpts.EmitSummaryIndex,
+ remapDIPath(getCurrentDirname()), CSInfo,
+ getSource(SM, SM.getMainFileID())),
+ CGOpts.EmitVersionIdentMetadata ? Producer : "",
+ LO.Optimize || CGOpts.PrepareForLTO || CGOpts.PrepareForThinLTO,
CGOpts.DwarfDebugFlags, RuntimeVers,
CGOpts.EnableSplitDwarf ? "" : CGOpts.SplitDwarfFile, EmissionKind,
0 /* DWOid */, CGOpts.SplitDwarfInlining, CGOpts.DebugInfoForProfiling,
@@ -620,14 +637,13 @@ llvm::DIType *CGDebugInfo::CreateType(const BuiltinType *BT) {
return SelTy;
}
-#define IMAGE_TYPE(ImgType, Id, SingletonId, Access, Suffix) \
- case BuiltinType::Id: \
- return getOrCreateStructPtrType("opencl_" #ImgType "_" #Suffix "_t", \
+#define IMAGE_TYPE(ImgType, Id, SingletonId, Access, Suffix) \
+ case BuiltinType::Id: \
+ return getOrCreateStructPtrType("opencl_" #ImgType "_" #Suffix "_t", \
SingletonId);
#include "clang/Basic/OpenCLImageTypes.def"
case BuiltinType::OCLSampler:
- return getOrCreateStructPtrType("opencl_sampler_t",
- OCLSamplerDITy);
+ return getOrCreateStructPtrType("opencl_sampler_t", OCLSamplerDITy);
case BuiltinType::OCLEvent:
return getOrCreateStructPtrType("opencl_event_t", OCLEventDITy);
case BuiltinType::OCLClkEvent:
@@ -645,6 +661,7 @@ llvm::DIType *CGDebugInfo::CreateType(const BuiltinType *BT) {
case BuiltinType::SChar:
Encoding = llvm::dwarf::DW_ATE_signed_char;
break;
+ case BuiltinType::Char8:
case BuiltinType::Char16:
case BuiltinType::Char32:
Encoding = llvm::dwarf::DW_ATE_UTF;
@@ -681,6 +698,34 @@ llvm::DIType *CGDebugInfo::CreateType(const BuiltinType *BT) {
// floating point types of the same size.
Encoding = llvm::dwarf::DW_ATE_float;
break;
+ case BuiltinType::ShortAccum:
+ case BuiltinType::Accum:
+ case BuiltinType::LongAccum:
+ case BuiltinType::ShortFract:
+ case BuiltinType::Fract:
+ case BuiltinType::LongFract:
+ case BuiltinType::SatShortFract:
+ case BuiltinType::SatFract:
+ case BuiltinType::SatLongFract:
+ case BuiltinType::SatShortAccum:
+ case BuiltinType::SatAccum:
+ case BuiltinType::SatLongAccum:
+ Encoding = llvm::dwarf::DW_ATE_signed_fixed;
+ break;
+ case BuiltinType::UShortAccum:
+ case BuiltinType::UAccum:
+ case BuiltinType::ULongAccum:
+ case BuiltinType::UShortFract:
+ case BuiltinType::UFract:
+ case BuiltinType::ULongFract:
+ case BuiltinType::SatUShortAccum:
+ case BuiltinType::SatUAccum:
+ case BuiltinType::SatULongAccum:
+ case BuiltinType::SatUShortFract:
+ case BuiltinType::SatUFract:
+ case BuiltinType::SatULongFract:
+ Encoding = llvm::dwarf::DW_ATE_unsigned_fixed;
+ break;
}
switch (BT->getKind()) {
@@ -780,27 +825,49 @@ static bool hasCXXMangling(const TagDecl *TD, llvm::DICompileUnit *TheCU) {
}
}
-/// In C++ mode, types have linkage, so we can rely on the ODR and
-/// on their mangled names, if they're external.
-static SmallString<256> getUniqueTagTypeName(const TagType *Ty,
- CodeGenModule &CGM,
- llvm::DICompileUnit *TheCU) {
- SmallString<256> FullName;
+// Determines if the tag declaration will require a type identifier.
+static bool needsTypeIdentifier(const TagDecl *TD, CodeGenModule &CGM,
+ llvm::DICompileUnit *TheCU) {
+ // We only add a type identifier for types with C++ name mangling.
+ if (!hasCXXMangling(TD, TheCU))
+ return false;
+
+ // CodeView types with C++ mangling need a type identifier.
+ if (CGM.getCodeGenOpts().EmitCodeView)
+ return true;
+
+ // Externally visible types with C++ mangling need a type identifier.
+ if (TD->isExternallyVisible())
+ return true;
+
+ return false;
+}
+
+// When emitting CodeView debug information we need to produce a type
+// identifier for all types which have a C++ mangling. Until a GUID is added
+// to the identifier (not currently implemented) the result will not be unique
+// across compilation units.
+// When emitting DWARF debug information, we need to produce a type identifier
+// for all externally visible types with C++ name mangling. This identifier
+// should be unique across ODR-compliant compilation units.
+static SmallString<256> getTypeIdentifier(const TagType *Ty, CodeGenModule &CGM,
+ llvm::DICompileUnit *TheCU) {
+ SmallString<256> Identifier;
const TagDecl *TD = Ty->getDecl();
- if (!hasCXXMangling(TD, TheCU) || !TD->isExternallyVisible())
- return FullName;
+ if (!needsTypeIdentifier(TD, CGM, TheCU))
+ return Identifier;
// TODO: This is using the RTTI name. Is there a better way to get
// a unique string for a type?
- llvm::raw_svector_ostream Out(FullName);
+ llvm::raw_svector_ostream Out(Identifier);
CGM.getCXXABI().getMangleContext().mangleCXXRTTIName(QualType(Ty, 0), Out);
- return FullName;
+ return Identifier;
}
-/// \return the approproate DWARF tag for a composite type.
+/// \return the appropriate DWARF tag for a composite type.
static llvm::dwarf::Tag getTagForRecord(const RecordDecl *RD) {
- llvm::dwarf::Tag Tag;
+ llvm::dwarf::Tag Tag;
if (RD->isStruct() || RD->isInterface())
Tag = llvm::dwarf::DW_TAG_structure_type;
else if (RD->isUnion())
@@ -828,10 +895,10 @@ CGDebugInfo::getOrCreateRecordFwdDecl(const RecordType *Ty,
uint32_t Align = 0;
// Create the type.
- SmallString<256> FullName = getUniqueTagTypeName(Ty, CGM, TheCU);
+ SmallString<256> Identifier = getTypeIdentifier(Ty, CGM, TheCU);
llvm::DICompositeType *RetTy = DBuilder.createReplaceableCompositeType(
getTagForRecord(RD), RDName, Ctx, DefUnit, Line, 0, Size, Align,
- llvm::DINode::FlagFwdDecl, FullName);
+ llvm::DINode::FlagFwdDecl, Identifier);
if (CGM.getCodeGenOpts().DebugFwdTemplateParams)
if (auto *TSpecial = dyn_cast<ClassTemplateSpecializationDecl>(RD))
DBuilder.replaceArrays(RetTy, llvm::DINodeArray(),
@@ -926,9 +993,8 @@ llvm::DIType *CGDebugInfo::CreateType(const BlockPointerType *Ty,
// DW_AT_APPLE_BLOCK attribute and are an implementation detail only
// the debugger needs to know about. To allow type uniquing, emit
// them without a name or a location.
- EltTy =
- DBuilder.createStructType(Unit, "", nullptr, LineNo,
- FieldOffset, 0, Flags, nullptr, Elements);
+ EltTy = DBuilder.createStructType(Unit, "", nullptr, LineNo, FieldOffset, 0,
+ Flags, nullptr, Elements);
return DBuilder.createPointerType(EltTy, Size);
}
@@ -943,8 +1009,9 @@ llvm::DIType *CGDebugInfo::CreateType(const TemplateSpecializationType *Ty,
Ty->getTemplateName().print(OS, getPrintingPolicy(), /*qualified*/ false);
printTemplateArgumentList(OS, Ty->template_arguments(), getPrintingPolicy());
- auto *AliasDecl = cast<TypeAliasTemplateDecl>(
- Ty->getTemplateName().getAsTemplateDecl())->getTemplatedDecl();
+ auto *AliasDecl =
+ cast<TypeAliasTemplateDecl>(Ty->getTemplateName().getAsTemplateDecl())
+ ->getTemplatedDecl();
SourceLocation Loc = AliasDecl->getLocation();
return DBuilder.createTypedef(Src, OS.str(), getOrCreateFile(Loc),
@@ -981,20 +1048,28 @@ static unsigned getDwarfCC(CallingConv CC) {
return llvm::dwarf::DW_CC_LLVM_vectorcall;
case CC_X86Pascal:
return llvm::dwarf::DW_CC_BORLAND_pascal;
-
- // FIXME: Create new DW_CC_ codes for these calling conventions.
case CC_Win64:
+ return llvm::dwarf::DW_CC_LLVM_Win64;
case CC_X86_64SysV:
+ return llvm::dwarf::DW_CC_LLVM_X86_64SysV;
case CC_AAPCS:
+ return llvm::dwarf::DW_CC_LLVM_AAPCS;
case CC_AAPCS_VFP:
+ return llvm::dwarf::DW_CC_LLVM_AAPCS_VFP;
case CC_IntelOclBicc:
+ return llvm::dwarf::DW_CC_LLVM_IntelOclBicc;
case CC_SpirFunction:
+ return llvm::dwarf::DW_CC_LLVM_SpirFunction;
case CC_OpenCLKernel:
+ return llvm::dwarf::DW_CC_LLVM_OpenCLKernel;
case CC_Swift:
+ return llvm::dwarf::DW_CC_LLVM_Swift;
case CC_PreserveMost:
+ return llvm::dwarf::DW_CC_LLVM_PreserveMost;
case CC_PreserveAll:
+ return llvm::dwarf::DW_CC_LLVM_PreserveAll;
case CC_X86RegCall:
- return 0;
+ return llvm::dwarf::DW_CC_LLVM_X86RegCall;
}
return 0;
}
@@ -1102,8 +1177,8 @@ CGDebugInfo::createFieldType(StringRef name, QualType type, SourceLocation loc,
}
llvm::DINode::DIFlags flags = getAccessFlag(AS, RD);
- return DBuilder.createMemberType(scope, name, file, line, SizeInBits,
- Align, offsetInBits, flags, debugType);
+ return DBuilder.createMemberType(scope, name, file, line, SizeInBits, Align,
+ offsetInBits, flags, debugType);
}
void CGDebugInfo::CollectRecordLambdaFields(
@@ -1223,10 +1298,6 @@ void CGDebugInfo::CollectRecordFields(
else {
const ASTRecordLayout &layout = CGM.getContext().getASTRecordLayout(record);
- // Debug info for nested types is included in the member list only for
- // CodeView.
- bool IncludeNestedTypes = CGM.getCodeGenOpts().EmitCodeView;
-
// Field number for non-static fields.
unsigned fieldNo = 0;
@@ -1236,6 +1307,13 @@ void CGDebugInfo::CollectRecordFields(
if (const auto *V = dyn_cast<VarDecl>(I)) {
if (V->hasAttr<NoDebugAttr>())
continue;
+
+ // Skip variable template specializations when emitting CodeView. MSVC
+ // doesn't emit them.
+ if (CGM.getCodeGenOpts().EmitCodeView &&
+ isa<VarTemplateSpecializationDecl>(V))
+ continue;
+
// Reuse the existing static member declaration if one exists
auto MI = StaticDataMemberCache.find(V->getCanonicalDecl());
if (MI != StaticDataMemberCache.end()) {
@@ -1252,7 +1330,9 @@ void CGDebugInfo::CollectRecordFields(
// Bump field number for next field.
++fieldNo;
- } else if (IncludeNestedTypes) {
+ } else if (CGM.getCodeGenOpts().EmitCodeView) {
+ // Debug info for nested types is included in the member list only for
+ // CodeView.
if (const auto *nestedType = dyn_cast<TypeDecl>(I))
if (!nestedType->isImplicit() &&
nestedType->getDeclContext() == record)
@@ -1386,7 +1466,7 @@ llvm::DISubprogram *CGDebugInfo::CreateCXXMemberFunction(
// deleting dtor.
const auto *DD = dyn_cast<CXXDestructorDecl>(Method);
GlobalDecl GD = DD ? GlobalDecl(DD, Dtor_Deleting) : GlobalDecl(Method);
- MicrosoftVTableContext::MethodVFTableLocation ML =
+ MethodVFTableLocation ML =
CGM.getMicrosoftVTableContext().getMethodVFTableLocation(GD);
VIndex = ML.Index;
@@ -1507,6 +1587,7 @@ void CGDebugInfo::CollectCXXBasesAux(
auto *BaseTy = getOrCreateType(BI.getType(), Unit);
llvm::DINode::DIFlags BFlags = StartingFlags;
uint64_t BaseOffset;
+ uint32_t VBPtrOffset = 0;
if (BI.isVirtual()) {
if (CGM.getTarget().getCXXABI().isItaniumFamily()) {
@@ -1520,6 +1601,10 @@ void CGDebugInfo::CollectCXXBasesAux(
// vbase offset offset in Itanium.
BaseOffset =
4 * CGM.getMicrosoftVTableContext().getVBTableIndex(RD, Base);
+ VBPtrOffset = CGM.getContext()
+ .getASTRecordLayout(RD)
+ .getVBPtrOffset()
+ .getQuantity();
}
BFlags |= llvm::DINode::FlagVirtual;
} else
@@ -1528,8 +1613,8 @@ void CGDebugInfo::CollectCXXBasesAux(
// BI->isVirtual() and bits when not.
BFlags |= getAccessFlag(BI.getAccessSpecifier(), RD);
- llvm::DIType *DTy =
- DBuilder.createInheritance(RecordTy, BaseTy, BaseOffset, BFlags);
+ llvm::DIType *DTy = DBuilder.createInheritance(RecordTy, BaseTy, BaseOffset,
+ VBPtrOffset, BFlags);
EltTys.push_back(DTy);
}
}
@@ -1603,8 +1688,8 @@ CGDebugInfo::CollectTemplateParams(const TemplateParameterList *TPList,
V = CGM.getCXXABI().EmitNullMemberPointer(MPT);
if (!V)
V = llvm::ConstantInt::get(CGM.Int8Ty, 0);
- TemplateParams.push_back(DBuilder.createTemplateValueParameter(
- TheCU, Name, TTy, V));
+ TemplateParams.push_back(
+ DBuilder.createTemplateValueParameter(TheCU, Name, TTy, V));
} break;
case TemplateArgument::Template:
TemplateParams.push_back(DBuilder.createTemplateTemplateParameter(
@@ -1676,9 +1761,8 @@ llvm::DIType *CGDebugInfo::getOrCreateVTablePtrType(llvm::DIFile *Unit) {
Optional<unsigned> DWARFAddressSpace =
CGM.getTarget().getDWARFAddressSpace(VtblPtrAddressSpace);
- llvm::DIType *vtbl_ptr_type =
- DBuilder.createPointerType(SubTy, Size, 0, DWARFAddressSpace,
- "__vtbl_ptr_type");
+ llvm::DIType *vtbl_ptr_type = DBuilder.createPointerType(
+ SubTy, Size, 0, DWARFAddressSpace, "__vtbl_ptr_type");
VTablePtrType = DBuilder.createPointerType(vtbl_ptr_type, Size);
return VTablePtrType;
}
@@ -1722,9 +1806,8 @@ void CGDebugInfo::CollectVTableInfo(const CXXRecordDecl *RD, llvm::DIFile *Unit,
CGM.getTarget().getDWARFAddressSpace(VtblPtrAddressSpace);
// Create a very wide void* type and insert it directly in the element list.
- llvm::DIType *VTableType =
- DBuilder.createPointerType(nullptr, VTableWidth, 0, DWARFAddressSpace,
- "__vtbl_ptr_type");
+ llvm::DIType *VTableType = DBuilder.createPointerType(
+ nullptr, VTableWidth, 0, DWARFAddressSpace, "__vtbl_ptr_type");
EltTys.push_back(VTableType);
// The vptr is a pointer to this special vtable type.
@@ -1739,9 +1822,9 @@ void CGDebugInfo::CollectVTableInfo(const CXXRecordDecl *RD, llvm::DIFile *Unit,
VPtrTy = getOrCreateVTablePtrType(Unit);
unsigned Size = CGM.getContext().getTypeSize(CGM.getContext().VoidPtrTy);
- llvm::DIType *VPtrMember = DBuilder.createMemberType(
- Unit, getVTableName(RD), Unit, 0, Size, 0, 0,
- llvm::DINode::FlagArtificial, VPtrTy);
+ llvm::DIType *VPtrMember =
+ DBuilder.createMemberType(Unit, getVTableName(RD), Unit, 0, Size, 0, 0,
+ llvm::DINode::FlagArtificial, VPtrTy);
EltTys.push_back(VPtrMember);
}
@@ -2079,7 +2162,7 @@ CGDebugInfo::getOrCreateModuleRef(ExternalASTSource::ASTSourceDescriptor Mod,
llvm::raw_svector_ostream OS(ConfigMacros);
const auto &PPOpts = CGM.getPreprocessorOpts();
unsigned I = 0;
- // Translate the macro definitions back into a commmand line.
+ // Translate the macro definitions back into a command line.
for (auto &M : PPOpts.Macros) {
if (++I > 1)
OS << " ";
@@ -2088,9 +2171,14 @@ CGDebugInfo::getOrCreateModuleRef(ExternalASTSource::ASTSourceDescriptor Mod,
OS << "\"-" << (Undef ? 'U' : 'D');
for (char c : Macro)
switch (c) {
- case '\\' : OS << "\\\\"; break;
- case '"' : OS << "\\\""; break;
- default: OS << c;
+ case '\\':
+ OS << "\\\\";
+ break;
+ case '"':
+ OS << "\\\"";
+ break;
+ default:
+ OS << c;
}
OS << '\"';
}
@@ -2107,6 +2195,7 @@ CGDebugInfo::getOrCreateModuleRef(ExternalASTSource::ASTSourceDescriptor Mod,
: ~1ULL;
llvm::DIBuilder DIB(CGM.getModule());
DIB.createCompileUnit(TheCU->getSourceLanguage(),
+ // TODO: Support "Source" from external AST providers?
DIB.createFile(Mod.getModuleName(), Mod.getPath()),
TheCU->getProducer(), true, StringRef(), 0,
Mod.getASTFile(), llvm::DICompileUnit::FullDebug,
@@ -2162,7 +2251,7 @@ llvm::DIType *CGDebugInfo::CreateTypeDefinition(const ObjCInterfaceType *Ty,
if (!SClassTy)
return nullptr;
- llvm::DIType *InhTag = DBuilder.createInheritance(RealDecl, SClassTy, 0,
+ llvm::DIType *InhTag = DBuilder.createInheritance(RealDecl, SClassTy, 0, 0,
llvm::DINode::FlagZero);
EltTys.push_back(InhTag);
}
@@ -2184,7 +2273,7 @@ llvm::DIType *CGDebugInfo::CreateTypeDefinition(const ObjCInterfaceType *Ty,
EltTys.push_back(PropertyNode);
};
{
- llvm::SmallPtrSet<const IdentifierInfo*, 16> PropertySet;
+ llvm::SmallPtrSet<const IdentifierInfo *, 16> PropertySet;
for (const ObjCCategoryDecl *ClassExt : ID->known_extensions())
for (auto *PD : ClassExt->properties()) {
PropertySet.insert(PD->getIdentifier());
@@ -2265,10 +2354,12 @@ llvm::DIType *CGDebugInfo::CreateTypeDefinition(const ObjCInterfaceType *Ty,
ObjCMethodDecl *Setter = PD->getSetterMethodDecl();
PropertyNode = DBuilder.createObjCProperty(
PD->getName(), PUnit, PLine,
- hasDefaultGetterName(PD, Getter) ? "" : getSelectorName(
- PD->getGetterName()),
- hasDefaultSetterName(PD, Setter) ? "" : getSelectorName(
- PD->getSetterName()),
+ hasDefaultGetterName(PD, Getter)
+ ? ""
+ : getSelectorName(PD->getGetterName()),
+ hasDefaultSetterName(PD, Setter)
+ ? ""
+ : getSelectorName(PD->getSetterName()),
PD->getPropertyAttributes(),
getOrCreateType(PD->getType(), PUnit));
}
@@ -2291,12 +2382,14 @@ llvm::DIType *CGDebugInfo::CreateType(const VectorType *Ty,
llvm::DIFile *Unit) {
llvm::DIType *ElementTy = getOrCreateType(Ty->getElementType(), Unit);
int64_t Count = Ty->getNumElements();
- if (Count == 0)
- // If number of elements are not known then this is an unbounded array.
- // Use Count == -1 to express such arrays.
- Count = -1;
- llvm::Metadata *Subscript = DBuilder.getOrCreateSubrange(0, Count);
+ llvm::Metadata *Subscript;
+ QualType QTy(Ty, 0);
+ auto SizeExpr = SizeExprCache.find(QTy);
+ if (SizeExpr != SizeExprCache.end())
+ Subscript = DBuilder.getOrCreateSubrange(0, SizeExpr->getSecond());
+ else
+ Subscript = DBuilder.getOrCreateSubrange(0, Count ? Count : -1);
llvm::DINodeArray SubscriptArray = DBuilder.getOrCreateArray(Subscript);
uint64_t Size = CGM.getContext().getTypeSize(Ty);
@@ -2353,8 +2446,12 @@ llvm::DIType *CGDebugInfo::CreateType(const ArrayType *Ty, llvm::DIFile *Unit) {
}
}
- // FIXME: Verify this is right for VLAs.
- Subscripts.push_back(DBuilder.getOrCreateSubrange(0, Count));
+ auto SizeNode = SizeExprCache.find(EltTy);
+ if (SizeNode != SizeExprCache.end())
+ Subscripts.push_back(
+ DBuilder.getOrCreateSubrange(0, SizeNode->getSecond()));
+ else
+ Subscripts.push_back(DBuilder.getOrCreateSubrange(0, Count));
EltTy = Ty->getElementType();
}
@@ -2422,8 +2519,7 @@ llvm::DIType *CGDebugInfo::CreateType(const AtomicType *Ty, llvm::DIFile *U) {
return DBuilder.createQualifiedType(llvm::dwarf::DW_TAG_atomic_type, FromTy);
}
-llvm::DIType* CGDebugInfo::CreateType(const PipeType *Ty,
- llvm::DIFile *U) {
+llvm::DIType *CGDebugInfo::CreateType(const PipeType *Ty, llvm::DIFile *U) {
return getOrCreateType(Ty->getElementType(), U);
}
@@ -2437,7 +2533,7 @@ llvm::DIType *CGDebugInfo::CreateEnumType(const EnumType *Ty) {
Align = getDeclAlignIfRequired(ED, CGM.getContext());
}
- SmallString<256> FullName = getUniqueTagTypeName(Ty, CGM, TheCU);
+ SmallString<256> Identifier = getTypeIdentifier(Ty, CGM, TheCU);
bool isImportedFromModule =
DebugTypeExtRefs && ED->isFromASTFile() && ED->getDefinition();
@@ -2460,7 +2556,7 @@ llvm::DIType *CGDebugInfo::CreateEnumType(const EnumType *Ty) {
StringRef EDName = ED->getName();
llvm::DIType *RetTy = DBuilder.createReplaceableCompositeType(
llvm::dwarf::DW_TAG_enumeration_type, EDName, EDContext, DefUnit, Line,
- 0, Size, Align, llvm::DINode::FlagFwdDecl, FullName);
+ 0, Size, Align, llvm::DINode::FlagFwdDecl, Identifier);
ReplaceMap.emplace_back(
std::piecewise_construct, std::make_tuple(Ty),
@@ -2480,14 +2576,17 @@ llvm::DIType *CGDebugInfo::CreateTypeDefinition(const EnumType *Ty) {
Align = getDeclAlignIfRequired(ED, CGM.getContext());
}
- SmallString<256> FullName = getUniqueTagTypeName(Ty, CGM, TheCU);
+ SmallString<256> Identifier = getTypeIdentifier(Ty, CGM, TheCU);
// Create elements for each enumerator.
SmallVector<llvm::Metadata *, 16> Enumerators;
ED = ED->getDefinition();
+ bool IsSigned = ED->getIntegerType()->isSignedIntegerType();
for (const auto *Enum : ED->enumerators()) {
- Enumerators.push_back(DBuilder.createEnumerator(
- Enum->getName(), Enum->getInitVal().getSExtValue()));
+ const auto &InitVal = Enum->getInitVal();
+ auto Value = IsSigned ? InitVal.getSExtValue() : InitVal.getZExtValue();
+ Enumerators.push_back(
+ DBuilder.createEnumerator(Enum->getName(), Value, !IsSigned));
}
// Return a CompositeType for the enum itself.
@@ -2496,11 +2595,10 @@ llvm::DIType *CGDebugInfo::CreateTypeDefinition(const EnumType *Ty) {
llvm::DIFile *DefUnit = getOrCreateFile(ED->getLocation());
unsigned Line = getLineNumber(ED->getLocation());
llvm::DIScope *EnumContext = getDeclContextDescriptor(ED);
- llvm::DIType *ClassTy =
- ED->isFixed() ? getOrCreateType(ED->getIntegerType(), DefUnit) : nullptr;
+ llvm::DIType *ClassTy = getOrCreateType(ED->getIntegerType(), DefUnit);
return DBuilder.createEnumerationType(EnumContext, ED->getName(), DefUnit,
Line, Size, Align, EltArray, ClassTy,
- FullName);
+ Identifier, ED->isFixed());
}
llvm::DIMacro *CGDebugInfo::CreateMacro(llvm::DIMacroFile *Parent,
@@ -2585,10 +2683,10 @@ llvm::DIType *CGDebugInfo::getTypeOrNull(QualType Ty) {
// Unwrap the type as needed for debug information.
Ty = UnwrapTypeForDebugInfo(Ty, CGM.getContext());
- auto it = TypeCache.find(Ty.getAsOpaquePtr());
- if (it != TypeCache.end()) {
+ auto It = TypeCache.find(Ty.getAsOpaquePtr());
+ if (It != TypeCache.end()) {
// Verify that the debug info still exists.
- if (llvm::Metadata *V = it->second)
+ if (llvm::Metadata *V = It->second)
return cast<llvm::DIType>(V);
}
@@ -2623,7 +2721,7 @@ llvm::DIType *CGDebugInfo::getOrCreateType(QualType Ty, llvm::DIFile *Unit) {
return T;
llvm::DIType *Res = CreateTypeNode(Ty, Unit);
- void* TyPtr = Ty.getAsOpaquePtr();
+ void *TyPtr = Ty.getAsOpaquePtr();
// And update the type cache.
TypeCache[TyPtr].reset(Res);
@@ -2801,11 +2899,24 @@ llvm::DICompositeType *CGDebugInfo::CreateLimitedType(const RecordType *Ty) {
uint64_t Size = CGM.getContext().getTypeSize(Ty);
auto Align = getDeclAlignIfRequired(D, CGM.getContext());
- SmallString<256> FullName = getUniqueTagTypeName(Ty, CGM, TheCU);
+ SmallString<256> Identifier = getTypeIdentifier(Ty, CGM, TheCU);
+
+ // Explicitly record the calling convention for C++ records.
+ auto Flags = llvm::DINode::FlagZero;
+ if (auto CXXRD = dyn_cast<CXXRecordDecl>(RD)) {
+ if (CGM.getCXXABI().getRecordArgABI(CXXRD) == CGCXXABI::RAA_Indirect)
+ Flags |= llvm::DINode::FlagTypePassByReference;
+ else
+ Flags |= llvm::DINode::FlagTypePassByValue;
+
+ // Record if a C++ record is trivial type.
+ if (CXXRD->isTrivial())
+ Flags |= llvm::DINode::FlagTrivial;
+ }
llvm::DICompositeType *RealDecl = DBuilder.createReplaceableCompositeType(
getTagForRecord(RD), RDName, RDContext, DefUnit, Line, 0, Size, Align,
- llvm::DINode::FlagZero, FullName);
+ Flags, Identifier);
// Elements of composite types usually have back to the type, creating
// uniquing cycles. Distinct nodes are more efficient.
@@ -2819,14 +2930,14 @@ llvm::DICompositeType *CGDebugInfo::CreateLimitedType(const RecordType *Ty) {
// so they don't tend to be involved in uniquing cycles and there is some
// chance of merging them when linking together two modules. Only make
// them distinct if they are ODR-uniqued.
- if (FullName.empty())
+ if (Identifier.empty())
break;
LLVM_FALLTHROUGH;
case llvm::dwarf::DW_TAG_structure_type:
case llvm::dwarf::DW_TAG_union_type:
case llvm::dwarf::DW_TAG_class_type:
- // Immediatley resolve to a distinct node.
+ // Immediately resolve to a distinct node.
RealDecl =
llvm::MDNode::replaceWithDistinct(llvm::TempDICompositeType(RealDecl));
break;
@@ -2901,10 +3012,10 @@ void CGDebugInfo::collectFunctionDeclProps(GlobalDecl GD, llvm::DIFile *Unit,
if (DebugKind >= codegenoptions::LimitedDebugInfo) {
if (const NamespaceDecl *NSDecl =
- dyn_cast_or_null<NamespaceDecl>(FD->getDeclContext()))
+ dyn_cast_or_null<NamespaceDecl>(FD->getDeclContext()))
FDContext = getOrCreateNamespace(NSDecl);
else if (const RecordDecl *RDecl =
- dyn_cast_or_null<RecordDecl>(FD->getDeclContext())) {
+ dyn_cast_or_null<RecordDecl>(FD->getDeclContext())) {
llvm::DIScope *Mod = getParentModuleOrNull(RDecl);
FDContext = getContextDescriptor(RDecl, Mod ? Mod : TheCU);
}
@@ -2931,8 +3042,8 @@ void CGDebugInfo::collectVarDeclProps(const VarDecl *VD, llvm::DIFile *&Unit,
llvm::APInt ConstVal(32, 1);
QualType ET = CGM.getContext().getAsArrayType(T)->getElementType();
- T = CGM.getContext().getConstantArrayType(ET, ConstVal,
- ArrayType::Normal, 0);
+ T = CGM.getContext().getConstantArrayType(ET, ConstVal, ArrayType::Normal,
+ 0);
}
Name = VD->getName();
@@ -2959,8 +3070,8 @@ void CGDebugInfo::collectVarDeclProps(const VarDecl *VD, llvm::DIFile *&Unit,
if (DC->isRecord())
DC = CGM.getContext().getTranslationUnitDecl();
- llvm::DIScope *Mod = getParentModuleOrNull(VD);
- VDContext = getContextDescriptor(cast<Decl>(DC), Mod ? Mod : TheCU);
+ llvm::DIScope *Mod = getParentModuleOrNull(VD);
+ VDContext = getContextDescriptor(cast<Decl>(DC), Mod ? Mod : TheCU);
}
llvm::DISubprogram *CGDebugInfo::getFunctionFwdDeclOrStub(GlobalDecl GD,
@@ -2972,8 +3083,8 @@ llvm::DISubprogram *CGDebugInfo::getFunctionFwdDeclOrStub(GlobalDecl GD,
llvm::DIFile *Unit = getOrCreateFile(Loc);
llvm::DIScope *DContext = Unit;
unsigned Line = getLineNumber(Loc);
- collectFunctionDeclProps(GD, Unit, Name, LinkageName, DContext,
- TParamsArray, Flags);
+ collectFunctionDeclProps(GD, Unit, Name, LinkageName, DContext, TParamsArray,
+ Flags);
auto *FD = dyn_cast<FunctionDecl>(GD.getDecl());
// Build function type.
@@ -2999,20 +3110,18 @@ llvm::DISubprogram *CGDebugInfo::getFunctionFwdDeclOrStub(GlobalDecl GD,
!FD->isExternallyVisible(),
/* isDefinition = */ false, 0, Flags, CGM.getLangOpts().Optimize,
TParamsArray.get(), getFunctionDeclaration(FD));
- const auto *CanonDecl = cast<FunctionDecl>(FD->getCanonicalDecl());
+ const FunctionDecl *CanonDecl = FD->getCanonicalDecl();
FwdDeclReplaceMap.emplace_back(std::piecewise_construct,
std::make_tuple(CanonDecl),
std::make_tuple(SP));
return SP;
}
-llvm::DISubprogram *
-CGDebugInfo::getFunctionForwardDeclaration(GlobalDecl GD) {
+llvm::DISubprogram *CGDebugInfo::getFunctionForwardDeclaration(GlobalDecl GD) {
return getFunctionFwdDeclOrStub(GD, /* Stub = */ false);
}
-llvm::DISubprogram *
-CGDebugInfo::getFunctionStub(GlobalDecl GD) {
+llvm::DISubprogram *CGDebugInfo::getFunctionStub(GlobalDecl GD) {
return getFunctionFwdDeclOrStub(GD, /* Stub = */ true);
}
@@ -3136,7 +3245,8 @@ llvm::DISubroutineType *CGDebugInfo::getOrCreateFunctionType(const Decl *D,
if (FPT->getNumParams() > 1)
SelfDeclTy = FPT->getParamType(0);
if (!SelfDeclTy.isNull())
- Elts.push_back(CreateSelfType(SelfDeclTy, getOrCreateType(SelfDeclTy, F)));
+ Elts.push_back(
+ CreateSelfType(SelfDeclTy, getOrCreateType(SelfDeclTy, F)));
// "_cmd" pointer is always second argument.
Elts.push_back(DBuilder.createArtificialType(
getOrCreateType(CGM.getContext().getObjCSelType(), F)));
@@ -3172,7 +3282,8 @@ llvm::DISubroutineType *CGDebugInfo::getOrCreateFunctionType(const Decl *D,
void CGDebugInfo::EmitFunctionStart(GlobalDecl GD, SourceLocation Loc,
SourceLocation ScopeLoc, QualType FnType,
- llvm::Function *Fn, CGBuilderTy &Builder) {
+ llvm::Function *Fn, bool CurFuncIsThunk,
+ CGBuilderTy &Builder) {
StringRef Name;
StringRef LinkageName;
@@ -3213,11 +3324,15 @@ void CGDebugInfo::EmitFunctionStart(GlobalDecl GD, SourceLocation Loc,
if (Name.startswith("\01"))
Name = Name.substr(1);
- if (!HasDecl || D->isImplicit()) {
+ if (!HasDecl || D->isImplicit() || D->hasAttr<ArtificialAttr>()) {
Flags |= llvm::DINode::FlagArtificial;
// Artificial functions should not silently reuse CurLoc.
CurLoc = SourceLocation();
}
+
+ if (CurFuncIsThunk)
+ Flags |= llvm::DINode::FlagThunk;
+
unsigned LineNo = getLineNumber(Loc);
unsigned ScopeLine = getLineNumber(ScopeLoc);
@@ -3238,6 +3353,27 @@ void CGDebugInfo::EmitFunctionStart(GlobalDecl GD, SourceLocation Loc,
if (HasDecl && isa<FunctionDecl>(D))
DeclCache[D->getCanonicalDecl()].reset(SP);
+ if (CGM.getCodeGenOpts().DwarfVersion >= 5) {
+ // Starting with DWARF V5 method declarations are emitted as children of
+ // the interface type.
+ if (const auto *OMD = dyn_cast_or_null<ObjCMethodDecl>(D)) {
+ const ObjCInterfaceDecl *ID = OMD->getClassInterface();
+ QualType QTy(ID->getTypeForDecl(), 0);
+ auto It = TypeCache.find(QTy.getAsOpaquePtr());
+ if (It != TypeCache.end()) {
+ llvm::DICompositeType *InterfaceDecl =
+ cast<llvm::DICompositeType>(It->second);
+ llvm::DISubprogram *FD = DBuilder.createFunction(
+ InterfaceDecl, Name, LinkageName, Unit, LineNo,
+ getOrCreateFunctionType(D, FnType, Unit), Fn->hasLocalLinkage(),
+ false /*definition*/, ScopeLine, Flags, CGM.getLangOpts().Optimize,
+ TParamsArray.get());
+ DBuilder.finalizeSubprogram(FD);
+ ObjCMethodCache[ID].push_back(FD);
+ }
+ }
+ }
+
// Push the function onto the lexical block stack.
LexicalBlockStack.emplace_back(SP);
@@ -3330,8 +3466,7 @@ void CGDebugInfo::CreateLexicalBlock(SourceLocation Loc) {
}
void CGDebugInfo::AppendAddressSpaceXDeref(
- unsigned AddressSpace,
- SmallVectorImpl<int64_t> &Expr) const {
+ unsigned AddressSpace, SmallVectorImpl<int64_t> &Expr) const {
Optional<unsigned> DWARFAddressSpace =
CGM.getTarget().getDWARFAddressSpace(AddressSpace);
if (!DWARFAddressSpace)
@@ -3463,13 +3598,14 @@ llvm::DIType *CGDebugInfo::EmitTypeForVarWithBlocksAttr(const VarDecl *VD,
nullptr, Elements);
}
-void CGDebugInfo::EmitDeclare(const VarDecl *VD, llvm::Value *Storage,
- llvm::Optional<unsigned> ArgNo,
- CGBuilderTy &Builder) {
+llvm::DILocalVariable *CGDebugInfo::EmitDeclare(const VarDecl *VD,
+ llvm::Value *Storage,
+ llvm::Optional<unsigned> ArgNo,
+ CGBuilderTy &Builder) {
assert(DebugKind >= codegenoptions::LimitedDebugInfo);
assert(!LexicalBlockStack.empty() && "Region stack mismatch, stack empty!");
if (VD->hasAttr<NoDebugAttr>())
- return;
+ return nullptr;
bool Unwritten =
VD->isImplicit() || (isa<Decl>(VD->getDeclContext()) &&
@@ -3487,7 +3623,7 @@ void CGDebugInfo::EmitDeclare(const VarDecl *VD, llvm::Value *Storage,
// If there is no debug info for this type then do not emit debug info
// for this variable.
if (!Ty)
- return;
+ return nullptr;
// Get location information.
unsigned Line = 0;
@@ -3538,15 +3674,15 @@ void CGDebugInfo::EmitDeclare(const VarDecl *VD, llvm::Value *Storage,
} else if (const auto *RT = dyn_cast<RecordType>(VD->getType())) {
// If VD is an anonymous union then Storage represents value for
// all union fields.
- const auto *RD = cast<RecordDecl>(RT->getDecl());
+ const RecordDecl *RD = RT->getDecl();
if (RD->isUnion() && RD->isAnonymousStructOrUnion()) {
// GDB has trouble finding local variables in anonymous unions, so we emit
- // artifical local variables for each of the members.
+ // artificial local variables for each of the members.
//
// FIXME: Remove this code as soon as GDB supports this.
// The debug info verifier in LLVM operates based on the assumption that a
- // variable has the same size as its storage and we had to disable the check
- // for artificial variables.
+ // variable has the same size as its storage and we had to disable the
+ // check for artificial variables.
for (const auto *Field : RD->fields()) {
llvm::DIType *FieldTy = getOrCreateType(Field->getType(), Unit);
StringRef FieldName = Field->getName();
@@ -3571,25 +3707,26 @@ void CGDebugInfo::EmitDeclare(const VarDecl *VD, llvm::Value *Storage,
}
// Create the descriptor for the variable.
- auto *D = ArgNo
- ? DBuilder.createParameterVariable(
- Scope, Name, *ArgNo, Unit, Line, Ty,
- CGM.getLangOpts().Optimize, Flags)
- : DBuilder.createAutoVariable(Scope, Name, Unit, Line, Ty,
- CGM.getLangOpts().Optimize, Flags,
- Align);
+ auto *D = ArgNo ? DBuilder.createParameterVariable(
+ Scope, Name, *ArgNo, Unit, Line, Ty,
+ CGM.getLangOpts().Optimize, Flags)
+ : DBuilder.createAutoVariable(Scope, Name, Unit, Line, Ty,
+ CGM.getLangOpts().Optimize,
+ Flags, Align);
// Insert an llvm.dbg.declare into the current block.
DBuilder.insertDeclare(Storage, D, DBuilder.createExpression(Expr),
llvm::DebugLoc::get(Line, Column, Scope, CurInlinedAt),
Builder.GetInsertBlock());
+
+ return D;
}
-void CGDebugInfo::EmitDeclareOfAutoVariable(const VarDecl *VD,
- llvm::Value *Storage,
- CGBuilderTy &Builder) {
+llvm::DILocalVariable *
+CGDebugInfo::EmitDeclareOfAutoVariable(const VarDecl *VD, llvm::Value *Storage,
+ CGBuilderTy &Builder) {
assert(DebugKind >= codegenoptions::LimitedDebugInfo);
- EmitDeclare(VD, Storage, llvm::None, Builder);
+ return EmitDeclare(VD, Storage, llvm::None, Builder);
}
llvm::DIType *CGDebugInfo::CreateSelfType(const QualType &QualTy,
@@ -3686,7 +3823,7 @@ struct BlockLayoutChunk {
bool operator<(const BlockLayoutChunk &l, const BlockLayoutChunk &r) {
return l.OffsetInBits < r.OffsetInBits;
}
-}
+} // namespace
void CGDebugInfo::EmitDeclareOfBlockLiteralArgVariable(const CGBlockInfo &block,
StringRef Name,
@@ -3725,9 +3862,10 @@ void CGDebugInfo::EmitDeclareOfBlockLiteralArgVariable(const CGBlockInfo &block,
blockLayout->getElementOffsetInBits(3),
tunit, tunit));
fields.push_back(createFieldType(
- "__descriptor", C.getPointerType(block.NeedsCopyDispose
- ? C.getBlockDescriptorExtendedType()
- : C.getBlockDescriptorType()),
+ "__descriptor",
+ C.getPointerType(block.NeedsCopyDispose
+ ? C.getBlockDescriptorExtendedType()
+ : C.getBlockDescriptorType()),
loc, AS_public, blockLayout->getElementOffsetInBits(4), tunit, tunit));
// We want to sort the captures by offset, not because DWARF
@@ -3806,8 +3944,8 @@ void CGDebugInfo::EmitDeclareOfBlockLiteralArgVariable(const CGBlockInfo &block,
}
SmallString<36> typeName;
- llvm::raw_svector_ostream(typeName) << "__block_literal_"
- << CGM.getUniqueBlockCount();
+ llvm::raw_svector_ostream(typeName)
+ << "__block_literal_" << CGM.getUniqueBlockCount();
llvm::DINodeArray fieldsArray = DBuilder.getOrCreateArray(fields);
@@ -3823,8 +3961,7 @@ void CGDebugInfo::EmitDeclareOfBlockLiteralArgVariable(const CGBlockInfo &block,
// Create the descriptor for the parameter.
auto *debugVar = DBuilder.createParameterVariable(
- scope, Name, ArgNo, tunit, line, type,
- CGM.getLangOpts().Optimize, flags);
+ scope, Name, ArgNo, tunit, line, type, CGM.getLangOpts().Optimize, flags);
// Insert an llvm.dbg.declare into the current block.
DBuilder.insertDeclare(Alloca, debugVar, DBuilder.createExpression(),
@@ -3863,7 +4000,7 @@ llvm::DIGlobalVariableExpression *CGDebugInfo::CollectAnonRecordDecls(
if (FieldName.empty()) {
if (const auto *RT = dyn_cast<RecordType>(Field->getType()))
GVE = CollectAnonRecordDecls(RT->getDecl(), Unit, LineNo, LinkageName,
- Var, DContext);
+ Var, DContext);
continue;
}
// Use VarDecl's Tag, Scope and Line number.
@@ -4090,7 +4227,6 @@ void CGDebugInfo::setDwoId(uint64_t Signature) {
TheCU->setDWOId(Signature);
}
-
void CGDebugInfo::finalize() {
// Creating types might create further types - invalidating the current
// element and the size(), so don't cache/reference them.
@@ -4102,32 +4238,55 @@ void CGDebugInfo::finalize() {
DBuilder.replaceTemporary(llvm::TempDIType(E.Decl), Ty);
}
- for (auto p : ReplaceMap) {
- assert(p.second);
- auto *Ty = cast<llvm::DIType>(p.second);
+ if (CGM.getCodeGenOpts().DwarfVersion >= 5) {
+ // Add methods to interface.
+ for (const auto &P : ObjCMethodCache) {
+ if (P.second.empty())
+ continue;
+
+ QualType QTy(P.first->getTypeForDecl(), 0);
+ auto It = TypeCache.find(QTy.getAsOpaquePtr());
+ assert(It != TypeCache.end());
+
+ llvm::DICompositeType *InterfaceDecl =
+ cast<llvm::DICompositeType>(It->second);
+
+ SmallVector<llvm::Metadata *, 16> EltTys;
+ auto CurrenetElts = InterfaceDecl->getElements();
+ EltTys.append(CurrenetElts.begin(), CurrenetElts.end());
+ for (auto &MD : P.second)
+ EltTys.push_back(MD);
+ llvm::DINodeArray Elements = DBuilder.getOrCreateArray(EltTys);
+ DBuilder.replaceArrays(InterfaceDecl, Elements);
+ }
+ }
+
+ for (const auto &P : ReplaceMap) {
+ assert(P.second);
+ auto *Ty = cast<llvm::DIType>(P.second);
assert(Ty->isForwardDecl());
- auto it = TypeCache.find(p.first);
- assert(it != TypeCache.end());
- assert(it->second);
+ auto It = TypeCache.find(P.first);
+ assert(It != TypeCache.end());
+ assert(It->second);
DBuilder.replaceTemporary(llvm::TempDIType(Ty),
- cast<llvm::DIType>(it->second));
+ cast<llvm::DIType>(It->second));
}
- for (const auto &p : FwdDeclReplaceMap) {
- assert(p.second);
- llvm::TempMDNode FwdDecl(cast<llvm::MDNode>(p.second));
+ for (const auto &P : FwdDeclReplaceMap) {
+ assert(P.second);
+ llvm::TempMDNode FwdDecl(cast<llvm::MDNode>(P.second));
llvm::Metadata *Repl;
- auto it = DeclCache.find(p.first);
+ auto It = DeclCache.find(P.first);
// If there has been no definition for the declaration, call RAUW
// with ourselves, that will destroy the temporary MDNode and
// replace it with a standard one, avoiding leaking memory.
- if (it == DeclCache.end())
- Repl = p.second;
+ if (It == DeclCache.end())
+ Repl = P.second;
else
- Repl = it->second;
+ Repl = It->second;
if (auto *GVE = dyn_cast_or_null<llvm::DIGlobalVariableExpression>(Repl))
Repl = GVE->getVariable();
@@ -4157,6 +4316,5 @@ llvm::DebugLoc CGDebugInfo::SourceLocToDebugLoc(SourceLocation Loc) {
return llvm::DebugLoc();
llvm::MDNode *Scope = LexicalBlockStack.back();
- return llvm::DebugLoc::get(
- getLineNumber(Loc), getColumnNumber(Loc), Scope);
+ return llvm::DebugLoc::get(getLineNumber(Loc), getColumnNumber(Loc), Scope);
}
diff --git a/lib/CodeGen/CGDebugInfo.h b/lib/CodeGen/CGDebugInfo.h
index 4f7b7f2a0d9c..e632806138f0 100644
--- a/lib/CodeGen/CGDebugInfo.h
+++ b/lib/CodeGen/CGDebugInfo.h
@@ -19,6 +19,7 @@
#include "clang/AST/Expr.h"
#include "clang/AST/ExternalASTSource.h"
#include "clang/AST/Type.h"
+#include "clang/AST/TypeOrdering.h"
#include "clang/Basic/SourceLocation.h"
#include "clang/Frontend/CodeGenOptions.h"
#include "llvm/ADT/DenseMap.h"
@@ -66,7 +67,7 @@ class CGDebugInfo {
llvm::DIType *ClassTy = nullptr;
llvm::DICompositeType *ObjTy = nullptr;
llvm::DIType *SelTy = nullptr;
-#define IMAGE_TYPE(ImgType, Id, SingletonId, Access, Suffix) \
+#define IMAGE_TYPE(ImgType, Id, SingletonId, Access, Suffix) \
llvm::DIType *SingletonId = nullptr;
#include "clang/Basic/OpenCLImageTypes.def"
llvm::DIType *OCLSamplerDITy = nullptr;
@@ -81,6 +82,10 @@ class CGDebugInfo {
llvm::SmallDenseMap<llvm::StringRef, llvm::StringRef> DebugPrefixMap;
+ /// Cache that maps VLA types to size expressions for that type,
+ /// represented by instantiated Metadata nodes.
+ llvm::SmallDenseMap<QualType, llvm::Metadata *> SizeExprCache;
+
struct ObjCInterfaceCacheEntry {
const ObjCInterfaceType *Type;
llvm::DIType *Decl;
@@ -93,6 +98,10 @@ class CGDebugInfo {
/// Cache of previously constructed interfaces which may change.
llvm::SmallVector<ObjCInterfaceCacheEntry, 32> ObjCInterfaceCache;
+ /// Cache of forward declarations for methods belonging to the interface.
+ llvm::DenseMap<const ObjCInterfaceDecl *, std::vector<llvm::DISubprogram *>>
+ ObjCMethodCache;
+
/// Cache of references to clang modules and precompiled headers.
llvm::DenseMap<const Module *, llvm::TrackingMDRef> ModuleCache;
@@ -223,12 +232,12 @@ class CGDebugInfo {
/// Helper function for CollectCXXBases.
/// Adds debug info entries for types in Bases that are not in SeenTypes.
- void CollectCXXBasesAux(const CXXRecordDecl *RD, llvm::DIFile *Unit,
- SmallVectorImpl<llvm::Metadata *> &EltTys,
- llvm::DIType *RecordTy,
- const CXXRecordDecl::base_class_const_range &Bases,
- llvm::DenseSet<CanonicalDeclPtr<const CXXRecordDecl>> &SeenTypes,
- llvm::DINode::DIFlags StartingFlags);
+ void CollectCXXBasesAux(
+ const CXXRecordDecl *RD, llvm::DIFile *Unit,
+ SmallVectorImpl<llvm::Metadata *> &EltTys, llvm::DIType *RecordTy,
+ const CXXRecordDecl::base_class_const_range &Bases,
+ llvm::DenseSet<CanonicalDeclPtr<const CXXRecordDecl>> &SeenTypes,
+ llvm::DINode::DIFlags StartingFlags);
/// A helper function to collect template parameters.
llvm::DINodeArray CollectTemplateParams(const TemplateParameterList *TPList,
@@ -247,8 +256,7 @@ class CGDebugInfo {
llvm::DIType *createFieldType(StringRef name, QualType type,
SourceLocation loc, AccessSpecifier AS,
- uint64_t offsetInBits,
- uint32_t AlignInBits,
+ uint64_t offsetInBits, uint32_t AlignInBits,
llvm::DIFile *tunit, llvm::DIScope *scope,
const RecordDecl *RD = nullptr);
@@ -309,6 +317,11 @@ public:
void finalize();
+ /// Register VLA size expression debug node with the qualified type.
+ void registerVLASizeExpression(QualType Ty, llvm::Metadata *SizeExpr) {
+ SizeExprCache[Ty] = SizeExpr;
+ }
+
/// Module debugging: Support for building PCMs.
/// @{
/// Set the main CU's DwoId field to \p Signature.
@@ -356,7 +369,8 @@ public:
/// \param ScopeLoc The location of the function body.
void EmitFunctionStart(GlobalDecl GD, SourceLocation Loc,
SourceLocation ScopeLoc, QualType FnType,
- llvm::Function *Fn, CGBuilderTy &Builder);
+ llvm::Function *Fn, bool CurFnIsThunk,
+ CGBuilderTy &Builder);
/// Start a new scope for an inlined function.
void EmitInlineFunctionStart(CGBuilderTy &Builder, GlobalDecl GD);
@@ -379,16 +393,17 @@ public:
/// Emit call to \c llvm.dbg.declare for an automatic variable
/// declaration.
- void EmitDeclareOfAutoVariable(const VarDecl *Decl, llvm::Value *AI,
- CGBuilderTy &Builder);
+ /// Returns a pointer to the DILocalVariable associated with the
+ /// llvm.dbg.declare, or nullptr otherwise.
+ llvm::DILocalVariable *EmitDeclareOfAutoVariable(const VarDecl *Decl,
+ llvm::Value *AI,
+ CGBuilderTy &Builder);
/// Emit call to \c llvm.dbg.declare for an imported variable
/// declaration in a block.
- void EmitDeclareOfBlockDeclRefVariable(const VarDecl *variable,
- llvm::Value *storage,
- CGBuilderTy &Builder,
- const CGBlockInfo &blockInfo,
- llvm::Instruction *InsertPoint = nullptr);
+ void EmitDeclareOfBlockDeclRefVariable(
+ const VarDecl *variable, llvm::Value *storage, CGBuilderTy &Builder,
+ const CGBlockInfo &blockInfo, llvm::Instruction *InsertPoint = nullptr);
/// Emit call to \c llvm.dbg.declare for an argument variable
/// declaration.
@@ -451,10 +466,14 @@ public:
llvm::DIMacroFile *CreateTempMacroFile(llvm::DIMacroFile *Parent,
SourceLocation LineLoc,
SourceLocation FileLoc);
+
private:
/// Emit call to llvm.dbg.declare for a variable declaration.
- void EmitDeclare(const VarDecl *decl, llvm::Value *AI,
- llvm::Optional<unsigned> ArgNo, CGBuilderTy &Builder);
+ /// Returns a pointer to the DILocalVariable associated with the
+ /// llvm.dbg.declare, or nullptr otherwise.
+ llvm::DILocalVariable *EmitDeclare(const VarDecl *decl, llvm::Value *AI,
+ llvm::Optional<unsigned> ArgNo,
+ CGBuilderTy &Builder);
/// Build up structure info for the byref. See \a BuildByRefType.
llvm::DIType *EmitTypeForVarWithBlocksAttr(const VarDecl *VD,
@@ -482,8 +501,11 @@ private:
std::string remapDIPath(StringRef) const;
/// Compute the file checksum debug info for input file ID.
- llvm::DIFile::ChecksumKind computeChecksum(FileID FID,
- SmallString<32> &Checksum) const;
+ Optional<llvm::DIFile::ChecksumKind>
+ computeChecksum(FileID FID, SmallString<32> &Checksum) const;
+
+ /// Get the source of the given file ID.
+ Optional<StringRef> getSource(const SourceManager &SM, FileID FID);
/// Get the file debug info descriptor for the input location.
llvm::DIFile *getOrCreateFile(SourceLocation Loc);
@@ -637,7 +659,7 @@ public:
~ApplyDebugLocation();
- /// \brief Apply TemporaryLocation if it is valid. Otherwise switch
+ /// Apply TemporaryLocation if it is valid. Otherwise switch
/// to an artificial debug location that has a valid scope, but no
/// line information.
///
@@ -651,7 +673,7 @@ public:
static ApplyDebugLocation CreateArtificial(CodeGenFunction &CGF) {
return ApplyDebugLocation(CGF, false, SourceLocation());
}
- /// \brief Apply TemporaryLocation if it is valid. Otherwise switch
+ /// Apply TemporaryLocation if it is valid. Otherwise switch
/// to an artificial debug location that has a valid scope, but no
/// line information.
static ApplyDebugLocation
@@ -668,7 +690,6 @@ public:
static ApplyDebugLocation CreateEmpty(CodeGenFunction &CGF) {
return ApplyDebugLocation(CGF, true, SourceLocation());
}
-
};
/// A scoped helper to set the current debug location to an inlined location.
diff --git a/lib/CodeGen/CGDecl.cpp b/lib/CodeGen/CGDecl.cpp
index 04585a8afbb6..57b2fbadbeec 100644
--- a/lib/CodeGen/CGDecl.cpp
+++ b/lib/CodeGen/CGDecl.cpp
@@ -229,18 +229,19 @@ llvm::Constant *CodeGenModule::getOrCreateStaticVarDecl(
LangAS AS = GetGlobalVarAddressSpace(&D);
unsigned TargetAS = getContext().getTargetAddressSpace(AS);
- // Local address space cannot have an initializer.
+ // OpenCL variables in local address space and CUDA shared
+ // variables cannot have an initializer.
llvm::Constant *Init = nullptr;
- if (Ty.getAddressSpace() != LangAS::opencl_local)
- Init = EmitNullConstant(Ty);
- else
+ if (Ty.getAddressSpace() == LangAS::opencl_local ||
+ D.hasAttr<CUDASharedAttr>())
Init = llvm::UndefValue::get(LTy);
+ else
+ Init = EmitNullConstant(Ty);
llvm::GlobalVariable *GV = new llvm::GlobalVariable(
getModule(), LTy, Ty.isConstant(getContext()), Linkage, Init, Name,
nullptr, llvm::GlobalVariable::NotThreadLocal, TargetAS);
GV->setAlignment(getContext().getDeclAlign(&D).getQuantity());
- setGlobalVisibility(GV, &D, ForDefinition);
if (supportsCOMDAT() && GV->isWeakForLinker())
GV->setComdat(TheModule.getOrInsertComdat(GV->getName()));
@@ -248,12 +249,7 @@ llvm::Constant *CodeGenModule::getOrCreateStaticVarDecl(
if (D.getTLSKind())
setTLSMode(GV, D);
- if (D.isExternallyVisible()) {
- if (D.hasAttr<DLLImportAttr>())
- GV->setDLLStorageClass(llvm::GlobalVariable::DLLImportStorageClass);
- else if (D.hasAttr<DLLExportAttr>())
- GV->setDLLStorageClass(llvm::GlobalVariable::DLLExportStorageClass);
- }
+ setGVProperties(GV, &D);
// Make sure the result is of the correct type.
LangAS ExpectedAS = Ty.getAddressSpace();
@@ -291,8 +287,11 @@ llvm::Constant *CodeGenModule::getOrCreateStaticVarDecl(
// never defer them.
assert(isa<ObjCMethodDecl>(DC) && "unexpected parent code decl");
}
- if (GD.getDecl())
+ if (GD.getDecl()) {
+ // Disable emission of the parent function for the OpenMP device codegen.
+ CGOpenMPRuntime::DisableAutoDeclareTargetRAII NoDeclTarget(*this);
(void)GetAddrOfGlobal(GD);
+ }
return Addr;
}
@@ -344,6 +343,7 @@ CodeGenFunction::AddInitializerToStaticVarDecl(const VarDecl &D,
OldGV->getThreadLocalMode(),
CGM.getContext().getTargetAddressSpace(D.getType()));
GV->setVisibility(OldGV->getVisibility());
+ GV->setDSOLocal(OldGV->isDSOLocal());
GV->setComdat(OldGV->getComdat());
// Steal the name of the old global
@@ -469,13 +469,11 @@ namespace {
}
};
- struct DestroyNRVOVariable final : EHScopeStack::Cleanup {
- DestroyNRVOVariable(Address addr,
- const CXXDestructorDecl *Dtor,
- llvm::Value *NRVOFlag)
- : Dtor(Dtor), NRVOFlag(NRVOFlag), Loc(addr) {}
+ template <class Derived>
+ struct DestroyNRVOVariable : EHScopeStack::Cleanup {
+ DestroyNRVOVariable(Address addr, llvm::Value *NRVOFlag)
+ : NRVOFlag(NRVOFlag), Loc(addr) {}
- const CXXDestructorDecl *Dtor;
llvm::Value *NRVOFlag;
Address Loc;
@@ -494,12 +492,39 @@ namespace {
CGF.EmitBlock(RunDtorBB);
}
+ static_cast<Derived *>(this)->emitDestructorCall(CGF);
+
+ if (NRVO) CGF.EmitBlock(SkipDtorBB);
+ }
+
+ virtual ~DestroyNRVOVariable() = default;
+ };
+
+ struct DestroyNRVOVariableCXX final
+ : DestroyNRVOVariable<DestroyNRVOVariableCXX> {
+ DestroyNRVOVariableCXX(Address addr, const CXXDestructorDecl *Dtor,
+ llvm::Value *NRVOFlag)
+ : DestroyNRVOVariable<DestroyNRVOVariableCXX>(addr, NRVOFlag),
+ Dtor(Dtor) {}
+
+ const CXXDestructorDecl *Dtor;
+
+ void emitDestructorCall(CodeGenFunction &CGF) {
CGF.EmitCXXDestructorCall(Dtor, Dtor_Complete,
/*ForVirtualBase=*/false,
- /*Delegating=*/false,
- Loc);
+ /*Delegating=*/false, Loc);
+ }
+ };
- if (NRVO) CGF.EmitBlock(SkipDtorBB);
+ struct DestroyNRVOVariableC final
+ : DestroyNRVOVariable<DestroyNRVOVariableC> {
+ DestroyNRVOVariableC(Address addr, llvm::Value *NRVOFlag, QualType Ty)
+ : DestroyNRVOVariable<DestroyNRVOVariableC>(addr, NRVOFlag), Ty(Ty) {}
+
+ QualType Ty;
+
+ void emitDestructorCall(CodeGenFunction &CGF) {
+ CGF.destroyNonTrivialCStruct(CGF, Loc, Ty);
}
};
@@ -821,11 +846,10 @@ void CodeGenFunction::EmitScalarInit(const Expr *init, const ValueDecl *D,
EmitStoreOfScalar(value, lvalue, /* isInitialization */ true);
}
-/// canEmitInitWithFewStoresAfterMemset - Decide whether we can emit the
-/// non-zero parts of the specified initializer with equal or fewer than
-/// NumStores scalar stores.
-static bool canEmitInitWithFewStoresAfterMemset(llvm::Constant *Init,
- unsigned &NumStores) {
+/// Decide whether we can emit the non-zero parts of the specified initializer
+/// with equal or fewer than NumStores scalar stores.
+static bool canEmitInitWithFewStoresAfterBZero(llvm::Constant *Init,
+ unsigned &NumStores) {
// Zero and Undef never requires any extra stores.
if (isa<llvm::ConstantAggregateZero>(Init) ||
isa<llvm::ConstantPointerNull>(Init) ||
@@ -840,7 +864,7 @@ static bool canEmitInitWithFewStoresAfterMemset(llvm::Constant *Init,
if (isa<llvm::ConstantArray>(Init) || isa<llvm::ConstantStruct>(Init)) {
for (unsigned i = 0, e = Init->getNumOperands(); i != e; ++i) {
llvm::Constant *Elt = cast<llvm::Constant>(Init->getOperand(i));
- if (!canEmitInitWithFewStoresAfterMemset(Elt, NumStores))
+ if (!canEmitInitWithFewStoresAfterBZero(Elt, NumStores))
return false;
}
return true;
@@ -850,7 +874,7 @@ static bool canEmitInitWithFewStoresAfterMemset(llvm::Constant *Init,
dyn_cast<llvm::ConstantDataSequential>(Init)) {
for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) {
llvm::Constant *Elt = CDS->getElementAsConstant(i);
- if (!canEmitInitWithFewStoresAfterMemset(Elt, NumStores))
+ if (!canEmitInitWithFewStoresAfterBZero(Elt, NumStores))
return false;
}
return true;
@@ -860,18 +884,18 @@ static bool canEmitInitWithFewStoresAfterMemset(llvm::Constant *Init,
return false;
}
-/// emitStoresForInitAfterMemset - For inits that
-/// canEmitInitWithFewStoresAfterMemset returned true for, emit the scalar
-/// stores that would be required.
-static void emitStoresForInitAfterMemset(llvm::Constant *Init, llvm::Value *Loc,
- bool isVolatile, CGBuilderTy &Builder) {
+/// For inits that canEmitInitWithFewStoresAfterBZero returned true for, emit
+/// the scalar stores that would be required.
+static void emitStoresForInitAfterBZero(CodeGenModule &CGM,
+ llvm::Constant *Init, Address Loc,
+ bool isVolatile, CGBuilderTy &Builder) {
assert(!Init->isNullValue() && !isa<llvm::UndefValue>(Init) &&
- "called emitStoresForInitAfterMemset for zero or undef value.");
+ "called emitStoresForInitAfterBZero for zero or undef value.");
if (isa<llvm::ConstantInt>(Init) || isa<llvm::ConstantFP>(Init) ||
isa<llvm::ConstantVector>(Init) || isa<llvm::BlockAddress>(Init) ||
isa<llvm::ConstantExpr>(Init)) {
- Builder.CreateDefaultAlignedStore(Init, Loc, isVolatile);
+ Builder.CreateStore(Init, Loc, isVolatile);
return;
}
@@ -882,8 +906,9 @@ static void emitStoresForInitAfterMemset(llvm::Constant *Init, llvm::Value *Loc,
// If necessary, get a pointer to the element and emit it.
if (!Elt->isNullValue() && !isa<llvm::UndefValue>(Elt))
- emitStoresForInitAfterMemset(
- Elt, Builder.CreateConstGEP2_32(Init->getType(), Loc, 0, i),
+ emitStoresForInitAfterBZero(
+ CGM, Elt,
+ Builder.CreateConstInBoundsGEP2_32(Loc, 0, i, CGM.getDataLayout()),
isVolatile, Builder);
}
return;
@@ -897,19 +922,19 @@ static void emitStoresForInitAfterMemset(llvm::Constant *Init, llvm::Value *Loc,
// If necessary, get a pointer to the element and emit it.
if (!Elt->isNullValue() && !isa<llvm::UndefValue>(Elt))
- emitStoresForInitAfterMemset(
- Elt, Builder.CreateConstGEP2_32(Init->getType(), Loc, 0, i),
+ emitStoresForInitAfterBZero(
+ CGM, Elt,
+ Builder.CreateConstInBoundsGEP2_32(Loc, 0, i, CGM.getDataLayout()),
isVolatile, Builder);
}
}
-/// shouldUseMemSetPlusStoresToInitialize - Decide whether we should use memset
-/// plus some stores to initialize a local variable instead of using a memcpy
-/// from a constant global. It is beneficial to use memset if the global is all
-/// zeros, or mostly zeros and large.
-static bool shouldUseMemSetPlusStoresToInitialize(llvm::Constant *Init,
- uint64_t GlobalSize) {
- // If a global is all zeros, always use a memset.
+/// Decide whether we should use bzero plus some stores to initialize a local
+/// variable instead of using a memcpy from a constant global. It is beneficial
+/// to use bzero if the global is all zeros, or mostly zeros and large.
+static bool shouldUseBZeroPlusStoresToInitialize(llvm::Constant *Init,
+ uint64_t GlobalSize) {
+ // If a global is all zeros, always use a bzero.
if (isa<llvm::ConstantAggregateZero>(Init)) return true;
// If a non-zero global is <= 32 bytes, always use a memcpy. If it is large,
@@ -920,7 +945,114 @@ static bool shouldUseMemSetPlusStoresToInitialize(llvm::Constant *Init,
uint64_t SizeLimit = 32;
return GlobalSize > SizeLimit &&
- canEmitInitWithFewStoresAfterMemset(Init, StoreBudget);
+ canEmitInitWithFewStoresAfterBZero(Init, StoreBudget);
+}
+
+/// A byte pattern.
+///
+/// Can be "any" pattern if the value was padding or known to be undef.
+/// Can be "none" pattern if a sequence doesn't exist.
+class BytePattern {
+ uint8_t Val;
+ enum class ValueType : uint8_t { Specific, Any, None } Type;
+ BytePattern(ValueType Type) : Type(Type) {}
+
+public:
+ BytePattern(uint8_t Value) : Val(Value), Type(ValueType::Specific) {}
+ static BytePattern Any() { return BytePattern(ValueType::Any); }
+ static BytePattern None() { return BytePattern(ValueType::None); }
+ bool isAny() const { return Type == ValueType::Any; }
+ bool isNone() const { return Type == ValueType::None; }
+ bool isValued() const { return Type == ValueType::Specific; }
+ uint8_t getValue() const {
+ assert(isValued());
+ return Val;
+ }
+ BytePattern merge(const BytePattern Other) const {
+ if (isNone() || Other.isNone())
+ return None();
+ if (isAny())
+ return Other;
+ if (Other.isAny())
+ return *this;
+ if (getValue() == Other.getValue())
+ return *this;
+ return None();
+ }
+};
+
+/// Figures out whether the constant can be initialized with memset.
+static BytePattern constantIsRepeatedBytePattern(llvm::Constant *C) {
+ if (isa<llvm::ConstantAggregateZero>(C) || isa<llvm::ConstantPointerNull>(C))
+ return BytePattern(0x00);
+ if (isa<llvm::UndefValue>(C))
+ return BytePattern::Any();
+
+ if (isa<llvm::ConstantInt>(C)) {
+ auto *Int = cast<llvm::ConstantInt>(C);
+ if (Int->getBitWidth() % 8 != 0)
+ return BytePattern::None();
+ const llvm::APInt &Value = Int->getValue();
+ if (Value.isSplat(8))
+ return BytePattern(Value.getLoBits(8).getLimitedValue());
+ return BytePattern::None();
+ }
+
+ if (isa<llvm::ConstantFP>(C)) {
+ auto *FP = cast<llvm::ConstantFP>(C);
+ llvm::APInt Bits = FP->getValueAPF().bitcastToAPInt();
+ if (Bits.getBitWidth() % 8 != 0)
+ return BytePattern::None();
+ if (!Bits.isSplat(8))
+ return BytePattern::None();
+ return BytePattern(Bits.getLimitedValue() & 0xFF);
+ }
+
+ if (isa<llvm::ConstantVector>(C)) {
+ llvm::Constant *Splat = cast<llvm::ConstantVector>(C)->getSplatValue();
+ if (Splat)
+ return constantIsRepeatedBytePattern(Splat);
+ return BytePattern::None();
+ }
+
+ if (isa<llvm::ConstantArray>(C) || isa<llvm::ConstantStruct>(C)) {
+ BytePattern Pattern(BytePattern::Any());
+ for (unsigned I = 0, E = C->getNumOperands(); I != E; ++I) {
+ llvm::Constant *Elt = cast<llvm::Constant>(C->getOperand(I));
+ Pattern = Pattern.merge(constantIsRepeatedBytePattern(Elt));
+ if (Pattern.isNone())
+ return Pattern;
+ }
+ return Pattern;
+ }
+
+ if (llvm::ConstantDataSequential *CDS =
+ dyn_cast<llvm::ConstantDataSequential>(C)) {
+ BytePattern Pattern(BytePattern::Any());
+ for (unsigned I = 0, E = CDS->getNumElements(); I != E; ++I) {
+ llvm::Constant *Elt = CDS->getElementAsConstant(I);
+ Pattern = Pattern.merge(constantIsRepeatedBytePattern(Elt));
+ if (Pattern.isNone())
+ return Pattern;
+ }
+ return Pattern;
+ }
+
+ // BlockAddress, ConstantExpr, and everything else is scary.
+ return BytePattern::None();
+}
+
+/// Decide whether we should use memset to initialize a local variable instead
+/// of using a memcpy from a constant global. Assumes we've already decided to
+/// not user bzero.
+/// FIXME We could be more clever, as we are for bzero above, and generate
+/// memset followed by stores. It's unclear that's worth the effort.
+static BytePattern shouldUseMemSetToInitialize(llvm::Constant *Init,
+ uint64_t GlobalSize) {
+ uint64_t SizeLimit = 32;
+ if (GlobalSize <= SizeLimit)
+ return BytePattern::None();
+ return constantIsRepeatedBytePattern(Init);
}
/// EmitAutoVarDecl - Emit code and set up an entry in LocalDeclMap for a
@@ -940,6 +1072,9 @@ llvm::Value *CodeGenFunction::EmitLifetimeStart(uint64_t Size,
if (!ShouldEmitLifetimeMarkers)
return nullptr;
+ assert(Addr->getType()->getPointerAddressSpace() ==
+ CGM.getDataLayout().getAllocaAddrSpace() &&
+ "Pointer should be in alloca address space");
llvm::Value *SizeV = llvm::ConstantInt::get(Int64Ty, Size);
Addr = Builder.CreateBitCast(Addr, AllocaInt8PtrTy);
llvm::CallInst *C =
@@ -949,12 +1084,68 @@ llvm::Value *CodeGenFunction::EmitLifetimeStart(uint64_t Size,
}
void CodeGenFunction::EmitLifetimeEnd(llvm::Value *Size, llvm::Value *Addr) {
+ assert(Addr->getType()->getPointerAddressSpace() ==
+ CGM.getDataLayout().getAllocaAddrSpace() &&
+ "Pointer should be in alloca address space");
Addr = Builder.CreateBitCast(Addr, AllocaInt8PtrTy);
llvm::CallInst *C =
Builder.CreateCall(CGM.getLLVMLifetimeEndFn(), {Size, Addr});
C->setDoesNotThrow();
}
+void CodeGenFunction::EmitAndRegisterVariableArrayDimensions(
+ CGDebugInfo *DI, const VarDecl &D, bool EmitDebugInfo) {
+ // For each dimension stores its QualType and corresponding
+ // size-expression Value.
+ SmallVector<CodeGenFunction::VlaSizePair, 4> Dimensions;
+
+ // Break down the array into individual dimensions.
+ QualType Type1D = D.getType();
+ while (getContext().getAsVariableArrayType(Type1D)) {
+ auto VlaSize = getVLAElements1D(Type1D);
+ if (auto *C = dyn_cast<llvm::ConstantInt>(VlaSize.NumElts))
+ Dimensions.emplace_back(C, Type1D.getUnqualifiedType());
+ else {
+ auto SizeExprAddr = CreateDefaultAlignTempAlloca(
+ VlaSize.NumElts->getType(), "__vla_expr");
+ Builder.CreateStore(VlaSize.NumElts, SizeExprAddr);
+ Dimensions.emplace_back(SizeExprAddr.getPointer(),
+ Type1D.getUnqualifiedType());
+ }
+ Type1D = VlaSize.Type;
+ }
+
+ if (!EmitDebugInfo)
+ return;
+
+ // Register each dimension's size-expression with a DILocalVariable,
+ // so that it can be used by CGDebugInfo when instantiating a DISubrange
+ // to describe this array.
+ for (auto &VlaSize : Dimensions) {
+ llvm::Metadata *MD;
+ if (auto *C = dyn_cast<llvm::ConstantInt>(VlaSize.NumElts))
+ MD = llvm::ConstantAsMetadata::get(C);
+ else {
+ // Create an artificial VarDecl to generate debug info for.
+ IdentifierInfo &NameIdent = getContext().Idents.getOwn(
+ cast<llvm::AllocaInst>(VlaSize.NumElts)->getName());
+ auto VlaExprTy = VlaSize.NumElts->getType()->getPointerElementType();
+ auto QT = getContext().getIntTypeForBitwidth(
+ VlaExprTy->getScalarSizeInBits(), false);
+ auto *ArtificialDecl = VarDecl::Create(
+ getContext(), const_cast<DeclContext *>(D.getDeclContext()),
+ D.getLocation(), D.getLocation(), &NameIdent, QT,
+ getContext().CreateTypeSourceInfo(QT), SC_Auto);
+ ArtificialDecl->setImplicit();
+
+ MD = DI->EmitDeclareOfAutoVariable(ArtificialDecl, VlaSize.NumElts,
+ Builder);
+ }
+ assert(MD && "No Size expression debug node created");
+ DI->registerVLASizeExpression(VlaSize.Type, MD);
+ }
+}
+
/// EmitAutoVarAlloca - Emit the alloca and debug information for a
/// local variable. Does not emit initialization or destruction.
CodeGenFunction::AutoVarEmission
@@ -975,7 +1166,12 @@ CodeGenFunction::EmitAutoVarAlloca(const VarDecl &D) {
if (Ty->isVariablyModifiedType())
EmitVariablyModifiedType(Ty);
+ auto *DI = getDebugInfo();
+ bool EmitDebugInfo = DI && CGM.getCodeGenOpts().getDebugInfo() >=
+ codegenoptions::LimitedDebugInfo;
+
Address address = Address::invalid();
+ Address AllocaAddr = Address::invalid();
if (Ty->isConstantSizeType()) {
bool NRVO = getLangOpts().ElideConstructors &&
D.isNRVOVariable();
@@ -1016,16 +1212,27 @@ CodeGenFunction::EmitAutoVarAlloca(const VarDecl &D) {
}
// A normal fixed sized variable becomes an alloca in the entry block,
- // unless it's an NRVO variable.
-
- if (NRVO) {
+ // unless:
+ // - it's an NRVO variable.
+ // - we are compiling OpenMP and it's an OpenMP local variable.
+
+ Address OpenMPLocalAddr =
+ getLangOpts().OpenMP
+ ? CGM.getOpenMPRuntime().getAddressOfLocalVariable(*this, &D)
+ : Address::invalid();
+ if (getLangOpts().OpenMP && OpenMPLocalAddr.isValid()) {
+ address = OpenMPLocalAddr;
+ } else if (NRVO) {
// The named return value optimization: allocate this variable in the
// return slot, so that we can elide the copy when returning this
// variable (C++0x [class.copy]p34).
address = ReturnValue;
if (const RecordType *RecordTy = Ty->getAs<RecordType>()) {
- if (!cast<CXXRecordDecl>(RecordTy->getDecl())->hasTrivialDestructor()) {
+ const auto *RD = RecordTy->getDecl();
+ const auto *CXXRD = dyn_cast<CXXRecordDecl>(RD);
+ if ((CXXRD && !CXXRD->hasTrivialDestructor()) ||
+ RD->isNonTrivialToPrimitiveDestroy()) {
// Create a flag that is used to indicate when the NRVO was applied
// to this variable. Set it to zero to indicate that NRVO was not
// applied.
@@ -1055,7 +1262,8 @@ CodeGenFunction::EmitAutoVarAlloca(const VarDecl &D) {
// Create the alloca. Note that we set the name separately from
// building the instruction so that it's there even in no-asserts
// builds.
- address = CreateTempAlloca(allocaTy, allocaAlignment, D.getName());
+ address = CreateTempAlloca(allocaTy, allocaAlignment, D.getName(),
+ /*ArraySize=*/nullptr, &AllocaAddr);
// Don't emit lifetime markers for MSVC catch parameters. The lifetime of
// the catch parameter starts in the catchpad instruction, and we can't
@@ -1083,7 +1291,7 @@ CodeGenFunction::EmitAutoVarAlloca(const VarDecl &D) {
!(!getLangOpts().CPlusPlus && hasLabelBeenSeenInCurrentScope())) {
uint64_t size = CGM.getDataLayout().getTypeAllocSize(allocaTy);
emission.SizeForLifetimeMarkers =
- EmitLifetimeStart(size, address.getPointer());
+ EmitLifetimeStart(size, AllocaAddr.getPointer());
}
} else {
assert(!emission.useLifetimeMarkers());
@@ -1108,28 +1316,28 @@ CodeGenFunction::EmitAutoVarAlloca(const VarDecl &D) {
pushStackRestore(NormalCleanup, Stack);
}
- llvm::Value *elementCount;
- QualType elementType;
- std::tie(elementCount, elementType) = getVLASize(Ty);
-
- llvm::Type *llvmTy = ConvertTypeForMem(elementType);
+ auto VlaSize = getVLASize(Ty);
+ llvm::Type *llvmTy = ConvertTypeForMem(VlaSize.Type);
// Allocate memory for the array.
- address = CreateTempAlloca(llvmTy, alignment, "vla", elementCount);
+ address = CreateTempAlloca(llvmTy, alignment, "vla", VlaSize.NumElts,
+ &AllocaAddr);
+
+ // If we have debug info enabled, properly describe the VLA dimensions for
+ // this type by registering the vla size expression for each of the
+ // dimensions.
+ EmitAndRegisterVariableArrayDimensions(DI, D, EmitDebugInfo);
}
setAddrOfLocalVar(&D, address);
emission.Addr = address;
+ emission.AllocaAddr = AllocaAddr;
// Emit debug info for local var declaration.
- if (HaveInsertPoint())
- if (CGDebugInfo *DI = getDebugInfo()) {
- if (CGM.getCodeGenOpts().getDebugInfo() >=
- codegenoptions::LimitedDebugInfo) {
- DI->setLocation(D.getLocation());
- DI->EmitDeclareOfAutoVariable(&D, address.getPointer(), Builder);
- }
- }
+ if (EmitDebugInfo && HaveInsertPoint()) {
+ DI->setLocation(D.getLocation());
+ (void)DI->EmitDeclareOfAutoVariable(&D, address.getPointer(), Builder);
+ }
if (D.hasAttr<AnnotateAttr>())
EmitVarAnnotations(&D, address.getPointer());
@@ -1137,23 +1345,36 @@ CodeGenFunction::EmitAutoVarAlloca(const VarDecl &D) {
// Make sure we call @llvm.lifetime.end.
if (emission.useLifetimeMarkers())
EHStack.pushCleanup<CallLifetimeEnd>(NormalEHLifetimeMarker,
- emission.getAllocatedAddress(),
+ emission.getOriginalAllocatedAddress(),
emission.getSizeForLifetimeMarkers());
return emission;
}
+static bool isCapturedBy(const VarDecl &, const Expr *);
+
+/// Determines whether the given __block variable is potentially
+/// captured by the given statement.
+static bool isCapturedBy(const VarDecl &Var, const Stmt *S) {
+ if (const Expr *E = dyn_cast<Expr>(S))
+ return isCapturedBy(Var, E);
+ for (const Stmt *SubStmt : S->children())
+ if (isCapturedBy(Var, SubStmt))
+ return true;
+ return false;
+}
+
/// Determines whether the given __block variable is potentially
/// captured by the given expression.
-static bool isCapturedBy(const VarDecl &var, const Expr *e) {
+static bool isCapturedBy(const VarDecl &Var, const Expr *E) {
// Skip the most common kinds of expressions that make
// hierarchy-walking expensive.
- e = e->IgnoreParenCasts();
+ E = E->IgnoreParenCasts();
- if (const BlockExpr *be = dyn_cast<BlockExpr>(e)) {
- const BlockDecl *block = be->getBlockDecl();
- for (const auto &I : block->captures()) {
- if (I.getVariable() == &var)
+ if (const BlockExpr *BE = dyn_cast<BlockExpr>(E)) {
+ const BlockDecl *Block = BE->getBlockDecl();
+ for (const auto &I : Block->captures()) {
+ if (I.getVariable() == &Var)
return true;
}
@@ -1161,19 +1382,19 @@ static bool isCapturedBy(const VarDecl &var, const Expr *e) {
return false;
}
- if (const StmtExpr *SE = dyn_cast<StmtExpr>(e)) {
+ if (const StmtExpr *SE = dyn_cast<StmtExpr>(E)) {
const CompoundStmt *CS = SE->getSubStmt();
for (const auto *BI : CS->body())
- if (const auto *E = dyn_cast<Expr>(BI)) {
- if (isCapturedBy(var, E))
- return true;
+ if (const auto *BIE = dyn_cast<Expr>(BI)) {
+ if (isCapturedBy(Var, BIE))
+ return true;
}
else if (const auto *DS = dyn_cast<DeclStmt>(BI)) {
// special case declarations
for (const auto *I : DS->decls()) {
if (const auto *VD = dyn_cast<VarDecl>((I))) {
const Expr *Init = VD->getInit();
- if (Init && isCapturedBy(var, Init))
+ if (Init && isCapturedBy(Var, Init))
return true;
}
}
@@ -1185,14 +1406,14 @@ static bool isCapturedBy(const VarDecl &var, const Expr *e) {
return false;
}
- for (const Stmt *SubStmt : e->children())
- if (isCapturedBy(var, cast<Expr>(SubStmt)))
+ for (const Stmt *SubStmt : E->children())
+ if (isCapturedBy(Var, SubStmt))
return true;
return false;
}
-/// \brief Determine whether the given initializer is trivial in the sense
+/// Determine whether the given initializer is trivial in the sense
/// that it requires no code to be generated.
bool CodeGenFunction::isTrivialInitializer(const Expr *Init) {
if (!Init)
@@ -1232,6 +1453,19 @@ void CodeGenFunction::EmitAutoVarInit(const AutoVarEmission &emission) {
if (emission.IsByRef)
emitByrefStructureInit(emission);
+ // Initialize the variable here if it doesn't have a initializer and it is a
+ // C struct that is non-trivial to initialize or an array containing such a
+ // struct.
+ if (!Init &&
+ type.isNonTrivialToPrimitiveDefaultInitialize() ==
+ QualType::PDIK_Struct) {
+ LValue Dst = MakeAddrLValue(emission.getAllocatedAddress(), type);
+ if (emission.IsByRef)
+ drillIntoBlockVariable(*this, Dst, &D);
+ defaultInitNonTrivialCStructVar(Dst);
+ return;
+ }
+
if (isTrivialInitializer(Init))
return;
@@ -1270,58 +1504,66 @@ void CodeGenFunction::EmitAutoVarInit(const AutoVarEmission &emission) {
llvm::ConstantInt::get(IntPtrTy,
getContext().getTypeSizeInChars(type).getQuantity());
- llvm::Type *BP = AllocaInt8PtrTy;
+ llvm::Type *BP = CGM.Int8Ty->getPointerTo(Loc.getAddressSpace());
if (Loc.getType() != BP)
Loc = Builder.CreateBitCast(Loc, BP);
- // If the initializer is all or mostly zeros, codegen with memset then do
- // a few stores afterward.
- if (shouldUseMemSetPlusStoresToInitialize(constant,
- CGM.getDataLayout().getTypeAllocSize(constant->getType()))) {
+ // If the initializer is all or mostly the same, codegen with bzero / memset
+ // then do a few stores afterward.
+ uint64_t ConstantSize =
+ CGM.getDataLayout().getTypeAllocSize(constant->getType());
+ if (shouldUseBZeroPlusStoresToInitialize(constant, ConstantSize)) {
Builder.CreateMemSet(Loc, llvm::ConstantInt::get(Int8Ty, 0), SizeVal,
isVolatile);
// Zero and undef don't require a stores.
if (!constant->isNullValue() && !isa<llvm::UndefValue>(constant)) {
- Loc = Builder.CreateBitCast(Loc, constant->getType()->getPointerTo());
- emitStoresForInitAfterMemset(constant, Loc.getPointer(),
- isVolatile, Builder);
- }
- } else {
- // Otherwise, create a temporary global with the initializer then
- // memcpy from the global to the alloca.
- std::string Name = getStaticDeclName(CGM, D);
- unsigned AS = 0;
- if (getLangOpts().OpenCL) {
- AS = CGM.getContext().getTargetAddressSpace(LangAS::opencl_constant);
- BP = llvm::PointerType::getInt8PtrTy(getLLVMContext(), AS);
+ Loc = Builder.CreateBitCast(Loc,
+ constant->getType()->getPointerTo(Loc.getAddressSpace()));
+ emitStoresForInitAfterBZero(CGM, constant, Loc, isVolatile, Builder);
}
- llvm::GlobalVariable *GV =
- new llvm::GlobalVariable(CGM.getModule(), constant->getType(), true,
- llvm::GlobalValue::PrivateLinkage,
- constant, Name, nullptr,
- llvm::GlobalValue::NotThreadLocal, AS);
- GV->setAlignment(Loc.getAlignment().getQuantity());
- GV->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
-
- Address SrcPtr = Address(GV, Loc.getAlignment());
- if (SrcPtr.getType() != BP)
- SrcPtr = Builder.CreateBitCast(SrcPtr, BP);
+ return;
+ }
- Builder.CreateMemCpy(Loc, SrcPtr, SizeVal, isVolatile);
+ BytePattern Pattern = shouldUseMemSetToInitialize(constant, ConstantSize);
+ if (!Pattern.isNone()) {
+ uint8_t Value = Pattern.isAny() ? 0x00 : Pattern.getValue();
+ Builder.CreateMemSet(Loc, llvm::ConstantInt::get(Int8Ty, Value), SizeVal,
+ isVolatile);
+ return;
}
+
+ // Otherwise, create a temporary global with the initializer then
+ // memcpy from the global to the alloca.
+ std::string Name = getStaticDeclName(CGM, D);
+ unsigned AS = CGM.getContext().getTargetAddressSpace(
+ CGM.getStringLiteralAddressSpace());
+ BP = llvm::PointerType::getInt8PtrTy(getLLVMContext(), AS);
+
+ llvm::GlobalVariable *GV = new llvm::GlobalVariable(
+ CGM.getModule(), constant->getType(), true,
+ llvm::GlobalValue::PrivateLinkage, constant, Name, nullptr,
+ llvm::GlobalValue::NotThreadLocal, AS);
+ GV->setAlignment(Loc.getAlignment().getQuantity());
+ GV->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
+
+ Address SrcPtr = Address(GV, Loc.getAlignment());
+ if (SrcPtr.getType() != BP)
+ SrcPtr = Builder.CreateBitCast(SrcPtr, BP);
+
+ Builder.CreateMemCpy(Loc, SrcPtr, SizeVal, isVolatile);
}
-/// Emit an expression as an initializer for a variable at the given
-/// location. The expression is not necessarily the normal
-/// initializer for the variable, and the address is not necessarily
+/// Emit an expression as an initializer for an object (variable, field, etc.)
+/// at the given location. The expression is not necessarily the normal
+/// initializer for the object, and the address is not necessarily
/// its normal location.
///
/// \param init the initializing expression
-/// \param var the variable to act as if we're initializing
+/// \param D the object to act as if we're initializing
/// \param loc the address to initialize; its type is a pointer
-/// to the LLVM mapping of the variable's type
+/// to the LLVM mapping of the object's type
/// \param alignment the alignment of the address
-/// \param capturedByInit true if the variable is a __block variable
+/// \param capturedByInit true if \p D is a __block variable
/// whose address is potentially changed by the initializer
void CodeGenFunction::EmitExprAsInit(const Expr *init, const ValueDecl *D,
LValue lvalue, bool capturedByInit) {
@@ -1349,11 +1591,17 @@ void CodeGenFunction::EmitExprAsInit(const Expr *init, const ValueDecl *D,
if (type->isAtomicType()) {
EmitAtomicInit(const_cast<Expr*>(init), lvalue);
} else {
+ AggValueSlot::Overlap_t Overlap = AggValueSlot::MayOverlap;
+ if (isa<VarDecl>(D))
+ Overlap = AggValueSlot::DoesNotOverlap;
+ else if (auto *FD = dyn_cast<FieldDecl>(D))
+ Overlap = overlapForFieldInit(FD);
// TODO: how can we delay here if D is captured by its initializer?
EmitAggExpr(init, AggValueSlot::forLValue(lvalue,
AggValueSlot::IsDestructed,
AggValueSlot::DoesNotNeedGCBarriers,
- AggValueSlot::IsNotAliased));
+ AggValueSlot::IsNotAliased,
+ Overlap));
}
return;
}
@@ -1386,8 +1634,8 @@ void CodeGenFunction::emitAutoVarTypeCleanup(
if (emission.NRVOFlag) {
assert(!type->isArrayType());
CXXDestructorDecl *dtor = type->getAsCXXRecordDecl()->getDestructor();
- EHStack.pushCleanup<DestroyNRVOVariable>(cleanupKind, addr,
- dtor, emission.NRVOFlag);
+ EHStack.pushCleanup<DestroyNRVOVariableCXX>(cleanupKind, addr, dtor,
+ emission.NRVOFlag);
return;
}
break;
@@ -1406,6 +1654,16 @@ void CodeGenFunction::emitAutoVarTypeCleanup(
case QualType::DK_objc_weak_lifetime:
break;
+
+ case QualType::DK_nontrivial_c_struct:
+ destroyer = CodeGenFunction::destroyNonTrivialCStruct;
+ if (emission.NRVOFlag) {
+ assert(!type->isArrayType());
+ EHStack.pushCleanup<DestroyNRVOVariableC>(cleanupKind, addr,
+ emission.NRVOFlag, type);
+ return;
+ }
+ break;
}
// If we haven't chosen a more specific destroyer, use the default.
@@ -1452,9 +1710,15 @@ void CodeGenFunction::EmitAutoVarCleanups(const AutoVarEmission &emission) {
}
// If this is a block variable, call _Block_object_destroy
- // (on the unforwarded address).
- if (emission.IsByRef)
- enterByrefCleanup(emission);
+ // (on the unforwarded address). Don't enter this cleanup if we're in pure-GC
+ // mode.
+ if (emission.IsByRef && CGM.getLangOpts().getGC() != LangOptions::GCOnly) {
+ BlockFieldFlags Flags = BLOCK_FIELD_IS_BYREF;
+ if (emission.Variable->getType().isObjCGCWeak())
+ Flags |= BLOCK_FIELD_IS_WEAK;
+ enterByrefCleanup(NormalAndEHCleanup, emission.Addr, Flags,
+ /*LoadBlockVarAddr*/ false);
+ }
}
CodeGenFunction::Destroyer *
@@ -1467,6 +1731,8 @@ CodeGenFunction::getDestroyer(QualType::DestructionKind kind) {
return destroyARCStrongPrecise;
case QualType::DK_objc_weak_lifetime:
return destroyARCWeak;
+ case QualType::DK_nontrivial_c_struct:
+ return destroyNonTrivialCStruct;
}
llvm_unreachable("Unknown DestructionKind");
}
@@ -1506,9 +1772,6 @@ void CodeGenFunction::pushStackRestore(CleanupKind Kind, Address SPMem) {
void CodeGenFunction::pushLifetimeExtendedDestroy(
CleanupKind cleanupKind, Address addr, QualType type,
Destroyer *destroyer, bool useEHCleanupForArray) {
- assert(!isInConditionalBranch() &&
- "performing lifetime extension from within conditional");
-
// Push an EH-only cleanup for the object now.
// FIXME: When popping normal cleanups, we need to keep this EH cleanup
// around in case a temporary's destructor throws an exception.
@@ -1791,9 +2054,12 @@ void CodeGenFunction::EmitParmDecl(const VarDecl &D, ParamValue Arg,
// Use better IR generation for certain implicit parameters.
if (auto IPD = dyn_cast<ImplicitParamDecl>(&D)) {
// The only implicit argument a block has is its literal.
- // We assume this is always passed directly.
+ // This may be passed as an inalloca'ed value on Windows x86.
if (BlockInfo) {
- setBlockContextParameter(IPD, ArgNo, Arg.getDirectValue());
+ llvm::Value *V = Arg.isIndirect()
+ ? Builder.CreateLoad(Arg.getIndirectAddress())
+ : Arg.getDirectValue();
+ setBlockContextParameter(IPD, ArgNo, V);
return;
}
}
@@ -1809,20 +2075,50 @@ void CodeGenFunction::EmitParmDecl(const VarDecl &D, ParamValue Arg,
llvm::Type *IRTy = ConvertTypeForMem(Ty)->getPointerTo(AS);
if (DeclPtr.getType() != IRTy)
DeclPtr = Builder.CreateBitCast(DeclPtr, IRTy, D.getName());
+ // Indirect argument is in alloca address space, which may be different
+ // from the default address space.
+ auto AllocaAS = CGM.getASTAllocaAddressSpace();
+ auto *V = DeclPtr.getPointer();
+ auto SrcLangAS = getLangOpts().OpenCL ? LangAS::opencl_private : AllocaAS;
+ auto DestLangAS =
+ getLangOpts().OpenCL ? LangAS::opencl_private : LangAS::Default;
+ if (SrcLangAS != DestLangAS) {
+ assert(getContext().getTargetAddressSpace(SrcLangAS) ==
+ CGM.getDataLayout().getAllocaAddrSpace());
+ auto DestAS = getContext().getTargetAddressSpace(DestLangAS);
+ auto *T = V->getType()->getPointerElementType()->getPointerTo(DestAS);
+ DeclPtr = Address(getTargetHooks().performAddrSpaceCast(
+ *this, V, SrcLangAS, DestLangAS, T, true),
+ DeclPtr.getAlignment());
+ }
// Push a destructor cleanup for this parameter if the ABI requires it.
// Don't push a cleanup in a thunk for a method that will also emit a
// cleanup.
- if (!IsScalar && !CurFuncIsThunk &&
- getTarget().getCXXABI().areArgsDestroyedLeftToRightInCallee()) {
- const CXXRecordDecl *RD = Ty->getAsCXXRecordDecl();
- if (RD && RD->hasNonTrivialDestructor())
- pushDestroy(QualType::DK_cxx_destructor, DeclPtr, Ty);
+ if (hasAggregateEvaluationKind(Ty) && !CurFuncIsThunk &&
+ Ty->getAs<RecordType>()->getDecl()->isParamDestroyedInCallee()) {
+ if (QualType::DestructionKind DtorKind = Ty.isDestructedType()) {
+ assert((DtorKind == QualType::DK_cxx_destructor ||
+ DtorKind == QualType::DK_nontrivial_c_struct) &&
+ "unexpected destructor type");
+ pushDestroy(DtorKind, DeclPtr, Ty);
+ CalleeDestructedParamCleanups[cast<ParmVarDecl>(&D)] =
+ EHStack.stable_begin();
+ }
}
} else {
- // Otherwise, create a temporary to hold the value.
- DeclPtr = CreateMemTemp(Ty, getContext().getDeclAlign(&D),
- D.getName() + ".addr");
+ // Check if the parameter address is controlled by OpenMP runtime.
+ Address OpenMPLocalAddr =
+ getLangOpts().OpenMP
+ ? CGM.getOpenMPRuntime().getAddressOfLocalVariable(*this, &D)
+ : Address::invalid();
+ if (getLangOpts().OpenMP && OpenMPLocalAddr.isValid()) {
+ DeclPtr = OpenMPLocalAddr;
+ } else {
+ // Otherwise, create a temporary to hold the value.
+ DeclPtr = CreateMemTemp(Ty, getContext().getDeclAlign(&D),
+ D.getName() + ".addr");
+ }
DoStore = true;
}
diff --git a/lib/CodeGen/CGDeclCXX.cpp b/lib/CodeGen/CGDeclCXX.cpp
index 042997831702..5e237d7e0b69 100644
--- a/lib/CodeGen/CGDeclCXX.cpp
+++ b/lib/CodeGen/CGDeclCXX.cpp
@@ -53,7 +53,8 @@ static void EmitDeclInit(CodeGenFunction &CGF, const VarDecl &D,
case TEK_Aggregate:
CGF.EmitAggExpr(Init, AggValueSlot::forLValue(lv,AggValueSlot::IsDestructed,
AggValueSlot::DoesNotNeedGCBarriers,
- AggValueSlot::IsNotAliased));
+ AggValueSlot::IsNotAliased,
+ AggValueSlot::DoesNotOverlap));
return;
}
llvm_unreachable("bad evaluation kind");
@@ -79,6 +80,7 @@ static void EmitDeclDestroy(CodeGenFunction &CGF, const VarDecl &D,
case QualType::DK_objc_strong_lifetime:
case QualType::DK_objc_weak_lifetime:
+ case QualType::DK_nontrivial_c_struct:
// We don't care about releasing objects during process teardown.
assert(!D.getTLSKind() && "should have rejected this");
return;
@@ -173,10 +175,12 @@ void CodeGenFunction::EmitCXXGlobalVarDeclInit(const VarDecl &D,
ConstantAddress DeclAddr(DeclPtr, getContext().getDeclAlign(&D));
if (!T->isReferenceType()) {
- if (getLangOpts().OpenMP && D.hasAttr<OMPThreadPrivateDeclAttr>())
+ if (getLangOpts().OpenMP && !getLangOpts().OpenMPSimd &&
+ D.hasAttr<OMPThreadPrivateDeclAttr>()) {
(void)CGM.getOpenMPRuntime().emitThreadPrivateVarDefinition(
&D, DeclAddr, D.getAttr<OMPThreadPrivateDeclAttr>()->getLocation(),
PerformInit, this);
+ }
if (PerformInit)
EmitDeclInit(*this, D, DeclAddr);
if (CGM.isTypeConstant(D.getType(), true))
@@ -232,7 +236,10 @@ void CodeGenFunction::registerGlobalDtorWithAtExit(const VarDecl &VD,
llvm::Constant *addr) {
// Create a function which calls the destructor.
llvm::Constant *dtorStub = createAtExitStub(VD, dtor, addr);
+ registerGlobalDtorWithAtExit(dtorStub);
+}
+void CodeGenFunction::registerGlobalDtorWithAtExit(llvm::Constant *dtorStub) {
// extern "C" int atexit(void (*f)(void));
llvm::FunctionType *atexitTy =
llvm::FunctionType::get(IntTy, dtorStub->getType(), false);
@@ -309,7 +316,7 @@ llvm::Function *CodeGenModule::CreateGlobalInitOrDestructFunction(
Fn->setSection(Section);
}
- SetInternalFunctionAttributes(nullptr, Fn, FI);
+ SetInternalFunctionAttributes(GlobalDecl(), Fn, FI);
Fn->setCallingConv(getRuntimeCC());
@@ -328,6 +335,10 @@ llvm::Function *CodeGenModule::CreateGlobalInitOrDestructFunction(
!isInSanitizerBlacklist(SanitizerKind::HWAddress, Fn, Loc))
Fn->addFnAttr(llvm::Attribute::SanitizeHWAddress);
+ if (getLangOpts().Sanitize.has(SanitizerKind::KernelHWAddress) &&
+ !isInSanitizerBlacklist(SanitizerKind::KernelHWAddress, Fn, Loc))
+ Fn->addFnAttr(llvm::Attribute::SanitizeHWAddress);
+
if (getLangOpts().Sanitize.has(SanitizerKind::Thread) &&
!isInSanitizerBlacklist(SanitizerKind::Thread, Fn, Loc))
Fn->addFnAttr(llvm::Attribute::SanitizeThread);
@@ -340,6 +351,10 @@ llvm::Function *CodeGenModule::CreateGlobalInitOrDestructFunction(
!isInSanitizerBlacklist(SanitizerKind::SafeStack, Fn, Loc))
Fn->addFnAttr(llvm::Attribute::SafeStack);
+ if (getLangOpts().Sanitize.has(SanitizerKind::ShadowCallStack) &&
+ !isInSanitizerBlacklist(SanitizerKind::ShadowCallStack, Fn, Loc))
+ Fn->addFnAttr(llvm::Attribute::ShadowCallStack);
+
return Fn;
}
@@ -376,6 +391,10 @@ CodeGenModule::EmitCXXGlobalVarDeclInitFunc(const VarDecl *D,
D->hasAttr<CUDASharedAttr>()))
return;
+ if (getLangOpts().OpenMP &&
+ getOpenMPRuntime().emitDeclareTargetVarDefinition(D, Addr, PerformInit))
+ return;
+
// Check if we've already initialized this decl.
auto I = DelayedCXXInitPosition.find(D);
if (I != DelayedCXXInitPosition.end() && I->second == ~0U)
diff --git a/lib/CodeGen/CGException.cpp b/lib/CodeGen/CGException.cpp
index 1ec084ff3f5b..c9820c242554 100644
--- a/lib/CodeGen/CGException.cpp
+++ b/lib/CodeGen/CGException.cpp
@@ -65,7 +65,7 @@ llvm::Constant *CodeGenModule::getTerminateFn() {
if (getLangOpts().isCompatibleWithMSVC(LangOptions::MSVC2015))
name = "__std_terminate";
else
- name = "\01?terminate@@YAXXZ";
+ name = "?terminate@@YAXXZ";
} else if (getLangOpts().ObjC1 &&
getLangOpts().ObjCRuntime.hasTerminate())
name = "objc_terminate";
@@ -111,21 +111,32 @@ const EHPersonality
EHPersonality::MSVC_C_specific_handler = { "__C_specific_handler", nullptr };
const EHPersonality
EHPersonality::MSVC_CxxFrameHandler3 = { "__CxxFrameHandler3", nullptr };
+const EHPersonality
+EHPersonality::GNU_Wasm_CPlusPlus = { "__gxx_wasm_personality_v0", nullptr };
-static const EHPersonality &getCPersonality(const llvm::Triple &T,
+static const EHPersonality &getCPersonality(const TargetInfo &Target,
const LangOptions &L) {
+ const llvm::Triple &T = Target.getTriple();
+ if (T.isWindowsMSVCEnvironment())
+ return EHPersonality::MSVC_CxxFrameHandler3;
if (L.SjLjExceptions)
return EHPersonality::GNU_C_SJLJ;
+ if (L.DWARFExceptions)
+ return EHPersonality::GNU_C;
if (L.SEHExceptions)
return EHPersonality::GNU_C_SEH;
return EHPersonality::GNU_C;
}
-static const EHPersonality &getObjCPersonality(const llvm::Triple &T,
+static const EHPersonality &getObjCPersonality(const TargetInfo &Target,
const LangOptions &L) {
+ const llvm::Triple &T = Target.getTriple();
+ if (T.isWindowsMSVCEnvironment())
+ return EHPersonality::MSVC_CxxFrameHandler3;
+
switch (L.ObjCRuntime.getKind()) {
case ObjCRuntime::FragileMacOSX:
- return getCPersonality(T, L);
+ return getCPersonality(Target, L);
case ObjCRuntime::MacOSX:
case ObjCRuntime::iOS:
case ObjCRuntime::WatchOS:
@@ -145,24 +156,37 @@ static const EHPersonality &getObjCPersonality(const llvm::Triple &T,
llvm_unreachable("bad runtime kind");
}
-static const EHPersonality &getCXXPersonality(const llvm::Triple &T,
+static const EHPersonality &getCXXPersonality(const TargetInfo &Target,
const LangOptions &L) {
+ const llvm::Triple &T = Target.getTriple();
+ if (T.isWindowsMSVCEnvironment())
+ return EHPersonality::MSVC_CxxFrameHandler3;
if (L.SjLjExceptions)
return EHPersonality::GNU_CPlusPlus_SJLJ;
+ if (L.DWARFExceptions)
+ return EHPersonality::GNU_CPlusPlus;
if (L.SEHExceptions)
return EHPersonality::GNU_CPlusPlus_SEH;
+ // Wasm EH is a non-MVP feature for now.
+ if (Target.hasFeature("exception-handling") &&
+ (T.getArch() == llvm::Triple::wasm32 ||
+ T.getArch() == llvm::Triple::wasm64))
+ return EHPersonality::GNU_Wasm_CPlusPlus;
return EHPersonality::GNU_CPlusPlus;
}
/// Determines the personality function to use when both C++
/// and Objective-C exceptions are being caught.
-static const EHPersonality &getObjCXXPersonality(const llvm::Triple &T,
+static const EHPersonality &getObjCXXPersonality(const TargetInfo &Target,
const LangOptions &L) {
+ if (Target.getTriple().isWindowsMSVCEnvironment())
+ return EHPersonality::MSVC_CxxFrameHandler3;
+
switch (L.ObjCRuntime.getKind()) {
// In the fragile ABI, just use C++ exception handling and hope
// they're not doing crazy exception mixing.
case ObjCRuntime::FragileMacOSX:
- return getCXXPersonality(T, L);
+ return getCXXPersonality(Target, L);
// The ObjC personality defers to the C++ personality for non-ObjC
// handlers. Unlike the C++ case, we use the same personality
@@ -170,7 +194,7 @@ static const EHPersonality &getObjCXXPersonality(const llvm::Triple &T,
case ObjCRuntime::MacOSX:
case ObjCRuntime::iOS:
case ObjCRuntime::WatchOS:
- return getObjCPersonality(T, L);
+ return getObjCPersonality(Target, L);
case ObjCRuntime::GNUstep:
return EHPersonality::GNU_ObjCXX;
@@ -179,7 +203,7 @@ static const EHPersonality &getObjCXXPersonality(const llvm::Triple &T,
// mixed EH. Use the ObjC personality just to avoid returning null.
case ObjCRuntime::GCC:
case ObjCRuntime::ObjFW:
- return getObjCPersonality(T, L);
+ return getObjCPersonality(Target, L);
}
llvm_unreachable("bad runtime kind");
}
@@ -194,30 +218,17 @@ const EHPersonality &EHPersonality::get(CodeGenModule &CGM,
const FunctionDecl *FD) {
const llvm::Triple &T = CGM.getTarget().getTriple();
const LangOptions &L = CGM.getLangOpts();
+ const TargetInfo &Target = CGM.getTarget();
// Functions using SEH get an SEH personality.
if (FD && FD->usesSEHTry())
return getSEHPersonalityMSVC(T);
- // Try to pick a personality function that is compatible with MSVC if we're
- // not compiling Obj-C. Obj-C users better have an Obj-C runtime that supports
- // the GCC-style personality function.
- if (T.isWindowsMSVCEnvironment() && !L.ObjC1) {
- if (L.SjLjExceptions)
- return EHPersonality::GNU_CPlusPlus_SJLJ;
- if (L.DWARFExceptions)
- return EHPersonality::GNU_CPlusPlus;
- return EHPersonality::MSVC_CxxFrameHandler3;
- }
-
- if (L.CPlusPlus && L.ObjC1)
- return getObjCXXPersonality(T, L);
- else if (L.CPlusPlus)
- return getCXXPersonality(T, L);
- else if (L.ObjC1)
- return getObjCPersonality(T, L);
- else
- return getCPersonality(T, L);
+ if (L.ObjC1)
+ return L.CPlusPlus ? getObjCXXPersonality(Target, L)
+ : getObjCPersonality(Target, L);
+ return L.CPlusPlus ? getCXXPersonality(Target, L)
+ : getCPersonality(Target, L);
}
const EHPersonality &EHPersonality::get(CodeGenFunction &CGF) {
@@ -313,8 +324,7 @@ void CodeGenModule::SimplifyPersonality() {
return;
const EHPersonality &ObjCXX = EHPersonality::get(*this, /*FD=*/nullptr);
- const EHPersonality &CXX =
- getCXXPersonality(getTarget().getTriple(), LangOpts);
+ const EHPersonality &CXX = getCXXPersonality(getTarget(), LangOpts);
if (&ObjCXX == &CXX)
return;
@@ -448,11 +458,9 @@ void CodeGenFunction::EmitStartEHSpec(const Decl *D) {
return;
ExceptionSpecificationType EST = Proto->getExceptionSpecType();
- if (isNoexceptExceptionSpec(EST)) {
- if (Proto->getNoexceptSpec(getContext()) == FunctionProtoType::NR_Nothrow) {
- // noexcept functions are simple terminate scopes.
- EHStack.pushTerminate();
- }
+ if (isNoexceptExceptionSpec(EST) && Proto->canThrow() == CT_Cannot) {
+ // noexcept functions are simple terminate scopes.
+ EHStack.pushTerminate();
} else if (EST == EST_Dynamic || EST == EST_DynamicNone) {
// TODO: Revisit exception specifications for the MS ABI. There is a way to
// encode these in an object file but MSVC doesn't do anything with it.
@@ -527,10 +535,8 @@ void CodeGenFunction::EmitEndEHSpec(const Decl *D) {
return;
ExceptionSpecificationType EST = Proto->getExceptionSpecType();
- if (isNoexceptExceptionSpec(EST)) {
- if (Proto->getNoexceptSpec(getContext()) == FunctionProtoType::NR_Nothrow) {
- EHStack.popTerminate();
- }
+ if (isNoexceptExceptionSpec(EST) && Proto->canThrow() == CT_Cannot) {
+ EHStack.popTerminate();
} else if (EST == EST_Dynamic || EST == EST_DynamicNone) {
// TODO: Revisit exception specifications for the MS ABI. There is a way to
// encode these in an object file but MSVC doesn't do anything with it.
@@ -584,7 +590,7 @@ void CodeGenFunction::EnterCXXTryStmt(const CXXTryStmt &S, bool IsFnTryBlock) {
llvm::BasicBlock *
CodeGenFunction::getEHDispatchBlock(EHScopeStack::stable_iterator si) {
if (EHPersonality::get(*this).usesFuncletPads())
- return getMSVCDispatchBlock(si);
+ return getFuncletEHDispatchBlock(si);
// The dispatch block for the end of the scope chain is a block that
// just resumes unwinding.
@@ -632,7 +638,7 @@ CodeGenFunction::getEHDispatchBlock(EHScopeStack::stable_iterator si) {
}
llvm::BasicBlock *
-CodeGenFunction::getMSVCDispatchBlock(EHScopeStack::stable_iterator SI) {
+CodeGenFunction::getFuncletEHDispatchBlock(EHScopeStack::stable_iterator SI) {
// Returning nullptr indicates that the previous dispatch block should unwind
// to caller.
if (SI == EHStack.stable_end())
@@ -646,7 +652,7 @@ CodeGenFunction::getMSVCDispatchBlock(EHScopeStack::stable_iterator SI) {
return DispatchBlock;
if (EHS.getKind() == EHScope::Terminate)
- DispatchBlock = getTerminateHandler();
+ DispatchBlock = getTerminateFunclet();
else
DispatchBlock = createBasicBlock();
CGBuilderTy Builder(*this, DispatchBlock);
@@ -926,10 +932,121 @@ static void emitCatchPadBlock(CodeGenFunction &CGF, EHCatchScope &CatchScope) {
CGF.Builder.restoreIP(SavedIP);
}
+// Wasm uses Windows-style EH instructions, but it merges all catch clauses into
+// one big catchpad, within which we use Itanium's landingpad-style selector
+// comparison instructions.
+static void emitWasmCatchPadBlock(CodeGenFunction &CGF,
+ EHCatchScope &CatchScope) {
+ llvm::BasicBlock *DispatchBlock = CatchScope.getCachedEHDispatchBlock();
+ assert(DispatchBlock);
+
+ CGBuilderTy::InsertPoint SavedIP = CGF.Builder.saveIP();
+ CGF.EmitBlockAfterUses(DispatchBlock);
+
+ llvm::Value *ParentPad = CGF.CurrentFuncletPad;
+ if (!ParentPad)
+ ParentPad = llvm::ConstantTokenNone::get(CGF.getLLVMContext());
+ llvm::BasicBlock *UnwindBB =
+ CGF.getEHDispatchBlock(CatchScope.getEnclosingEHScope());
+
+ unsigned NumHandlers = CatchScope.getNumHandlers();
+ llvm::CatchSwitchInst *CatchSwitch =
+ CGF.Builder.CreateCatchSwitch(ParentPad, UnwindBB, NumHandlers);
+
+ // We don't use a landingpad instruction, so generate intrinsic calls to
+ // provide exception and selector values.
+ llvm::BasicBlock *WasmCatchStartBlock = CGF.createBasicBlock("catch.start");
+ CatchSwitch->addHandler(WasmCatchStartBlock);
+ CGF.EmitBlockAfterUses(WasmCatchStartBlock);
+
+ // Create a catchpad instruction.
+ SmallVector<llvm::Value *, 4> CatchTypes;
+ for (unsigned I = 0, E = NumHandlers; I < E; ++I) {
+ const EHCatchScope::Handler &Handler = CatchScope.getHandler(I);
+ CatchTypeInfo TypeInfo = Handler.Type;
+ if (!TypeInfo.RTTI)
+ TypeInfo.RTTI = llvm::Constant::getNullValue(CGF.VoidPtrTy);
+ CatchTypes.push_back(TypeInfo.RTTI);
+ }
+ auto *CPI = CGF.Builder.CreateCatchPad(CatchSwitch, CatchTypes);
+
+ // Create calls to wasm.get.exception and wasm.get.ehselector intrinsics.
+ // Before they are lowered appropriately later, they provide values for the
+ // exception and selector.
+ llvm::Value *GetExnFn =
+ CGF.CGM.getIntrinsic(llvm::Intrinsic::wasm_get_exception);
+ llvm::Value *GetSelectorFn =
+ CGF.CGM.getIntrinsic(llvm::Intrinsic::wasm_get_ehselector);
+ llvm::CallInst *Exn = CGF.Builder.CreateCall(GetExnFn, CPI);
+ CGF.Builder.CreateStore(Exn, CGF.getExceptionSlot());
+ llvm::CallInst *Selector = CGF.Builder.CreateCall(GetSelectorFn, CPI);
+
+ llvm::Value *TypeIDFn = CGF.CGM.getIntrinsic(llvm::Intrinsic::eh_typeid_for);
+
+ // If there's only a single catch-all, branch directly to its handler.
+ if (CatchScope.getNumHandlers() == 1 &&
+ CatchScope.getHandler(0).isCatchAll()) {
+ CGF.Builder.CreateBr(CatchScope.getHandler(0).Block);
+ CGF.Builder.restoreIP(SavedIP);
+ return;
+ }
+
+ // Test against each of the exception types we claim to catch.
+ for (unsigned I = 0, E = NumHandlers;; ++I) {
+ assert(I < E && "ran off end of handlers!");
+ const EHCatchScope::Handler &Handler = CatchScope.getHandler(I);
+ CatchTypeInfo TypeInfo = Handler.Type;
+ if (!TypeInfo.RTTI)
+ TypeInfo.RTTI = llvm::Constant::getNullValue(CGF.VoidPtrTy);
+
+ // Figure out the next block.
+ llvm::BasicBlock *NextBlock;
+
+ bool EmitNextBlock = false, NextIsEnd = false;
+
+ // If this is the last handler, we're at the end, and the next block is a
+ // block that contains a call to the rethrow function, so we can unwind to
+ // the enclosing EH scope. The call itself will be generated later.
+ if (I + 1 == E) {
+ NextBlock = CGF.createBasicBlock("rethrow");
+ EmitNextBlock = true;
+ NextIsEnd = true;
+
+ // If the next handler is a catch-all, we're at the end, and the
+ // next block is that handler.
+ } else if (CatchScope.getHandler(I + 1).isCatchAll()) {
+ NextBlock = CatchScope.getHandler(I + 1).Block;
+ NextIsEnd = true;
+
+ // Otherwise, we're not at the end and we need a new block.
+ } else {
+ NextBlock = CGF.createBasicBlock("catch.fallthrough");
+ EmitNextBlock = true;
+ }
+
+ // Figure out the catch type's index in the LSDA's type table.
+ llvm::CallInst *TypeIndex = CGF.Builder.CreateCall(TypeIDFn, TypeInfo.RTTI);
+ TypeIndex->setDoesNotThrow();
+
+ llvm::Value *MatchesTypeIndex =
+ CGF.Builder.CreateICmpEQ(Selector, TypeIndex, "matches");
+ CGF.Builder.CreateCondBr(MatchesTypeIndex, Handler.Block, NextBlock);
+
+ if (EmitNextBlock)
+ CGF.EmitBlock(NextBlock);
+ if (NextIsEnd)
+ break;
+ }
+
+ CGF.Builder.restoreIP(SavedIP);
+}
+
/// Emit the structure of the dispatch block for the given catch scope.
/// It is an invariant that the dispatch block already exists.
static void emitCatchDispatchBlock(CodeGenFunction &CGF,
EHCatchScope &catchScope) {
+ if (EHPersonality::get(CGF).isWasmPersonality())
+ return emitWasmCatchPadBlock(CGF, catchScope);
if (EHPersonality::get(CGF).usesFuncletPads())
return emitCatchPadBlock(CGF, catchScope);
@@ -1017,6 +1134,7 @@ void CodeGenFunction::ExitCXXTryStmt(const CXXTryStmt &S, bool IsFnTryBlock) {
unsigned NumHandlers = S.getNumHandlers();
EHCatchScope &CatchScope = cast<EHCatchScope>(*EHStack.begin());
assert(CatchScope.getNumHandlers() == NumHandlers);
+ llvm::BasicBlock *DispatchBlock = CatchScope.getCachedEHDispatchBlock();
// If the catch was not required, bail out now.
if (!CatchScope.hasEHBranches()) {
@@ -1049,6 +1167,22 @@ void CodeGenFunction::ExitCXXTryStmt(const CXXTryStmt &S, bool IsFnTryBlock) {
doImplicitRethrow = isa<CXXDestructorDecl>(CurCodeDecl) ||
isa<CXXConstructorDecl>(CurCodeDecl);
+ // Wasm uses Windows-style EH instructions, but merges all catch clauses into
+ // one big catchpad. So we save the old funclet pad here before we traverse
+ // each catch handler.
+ SaveAndRestore<llvm::Instruction *> RestoreCurrentFuncletPad(
+ CurrentFuncletPad);
+ llvm::BasicBlock *WasmCatchStartBlock = nullptr;
+ if (EHPersonality::get(*this).isWasmPersonality()) {
+ auto *CatchSwitch =
+ cast<llvm::CatchSwitchInst>(DispatchBlock->getFirstNonPHI());
+ WasmCatchStartBlock = CatchSwitch->hasUnwindDest()
+ ? CatchSwitch->getSuccessor(1)
+ : CatchSwitch->getSuccessor(0);
+ auto *CPI = cast<llvm::CatchPadInst>(WasmCatchStartBlock->getFirstNonPHI());
+ CurrentFuncletPad = CPI;
+ }
+
// Perversely, we emit the handlers backwards precisely because we
// want them to appear in source order. In all of these cases, the
// catch block will have exactly one predecessor, which will be a
@@ -1056,7 +1190,9 @@ void CodeGenFunction::ExitCXXTryStmt(const CXXTryStmt &S, bool IsFnTryBlock) {
// a catch-all, one of the dispatch blocks will branch to two
// different handlers, and EmitBlockAfterUses will cause the second
// handler to be moved before the first.
+ bool HasCatchAll = false;
for (unsigned I = NumHandlers; I != 0; --I) {
+ HasCatchAll |= Handlers[I - 1].isCatchAll();
llvm::BasicBlock *CatchBlock = Handlers[I-1].Block;
EmitBlockAfterUses(CatchBlock);
@@ -1101,6 +1237,27 @@ void CodeGenFunction::ExitCXXTryStmt(const CXXTryStmt &S, bool IsFnTryBlock) {
Builder.CreateBr(ContBB);
}
+ // Because in wasm we merge all catch clauses into one big catchpad, in case
+ // none of the types in catch handlers matches after we test against each of
+ // them, we should unwind to the next EH enclosing scope. We generate a call
+ // to rethrow function here to do that.
+ if (EHPersonality::get(*this).isWasmPersonality() && !HasCatchAll) {
+ assert(WasmCatchStartBlock);
+ // Navigate for the "rethrow" block we created in emitWasmCatchPadBlock().
+ // Wasm uses landingpad-style conditional branches to compare selectors, so
+ // we follow the false destination for each of the cond branches to reach
+ // the rethrow block.
+ llvm::BasicBlock *RethrowBlock = WasmCatchStartBlock;
+ while (llvm::TerminatorInst *TI = RethrowBlock->getTerminator()) {
+ auto *BI = cast<llvm::BranchInst>(TI);
+ assert(BI->isConditional());
+ RethrowBlock = BI->getSuccessor(1);
+ }
+ assert(RethrowBlock != WasmCatchStartBlock && RethrowBlock->empty());
+ Builder.SetInsertPoint(RethrowBlock);
+ CGM.getCXXABI().emitRethrow(*this, /*isNoReturn=*/true);
+ }
+
EmitBlock(ContBB);
incrementProfileCounter(&S);
}
@@ -1334,23 +1491,59 @@ llvm::BasicBlock *CodeGenFunction::getTerminateHandler() {
if (TerminateHandler)
return TerminateHandler;
- CGBuilderTy::InsertPoint SavedIP = Builder.saveAndClearIP();
-
// Set up the terminate handler. This block is inserted at the very
// end of the function by FinishFunction.
TerminateHandler = createBasicBlock("terminate.handler");
+ CGBuilderTy::InsertPoint SavedIP = Builder.saveAndClearIP();
Builder.SetInsertPoint(TerminateHandler);
+
llvm::Value *Exn = nullptr;
+ if (getLangOpts().CPlusPlus)
+ Exn = getExceptionFromSlot();
+ llvm::CallInst *terminateCall =
+ CGM.getCXXABI().emitTerminateForUnexpectedException(*this, Exn);
+ terminateCall->setDoesNotReturn();
+ Builder.CreateUnreachable();
+
+ // Restore the saved insertion state.
+ Builder.restoreIP(SavedIP);
+
+ return TerminateHandler;
+}
+
+llvm::BasicBlock *CodeGenFunction::getTerminateFunclet() {
+ assert(EHPersonality::get(*this).usesFuncletPads() &&
+ "use getTerminateLandingPad for non-funclet EH");
+
+ llvm::BasicBlock *&TerminateFunclet = TerminateFunclets[CurrentFuncletPad];
+ if (TerminateFunclet)
+ return TerminateFunclet;
+
+ CGBuilderTy::InsertPoint SavedIP = Builder.saveAndClearIP();
+
+ // Set up the terminate handler. This block is inserted at the very
+ // end of the function by FinishFunction.
+ TerminateFunclet = createBasicBlock("terminate.handler");
+ Builder.SetInsertPoint(TerminateFunclet);
+
+ // Create the cleanuppad using the current parent pad as its token. Use 'none'
+ // if this is a top-level terminate scope, which is the common case.
SaveAndRestore<llvm::Instruction *> RestoreCurrentFuncletPad(
CurrentFuncletPad);
- if (EHPersonality::get(*this).usesFuncletPads()) {
- llvm::Value *ParentPad = CurrentFuncletPad;
- if (!ParentPad)
- ParentPad = llvm::ConstantTokenNone::get(CGM.getLLVMContext());
- CurrentFuncletPad = Builder.CreateCleanupPad(ParentPad);
- } else {
- if (getLangOpts().CPlusPlus)
- Exn = getExceptionFromSlot();
+ llvm::Value *ParentPad = CurrentFuncletPad;
+ if (!ParentPad)
+ ParentPad = llvm::ConstantTokenNone::get(CGM.getLLVMContext());
+ CurrentFuncletPad = Builder.CreateCleanupPad(ParentPad);
+
+ // Emit the __std_terminate call.
+ llvm::Value *Exn = nullptr;
+ // In case of wasm personality, we need to pass the exception value to
+ // __clang_call_terminate function.
+ if (getLangOpts().CPlusPlus &&
+ EHPersonality::get(*this).isWasmPersonality()) {
+ llvm::Value *GetExnFn =
+ CGM.getIntrinsic(llvm::Intrinsic::wasm_get_exception);
+ Exn = Builder.CreateCall(GetExnFn, CurrentFuncletPad);
}
llvm::CallInst *terminateCall =
CGM.getCXXABI().emitTerminateForUnexpectedException(*this, Exn);
@@ -1360,7 +1553,7 @@ llvm::BasicBlock *CodeGenFunction::getTerminateHandler() {
// Restore the saved insertion state.
Builder.restoreIP(SavedIP);
- return TerminateHandler;
+ return TerminateFunclet;
}
llvm::BasicBlock *CodeGenFunction::getEHResumeBlock(bool isCleanup) {
diff --git a/lib/CodeGen/CGExpr.cpp b/lib/CodeGen/CGExpr.cpp
index c7dc8337e19e..3097caacb31c 100644
--- a/lib/CodeGen/CGExpr.cpp
+++ b/lib/CodeGen/CGExpr.cpp
@@ -61,18 +61,30 @@ llvm::Value *CodeGenFunction::EmitCastToVoidPtr(llvm::Value *value) {
/// CreateTempAlloca - This creates a alloca and inserts it into the entry
/// block.
+Address CodeGenFunction::CreateTempAllocaWithoutCast(llvm::Type *Ty,
+ CharUnits Align,
+ const Twine &Name,
+ llvm::Value *ArraySize) {
+ auto Alloca = CreateTempAlloca(Ty, Name, ArraySize);
+ Alloca->setAlignment(Align.getQuantity());
+ return Address(Alloca, Align);
+}
+
+/// CreateTempAlloca - This creates a alloca and inserts it into the entry
+/// block. The alloca is casted to default address space if necessary.
Address CodeGenFunction::CreateTempAlloca(llvm::Type *Ty, CharUnits Align,
const Twine &Name,
llvm::Value *ArraySize,
- bool CastToDefaultAddrSpace) {
- auto Alloca = CreateTempAlloca(Ty, Name, ArraySize);
- Alloca->setAlignment(Align.getQuantity());
- llvm::Value *V = Alloca;
+ Address *AllocaAddr) {
+ auto Alloca = CreateTempAllocaWithoutCast(Ty, Align, Name, ArraySize);
+ if (AllocaAddr)
+ *AllocaAddr = Alloca;
+ llvm::Value *V = Alloca.getPointer();
// Alloca always returns a pointer in alloca address space, which may
// be different from the type defined by the language. For example,
// in C++ the auto variables are in the default address space. Therefore
// cast alloca to the default address space when necessary.
- if (CastToDefaultAddrSpace && getASTAllocaAddressSpace() != LangAS::Default) {
+ if (getASTAllocaAddressSpace() != LangAS::Default) {
auto DestAddrSpace = getContext().getTargetAddressSpace(LangAS::Default);
llvm::IRBuilderBase::InsertPointGuard IPG(Builder);
// When ArraySize is nullptr, alloca is inserted at AllocaInsertPt,
@@ -125,17 +137,26 @@ Address CodeGenFunction::CreateIRTemp(QualType Ty, const Twine &Name) {
}
Address CodeGenFunction::CreateMemTemp(QualType Ty, const Twine &Name,
- bool CastToDefaultAddrSpace) {
+ Address *Alloca) {
// FIXME: Should we prefer the preferred type alignment here?
- return CreateMemTemp(Ty, getContext().getTypeAlignInChars(Ty), Name,
- CastToDefaultAddrSpace);
+ return CreateMemTemp(Ty, getContext().getTypeAlignInChars(Ty), Name, Alloca);
}
Address CodeGenFunction::CreateMemTemp(QualType Ty, CharUnits Align,
- const Twine &Name,
- bool CastToDefaultAddrSpace) {
- return CreateTempAlloca(ConvertTypeForMem(Ty), Align, Name, nullptr,
- CastToDefaultAddrSpace);
+ const Twine &Name, Address *Alloca) {
+ return CreateTempAlloca(ConvertTypeForMem(Ty), Align, Name,
+ /*ArraySize=*/nullptr, Alloca);
+}
+
+Address CodeGenFunction::CreateMemTempWithoutCast(QualType Ty, CharUnits Align,
+ const Twine &Name) {
+ return CreateTempAllocaWithoutCast(ConvertTypeForMem(Ty), Align, Name);
+}
+
+Address CodeGenFunction::CreateMemTempWithoutCast(QualType Ty,
+ const Twine &Name) {
+ return CreateMemTempWithoutCast(Ty, getContext().getTypeAlignInChars(Ty),
+ Name);
}
/// EvaluateExprAsBool - Perform the usual unary conversions on the specified
@@ -187,7 +208,7 @@ RValue CodeGenFunction::EmitAnyExpr(const Expr *E,
llvm_unreachable("bad evaluation kind");
}
-/// EmitAnyExprToTemp - Similary to EmitAnyExpr(), however, the result will
+/// EmitAnyExprToTemp - Similar to EmitAnyExpr(), however, the result will
/// always be accessible even if no aggregate location is provided.
RValue CodeGenFunction::EmitAnyExprToTemp(const Expr *E) {
AggValueSlot AggSlot = AggValueSlot::ignored();
@@ -214,7 +235,8 @@ void CodeGenFunction::EmitAnyExprToMem(const Expr *E,
EmitAggExpr(E, AggValueSlot::forAddr(Location, Quals,
AggValueSlot::IsDestructed_t(IsInit),
AggValueSlot::DoesNotNeedGCBarriers,
- AggValueSlot::IsAliased_t(!IsInit)));
+ AggValueSlot::IsAliased_t(!IsInit),
+ AggValueSlot::MayOverlap));
return;
}
@@ -347,7 +369,8 @@ pushTemporaryCleanup(CodeGenFunction &CGF, const MaterializeTemporaryExpr *M,
static Address createReferenceTemporary(CodeGenFunction &CGF,
const MaterializeTemporaryExpr *M,
- const Expr *Inner) {
+ const Expr *Inner,
+ Address *Alloca = nullptr) {
auto &TCG = CGF.getTargetHooks();
switch (M->getStorageDuration()) {
case SD_FullExpression:
@@ -380,7 +403,7 @@ static Address createReferenceTemporary(CodeGenFunction &CGF,
return Address(C, alignment);
}
}
- return CGF.CreateMemTemp(Ty, "ref.tmp");
+ return CGF.CreateMemTemp(Ty, "ref.tmp", Alloca);
}
case SD_Thread:
case SD_Static:
@@ -432,7 +455,8 @@ EmitMaterializeTemporaryExpr(const MaterializeTemporaryExpr *M) {
E->getType().getQualifiers(),
AggValueSlot::IsDestructed,
AggValueSlot::DoesNotNeedGCBarriers,
- AggValueSlot::IsNotAliased));
+ AggValueSlot::IsNotAliased,
+ AggValueSlot::DoesNotOverlap));
break;
}
}
@@ -456,7 +480,8 @@ EmitMaterializeTemporaryExpr(const MaterializeTemporaryExpr *M) {
}
// Create and initialize the reference temporary.
- Address Object = createReferenceTemporary(*this, M, E);
+ Address Alloca = Address::invalid();
+ Address Object = createReferenceTemporary(*this, M, E, &Alloca);
if (auto *Var = dyn_cast<llvm::GlobalVariable>(
Object.getPointer()->stripPointerCasts())) {
Object = Address(llvm::ConstantExpr::getBitCast(
@@ -475,13 +500,13 @@ EmitMaterializeTemporaryExpr(const MaterializeTemporaryExpr *M) {
case SD_Automatic:
case SD_FullExpression:
if (auto *Size = EmitLifetimeStart(
- CGM.getDataLayout().getTypeAllocSize(Object.getElementType()),
- Object.getPointer())) {
+ CGM.getDataLayout().getTypeAllocSize(Alloca.getElementType()),
+ Alloca.getPointer())) {
if (M->getStorageDuration() == SD_Automatic)
pushCleanupAfterFullExpr<CallLifetimeEnd>(NormalEHLifetimeMarker,
- Object, Size);
+ Alloca, Size);
else
- pushFullExprCleanup<CallLifetimeEnd>(NormalEHLifetimeMarker, Object,
+ pushFullExprCleanup<CallLifetimeEnd>(NormalEHLifetimeMarker, Alloca,
Size);
}
break;
@@ -873,7 +898,7 @@ static llvm::Value *getArrayIndexingBound(
if (const auto *CAT = dyn_cast<ConstantArrayType>(AT))
return CGF.Builder.getInt(CAT->getSize());
else if (const auto *VAT = dyn_cast<VariableArrayType>(AT))
- return CGF.getVLASize(VAT).first;
+ return CGF.getVLASize(VAT).NumElts;
// Ignore pass_object_size here. It's not applicable on decayed pointers.
}
}
@@ -1034,8 +1059,12 @@ Address CodeGenFunction::EmitPointerWithAlignment(const Expr *E,
// Derived-to-base conversions.
case CK_UncheckedDerivedToBase:
case CK_DerivedToBase: {
- Address Addr = EmitPointerWithAlignment(CE->getSubExpr(), BaseInfo,
- TBAAInfo);
+ // TODO: Support accesses to members of base classes in TBAA. For now, we
+ // conservatively pretend that the complete object is of the base class
+ // type.
+ if (TBAAInfo)
+ *TBAAInfo = CGM.getTBAAAccessInfo(E->getType());
+ Address Addr = EmitPointerWithAlignment(CE->getSubExpr(), BaseInfo);
auto Derived = CE->getSubExpr()->getType()->getPointeeCXXRecordDecl();
return GetAddressOfBaseClass(Addr, Derived,
CE->path_begin(), CE->path_end(),
@@ -1785,7 +1814,7 @@ RValue CodeGenFunction::EmitLoadOfExtVectorElementLValue(LValue LV) {
return RValue::get(Vec);
}
-/// @brief Generates lvalue for partial ext_vector access.
+/// Generates lvalue for partial ext_vector access.
Address CodeGenFunction::EmitExtVectorElementLValue(LValue LV) {
Address VectorAddress = LV.getExtVectorAddress();
const VectorType *ExprVT = LV.getType()->getAs<VectorType>();
@@ -1807,7 +1836,7 @@ Address CodeGenFunction::EmitExtVectorElementLValue(LValue LV) {
return VectorBasePtrPlusIx;
}
-/// @brief Load of global gamed gegisters are always calls to intrinsics.
+/// Load of global gamed gegisters are always calls to intrinsics.
RValue CodeGenFunction::EmitLoadOfGlobalRegLValue(LValue LV) {
assert((LV.getType()->isIntegerType() || LV.getType()->isPointerType()) &&
"Bad type for register variable");
@@ -2067,7 +2096,7 @@ void CodeGenFunction::EmitStoreThroughExtVectorComponentLValue(RValue Src,
Dst.isVolatileQualified());
}
-/// @brief Store of global named registers are always calls to intrinsics.
+/// Store of global named registers are always calls to intrinsics.
void CodeGenFunction::EmitStoreThroughGlobalRegLValue(RValue Src, LValue Dst) {
assert((Dst.getType()->isIntegerType() || Dst.getType()->isPointerType()) &&
"Bad type for register variable");
@@ -2206,6 +2235,22 @@ static LValue EmitThreadPrivateVarDeclLValue(
return CGF.MakeAddrLValue(Addr, T, AlignmentSource::Decl);
}
+static Address emitDeclTargetLinkVarDeclLValue(CodeGenFunction &CGF,
+ const VarDecl *VD, QualType T) {
+ for (const auto *D : VD->redecls()) {
+ if (!VD->hasAttrs())
+ continue;
+ if (const auto *Attr = D->getAttr<OMPDeclareTargetDeclAttr>())
+ if (Attr->getMapType() == OMPDeclareTargetDeclAttr::MT_Link) {
+ QualType PtrTy = CGF.getContext().getPointerType(VD->getType());
+ Address Addr =
+ CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetLink(VD);
+ return CGF.EmitLoadOfPointer(Addr, PtrTy->castAs<PointerType>());
+ }
+ }
+ return Address::invalid();
+}
+
Address
CodeGenFunction::EmitLoadOfReference(LValue RefLVal,
LValueBaseInfo *PointeeBaseInfo,
@@ -2255,6 +2300,13 @@ static LValue EmitGlobalVarDeclLValue(CodeGenFunction &CGF,
if (VD->getTLSKind() == VarDecl::TLS_Dynamic &&
CGF.CGM.getCXXABI().usesThreadWrapperFunction())
return CGF.CGM.getCXXABI().EmitThreadLocalVarDeclLValue(CGF, VD, T);
+ // Check if the variable is marked as declare target with link clause in
+ // device codegen.
+ if (CGF.getLangOpts().OpenMPIsDevice) {
+ Address Addr = emitDeclTargetLinkVarDeclLValue(CGF, VD, T);
+ if (Addr.isValid())
+ return CGF.MakeAddrLValue(Addr, T, AlignmentSource::Decl);
+ }
llvm::Value *V = CGF.CGM.GetAddrOfGlobalVar(VD);
llvm::Type *RealVarTy = CGF.getTypes().ConvertTypeForMem(VD->getType());
@@ -2263,9 +2315,11 @@ static LValue EmitGlobalVarDeclLValue(CodeGenFunction &CGF,
Address Addr(V, Alignment);
// Emit reference to the private copy of the variable if it is an OpenMP
// threadprivate variable.
- if (CGF.getLangOpts().OpenMP && VD->hasAttr<OMPThreadPrivateDeclAttr>())
+ if (CGF.getLangOpts().OpenMP && !CGF.getLangOpts().OpenMPSimd &&
+ VD->hasAttr<OMPThreadPrivateDeclAttr>()) {
return EmitThreadPrivateVarDeclLValue(CGF, VD, T, Addr, RealVarTy,
E->getExprLoc());
+ }
LValue LV = VD->getType()->isReferenceType() ?
CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(),
AlignmentSource::Decl) :
@@ -2446,7 +2500,8 @@ LValue CodeGenFunction::EmitDeclRefLValue(const DeclRefExpr *E) {
// Check for OpenMP threadprivate variables.
- if (getLangOpts().OpenMP && VD->hasAttr<OMPThreadPrivateDeclAttr>()) {
+ if (getLangOpts().OpenMP && !getLangOpts().OpenMPSimd &&
+ VD->hasAttr<OMPThreadPrivateDeclAttr>()) {
return EmitThreadPrivateVarDeclLValue(
*this, VD, T, addr, getTypes().ConvertTypeForMem(VD->getType()),
E->getExprLoc());
@@ -2579,7 +2634,7 @@ LValue CodeGenFunction::EmitPredefinedLValue(const PredefinedExpr *E) {
StringRef NameItems[] = {
PredefinedExpr::getIdentTypeName(E->getIdentType()), FnName};
std::string GVName = llvm::join(NameItems, NameItems + 2, ".");
- if (auto *BD = dyn_cast<BlockDecl>(CurCodeDecl)) {
+ if (auto *BD = dyn_cast_or_null<BlockDecl>(CurCodeDecl)) {
std::string Name = SL->getString();
if (!Name.empty()) {
unsigned Discriminator =
@@ -2678,7 +2733,7 @@ llvm::Value *CodeGenFunction::EmitCheckValue(llvm::Value *V) {
return Builder.CreatePtrToInt(V, TargetTy);
}
-/// \brief Emit a representation of a SourceLocation for passing to a handler
+/// Emit a representation of a SourceLocation for passing to a handler
/// in a sanitizer runtime library. The format for this data is:
/// \code
/// struct SourceLocation {
@@ -2737,7 +2792,7 @@ llvm::Constant *CodeGenFunction::EmitCheckSourceLocation(SourceLocation Loc) {
}
namespace {
-/// \brief Specify under what conditions this check can be recovered
+/// Specify under what conditions this check can be recovered
enum class CheckRecoverableKind {
/// Always terminate program execution if this check fails.
Unrecoverable,
@@ -2945,6 +3000,7 @@ void CodeGenFunction::EmitCfiSlowPathCheck(
bool WithDiag = !CGM.getCodeGenOpts().SanitizeTrap.has(Kind);
llvm::CallInst *CheckCall;
+ llvm::Constant *SlowPathFn;
if (WithDiag) {
llvm::Constant *Info = llvm::ConstantStruct::getAnon(StaticArgs);
auto *InfoPtr =
@@ -2953,20 +3009,20 @@ void CodeGenFunction::EmitCfiSlowPathCheck(
InfoPtr->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
CGM.getSanitizerMetadata()->disableSanitizerForGlobal(InfoPtr);
- llvm::Constant *SlowPathDiagFn = CGM.getModule().getOrInsertFunction(
+ SlowPathFn = CGM.getModule().getOrInsertFunction(
"__cfi_slowpath_diag",
llvm::FunctionType::get(VoidTy, {Int64Ty, Int8PtrTy, Int8PtrTy},
false));
CheckCall = Builder.CreateCall(
- SlowPathDiagFn,
- {TypeId, Ptr, Builder.CreateBitCast(InfoPtr, Int8PtrTy)});
+ SlowPathFn, {TypeId, Ptr, Builder.CreateBitCast(InfoPtr, Int8PtrTy)});
} else {
- llvm::Constant *SlowPathFn = CGM.getModule().getOrInsertFunction(
+ SlowPathFn = CGM.getModule().getOrInsertFunction(
"__cfi_slowpath",
llvm::FunctionType::get(VoidTy, {Int64Ty, Int8PtrTy}, false));
CheckCall = Builder.CreateCall(SlowPathFn, {TypeId, Ptr});
}
+ CGM.setDSOLocal(cast<llvm::GlobalValue>(SlowPathFn->stripPointerCasts()));
CheckCall->setDoesNotThrow();
EmitBlock(Cont);
@@ -2980,6 +3036,7 @@ void CodeGenFunction::EmitCfiCheckStub() {
llvm::Function *F = llvm::Function::Create(
llvm::FunctionType::get(VoidTy, {Int64Ty, Int8PtrTy, Int8PtrTy}, false),
llvm::GlobalValue::WeakAnyLinkage, "__cfi_check", M);
+ CGM.setDSOLocal(F);
llvm::BasicBlock *BB = llvm::BasicBlock::Create(Ctx, "entry", F);
// FIXME: consider emitting an intrinsic call like
// call void @llvm.cfi_check(i64 %0, i8* %1, i8* %2)
@@ -3018,6 +3075,11 @@ void CodeGenFunction::EmitCfiCheckFail() {
StartFunction(GlobalDecl(), CGM.getContext().VoidTy, F, FI, Args,
SourceLocation());
+ // This function should not be affected by blacklist. This function does
+ // not have a source location, but "src:*" would still apply. Revert any
+ // changes to SanOpts made in StartFunction.
+ SanOpts = CGM.getLangOpts().Sanitize;
+
llvm::Value *Data =
EmitLoadOfScalar(GetAddrOfLocalVar(&ArgData), /*Volatile=*/false,
CGM.getContext().VoidPtrTy, ArgData.getLocation());
@@ -3306,7 +3368,7 @@ LValue CodeGenFunction::EmitArraySubscriptExpr(const ArraySubscriptExpr *E,
auto *Idx = EmitIdxAfterBase(/*Promote*/true);
// The element count here is the total number of non-VLA elements.
- llvm::Value *numElements = getVLASize(vla).first;
+ llvm::Value *numElements = getVLASize(vla).NumElts;
// Effectively, the multiply by the VLA size is part of the GEP.
// GEP indexes are signed, and scaling an index isn't permitted to
@@ -3540,7 +3602,7 @@ LValue CodeGenFunction::EmitOMPArraySectionExpr(const OMPArraySectionExpr *E,
emitOMPArraySectionBase(*this, E->getBase(), BaseInfo, TBAAInfo,
BaseTy, VLA->getElementType(), IsLowerBound);
// The element count here is the total number of non-VLA elements.
- llvm::Value *NumElements = getVLASize(VLA).first;
+ llvm::Value *NumElements = getVLASize(VLA).NumElts;
// Effectively, the multiply by the VLA size is part of the GEP.
// GEP indexes are signed, and scaling an index isn't permitted to
@@ -3808,6 +3870,18 @@ LValue CodeGenFunction::EmitLValueForField(LValue base,
}
Address addr = base.getAddress();
+ if (auto *ClassDef = dyn_cast<CXXRecordDecl>(rec)) {
+ if (CGM.getCodeGenOpts().StrictVTablePointers &&
+ ClassDef->isDynamicClass()) {
+ // Getting to any field of dynamic object requires stripping dynamic
+ // information provided by invariant.group. This is because accessing
+ // fields may leak the real address of dynamic object, which could result
+ // in miscompilation when leaked pointer would be compared.
+ auto *stripped = Builder.CreateStripInvariantGroup(addr.getPointer());
+ addr = Address(stripped, addr.getAlignment());
+ }
+ }
+
unsigned RecordCVR = base.getVRQualifiers();
if (rec->isUnion()) {
// For unions, there is no pointer adjustment.
@@ -3816,7 +3890,7 @@ LValue CodeGenFunction::EmitLValueForField(LValue base,
hasAnyVptr(FieldType, getContext()))
// Because unions can easily skip invariant.barriers, we need to add
// a barrier every time CXXRecord field with vptr is referenced.
- addr = Address(Builder.CreateInvariantGroupBarrier(addr.getPointer()),
+ addr = Address(Builder.CreateLaunderInvariantGroup(addr.getPointer()),
addr.getAlignment());
} else {
// For structs, we GEP to the field that the record layout suggests.
@@ -4160,7 +4234,35 @@ LValue CodeGenFunction::EmitCastLValue(const CastExpr *E) {
LValue CodeGenFunction::EmitOpaqueValueLValue(const OpaqueValueExpr *e) {
assert(OpaqueValueMappingData::shouldBindAsLValue(e));
- return getOpaqueLValueMapping(e);
+ return getOrCreateOpaqueLValueMapping(e);
+}
+
+LValue
+CodeGenFunction::getOrCreateOpaqueLValueMapping(const OpaqueValueExpr *e) {
+ assert(OpaqueValueMapping::shouldBindAsLValue(e));
+
+ llvm::DenseMap<const OpaqueValueExpr*,LValue>::iterator
+ it = OpaqueLValues.find(e);
+
+ if (it != OpaqueLValues.end())
+ return it->second;
+
+ assert(e->isUnique() && "LValue for a nonunique OVE hasn't been emitted");
+ return EmitLValue(e->getSourceExpr());
+}
+
+RValue
+CodeGenFunction::getOrCreateOpaqueRValueMapping(const OpaqueValueExpr *e) {
+ assert(!OpaqueValueMapping::shouldBindAsLValue(e));
+
+ llvm::DenseMap<const OpaqueValueExpr*,RValue>::iterator
+ it = OpaqueRValues.find(e);
+
+ if (it != OpaqueRValues.end())
+ return it->second;
+
+ assert(e->isUnique() && "RValue for a nonunique OVE hasn't been emitted");
+ return EmitAnyExpr(e->getSourceExpr());
}
RValue CodeGenFunction::EmitRValueForField(LValue LV,
@@ -4476,8 +4578,7 @@ RValue CodeGenFunction::EmitCall(QualType CalleeType, const CGCallee &OrigCallee
CalleeType = getContext().getCanonicalType(CalleeType);
- const auto *FnType =
- cast<FunctionType>(cast<PointerType>(CalleeType)->getPointeeType());
+ auto PointeeType = cast<PointerType>(CalleeType)->getPointeeType();
CGCallee Callee = OrigCallee;
@@ -4486,8 +4587,12 @@ RValue CodeGenFunction::EmitCall(QualType CalleeType, const CGCallee &OrigCallee
if (llvm::Constant *PrefixSig =
CGM.getTargetCodeGenInfo().getUBSanFunctionSignature(CGM)) {
SanitizerScope SanScope(this);
+ // Remove any (C++17) exception specifications, to allow calling e.g. a
+ // noexcept function through a non-noexcept pointer.
+ auto ProtoTy =
+ getContext().getFunctionTypeWithExceptionSpec(PointeeType, EST_None);
llvm::Constant *FTRTTIConst =
- CGM.GetAddrOfRTTIDescriptor(QualType(FnType, 0), /*ForEH=*/true);
+ CGM.GetAddrOfRTTIDescriptor(ProtoTy, /*ForEH=*/true);
llvm::Type *PrefixStructTyElems[] = {PrefixSig->getType(), Int32Ty};
llvm::StructType *PrefixStructTy = llvm::StructType::get(
CGM.getLLVMContext(), PrefixStructTyElems, /*isPacked=*/true);
@@ -4527,6 +4632,8 @@ RValue CodeGenFunction::EmitCall(QualType CalleeType, const CGCallee &OrigCallee
}
}
+ const auto *FnType = cast<FunctionType>(PointeeType);
+
// If we are checking indirect calls and this call is indirect, check that the
// function pointer is a member of the bit set for the function type.
if (SanOpts.has(SanitizerKind::CFIICall) &&
@@ -4707,6 +4814,12 @@ static LValueOrRValue emitPseudoObjectExpr(CodeGenFunction &CGF,
// If this semantic expression is an opaque value, bind it
// to the result of its source expression.
if (const auto *ov = dyn_cast<OpaqueValueExpr>(semantic)) {
+ // Skip unique OVEs.
+ if (ov->isUnique()) {
+ assert(ov != resultExpr &&
+ "A unique OVE cannot be used as the result expression");
+ continue;
+ }
// If this is the result expression, we may need to evaluate
// directly into the slot.
diff --git a/lib/CodeGen/CGExprAgg.cpp b/lib/CodeGen/CGExprAgg.cpp
index 0f05cab66d7e..291740478329 100644
--- a/lib/CodeGen/CGExprAgg.cpp
+++ b/lib/CodeGen/CGExprAgg.cpp
@@ -12,8 +12,10 @@
//===----------------------------------------------------------------------===//
#include "CodeGenFunction.h"
+#include "CGCXXABI.h"
#include "CGObjCRuntime.h"
#include "CodeGenModule.h"
+#include "ConstantEmitter.h"
#include "clang/AST/ASTContext.h"
#include "clang/AST/DeclCXX.h"
#include "clang/AST/DeclTemplate.h"
@@ -22,6 +24,7 @@
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/IntrinsicInst.h"
using namespace clang;
using namespace CodeGen;
@@ -36,23 +39,6 @@ class AggExprEmitter : public StmtVisitor<AggExprEmitter> {
AggValueSlot Dest;
bool IsResultUnused;
- /// We want to use 'dest' as the return slot except under two
- /// conditions:
- /// - The destination slot requires garbage collection, so we
- /// need to use the GC API.
- /// - The destination slot is potentially aliased.
- bool shouldUseDestForReturnSlot() const {
- return !(Dest.requiresGCollection() || Dest.isPotentiallyAliased());
- }
-
- ReturnValueSlot getReturnValueSlot() const {
- if (!shouldUseDestForReturnSlot())
- return ReturnValueSlot();
-
- return ReturnValueSlot(Dest.getAddress(), Dest.isVolatile(),
- IsResultUnused);
- }
-
AggValueSlot EnsureSlot(QualType T) {
if (!Dest.isIgnored()) return Dest;
return CGF.CreateAggTemp(T, "agg.tmp.ensured");
@@ -62,6 +48,15 @@ class AggExprEmitter : public StmtVisitor<AggExprEmitter> {
Dest = CGF.CreateAggTemp(T, "agg.tmp.ensured");
}
+ // Calls `Fn` with a valid return value slot, potentially creating a temporary
+ // to do so. If a temporary is created, an appropriate copy into `Dest` will
+ // be emitted, as will lifetime markers.
+ //
+ // The given function should take a ReturnValueSlot, and return an RValue that
+ // points to said slot.
+ void withReturnValueSlot(const Expr *E,
+ llvm::function_ref<RValue(ReturnValueSlot)> Fn);
+
public:
AggExprEmitter(CodeGenFunction &cgf, AggValueSlot Dest, bool IsResultUnused)
: CGF(cgf), Builder(CGF.Builder), Dest(Dest),
@@ -76,8 +71,15 @@ public:
/// then loads the result into DestPtr.
void EmitAggLoadOfLValue(const Expr *E);
+ enum ExprValueKind {
+ EVK_RValue,
+ EVK_NonRValue
+ };
+
/// EmitFinalDestCopy - Perform the final copy to DestPtr, if desired.
- void EmitFinalDestCopy(QualType type, const LValue &src);
+ /// SrcIsRValue is true if source comes from an RValue.
+ void EmitFinalDestCopy(QualType type, const LValue &src,
+ ExprValueKind SrcValueKind = EVK_NonRValue);
void EmitFinalDestCopy(QualType type, RValue src);
void EmitCopy(QualType type, const AggValueSlot &dest,
const AggValueSlot &src);
@@ -85,7 +87,7 @@ public:
void EmitMoveFromReturnSlot(const Expr *E, RValue Src);
void EmitArrayInit(Address DestPtr, llvm::ArrayType *AType,
- QualType elementType, InitListExpr *E);
+ QualType ArrayQTy, InitListExpr *E);
AggValueSlot::NeedsGCBarriers_t needsGC(QualType T) {
if (CGF.getLangOpts().getGC() && TypeRequiresGCollection(T))
@@ -144,6 +146,7 @@ public:
void VisitPointerToDataMemberBinaryOperator(const BinaryOperator *BO);
void VisitBinAssign(const BinaryOperator *E);
void VisitBinComma(const BinaryOperator *E);
+ void VisitBinCmp(const BinaryOperator *E);
void VisitObjCMessageExpr(ObjCMessageExpr *E);
void VisitObjCIvarRefExpr(ObjCIvarRefExpr *E) {
@@ -217,7 +220,7 @@ void AggExprEmitter::EmitAggLoadOfLValue(const Expr *E) {
EmitFinalDestCopy(E->getType(), LV);
}
-/// \brief True if the given aggregate type requires special GC API calls.
+/// True if the given aggregate type requires special GC API calls.
bool AggExprEmitter::TypeRequiresGCollection(QualType T) {
// Only record types have members that might require garbage collection.
const RecordType *RecordTy = T->getAs<RecordType>();
@@ -234,38 +237,78 @@ bool AggExprEmitter::TypeRequiresGCollection(QualType T) {
return Record->hasObjectMember();
}
-/// \brief Perform the final move to DestPtr if for some reason
-/// getReturnValueSlot() didn't use it directly.
-///
-/// The idea is that you do something like this:
-/// RValue Result = EmitSomething(..., getReturnValueSlot());
-/// EmitMoveFromReturnSlot(E, Result);
-///
-/// If nothing interferes, this will cause the result to be emitted
-/// directly into the return value slot. Otherwise, a final move
-/// will be performed.
-void AggExprEmitter::EmitMoveFromReturnSlot(const Expr *E, RValue src) {
- if (shouldUseDestForReturnSlot()) {
- // Logically, Dest.getAddr() should equal Src.getAggregateAddr().
- // The possibility of undef rvalues complicates that a lot,
- // though, so we can't really assert.
- return;
+void AggExprEmitter::withReturnValueSlot(
+ const Expr *E, llvm::function_ref<RValue(ReturnValueSlot)> EmitCall) {
+ QualType RetTy = E->getType();
+ bool RequiresDestruction =
+ Dest.isIgnored() &&
+ RetTy.isDestructedType() == QualType::DK_nontrivial_c_struct;
+
+ // If it makes no observable difference, save a memcpy + temporary.
+ //
+ // We need to always provide our own temporary if destruction is required.
+ // Otherwise, EmitCall will emit its own, notice that it's "unused", and end
+ // its lifetime before we have the chance to emit a proper destructor call.
+ bool UseTemp = Dest.isPotentiallyAliased() || Dest.requiresGCollection() ||
+ (RequiresDestruction && !Dest.getAddress().isValid());
+
+ Address RetAddr = Address::invalid();
+ Address RetAllocaAddr = Address::invalid();
+
+ EHScopeStack::stable_iterator LifetimeEndBlock;
+ llvm::Value *LifetimeSizePtr = nullptr;
+ llvm::IntrinsicInst *LifetimeStartInst = nullptr;
+ if (!UseTemp) {
+ RetAddr = Dest.getAddress();
+ } else {
+ RetAddr = CGF.CreateMemTemp(RetTy, "tmp", &RetAllocaAddr);
+ uint64_t Size =
+ CGF.CGM.getDataLayout().getTypeAllocSize(CGF.ConvertTypeForMem(RetTy));
+ LifetimeSizePtr = CGF.EmitLifetimeStart(Size, RetAllocaAddr.getPointer());
+ if (LifetimeSizePtr) {
+ LifetimeStartInst =
+ cast<llvm::IntrinsicInst>(std::prev(Builder.GetInsertPoint()));
+ assert(LifetimeStartInst->getIntrinsicID() ==
+ llvm::Intrinsic::lifetime_start &&
+ "Last insertion wasn't a lifetime.start?");
+
+ CGF.pushFullExprCleanup<CodeGenFunction::CallLifetimeEnd>(
+ NormalEHLifetimeMarker, RetAllocaAddr, LifetimeSizePtr);
+ LifetimeEndBlock = CGF.EHStack.stable_begin();
+ }
}
- // Otherwise, copy from there to the destination.
- assert(Dest.getPointer() != src.getAggregatePointer());
- EmitFinalDestCopy(E->getType(), src);
+ RValue Src =
+ EmitCall(ReturnValueSlot(RetAddr, Dest.isVolatile(), IsResultUnused));
+
+ if (RequiresDestruction)
+ CGF.pushDestroy(RetTy.isDestructedType(), Src.getAggregateAddress(), RetTy);
+
+ if (!UseTemp)
+ return;
+
+ assert(Dest.getPointer() != Src.getAggregatePointer());
+ EmitFinalDestCopy(E->getType(), Src);
+
+ if (!RequiresDestruction && LifetimeStartInst) {
+ // If there's no dtor to run, the copy was the last use of our temporary.
+ // Since we're not guaranteed to be in an ExprWithCleanups, clean up
+ // eagerly.
+ CGF.DeactivateCleanupBlock(LifetimeEndBlock, LifetimeStartInst);
+ CGF.EmitLifetimeEnd(LifetimeSizePtr, RetAllocaAddr.getPointer());
+ }
}
/// EmitFinalDestCopy - Perform the final copy to DestPtr, if desired.
void AggExprEmitter::EmitFinalDestCopy(QualType type, RValue src) {
assert(src.isAggregate() && "value must be aggregate value!");
LValue srcLV = CGF.MakeAddrLValue(src.getAggregateAddress(), type);
- EmitFinalDestCopy(type, srcLV);
+ EmitFinalDestCopy(type, srcLV, EVK_RValue);
}
/// EmitFinalDestCopy - Perform the final copy to DestPtr, if desired.
-void AggExprEmitter::EmitFinalDestCopy(QualType type, const LValue &src) {
+void AggExprEmitter::EmitFinalDestCopy(QualType type, const LValue &src,
+ ExprValueKind SrcValueKind) {
// If Dest is ignored, then we're evaluating an aggregate expression
// in a context that doesn't care about the result. Note that loads
// from volatile l-values force the existence of a non-ignored
@@ -273,9 +316,32 @@ void AggExprEmitter::EmitFinalDestCopy(QualType type, const LValue &src) {
if (Dest.isIgnored())
return;
+ // Copy non-trivial C structs here.
+ LValue DstLV = CGF.MakeAddrLValue(
+ Dest.getAddress(), Dest.isVolatile() ? type.withVolatile() : type);
+
+ if (SrcValueKind == EVK_RValue) {
+ if (type.isNonTrivialToPrimitiveDestructiveMove() == QualType::PCK_Struct) {
+ if (Dest.isPotentiallyAliased())
+ CGF.callCStructMoveAssignmentOperator(DstLV, src);
+ else
+ CGF.callCStructMoveConstructor(DstLV, src);
+ return;
+ }
+ } else {
+ if (type.isNonTrivialToPrimitiveCopy() == QualType::PCK_Struct) {
+ if (Dest.isPotentiallyAliased())
+ CGF.callCStructCopyAssignmentOperator(DstLV, src);
+ else
+ CGF.callCStructCopyConstructor(DstLV, src);
+ return;
+ }
+ }
+
AggValueSlot srcAgg =
AggValueSlot::forLValue(src, AggValueSlot::IsDestructed,
- needsGC(type), AggValueSlot::IsAliased);
+ needsGC(type), AggValueSlot::IsAliased,
+ AggValueSlot::MayOverlap);
EmitCopy(type, Dest, srcAgg);
}
@@ -286,7 +352,7 @@ void AggExprEmitter::EmitFinalDestCopy(QualType type, const LValue &src) {
void AggExprEmitter::EmitCopy(QualType type, const AggValueSlot &dest,
const AggValueSlot &src) {
if (dest.requiresGCollection()) {
- CharUnits sz = CGF.getContext().getTypeSizeInChars(type);
+ CharUnits sz = dest.getPreferredSize(CGF.getContext(), type);
llvm::Value *size = llvm::ConstantInt::get(CGF.SizeTy, sz.getQuantity());
CGF.CGM.getObjCRuntime().EmitGCMemmoveCollectable(CGF,
dest.getAddress(),
@@ -298,11 +364,13 @@ void AggExprEmitter::EmitCopy(QualType type, const AggValueSlot &dest,
// If the result of the assignment is used, copy the LHS there also.
// It's volatile if either side is. Use the minimum alignment of
// the two sides.
- CGF.EmitAggregateCopy(dest.getAddress(), src.getAddress(), type,
+ LValue DestLV = CGF.MakeAddrLValue(dest.getAddress(), type);
+ LValue SrcLV = CGF.MakeAddrLValue(src.getAddress(), type);
+ CGF.EmitAggregateCopy(DestLV, SrcLV, type, dest.mayOverlap(),
dest.isVolatile() || src.isVolatile());
}
-/// \brief Emit the initializer for a std::initializer_list initialized with a
+/// Emit the initializer for a std::initializer_list initialized with a
/// real initializer list.
void
AggExprEmitter::VisitCXXStdInitializerListExpr(CXXStdInitializerListExpr *E) {
@@ -367,7 +435,7 @@ AggExprEmitter::VisitCXXStdInitializerListExpr(CXXStdInitializerListExpr *E) {
}
}
-/// \brief Determine if E is a trivial array filler, that is, one that is
+/// Determine if E is a trivial array filler, that is, one that is
/// equivalent to zero-initialization.
static bool isTrivialFiller(Expr *E) {
if (!E)
@@ -390,14 +458,17 @@ static bool isTrivialFiller(Expr *E) {
return false;
}
-/// \brief Emit initialization of an array from an initializer list.
+/// Emit initialization of an array from an initializer list.
void AggExprEmitter::EmitArrayInit(Address DestPtr, llvm::ArrayType *AType,
- QualType elementType, InitListExpr *E) {
+ QualType ArrayQTy, InitListExpr *E) {
uint64_t NumInitElements = E->getNumInits();
uint64_t NumArrayElements = AType->getNumElements();
assert(NumInitElements <= NumArrayElements);
+ QualType elementType =
+ CGF.getContext().getAsArrayType(ArrayQTy)->getElementType();
+
// DestPtr is an array*. Construct an elementType* by drilling
// down a level.
llvm::Value *zero = llvm::ConstantInt::get(CGF.SizeTy, 0);
@@ -409,6 +480,29 @@ void AggExprEmitter::EmitArrayInit(Address DestPtr, llvm::ArrayType *AType,
CharUnits elementAlign =
DestPtr.getAlignment().alignmentOfArrayElement(elementSize);
+ // Consider initializing the array by copying from a global. For this to be
+ // more efficient than per-element initialization, the size of the elements
+ // with explicit initializers should be large enough.
+ if (NumInitElements * elementSize.getQuantity() > 16 &&
+ elementType.isTriviallyCopyableType(CGF.getContext())) {
+ CodeGen::CodeGenModule &CGM = CGF.CGM;
+ ConstantEmitter Emitter(CGM);
+ LangAS AS = ArrayQTy.getAddressSpace();
+ if (llvm::Constant *C = Emitter.tryEmitForInitializer(E, AS, ArrayQTy)) {
+ auto GV = new llvm::GlobalVariable(
+ CGM.getModule(), C->getType(),
+ CGM.isTypeConstant(ArrayQTy, /* ExcludeCtorDtor= */ true),
+ llvm::GlobalValue::PrivateLinkage, C, "constinit",
+ /* InsertBefore= */ nullptr, llvm::GlobalVariable::NotThreadLocal,
+ CGM.getContext().getTargetAddressSpace(AS));
+ Emitter.finalize(GV);
+ CharUnits Align = CGM.getContext().getTypeAlignInChars(ArrayQTy);
+ GV->setAlignment(Align.getQuantity());
+ EmitFinalDestCopy(ArrayQTy, CGF.MakeAddrLValue(GV, ArrayQTy, Align));
+ return;
+ }
+ }
+
// Exception safety requires us to destroy all the
// already-constructed members if an initializer throws.
// For that, we'll need an EH cleanup.
@@ -540,7 +634,11 @@ void AggExprEmitter::VisitMaterializeTemporaryExpr(MaterializeTemporaryExpr *E){
}
void AggExprEmitter::VisitOpaqueValueExpr(OpaqueValueExpr *e) {
- EmitFinalDestCopy(e->getType(), CGF.getOpaqueLValueMapping(e));
+ // If this is a unique OVE, just visit its source expression.
+ if (e->isUnique())
+ Visit(e->getSourceExpr());
+ else
+ EmitFinalDestCopy(e->getType(), CGF.getOrCreateOpaqueLValueMapping(e));
}
void
@@ -586,12 +684,12 @@ void AggExprEmitter::VisitCastExpr(CastExpr *E) {
CGF.EmitDynamicCast(LV.getAddress(), cast<CXXDynamicCastExpr>(E));
else
CGF.CGM.ErrorUnsupported(E, "non-simple lvalue dynamic_cast");
-
+
if (!Dest.isIgnored())
CGF.CGM.ErrorUnsupported(E, "lvalue dynamic_cast with a destination");
break;
}
-
+
case CK_ToUnion: {
// Evaluate even if the destination is ignored.
if (Dest.isIgnored()) {
@@ -651,7 +749,7 @@ void AggExprEmitter::VisitCastExpr(CastExpr *E) {
if (isToAtomic) {
AggValueSlot valueDest = Dest;
if (!valueDest.isIgnored() && CGF.CGM.isPaddedAtomicType(atomicType)) {
- // Zero-initialize. (Strictly speaking, we only need to intialize
+ // Zero-initialize. (Strictly speaking, we only need to initialize
// the padding at the end, but this is simpler.)
if (!Dest.isZeroed())
CGF.EmitNullInitialization(Dest.getAddress(), atomicType);
@@ -665,6 +763,7 @@ void AggExprEmitter::VisitCastExpr(CastExpr *E) {
valueDest.isExternallyDestructed(),
valueDest.requiresGCollection(),
valueDest.isPotentiallyAliased(),
+ AggValueSlot::DoesNotOverlap,
AggValueSlot::IsZeroed);
}
@@ -762,13 +861,15 @@ void AggExprEmitter::VisitCallExpr(const CallExpr *E) {
return;
}
- RValue RV = CGF.EmitCallExpr(E, getReturnValueSlot());
- EmitMoveFromReturnSlot(E, RV);
+ withReturnValueSlot(E, [&](ReturnValueSlot Slot) {
+ return CGF.EmitCallExpr(E, Slot);
+ });
}
void AggExprEmitter::VisitObjCMessageExpr(ObjCMessageExpr *E) {
- RValue RV = CGF.EmitObjCMessageExpr(E, getReturnValueSlot());
- EmitMoveFromReturnSlot(E, RV);
+ withReturnValueSlot(E, [&](ReturnValueSlot Slot) {
+ return CGF.EmitObjCMessageExpr(E, Slot);
+ });
}
void AggExprEmitter::VisitBinComma(const BinaryOperator *E) {
@@ -781,6 +882,150 @@ void AggExprEmitter::VisitStmtExpr(const StmtExpr *E) {
CGF.EmitCompoundStmt(*E->getSubStmt(), true, Dest);
}
+enum CompareKind {
+ CK_Less,
+ CK_Greater,
+ CK_Equal,
+};
+
+static llvm::Value *EmitCompare(CGBuilderTy &Builder, CodeGenFunction &CGF,
+ const BinaryOperator *E, llvm::Value *LHS,
+ llvm::Value *RHS, CompareKind Kind,
+ const char *NameSuffix = "") {
+ QualType ArgTy = E->getLHS()->getType();
+ if (const ComplexType *CT = ArgTy->getAs<ComplexType>())
+ ArgTy = CT->getElementType();
+
+ if (const auto *MPT = ArgTy->getAs<MemberPointerType>()) {
+ assert(Kind == CK_Equal &&
+ "member pointers may only be compared for equality");
+ return CGF.CGM.getCXXABI().EmitMemberPointerComparison(
+ CGF, LHS, RHS, MPT, /*IsInequality*/ false);
+ }
+
+ // Compute the comparison instructions for the specified comparison kind.
+ struct CmpInstInfo {
+ const char *Name;
+ llvm::CmpInst::Predicate FCmp;
+ llvm::CmpInst::Predicate SCmp;
+ llvm::CmpInst::Predicate UCmp;
+ };
+ CmpInstInfo InstInfo = [&]() -> CmpInstInfo {
+ using FI = llvm::FCmpInst;
+ using II = llvm::ICmpInst;
+ switch (Kind) {
+ case CK_Less:
+ return {"cmp.lt", FI::FCMP_OLT, II::ICMP_SLT, II::ICMP_ULT};
+ case CK_Greater:
+ return {"cmp.gt", FI::FCMP_OGT, II::ICMP_SGT, II::ICMP_UGT};
+ case CK_Equal:
+ return {"cmp.eq", FI::FCMP_OEQ, II::ICMP_EQ, II::ICMP_EQ};
+ }
+ llvm_unreachable("Unrecognised CompareKind enum");
+ }();
+
+ if (ArgTy->hasFloatingRepresentation())
+ return Builder.CreateFCmp(InstInfo.FCmp, LHS, RHS,
+ llvm::Twine(InstInfo.Name) + NameSuffix);
+ if (ArgTy->isIntegralOrEnumerationType() || ArgTy->isPointerType()) {
+ auto Inst =
+ ArgTy->hasSignedIntegerRepresentation() ? InstInfo.SCmp : InstInfo.UCmp;
+ return Builder.CreateICmp(Inst, LHS, RHS,
+ llvm::Twine(InstInfo.Name) + NameSuffix);
+ }
+
+ llvm_unreachable("unsupported aggregate binary expression should have "
+ "already been handled");
+}
+
+void AggExprEmitter::VisitBinCmp(const BinaryOperator *E) {
+ using llvm::BasicBlock;
+ using llvm::PHINode;
+ using llvm::Value;
+ assert(CGF.getContext().hasSameType(E->getLHS()->getType(),
+ E->getRHS()->getType()));
+ const ComparisonCategoryInfo &CmpInfo =
+ CGF.getContext().CompCategories.getInfoForType(E->getType());
+ assert(CmpInfo.Record->isTriviallyCopyable() &&
+ "cannot copy non-trivially copyable aggregate");
+
+ QualType ArgTy = E->getLHS()->getType();
+
+ // TODO: Handle comparing these types.
+ if (ArgTy->isVectorType())
+ return CGF.ErrorUnsupported(
+ E, "aggregate three-way comparison with vector arguments");
+ if (!ArgTy->isIntegralOrEnumerationType() && !ArgTy->isRealFloatingType() &&
+ !ArgTy->isNullPtrType() && !ArgTy->isPointerType() &&
+ !ArgTy->isMemberPointerType() && !ArgTy->isAnyComplexType()) {
+ return CGF.ErrorUnsupported(E, "aggregate three-way comparison");
+ }
+ bool IsComplex = ArgTy->isAnyComplexType();
+
+ // Evaluate the operands to the expression and extract their values.
+ auto EmitOperand = [&](Expr *E) -> std::pair<Value *, Value *> {
+ RValue RV = CGF.EmitAnyExpr(E);
+ if (RV.isScalar())
+ return {RV.getScalarVal(), nullptr};
+ if (RV.isAggregate())
+ return {RV.getAggregatePointer(), nullptr};
+ assert(RV.isComplex());
+ return RV.getComplexVal();
+ };
+ auto LHSValues = EmitOperand(E->getLHS()),
+ RHSValues = EmitOperand(E->getRHS());
+
+ auto EmitCmp = [&](CompareKind K) {
+ Value *Cmp = EmitCompare(Builder, CGF, E, LHSValues.first, RHSValues.first,
+ K, IsComplex ? ".r" : "");
+ if (!IsComplex)
+ return Cmp;
+ assert(K == CompareKind::CK_Equal);
+ Value *CmpImag = EmitCompare(Builder, CGF, E, LHSValues.second,
+ RHSValues.second, K, ".i");
+ return Builder.CreateAnd(Cmp, CmpImag, "and.eq");
+ };
+ auto EmitCmpRes = [&](const ComparisonCategoryInfo::ValueInfo *VInfo) {
+ return Builder.getInt(VInfo->getIntValue());
+ };
+
+ Value *Select;
+ if (ArgTy->isNullPtrType()) {
+ Select = EmitCmpRes(CmpInfo.getEqualOrEquiv());
+ } else if (CmpInfo.isEquality()) {
+ Select = Builder.CreateSelect(
+ EmitCmp(CK_Equal), EmitCmpRes(CmpInfo.getEqualOrEquiv()),
+ EmitCmpRes(CmpInfo.getNonequalOrNonequiv()), "sel.eq");
+ } else if (!CmpInfo.isPartial()) {
+ Value *SelectOne =
+ Builder.CreateSelect(EmitCmp(CK_Less), EmitCmpRes(CmpInfo.getLess()),
+ EmitCmpRes(CmpInfo.getGreater()), "sel.lt");
+ Select = Builder.CreateSelect(EmitCmp(CK_Equal),
+ EmitCmpRes(CmpInfo.getEqualOrEquiv()),
+ SelectOne, "sel.eq");
+ } else {
+ Value *SelectEq = Builder.CreateSelect(
+ EmitCmp(CK_Equal), EmitCmpRes(CmpInfo.getEqualOrEquiv()),
+ EmitCmpRes(CmpInfo.getUnordered()), "sel.eq");
+ Value *SelectGT = Builder.CreateSelect(EmitCmp(CK_Greater),
+ EmitCmpRes(CmpInfo.getGreater()),
+ SelectEq, "sel.gt");
+ Select = Builder.CreateSelect(
+ EmitCmp(CK_Less), EmitCmpRes(CmpInfo.getLess()), SelectGT, "sel.lt");
+ }
+ // Create the return value in the destination slot.
+ EnsureDest(E->getType());
+ LValue DestLV = CGF.MakeAddrLValue(Dest.getAddress(), E->getType());
+
+ // Emit the address of the first (and only) field in the comparison category
+ // type, and initialize it from the constant integer value selected above.
+ LValue FieldLV = CGF.EmitLValueForFieldInitialization(
+ DestLV, *CmpInfo.Record->field_begin());
+ CGF.EmitStoreThroughLValue(RValue::get(Select), FieldLV, /*IsInit*/ true);
+
+ // All done! The result is in the Dest slot.
+}
+
void AggExprEmitter::VisitBinaryOperator(const BinaryOperator *E) {
if (E->getOpcode() == BO_PtrMemD || E->getOpcode() == BO_PtrMemI)
VisitPointerToDataMemberBinaryOperator(E);
@@ -890,7 +1135,8 @@ void AggExprEmitter::VisitBinAssign(const BinaryOperator *E) {
EmitCopy(E->getLHS()->getType(),
AggValueSlot::forLValue(LHS, AggValueSlot::IsDestructed,
needsGC(E->getLHS()->getType()),
- AggValueSlot::IsAliased),
+ AggValueSlot::IsAliased,
+ AggValueSlot::MayOverlap),
Dest);
return;
}
@@ -911,7 +1157,8 @@ void AggExprEmitter::VisitBinAssign(const BinaryOperator *E) {
AggValueSlot LHSSlot =
AggValueSlot::forLValue(LHS, AggValueSlot::IsDestructed,
needsGC(E->getLHS()->getType()),
- AggValueSlot::IsAliased);
+ AggValueSlot::IsAliased,
+ AggValueSlot::MayOverlap);
// A non-volatile aggregate destination might have volatile member.
if (!LHSSlot.isVolatile() &&
CGF.hasVolatileMember(E->getLHS()->getType()))
@@ -1089,6 +1336,7 @@ AggExprEmitter::EmitInitializationToLValue(Expr *E, LValue LV) {
AggValueSlot::IsDestructed,
AggValueSlot::DoesNotNeedGCBarriers,
AggValueSlot::IsNotAliased,
+ AggValueSlot::MayOverlap,
Dest.isZeroed()));
return;
case TEK_Scalar:
@@ -1156,11 +1404,8 @@ void AggExprEmitter::VisitInitListExpr(InitListExpr *E) {
// Handle initialization of an array.
if (E->getType()->isArrayType()) {
- QualType elementType =
- CGF.getContext().getAsArrayType(E->getType())->getElementType();
-
auto AType = cast<llvm::ArrayType>(Dest.getAddress().getElementType());
- EmitArrayInit(Dest.getAddress(), AType, elementType, E);
+ EmitArrayInit(Dest.getAddress(), AType, E->getType(), E);
return;
}
@@ -1190,11 +1435,12 @@ void AggExprEmitter::VisitInitListExpr(InitListExpr *E) {
Address V = CGF.GetAddressOfDirectBaseInCompleteClass(
Dest.getAddress(), CXXRD, BaseRD,
/*isBaseVirtual*/ false);
- AggValueSlot AggSlot =
- AggValueSlot::forAddr(V, Qualifiers(),
- AggValueSlot::IsDestructed,
- AggValueSlot::DoesNotNeedGCBarriers,
- AggValueSlot::IsNotAliased);
+ AggValueSlot AggSlot = AggValueSlot::forAddr(
+ V, Qualifiers(),
+ AggValueSlot::IsDestructed,
+ AggValueSlot::DoesNotNeedGCBarriers,
+ AggValueSlot::IsNotAliased,
+ CGF.overlapForBaseInit(CXXRD, BaseRD, Base.isVirtual()));
CGF.EmitAggExpr(E->getInit(curInitIndex++), AggSlot);
if (QualType::DestructionKind dtorKind =
@@ -1375,7 +1621,9 @@ void AggExprEmitter::VisitArrayInitLoopExpr(const ArrayInitLoopExpr *E,
// If the subexpression is an ArrayInitLoopExpr, share its cleanup.
auto elementSlot = AggValueSlot::forLValue(
elementLV, AggValueSlot::IsDestructed,
- AggValueSlot::DoesNotNeedGCBarriers, AggValueSlot::IsNotAliased);
+ AggValueSlot::DoesNotNeedGCBarriers,
+ AggValueSlot::IsNotAliased,
+ AggValueSlot::DoesNotOverlap);
AggExprEmitter(CGF, elementSlot, false)
.VisitArrayInitLoopExpr(InnerLoop, outerBegin);
} else
@@ -1425,6 +1673,8 @@ static CharUnits GetNumNonZeroBytesInInit(const Expr *E, CodeGenFunction &CGF) {
// If this is an initlist expr, sum up the size of sizes of the (present)
// elements. If this is something weird, assume the whole thing is non-zero.
const InitListExpr *ILE = dyn_cast<InitListExpr>(E);
+ while (ILE && ILE->isTransparent())
+ ILE = dyn_cast<InitListExpr>(ILE->getInit(0));
if (!ILE || !CGF.getTypes().isZeroInitializable(ILE->getType()))
return CGF.getContext().getTypeSizeInChars(E->getType());
@@ -1491,7 +1741,7 @@ static void CheckAggExprForMemSetUse(AggValueSlot &Slot, const Expr *E,
}
// If the type is 16-bytes or smaller, prefer individual stores over memset.
- CharUnits Size = CGF.getContext().getTypeSizeInChars(E->getType());
+ CharUnits Size = Slot.getPreferredSize(CGF.getContext(), E->getType());
if (Size <= CharUnits::fromQuantity(16))
return;
@@ -1537,16 +1787,42 @@ LValue CodeGenFunction::EmitAggExprToLValue(const Expr *E) {
LValue LV = MakeAddrLValue(Temp, E->getType());
EmitAggExpr(E, AggValueSlot::forLValue(LV, AggValueSlot::IsNotDestructed,
AggValueSlot::DoesNotNeedGCBarriers,
- AggValueSlot::IsNotAliased));
+ AggValueSlot::IsNotAliased,
+ AggValueSlot::DoesNotOverlap));
return LV;
}
-void CodeGenFunction::EmitAggregateCopy(Address DestPtr,
- Address SrcPtr, QualType Ty,
- bool isVolatile,
- bool isAssignment) {
+AggValueSlot::Overlap_t CodeGenFunction::overlapForBaseInit(
+ const CXXRecordDecl *RD, const CXXRecordDecl *BaseRD, bool IsVirtual) {
+ // Virtual bases are initialized first, in address order, so there's never
+ // any overlap during their initialization.
+ //
+ // FIXME: Under P0840, this is no longer true: the tail padding of a vbase
+ // of a field could be reused by a vbase of a containing class.
+ if (IsVirtual)
+ return AggValueSlot::DoesNotOverlap;
+
+ // If the base class is laid out entirely within the nvsize of the derived
+ // class, its tail padding cannot yet be initialized, so we can issue
+ // stores at the full width of the base class.
+ const ASTRecordLayout &Layout = getContext().getASTRecordLayout(RD);
+ if (Layout.getBaseClassOffset(BaseRD) +
+ getContext().getASTRecordLayout(BaseRD).getSize() <=
+ Layout.getNonVirtualSize())
+ return AggValueSlot::DoesNotOverlap;
+
+ // The tail padding may contain values we need to preserve.
+ return AggValueSlot::MayOverlap;
+}
+
+void CodeGenFunction::EmitAggregateCopy(LValue Dest, LValue Src, QualType Ty,
+ AggValueSlot::Overlap_t MayOverlap,
+ bool isVolatile) {
assert(!Ty->isAnyComplexType() && "Shouldn't happen for complex");
+ Address DestPtr = Dest.getAddress();
+ Address SrcPtr = Src.getAddress();
+
if (getLangOpts().CPlusPlus) {
if (const RecordType *RT = Ty->getAs<RecordType>()) {
CXXRecordDecl *Record = cast<CXXRecordDecl>(RT->getDecl());
@@ -1562,7 +1838,7 @@ void CodeGenFunction::EmitAggregateCopy(Address DestPtr,
return;
}
}
-
+
// Aggregate assignment turns into llvm.memcpy. This is almost valid per
// C99 6.5.16.1p3, which states "If the value being stored in an object is
// read from another object that overlaps in anyway the storage of the first
@@ -1574,12 +1850,11 @@ void CodeGenFunction::EmitAggregateCopy(Address DestPtr,
// implementation handles this case safely. If there is a libc that does not
// safely handle this, we can add a target hook.
- // Get data size info for this aggregate. If this is an assignment,
- // don't copy the tail padding, because we might be assigning into a
- // base subobject where the tail padding is claimed. Otherwise,
- // copying it is fine.
+ // Get data size info for this aggregate. Don't copy the tail padding if this
+ // might be a potentially-overlapping subobject, since the tail padding might
+ // be occupied by a different object. Otherwise, copying it is fine.
std::pair<CharUnits, CharUnits> TypeInfo;
- if (isAssignment)
+ if (MayOverlap)
TypeInfo = getContext().getTypeInfoDataSizeInChars(Ty);
else
TypeInfo = getContext().getTypeInfoInChars(Ty);
@@ -1591,22 +1866,11 @@ void CodeGenFunction::EmitAggregateCopy(Address DestPtr,
getContext().getAsArrayType(Ty))) {
QualType BaseEltTy;
SizeVal = emitArrayLength(VAT, BaseEltTy, DestPtr);
- TypeInfo = getContext().getTypeInfoDataSizeInChars(BaseEltTy);
- std::pair<CharUnits, CharUnits> LastElementTypeInfo;
- if (!isAssignment)
- LastElementTypeInfo = getContext().getTypeInfoInChars(BaseEltTy);
+ TypeInfo = getContext().getTypeInfoInChars(BaseEltTy);
assert(!TypeInfo.first.isZero());
SizeVal = Builder.CreateNUWMul(
SizeVal,
llvm::ConstantInt::get(SizeTy, TypeInfo.first.getQuantity()));
- if (!isAssignment) {
- SizeVal = Builder.CreateNUWSub(
- SizeVal,
- llvm::ConstantInt::get(SizeTy, TypeInfo.first.getQuantity()));
- SizeVal = Builder.CreateNUWAdd(
- SizeVal, llvm::ConstantInt::get(
- SizeTy, LastElementTypeInfo.first.getQuantity()));
- }
}
}
if (!SizeVal) {
@@ -1657,4 +1921,10 @@ void CodeGenFunction::EmitAggregateCopy(Address DestPtr,
// the optimizer wishes to expand it in to scalar memory operations.
if (llvm::MDNode *TBAAStructTag = CGM.getTBAAStructInfo(Ty))
Inst->setMetadata(llvm::LLVMContext::MD_tbaa_struct, TBAAStructTag);
+
+ if (CGM.getCodeGenOpts().NewStructPathTBAA) {
+ TBAAAccessInfo TBAAInfo = CGM.mergeTBAAInfoForMemoryTransfer(
+ Dest.getTBAAInfo(), Src.getTBAAInfo());
+ CGM.DecorateInstructionWithTBAA(Inst, TBAAInfo);
+ }
}
diff --git a/lib/CodeGen/CGExprCXX.cpp b/lib/CodeGen/CGExprCXX.cpp
index c32f1e5415da..8955d8a4a83c 100644
--- a/lib/CodeGen/CGExprCXX.cpp
+++ b/lib/CodeGen/CGExprCXX.cpp
@@ -242,11 +242,15 @@ RValue CodeGenFunction::EmitCXXMemberOrOperatorMemberCallExpr(
}
}
- Address This = Address::invalid();
- if (IsArrow)
- This = EmitPointerWithAlignment(Base);
- else
- This = EmitLValue(Base).getAddress();
+ LValue This;
+ if (IsArrow) {
+ LValueBaseInfo BaseInfo;
+ TBAAAccessInfo TBAAInfo;
+ Address ThisValue = EmitPointerWithAlignment(Base, &BaseInfo, &TBAAInfo);
+ This = MakeAddrLValue(ThisValue, Base->getType(), BaseInfo, TBAAInfo);
+ } else {
+ This = EmitLValue(Base);
+ }
if (MD->isTrivial() || (MD->isDefaulted() && MD->getParent()->isUnion())) {
@@ -261,10 +265,10 @@ RValue CodeGenFunction::EmitCXXMemberOrOperatorMemberCallExpr(
// when it isn't necessary; just produce the proper effect here.
LValue RHS = isa<CXXOperatorCallExpr>(CE)
? MakeNaturalAlignAddrLValue(
- (*RtlArgs)[0].RV.getScalarVal(),
+ (*RtlArgs)[0].getRValue(*this).getScalarVal(),
(*(CE->arg_begin() + 1))->getType())
: EmitLValue(*CE->arg_begin());
- EmitAggregateAssign(This, RHS.getAddress(), CE->getType());
+ EmitAggregateAssign(This, RHS, CE->getType());
return RValue::get(This.getPointer());
}
@@ -272,8 +276,13 @@ RValue CodeGenFunction::EmitCXXMemberOrOperatorMemberCallExpr(
cast<CXXConstructorDecl>(MD)->isCopyOrMoveConstructor()) {
// Trivial move and copy ctor are the same.
assert(CE->getNumArgs() == 1 && "unexpected argcount for trivial ctor");
- Address RHS = EmitLValue(*CE->arg_begin()).getAddress();
- EmitAggregateCopy(This, RHS, (*CE->arg_begin())->getType());
+ const Expr *Arg = *CE->arg_begin();
+ LValue RHS = EmitLValue(Arg);
+ LValue Dest = MakeAddrLValue(This.getAddress(), Arg->getType());
+ // This is the MSVC p->Ctor::Ctor(...) extension. We assume that's
+ // constructing a new complete object of type Ctor.
+ EmitAggregateCopy(Dest, RHS, Arg->getType(),
+ AggValueSlot::DoesNotOverlap);
return RValue::get(This.getPointer());
}
llvm_unreachable("unknown trivial member function");
@@ -335,7 +344,8 @@ RValue CodeGenFunction::EmitCXXMemberOrOperatorMemberCallExpr(
assert(ReturnValue.isNull() && "Destructor shouldn't have return value");
if (UseVirtualCall) {
CGM.getCXXABI().EmitVirtualDestructorCall(
- *this, Dtor, Dtor_Complete, This, cast<CXXMemberCallExpr>(CE));
+ *this, Dtor, Dtor_Complete, This.getAddress(),
+ cast<CXXMemberCallExpr>(CE));
} else {
CGCallee Callee;
if (getLangOpts().AppleKext && MD->isVirtual() && HasQualifier)
@@ -364,15 +374,15 @@ RValue CodeGenFunction::EmitCXXMemberOrOperatorMemberCallExpr(
CGM.GetAddrOfFunction(GlobalDecl(Ctor, Ctor_Complete), Ty),
Ctor);
} else if (UseVirtualCall) {
- Callee = CGM.getCXXABI().getVirtualFunctionPointer(*this, MD, This, Ty,
- CE->getLocStart());
+ Callee = CGCallee::forVirtual(CE, MD, This.getAddress(), Ty);
} else {
if (SanOpts.has(SanitizerKind::CFINVCall) &&
MD->getParent()->isDynamicClass()) {
llvm::Value *VTable;
const CXXRecordDecl *RD;
std::tie(VTable, RD) =
- CGM.getCXXABI().LoadVTablePtr(*this, This, MD->getParent());
+ CGM.getCXXABI().LoadVTablePtr(*this, This.getAddress(),
+ MD->getParent());
EmitVTablePtrCheckForCall(RD, VTable, CFITCK_NVCall, CE->getLocStart());
}
@@ -388,8 +398,10 @@ RValue CodeGenFunction::EmitCXXMemberOrOperatorMemberCallExpr(
}
if (MD->isVirtual()) {
- This = CGM.getCXXABI().adjustThisArgumentForVirtualFunctionCall(
- *this, CalleeDecl, This, UseVirtualCall);
+ Address NewThisAddr =
+ CGM.getCXXABI().adjustThisArgumentForVirtualFunctionCall(
+ *this, CalleeDecl, This.getAddress(), UseVirtualCall);
+ This.setAddress(NewThisAddr);
}
return EmitCXXMemberOrOperatorCall(
@@ -622,7 +634,7 @@ CodeGenFunction::EmitCXXConstructExpr(const CXXConstructExpr *E,
// Call the constructor.
EmitCXXConstructorCall(CD, Type, ForVirtualBase, Delegating,
- Dest.getAddress(), E);
+ Dest.getAddress(), E, Dest.mayOverlap());
}
}
@@ -924,7 +936,8 @@ static llvm::Value *EmitCXXNewAllocSize(CodeGenFunction &CGF,
}
static void StoreAnyExprIntoOneUnit(CodeGenFunction &CGF, const Expr *Init,
- QualType AllocType, Address NewPtr) {
+ QualType AllocType, Address NewPtr,
+ AggValueSlot::Overlap_t MayOverlap) {
// FIXME: Refactor with EmitExprAsInit.
switch (CGF.getEvaluationKind(AllocType)) {
case TEK_Scalar:
@@ -940,7 +953,8 @@ static void StoreAnyExprIntoOneUnit(CodeGenFunction &CGF, const Expr *Init,
= AggValueSlot::forAddr(NewPtr, AllocType.getQualifiers(),
AggValueSlot::IsDestructed,
AggValueSlot::DoesNotNeedGCBarriers,
- AggValueSlot::IsNotAliased);
+ AggValueSlot::IsNotAliased,
+ MayOverlap);
CGF.EmitAggExpr(Init, Slot);
return;
}
@@ -1009,7 +1023,8 @@ void CodeGenFunction::EmitNewArrayInitializer(
AggValueSlot::forAddr(CurPtr, ElementType.getQualifiers(),
AggValueSlot::IsDestructed,
AggValueSlot::DoesNotNeedGCBarriers,
- AggValueSlot::IsNotAliased);
+ AggValueSlot::IsNotAliased,
+ AggValueSlot::DoesNotOverlap);
EmitAggExpr(ILE->getInit(0), Slot);
// Move past these elements.
@@ -1074,7 +1089,8 @@ void CodeGenFunction::EmitNewArrayInitializer(
// an array, and we have an array filler, we can fold together the two
// initialization loops.
StoreAnyExprIntoOneUnit(*this, ILE->getInit(i),
- ILE->getInit(i)->getType(), CurPtr);
+ ILE->getInit(i)->getType(), CurPtr,
+ AggValueSlot::DoesNotOverlap);
CurPtr = Address(Builder.CreateInBoundsGEP(CurPtr.getPointer(),
Builder.getSize(1),
"array.exp.next"),
@@ -1227,7 +1243,8 @@ void CodeGenFunction::EmitNewArrayInitializer(
}
// Emit the initializer into this element.
- StoreAnyExprIntoOneUnit(*this, Init, Init->getType(), CurPtr);
+ StoreAnyExprIntoOneUnit(*this, Init, Init->getType(), CurPtr,
+ AggValueSlot::DoesNotOverlap);
// Leave the Cleanup if we entered one.
if (CleanupDominator) {
@@ -1258,7 +1275,8 @@ static void EmitNewInitializer(CodeGenFunction &CGF, const CXXNewExpr *E,
CGF.EmitNewArrayInitializer(E, ElementType, ElementTy, NewPtr, NumElements,
AllocSizeWithoutCookie);
else if (const Expr *Init = E->getInitializer())
- StoreAnyExprIntoOneUnit(CGF, Init, E->getAllocatedType(), NewPtr);
+ StoreAnyExprIntoOneUnit(CGF, Init, E->getAllocatedType(), NewPtr,
+ AggValueSlot::DoesNotOverlap);
}
/// Emit a call to an operator new or operator delete function, as implicitly
@@ -1298,19 +1316,19 @@ static RValue EmitNewDeleteCall(CodeGenFunction &CGF,
}
RValue CodeGenFunction::EmitBuiltinNewDeleteCall(const FunctionProtoType *Type,
- const Expr *Arg,
+ const CallExpr *TheCall,
bool IsDelete) {
CallArgList Args;
- const Stmt *ArgS = Arg;
- EmitCallArgs(Args, *Type->param_type_begin(), llvm::makeArrayRef(ArgS));
+ EmitCallArgs(Args, Type->getParamTypes(), TheCall->arguments());
// Find the allocation or deallocation function that we're calling.
ASTContext &Ctx = getContext();
DeclarationName Name = Ctx.DeclarationNames
.getCXXOperatorName(IsDelete ? OO_Delete : OO_New);
+
for (auto *Decl : Ctx.getTranslationUnitDecl()->lookup(Name))
if (auto *FD = dyn_cast<FunctionDecl>(Decl))
if (Ctx.hasSameType(FD->getType(), QualType(Type, 0)))
- return EmitNewDeleteCall(*this, cast<FunctionDecl>(Decl), Type, Args);
+ return EmitNewDeleteCall(*this, FD, Type, Args);
llvm_unreachable("predeclared global operator new/delete is missing");
}
@@ -1481,7 +1499,7 @@ static void EnterNewDeleteCleanup(CodeGenFunction &CGF,
AllocAlign);
for (unsigned I = 0, N = E->getNumPlacementArgs(); I != N; ++I) {
auto &Arg = NewArgs[I + NumNonPlacementArgs];
- Cleanup->setPlacementArg(I, Arg.RV, Arg.Ty);
+ Cleanup->setPlacementArg(I, Arg.getRValue(CGF), Arg.Ty);
}
return;
@@ -1512,8 +1530,8 @@ static void EnterNewDeleteCleanup(CodeGenFunction &CGF,
AllocAlign);
for (unsigned I = 0, N = E->getNumPlacementArgs(); I != N; ++I) {
auto &Arg = NewArgs[I + NumNonPlacementArgs];
- Cleanup->setPlacementArg(I, DominatingValue<RValue>::save(CGF, Arg.RV),
- Arg.Ty);
+ Cleanup->setPlacementArg(
+ I, DominatingValue<RValue>::save(CGF, Arg.getRValue(CGF)), Arg.Ty);
}
CGF.initFullExprCleanup();
@@ -1678,13 +1696,13 @@ llvm::Value *CodeGenFunction::EmitCXXNewExpr(const CXXNewExpr *E) {
llvm::Type *elementTy = ConvertTypeForMem(allocType);
Address result = Builder.CreateElementBitCast(allocation, elementTy);
- // Passing pointer through invariant.group.barrier to avoid propagation of
+ // Passing pointer through launder.invariant.group to avoid propagation of
// vptrs information which may be included in previous type.
// To not break LTO with different optimizations levels, we do it regardless
// of optimization level.
if (CGM.getCodeGenOpts().StrictVTablePointers &&
allocator->isReservedGlobalPlacementOperator())
- result = Address(Builder.CreateInvariantGroupBarrier(result.getPointer()),
+ result = Address(Builder.CreateLaunderInvariantGroup(result.getPointer()),
result.getAlignment());
EmitNewInitializer(*this, E, allocType, elementTy, result, numElements,
diff --git a/lib/CodeGen/CGExprComplex.cpp b/lib/CodeGen/CGExprComplex.cpp
index 9094d3f8a91c..fb176093a741 100644
--- a/lib/CodeGen/CGExprComplex.cpp
+++ b/lib/CodeGen/CGExprComplex.cpp
@@ -155,8 +155,9 @@ public:
}
ComplexPairTy VisitOpaqueValueExpr(OpaqueValueExpr *E) {
if (E->isGLValue())
- return EmitLoadOfLValue(CGF.getOpaqueLValueMapping(E), E->getExprLoc());
- return CGF.getOpaqueRValueMapping(E).getComplexVal();
+ return EmitLoadOfLValue(CGF.getOrCreateOpaqueLValueMapping(E),
+ E->getExprLoc());
+ return CGF.getOrCreateOpaqueRValueMapping(E).getComplexVal();
}
ComplexPairTy VisitPseudoObjectExpr(PseudoObjectExpr *E) {
@@ -594,7 +595,7 @@ ComplexPairTy ComplexExprEmitter::EmitBinSub(const BinOpInfo &Op) {
return ComplexPairTy(ResR, ResI);
}
-/// \brief Emit a libcall for a binary operation on complex types.
+/// Emit a libcall for a binary operation on complex types.
ComplexPairTy ComplexExprEmitter::EmitComplexBinOpLibCall(StringRef LibCallName,
const BinOpInfo &Op) {
CallArgList Args;
@@ -628,11 +629,11 @@ ComplexPairTy ComplexExprEmitter::EmitComplexBinOpLibCall(StringRef LibCallName,
llvm::Instruction *Call;
RValue Res = CGF.EmitCall(FuncInfo, Callee, ReturnValueSlot(), Args, &Call);
- cast<llvm::CallInst>(Call)->setCallingConv(CGF.CGM.getBuiltinCC());
+ cast<llvm::CallInst>(Call)->setCallingConv(CGF.CGM.getRuntimeCC());
return Res.getComplexVal();
}
-/// \brief Lookup the libcall name for a given floating point type complex
+/// Lookup the libcall name for a given floating point type complex
/// multiply.
static StringRef getComplexMultiplyLibCallName(llvm::Type *Ty) {
switch (Ty->getTypeID()) {
@@ -1055,7 +1056,7 @@ ComplexPairTy ComplexExprEmitter::VisitInitListExpr(InitListExpr *E) {
return Visit(E->getInit(0));
}
- // Empty init list intializes to null
+ // Empty init list initializes to null
assert(E->getNumInits() == 0 && "Unexpected number of inits");
QualType Ty = E->getType()->castAs<ComplexType>()->getElementType();
llvm::Type* LTy = CGF.ConvertType(Ty);
diff --git a/lib/CodeGen/CGExprConstant.cpp b/lib/CodeGen/CGExprConstant.cpp
index d1b9e13a6f93..cfd0b859233a 100644
--- a/lib/CodeGen/CGExprConstant.cpp
+++ b/lib/CodeGen/CGExprConstant.cpp
@@ -635,6 +635,72 @@ static ConstantAddress tryEmitGlobalCompoundLiteral(CodeGenModule &CGM,
return ConstantAddress(GV, Align);
}
+static llvm::Constant *
+EmitArrayConstant(CodeGenModule &CGM, const ConstantArrayType *DestType,
+ llvm::Type *CommonElementType, unsigned ArrayBound,
+ SmallVectorImpl<llvm::Constant *> &Elements,
+ llvm::Constant *Filler) {
+ // Figure out how long the initial prefix of non-zero elements is.
+ unsigned NonzeroLength = ArrayBound;
+ if (Elements.size() < NonzeroLength && Filler->isNullValue())
+ NonzeroLength = Elements.size();
+ if (NonzeroLength == Elements.size()) {
+ while (NonzeroLength > 0 && Elements[NonzeroLength - 1]->isNullValue())
+ --NonzeroLength;
+ }
+
+ if (NonzeroLength == 0) {
+ return llvm::ConstantAggregateZero::get(
+ CGM.getTypes().ConvertType(QualType(DestType, 0)));
+ }
+
+ // Add a zeroinitializer array filler if we have lots of trailing zeroes.
+ unsigned TrailingZeroes = ArrayBound - NonzeroLength;
+ if (TrailingZeroes >= 8) {
+ assert(Elements.size() >= NonzeroLength &&
+ "missing initializer for non-zero element");
+
+ // If all the elements had the same type up to the trailing zeroes, emit a
+ // struct of two arrays (the nonzero data and the zeroinitializer).
+ if (CommonElementType && NonzeroLength >= 8) {
+ llvm::Constant *Initial = llvm::ConstantArray::get(
+ llvm::ArrayType::get(CommonElementType, NonzeroLength),
+ makeArrayRef(Elements).take_front(NonzeroLength));
+ Elements.resize(2);
+ Elements[0] = Initial;
+ } else {
+ Elements.resize(NonzeroLength + 1);
+ }
+
+ auto *FillerType =
+ CommonElementType
+ ? CommonElementType
+ : CGM.getTypes().ConvertType(DestType->getElementType());
+ FillerType = llvm::ArrayType::get(FillerType, TrailingZeroes);
+ Elements.back() = llvm::ConstantAggregateZero::get(FillerType);
+ CommonElementType = nullptr;
+ } else if (Elements.size() != ArrayBound) {
+ // Otherwise pad to the right size with the filler if necessary.
+ Elements.resize(ArrayBound, Filler);
+ if (Filler->getType() != CommonElementType)
+ CommonElementType = nullptr;
+ }
+
+ // If all elements have the same type, just emit an array constant.
+ if (CommonElementType)
+ return llvm::ConstantArray::get(
+ llvm::ArrayType::get(CommonElementType, ArrayBound), Elements);
+
+ // We have mixed types. Use a packed struct.
+ llvm::SmallVector<llvm::Type *, 16> Types;
+ Types.reserve(Elements.size());
+ for (llvm::Constant *Elt : Elements)
+ Types.push_back(Elt->getType());
+ llvm::StructType *SType =
+ llvm::StructType::get(CGM.getLLVMContext(), Types, true);
+ return llvm::ConstantStruct::get(SType, Elements);
+}
+
/// This class only needs to handle two cases:
/// 1) Literals (this is used by APValue emission to emit literals).
/// 2) Arrays, structs and unions (outside C++11 mode, we don't currently
@@ -832,60 +898,47 @@ public:
}
llvm::Constant *EmitArrayInitialization(InitListExpr *ILE, QualType T) {
- llvm::ArrayType *AType =
- cast<llvm::ArrayType>(ConvertType(ILE->getType()));
- llvm::Type *ElemTy = AType->getElementType();
+ auto *CAT = CGM.getContext().getAsConstantArrayType(ILE->getType());
+ assert(CAT && "can't emit array init for non-constant-bound array");
unsigned NumInitElements = ILE->getNumInits();
- unsigned NumElements = AType->getNumElements();
+ unsigned NumElements = CAT->getSize().getZExtValue();
// Initialising an array requires us to automatically
// initialise any elements that have not been initialised explicitly
unsigned NumInitableElts = std::min(NumInitElements, NumElements);
- QualType EltType = CGM.getContext().getAsArrayType(T)->getElementType();
+ QualType EltType = CAT->getElementType();
// Initialize remaining array elements.
- llvm::Constant *fillC;
- if (Expr *filler = ILE->getArrayFiller())
+ llvm::Constant *fillC = nullptr;
+ if (Expr *filler = ILE->getArrayFiller()) {
fillC = Emitter.tryEmitAbstractForMemory(filler, EltType);
- else
- fillC = Emitter.emitNullForMemory(EltType);
- if (!fillC)
- return nullptr;
-
- // Try to use a ConstantAggregateZero if we can.
- if (fillC->isNullValue() && !NumInitableElts)
- return llvm::ConstantAggregateZero::get(AType);
+ if (!fillC)
+ return nullptr;
+ }
// Copy initializer elements.
SmallVector<llvm::Constant*, 16> Elts;
- Elts.reserve(NumInitableElts + NumElements);
+ if (fillC && fillC->isNullValue())
+ Elts.reserve(NumInitableElts + 1);
+ else
+ Elts.reserve(NumElements);
- bool RewriteType = false;
+ llvm::Type *CommonElementType = nullptr;
for (unsigned i = 0; i < NumInitableElts; ++i) {
Expr *Init = ILE->getInit(i);
llvm::Constant *C = Emitter.tryEmitPrivateForMemory(Init, EltType);
if (!C)
return nullptr;
- RewriteType |= (C->getType() != ElemTy);
+ if (i == 0)
+ CommonElementType = C->getType();
+ else if (C->getType() != CommonElementType)
+ CommonElementType = nullptr;
Elts.push_back(C);
}
- RewriteType |= (fillC->getType() != ElemTy);
- Elts.resize(NumElements, fillC);
-
- if (RewriteType) {
- // FIXME: Try to avoid packing the array
- std::vector<llvm::Type*> Types;
- Types.reserve(NumInitableElts + NumElements);
- for (unsigned i = 0, e = Elts.size(); i < e; ++i)
- Types.push_back(Elts[i]->getType());
- llvm::StructType *SType = llvm::StructType::get(AType->getContext(),
- Types, true);
- return llvm::ConstantStruct::get(SType, Elts);
- }
-
- return llvm::ConstantArray::get(AType, Elts);
+ return EmitArrayConstant(CGM, CAT, CommonElementType, NumElements, Elts,
+ fillC);
}
llvm::Constant *EmitRecordInitialization(InitListExpr *ILE, QualType T) {
@@ -1881,40 +1934,31 @@ llvm::Constant *ConstantEmitter::tryEmitPrivate(const APValue &Value,
case APValue::Union:
return ConstStructBuilder::BuildStruct(*this, Value, DestType);
case APValue::Array: {
- const ArrayType *CAT = CGM.getContext().getAsArrayType(DestType);
+ const ConstantArrayType *CAT =
+ CGM.getContext().getAsConstantArrayType(DestType);
unsigned NumElements = Value.getArraySize();
unsigned NumInitElts = Value.getArrayInitializedElts();
// Emit array filler, if there is one.
llvm::Constant *Filler = nullptr;
- if (Value.hasArrayFiller())
+ if (Value.hasArrayFiller()) {
Filler = tryEmitAbstractForMemory(Value.getArrayFiller(),
CAT->getElementType());
-
- // Emit initializer elements.
- llvm::Type *CommonElementType =
- CGM.getTypes().ConvertType(CAT->getElementType());
-
- // Try to use a ConstantAggregateZero if we can.
- if (Filler && Filler->isNullValue() && !NumInitElts) {
- llvm::ArrayType *AType =
- llvm::ArrayType::get(CommonElementType, NumElements);
- return llvm::ConstantAggregateZero::get(AType);
+ if (!Filler)
+ return nullptr;
}
+ // Emit initializer elements.
SmallVector<llvm::Constant*, 16> Elts;
- Elts.reserve(NumElements);
- for (unsigned I = 0; I < NumElements; ++I) {
- llvm::Constant *C = Filler;
- if (I < NumInitElts) {
- C = tryEmitPrivateForMemory(Value.getArrayInitializedElt(I),
- CAT->getElementType());
- } else if (!Filler) {
- assert(Value.hasArrayFiller() &&
- "Missing filler for implicit elements of initializer");
- C = tryEmitPrivateForMemory(Value.getArrayFiller(),
- CAT->getElementType());
- }
+ if (Filler && Filler->isNullValue())
+ Elts.reserve(NumInitElts + 1);
+ else
+ Elts.reserve(NumElements);
+
+ llvm::Type *CommonElementType = nullptr;
+ for (unsigned I = 0; I < NumInitElts; ++I) {
+ llvm::Constant *C = tryEmitPrivateForMemory(
+ Value.getArrayInitializedElt(I), CAT->getElementType());
if (!C) return nullptr;
if (I == 0)
@@ -1924,20 +1968,8 @@ llvm::Constant *ConstantEmitter::tryEmitPrivate(const APValue &Value,
Elts.push_back(C);
}
- if (!CommonElementType) {
- // FIXME: Try to avoid packing the array
- std::vector<llvm::Type*> Types;
- Types.reserve(NumElements);
- for (unsigned i = 0, e = Elts.size(); i < e; ++i)
- Types.push_back(Elts[i]->getType());
- llvm::StructType *SType =
- llvm::StructType::get(CGM.getLLVMContext(), Types, true);
- return llvm::ConstantStruct::get(SType, Elts);
- }
-
- llvm::ArrayType *AType =
- llvm::ArrayType::get(CommonElementType, NumElements);
- return llvm::ConstantArray::get(AType, Elts);
+ return EmitArrayConstant(CGM, CAT, CommonElementType, NumElements, Elts,
+ Filler);
}
case APValue::MemberPointer:
return CGM.getCXXABI().EmitMemberPointer(Value, DestType);
diff --git a/lib/CodeGen/CGExprScalar.cpp b/lib/CodeGen/CGExprScalar.cpp
index c46215067a68..783f74c5026d 100644
--- a/lib/CodeGen/CGExprScalar.cpp
+++ b/lib/CodeGen/CGExprScalar.cpp
@@ -165,7 +165,7 @@ static bool CanElideOverflowCheck(const ASTContext &Ctx, const BinOpInfo &Op) {
// If a unary op has a widened operand, the op cannot overflow.
if (const auto *UO = dyn_cast<UnaryOperator>(Op.E))
- return IsWidenedIntegerOp(Ctx, UO->getSubExpr());
+ return !UO->canOverflow();
// We usually don't need overflow checks for binops with widened operands.
// Multiplication with promoted unsigned operands is a special case.
@@ -387,6 +387,9 @@ public:
Value *VisitIntegerLiteral(const IntegerLiteral *E) {
return Builder.getInt(E->getValue());
}
+ Value *VisitFixedPointLiteral(const FixedPointLiteral *E) {
+ return Builder.getInt(E->getValue());
+ }
Value *VisitFloatingLiteral(const FloatingLiteral *E) {
return llvm::ConstantFP::get(VMContext, E->getValue());
}
@@ -422,10 +425,11 @@ public:
Value *VisitOpaqueValueExpr(OpaqueValueExpr *E) {
if (E->isGLValue())
- return EmitLoadOfLValue(CGF.getOpaqueLValueMapping(E), E->getExprLoc());
+ return EmitLoadOfLValue(CGF.getOrCreateOpaqueLValueMapping(E),
+ E->getExprLoc());
// Otherwise, assume the mapping is the scalar directly.
- return CGF.getOpaqueRValueMapping(E).getScalarVal();
+ return CGF.getOrCreateOpaqueRValueMapping(E).getScalarVal();
}
Value *emitConstant(const CodeGenFunction::ConstantEmission &Constant,
@@ -1144,7 +1148,7 @@ Value *ScalarExprEmitter::EmitNullValue(QualType Ty) {
return CGF.EmitFromMemory(CGF.CGM.EmitNullConstant(Ty), Ty);
}
-/// \brief Emit a sanitization check for the given "binary" operation (which
+/// Emit a sanitization check for the given "binary" operation (which
/// might actually be a unary increment which has been lowered to a binary
/// operation). The check passes if all values in \p Checks (which are \c i1),
/// are \c true.
@@ -1617,6 +1621,24 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) {
CE->getLocStart());
}
+ if (CGF.CGM.getCodeGenOpts().StrictVTablePointers) {
+ const QualType SrcType = E->getType();
+
+ if (SrcType.mayBeNotDynamicClass() && DestTy.mayBeDynamicClass()) {
+ // Casting to pointer that could carry dynamic information (provided by
+ // invariant.group) requires launder.
+ Src = Builder.CreateLaunderInvariantGroup(Src);
+ } else if (SrcType.mayBeDynamicClass() && DestTy.mayBeNotDynamicClass()) {
+ // Casting to pointer that does not carry dynamic information (provided
+ // by invariant.group) requires stripping it. Note that we don't do it
+ // if the source could not be dynamic type and destination could be
+ // dynamic because dynamic information is already laundered. It is
+ // because launder(strip(src)) == launder(src), so there is no need to
+ // add extra strip before launder.
+ Src = Builder.CreateStripInvariantGroup(Src);
+ }
+ }
+
return Builder.CreateBitCast(Src, DstTy);
}
case CK_AddressSpaceConversion: {
@@ -1753,12 +1775,31 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) {
llvm::Value* IntResult =
Builder.CreateIntCast(Src, MiddleTy, InputSigned, "conv");
- return Builder.CreateIntToPtr(IntResult, DestLLVMTy);
+ auto *IntToPtr = Builder.CreateIntToPtr(IntResult, DestLLVMTy);
+
+ if (CGF.CGM.getCodeGenOpts().StrictVTablePointers) {
+ // Going from integer to pointer that could be dynamic requires reloading
+ // dynamic information from invariant.group.
+ if (DestTy.mayBeDynamicClass())
+ IntToPtr = Builder.CreateLaunderInvariantGroup(IntToPtr);
+ }
+ return IntToPtr;
}
- case CK_PointerToIntegral:
+ case CK_PointerToIntegral: {
assert(!DestTy->isBooleanType() && "bool should use PointerToBool");
- return Builder.CreatePtrToInt(Visit(E), ConvertType(DestTy));
+ auto *PtrExpr = Visit(E);
+
+ if (CGF.CGM.getCodeGenOpts().StrictVTablePointers) {
+ const QualType SrcType = E->getType();
+
+ // Casting to integer requires stripping dynamic information as it does
+ // not carries it.
+ if (SrcType.mayBeDynamicClass())
+ PtrExpr = Builder.CreateStripInvariantGroup(PtrExpr);
+ }
+ return Builder.CreatePtrToInt(PtrExpr, ConvertType(DestTy));
+ }
case CK_ToVoid: {
CGF.EmitIgnoredExpr(E);
return nullptr;
@@ -1873,7 +1914,7 @@ llvm::Value *ScalarExprEmitter::EmitIncDecConsiderOverflowBehavior(
return Builder.CreateNSWAdd(InVal, Amount, Name);
// Fall through.
case LangOptions::SOB_Trapping:
- if (IsWidenedIntegerOp(CGF.getContext(), E->getSubExpr()))
+ if (!E->canOverflow())
return Builder.CreateNSWAdd(InVal, Amount, Name);
return EmitOverflowCheckedBinOp(createBinOpInfoFromIncDec(E, InVal, IsInc));
}
@@ -1955,11 +1996,9 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV,
} else if (type->isIntegerType()) {
// Note that signed integer inc/dec with width less than int can't
// overflow because of promotion rules; we're just eliding a few steps here.
- bool CanOverflow = value->getType()->getIntegerBitWidth() >=
- CGF.IntTy->getIntegerBitWidth();
- if (CanOverflow && type->isSignedIntegerOrEnumerationType()) {
+ if (E->canOverflow() && type->isSignedIntegerOrEnumerationType()) {
value = EmitIncDecConsiderOverflowBehavior(E, value, isInc);
- } else if (CanOverflow && type->isUnsignedIntegerType() &&
+ } else if (E->canOverflow() && type->isUnsignedIntegerType() &&
CGF.SanOpts.has(SanitizerKind::UnsignedIntegerOverflow)) {
value =
EmitOverflowCheckedBinOp(createBinOpInfoFromIncDec(E, value, isInc));
@@ -1975,7 +2014,7 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV,
// VLA types don't have constant size.
if (const VariableArrayType *vla
= CGF.getContext().getAsVariableArrayType(type)) {
- llvm::Value *numElts = CGF.getVLASize(vla).first;
+ llvm::Value *numElts = CGF.getVLASize(vla).NumElts;
if (!isInc) numElts = Builder.CreateNSWNeg(numElts, "vla.negsize");
if (CGF.getLangOpts().isSignedOverflowDefined())
value = Builder.CreateGEP(value, numElts, "vla.inc");
@@ -2273,16 +2312,13 @@ ScalarExprEmitter::VisitUnaryExprOrTypeTraitExpr(
CGF.EmitIgnoredExpr(E->getArgumentExpr());
}
- QualType eltType;
- llvm::Value *numElts;
- std::tie(numElts, eltType) = CGF.getVLASize(VAT);
-
- llvm::Value *size = numElts;
+ auto VlaSize = CGF.getVLASize(VAT);
+ llvm::Value *size = VlaSize.NumElts;
// Scale the number of non-VLA elements by the non-VLA element size.
- CharUnits eltSize = CGF.getContext().getTypeSizeInChars(eltType);
+ CharUnits eltSize = CGF.getContext().getTypeSizeInChars(VlaSize.Type);
if (!eltSize.isOne())
- size = CGF.Builder.CreateNUWMul(CGF.CGM.getSize(eltSize), numElts);
+ size = CGF.Builder.CreateNUWMul(CGF.CGM.getSize(eltSize), size);
return size;
}
@@ -2769,7 +2805,7 @@ static Value *emitPointerArithmetic(CodeGenFunction &CGF,
if (const VariableArrayType *vla
= CGF.getContext().getAsVariableArrayType(elementType)) {
// The element count here is the total number of non-VLA elements.
- llvm::Value *numElements = CGF.getVLASize(vla).first;
+ llvm::Value *numElements = CGF.getVLASize(vla).NumElts;
// Effectively, the multiply by the VLA size is part of the GEP.
// GEP indexes are signed, and scaling an index isn't permitted to
@@ -2964,10 +3000,9 @@ Value *ScalarExprEmitter::EmitSub(const BinOpInfo &op) {
// For a variable-length array, this is going to be non-constant.
if (const VariableArrayType *vla
= CGF.getContext().getAsVariableArrayType(elementType)) {
- llvm::Value *numElements;
- std::tie(numElements, elementType) = CGF.getVLASize(vla);
-
- divisor = numElements;
+ auto VlaSize = CGF.getVLASize(vla);
+ elementType = VlaSize.Type;
+ divisor = VlaSize.NumElts;
// Scale the number of non-VLA elements by the non-VLA element size.
CharUnits eltSize = CGF.getContext().getTypeSizeInChars(elementType);
@@ -3243,6 +3278,23 @@ Value *ScalarExprEmitter::EmitCompare(const BinaryOperator *E,
Result = Builder.CreateICmp(SICmpOpc, LHS, RHS, "cmp");
} else {
// Unsigned integers and pointers.
+
+ if (CGF.CGM.getCodeGenOpts().StrictVTablePointers &&
+ !isa<llvm::ConstantPointerNull>(LHS) &&
+ !isa<llvm::ConstantPointerNull>(RHS)) {
+
+ // Dynamic information is required to be stripped for comparisons,
+ // because it could leak the dynamic information. Based on comparisons
+ // of pointers to dynamic objects, the optimizer can replace one pointer
+ // with another, which might be incorrect in presence of invariant
+ // groups. Comparison with null is safe because null does not carry any
+ // dynamic information.
+ if (LHSTy.mayBeDynamicClass())
+ LHS = Builder.CreateStripInvariantGroup(LHS);
+ if (RHSTy.mayBeDynamicClass())
+ RHS = Builder.CreateStripInvariantGroup(RHS);
+ }
+
Result = Builder.CreateICmp(UICmpOpc, LHS, RHS, "cmp");
}
@@ -3433,6 +3485,12 @@ Value *ScalarExprEmitter::VisitBinLAnd(const BinaryOperator *E) {
// Insert an entry into the phi node for the edge with the value of RHSCond.
PN->addIncoming(RHSCond, RHSBlock);
+ // Artificial location to preserve the scope information
+ {
+ auto NL = ApplyDebugLocation::CreateArtificial(CGF);
+ PN->setDebugLoc(Builder.getCurrentDebugLocation());
+ }
+
// ZExt result to int.
return Builder.CreateZExtOrBitCast(PN, ResTy, "land.ext");
}
diff --git a/lib/CodeGen/CGGPUBuiltin.cpp b/lib/CodeGen/CGGPUBuiltin.cpp
index 48156b1b26b7..b5375ffb8db7 100644
--- a/lib/CodeGen/CGGPUBuiltin.cpp
+++ b/lib/CodeGen/CGGPUBuiltin.cpp
@@ -83,8 +83,9 @@ CodeGenFunction::EmitNVPTXDevicePrintfCallExpr(const CallExpr *E,
/* ParamsToSkip = */ 0);
// We don't know how to emit non-scalar varargs.
- if (std::any_of(Args.begin() + 1, Args.end(),
- [](const CallArg &A) { return !A.RV.isScalar(); })) {
+ if (std::any_of(Args.begin() + 1, Args.end(), [&](const CallArg &A) {
+ return !A.getRValue(*this).isScalar();
+ })) {
CGM.ErrorUnsupported(E, "non-scalar arg to printf");
return RValue::get(llvm::ConstantInt::get(IntTy, 0));
}
@@ -97,7 +98,7 @@ CodeGenFunction::EmitNVPTXDevicePrintfCallExpr(const CallExpr *E,
} else {
llvm::SmallVector<llvm::Type *, 8> ArgTypes;
for (unsigned I = 1, NumArgs = Args.size(); I < NumArgs; ++I)
- ArgTypes.push_back(Args[I].RV.getScalarVal()->getType());
+ ArgTypes.push_back(Args[I].getRValue(*this).getScalarVal()->getType());
// Using llvm::StructType is correct only because printf doesn't accept
// aggregates. If we had to handle aggregates here, we'd have to manually
@@ -109,7 +110,7 @@ CodeGenFunction::EmitNVPTXDevicePrintfCallExpr(const CallExpr *E,
for (unsigned I = 1, NumArgs = Args.size(); I < NumArgs; ++I) {
llvm::Value *P = Builder.CreateStructGEP(AllocaTy, Alloca, I - 1);
- llvm::Value *Arg = Args[I].RV.getScalarVal();
+ llvm::Value *Arg = Args[I].getRValue(*this).getScalarVal();
Builder.CreateAlignedStore(Arg, P, DL.getPrefTypeAlignment(Arg->getType()));
}
BufferPtr = Builder.CreatePointerCast(Alloca, llvm::Type::getInt8PtrTy(Ctx));
@@ -117,6 +118,6 @@ CodeGenFunction::EmitNVPTXDevicePrintfCallExpr(const CallExpr *E,
// Invoke vprintf and return.
llvm::Function* VprintfFunc = GetVprintfDeclaration(CGM.getModule());
- return RValue::get(
- Builder.CreateCall(VprintfFunc, {Args[0].RV.getScalarVal(), BufferPtr}));
+ return RValue::get(Builder.CreateCall(
+ VprintfFunc, {Args[0].getRValue(*this).getScalarVal(), BufferPtr}));
}
diff --git a/lib/CodeGen/CGLoopInfo.h b/lib/CodeGen/CGLoopInfo.h
index 15608c105dc7..9d5f23ff9a2a 100644
--- a/lib/CodeGen/CGLoopInfo.h
+++ b/lib/CodeGen/CGLoopInfo.h
@@ -32,62 +32,62 @@ class Attr;
class ASTContext;
namespace CodeGen {
-/// \brief Attributes that may be specified on loops.
+/// Attributes that may be specified on loops.
struct LoopAttributes {
explicit LoopAttributes(bool IsParallel = false);
void clear();
- /// \brief Generate llvm.loop.parallel metadata for loads and stores.
+ /// Generate llvm.loop.parallel metadata for loads and stores.
bool IsParallel;
- /// \brief State of loop vectorization or unrolling.
+ /// State of loop vectorization or unrolling.
enum LVEnableState { Unspecified, Enable, Disable, Full };
- /// \brief Value for llvm.loop.vectorize.enable metadata.
+ /// Value for llvm.loop.vectorize.enable metadata.
LVEnableState VectorizeEnable;
- /// \brief Value for llvm.loop.unroll.* metadata (enable, disable, or full).
+ /// Value for llvm.loop.unroll.* metadata (enable, disable, or full).
LVEnableState UnrollEnable;
- /// \brief Value for llvm.loop.vectorize.width metadata.
+ /// Value for llvm.loop.vectorize.width metadata.
unsigned VectorizeWidth;
- /// \brief Value for llvm.loop.interleave.count metadata.
+ /// Value for llvm.loop.interleave.count metadata.
unsigned InterleaveCount;
- /// \brief llvm.unroll.
+ /// llvm.unroll.
unsigned UnrollCount;
- /// \brief Value for llvm.loop.distribute.enable metadata.
+ /// Value for llvm.loop.distribute.enable metadata.
LVEnableState DistributeEnable;
};
-/// \brief Information used when generating a structured loop.
+/// Information used when generating a structured loop.
class LoopInfo {
public:
- /// \brief Construct a new LoopInfo for the loop with entry Header.
+ /// Construct a new LoopInfo for the loop with entry Header.
LoopInfo(llvm::BasicBlock *Header, const LoopAttributes &Attrs,
const llvm::DebugLoc &StartLoc, const llvm::DebugLoc &EndLoc);
- /// \brief Get the loop id metadata for this loop.
+ /// Get the loop id metadata for this loop.
llvm::MDNode *getLoopID() const { return LoopID; }
- /// \brief Get the header block of this loop.
+ /// Get the header block of this loop.
llvm::BasicBlock *getHeader() const { return Header; }
- /// \brief Get the set of attributes active for this loop.
+ /// Get the set of attributes active for this loop.
const LoopAttributes &getAttributes() const { return Attrs; }
private:
- /// \brief Loop ID metadata.
+ /// Loop ID metadata.
llvm::MDNode *LoopID;
- /// \brief Header block of this loop.
+ /// Header block of this loop.
llvm::BasicBlock *Header;
- /// \brief The attributes for this loop.
+ /// The attributes for this loop.
LoopAttributes Attrs;
};
-/// \brief A stack of loop information corresponding to loop nesting levels.
+/// A stack of loop information corresponding to loop nesting levels.
/// This stack can be used to prepare attributes which are applied when a loop
/// is emitted.
class LoopInfoStack {
@@ -97,70 +97,70 @@ class LoopInfoStack {
public:
LoopInfoStack() {}
- /// \brief Begin a new structured loop. The set of staged attributes will be
+ /// Begin a new structured loop. The set of staged attributes will be
/// applied to the loop and then cleared.
void push(llvm::BasicBlock *Header, const llvm::DebugLoc &StartLoc,
const llvm::DebugLoc &EndLoc);
- /// \brief Begin a new structured loop. Stage attributes from the Attrs list.
+ /// Begin a new structured loop. Stage attributes from the Attrs list.
/// The staged attributes are applied to the loop and then cleared.
void push(llvm::BasicBlock *Header, clang::ASTContext &Ctx,
llvm::ArrayRef<const Attr *> Attrs, const llvm::DebugLoc &StartLoc,
const llvm::DebugLoc &EndLoc);
- /// \brief End the current loop.
+ /// End the current loop.
void pop();
- /// \brief Return the top loop id metadata.
+ /// Return the top loop id metadata.
llvm::MDNode *getCurLoopID() const { return getInfo().getLoopID(); }
- /// \brief Return true if the top loop is parallel.
+ /// Return true if the top loop is parallel.
bool getCurLoopParallel() const {
return hasInfo() ? getInfo().getAttributes().IsParallel : false;
}
- /// \brief Function called by the CodeGenFunction when an instruction is
+ /// Function called by the CodeGenFunction when an instruction is
/// created.
void InsertHelper(llvm::Instruction *I) const;
- /// \brief Set the next pushed loop as parallel.
+ /// Set the next pushed loop as parallel.
void setParallel(bool Enable = true) { StagedAttrs.IsParallel = Enable; }
- /// \brief Set the next pushed loop 'vectorize.enable'
+ /// Set the next pushed loop 'vectorize.enable'
void setVectorizeEnable(bool Enable = true) {
StagedAttrs.VectorizeEnable =
Enable ? LoopAttributes::Enable : LoopAttributes::Disable;
}
- /// \brief Set the next pushed loop as a distribution candidate.
+ /// Set the next pushed loop as a distribution candidate.
void setDistributeState(bool Enable = true) {
StagedAttrs.DistributeEnable =
Enable ? LoopAttributes::Enable : LoopAttributes::Disable;
}
- /// \brief Set the next pushed loop unroll state.
+ /// Set the next pushed loop unroll state.
void setUnrollState(const LoopAttributes::LVEnableState &State) {
StagedAttrs.UnrollEnable = State;
}
- /// \brief Set the vectorize width for the next loop pushed.
+ /// Set the vectorize width for the next loop pushed.
void setVectorizeWidth(unsigned W) { StagedAttrs.VectorizeWidth = W; }
- /// \brief Set the interleave count for the next loop pushed.
+ /// Set the interleave count for the next loop pushed.
void setInterleaveCount(unsigned C) { StagedAttrs.InterleaveCount = C; }
- /// \brief Set the unroll count for the next loop pushed.
+ /// Set the unroll count for the next loop pushed.
void setUnrollCount(unsigned C) { StagedAttrs.UnrollCount = C; }
private:
- /// \brief Returns true if there is LoopInfo on the stack.
+ /// Returns true if there is LoopInfo on the stack.
bool hasInfo() const { return !Active.empty(); }
- /// \brief Return the LoopInfo for the current loop. HasInfo should be called
+ /// Return the LoopInfo for the current loop. HasInfo should be called
/// first to ensure LoopInfo is present.
const LoopInfo &getInfo() const { return Active.back(); }
- /// \brief The set of attributes that will be applied to the next pushed loop.
+ /// The set of attributes that will be applied to the next pushed loop.
LoopAttributes StagedAttrs;
- /// \brief Stack of active loops.
+ /// Stack of active loops.
llvm::SmallVector<LoopInfo, 4> Active;
};
diff --git a/lib/CodeGen/CGNonTrivialStruct.cpp b/lib/CodeGen/CGNonTrivialStruct.cpp
new file mode 100644
index 000000000000..922e0934b866
--- /dev/null
+++ b/lib/CodeGen/CGNonTrivialStruct.cpp
@@ -0,0 +1,885 @@
+//===--- CGNonTrivialStruct.cpp - Emit Special Functions for C Structs ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines functions to generate various special functions for C
+// structs.
+//
+//===----------------------------------------------------------------------===//
+
+#include "CodeGenFunction.h"
+#include "CodeGenModule.h"
+#include "clang/AST/NonTrivialTypeVisitor.h"
+#include "llvm/Support/ScopedPrinter.h"
+#include <array>
+
+using namespace clang;
+using namespace CodeGen;
+
+// Return the size of a field in number of bits.
+static uint64_t getFieldSize(const FieldDecl *FD, QualType FT,
+ ASTContext &Ctx) {
+ if (FD && FD->isBitField())
+ return FD->getBitWidthValue(Ctx);
+ return Ctx.getTypeSize(FT);
+}
+
+namespace {
+enum { DstIdx = 0, SrcIdx = 1 };
+const char *ValNameStr[2] = {"dst", "src"};
+
+template <class Derived> struct StructVisitor {
+ StructVisitor(ASTContext &Ctx) : Ctx(Ctx) {}
+
+ template <class... Ts>
+ void visitStructFields(QualType QT, CharUnits CurStructOffset, Ts... Args) {
+ const RecordDecl *RD = QT->castAs<RecordType>()->getDecl();
+
+ // Iterate over the fields of the struct.
+ for (const FieldDecl *FD : RD->fields()) {
+ QualType FT = FD->getType();
+ FT = QT.isVolatileQualified() ? FT.withVolatile() : FT;
+ asDerived().visit(FT, FD, CurStructOffset, Args...);
+ }
+
+ asDerived().flushTrivialFields(Args...);
+ }
+
+ template <class... Ts> void visitTrivial(Ts... Args) {}
+
+ template <class... Ts> void visitCXXDestructor(Ts... Args) {
+ llvm_unreachable("field of a C++ struct type is not expected");
+ }
+
+ template <class... Ts> void flushTrivialFields(Ts... Args) {}
+
+ uint64_t getFieldOffsetInBits(const FieldDecl *FD) {
+ return FD ? Ctx.getASTRecordLayout(FD->getParent())
+ .getFieldOffset(FD->getFieldIndex())
+ : 0;
+ }
+
+ CharUnits getFieldOffset(const FieldDecl *FD) {
+ return Ctx.toCharUnitsFromBits(getFieldOffsetInBits(FD));
+ }
+
+ Derived &asDerived() { return static_cast<Derived &>(*this); }
+
+ ASTContext &getContext() { return Ctx; }
+ ASTContext &Ctx;
+};
+
+template <class Derived, bool IsMove>
+struct CopyStructVisitor : StructVisitor<Derived>,
+ CopiedTypeVisitor<Derived, IsMove> {
+ using StructVisitor<Derived>::asDerived;
+ using Super = CopiedTypeVisitor<Derived, IsMove>;
+
+ CopyStructVisitor(ASTContext &Ctx) : StructVisitor<Derived>(Ctx) {}
+
+ template <class... Ts>
+ void preVisit(QualType::PrimitiveCopyKind PCK, QualType FT,
+ const FieldDecl *FD, CharUnits CurStructOffsset,
+ Ts &&... Args) {
+ if (PCK)
+ asDerived().flushTrivialFields(std::forward<Ts>(Args)...);
+ }
+
+ template <class... Ts>
+ void visitWithKind(QualType::PrimitiveCopyKind PCK, QualType FT,
+ const FieldDecl *FD, CharUnits CurStructOffsset,
+ Ts &&... Args) {
+ if (const auto *AT = asDerived().getContext().getAsArrayType(FT)) {
+ asDerived().visitArray(PCK, AT, FT.isVolatileQualified(), FD,
+ CurStructOffsset, std::forward<Ts>(Args)...);
+ return;
+ }
+
+ Super::visitWithKind(PCK, FT, FD, CurStructOffsset,
+ std::forward<Ts>(Args)...);
+ }
+
+ template <class... Ts>
+ void visitTrivial(QualType FT, const FieldDecl *FD, CharUnits CurStructOffset,
+ Ts... Args) {
+ assert(!FT.isVolatileQualified() && "volatile field not expected");
+ ASTContext &Ctx = asDerived().getContext();
+ uint64_t FieldSize = getFieldSize(FD, FT, Ctx);
+
+ // Ignore zero-sized fields.
+ if (FieldSize == 0)
+ return;
+
+ uint64_t FStartInBits = asDerived().getFieldOffsetInBits(FD);
+ uint64_t FEndInBits = FStartInBits + FieldSize;
+ uint64_t RoundedFEnd = llvm::alignTo(FEndInBits, Ctx.getCharWidth());
+
+ // Set Start if this is the first field of a sequence of trivial fields.
+ if (Start == End)
+ Start = CurStructOffset + Ctx.toCharUnitsFromBits(FStartInBits);
+ End = CurStructOffset + Ctx.toCharUnitsFromBits(RoundedFEnd);
+ }
+
+ CharUnits Start = CharUnits::Zero(), End = CharUnits::Zero();
+};
+
+// This function creates the mangled name of a special function of a non-trivial
+// C struct. Since there is no ODR in C, the function is mangled based on the
+// struct contents and not the name. The mangled name has the following
+// structure:
+//
+// <function-name> ::= <prefix> <alignment-info> "_" <struct-field-info>
+// <prefix> ::= "__destructor_" | "__default_constructor_" |
+// "__copy_constructor_" | "__move_constructor_" |
+// "__copy_assignment_" | "__move_assignment_"
+// <alignment-info> ::= <dst-alignment> ["_" <src-alignment>]
+// <struct-field-info> ::= <field-info>+
+// <field-info> ::= <struct-or-scalar-field-info> | <array-field-info>
+// <struct-or-scalar-field-info> ::= <struct-field-info> | <strong-field-info> |
+// <trivial-field-info>
+// <array-field-info> ::= "_AB" <array-offset> "s" <element-size> "n"
+// <num-elements> <innermost-element-info> "_AE"
+// <innermost-element-info> ::= <struct-or-scalar-field-info>
+// <strong-field-info> ::= "_s" ["b"] ["v"] <field-offset>
+// <trivial-field-info> ::= "_t" ["v"] <field-offset> "_" <field-size>
+
+template <class Derived> struct GenFuncNameBase {
+ std::string getVolatileOffsetStr(bool IsVolatile, CharUnits Offset) {
+ std::string S;
+ if (IsVolatile)
+ S = "v";
+ S += llvm::to_string(Offset.getQuantity());
+ return S;
+ }
+
+ void visitARCStrong(QualType FT, const FieldDecl *FD,
+ CharUnits CurStructOffset) {
+ appendStr("_s");
+ if (FT->isBlockPointerType())
+ appendStr("b");
+ CharUnits FieldOffset = CurStructOffset + asDerived().getFieldOffset(FD);
+ appendStr(getVolatileOffsetStr(FT.isVolatileQualified(), FieldOffset));
+ }
+
+ void visitARCWeak(QualType FT, const FieldDecl *FD,
+ CharUnits CurStructOffset) {
+ appendStr("_w");
+ CharUnits FieldOffset = CurStructOffset + asDerived().getFieldOffset(FD);
+ appendStr(getVolatileOffsetStr(FT.isVolatileQualified(), FieldOffset));
+ }
+
+ void visitStruct(QualType QT, const FieldDecl *FD,
+ CharUnits CurStructOffset) {
+ CharUnits FieldOffset = CurStructOffset + asDerived().getFieldOffset(FD);
+ asDerived().visitStructFields(QT, FieldOffset);
+ }
+
+ template <class FieldKind>
+ void visitArray(FieldKind FK, const ArrayType *AT, bool IsVolatile,
+ const FieldDecl *FD, CharUnits CurStructOffset) {
+ // String for non-volatile trivial fields is emitted when
+ // flushTrivialFields is called.
+ if (!FK)
+ return asDerived().visitTrivial(QualType(AT, 0), FD, CurStructOffset);
+
+ CharUnits FieldOffset = CurStructOffset + asDerived().getFieldOffset(FD);
+ ASTContext &Ctx = asDerived().getContext();
+ const ConstantArrayType *CAT = cast<ConstantArrayType>(AT);
+ unsigned NumElts = Ctx.getConstantArrayElementCount(CAT);
+ QualType EltTy = Ctx.getBaseElementType(CAT);
+ CharUnits EltSize = Ctx.getTypeSizeInChars(EltTy);
+ appendStr("_AB" + llvm::to_string(FieldOffset.getQuantity()) + "s" +
+ llvm::to_string(EltSize.getQuantity()) + "n" +
+ llvm::to_string(NumElts));
+ EltTy = IsVolatile ? EltTy.withVolatile() : EltTy;
+ asDerived().visitWithKind(FK, EltTy, nullptr, FieldOffset);
+ appendStr("_AE");
+ }
+
+ void appendStr(StringRef Str) { Name += Str; }
+
+ std::string getName(QualType QT, bool IsVolatile) {
+ QT = IsVolatile ? QT.withVolatile() : QT;
+ asDerived().visitStructFields(QT, CharUnits::Zero());
+ return Name;
+ }
+
+ Derived &asDerived() { return static_cast<Derived &>(*this); }
+
+ std::string Name;
+};
+
+template <class Derived>
+struct GenUnaryFuncName : StructVisitor<Derived>, GenFuncNameBase<Derived> {
+ GenUnaryFuncName(StringRef Prefix, CharUnits DstAlignment, ASTContext &Ctx)
+ : StructVisitor<Derived>(Ctx) {
+ this->appendStr(Prefix);
+ this->appendStr(llvm::to_string(DstAlignment.getQuantity()));
+ }
+};
+
+// Helper function to create a null constant.
+static llvm::Constant *getNullForVariable(Address Addr) {
+ llvm::Type *Ty = Addr.getElementType();
+ return llvm::ConstantPointerNull::get(cast<llvm::PointerType>(Ty));
+}
+
+template <bool IsMove>
+struct GenBinaryFuncName : CopyStructVisitor<GenBinaryFuncName<IsMove>, IsMove>,
+ GenFuncNameBase<GenBinaryFuncName<IsMove>> {
+
+ GenBinaryFuncName(StringRef Prefix, CharUnits DstAlignment,
+ CharUnits SrcAlignment, ASTContext &Ctx)
+ : CopyStructVisitor<GenBinaryFuncName<IsMove>, IsMove>(Ctx) {
+ this->appendStr(Prefix);
+ this->appendStr(llvm::to_string(DstAlignment.getQuantity()));
+ this->appendStr("_" + llvm::to_string(SrcAlignment.getQuantity()));
+ }
+
+ void flushTrivialFields() {
+ if (this->Start == this->End)
+ return;
+
+ this->appendStr("_t" + llvm::to_string(this->Start.getQuantity()) + "w" +
+ llvm::to_string((this->End - this->Start).getQuantity()));
+
+ this->Start = this->End = CharUnits::Zero();
+ }
+
+ void visitVolatileTrivial(QualType FT, const FieldDecl *FD,
+ CharUnits CurStackOffset) {
+ // Because volatile fields can be bit-fields and are individually copied,
+ // their offset and width are in bits.
+ uint64_t OffsetInBits =
+ this->Ctx.toBits(CurStackOffset) + this->getFieldOffsetInBits(FD);
+ this->appendStr("_tv" + llvm::to_string(OffsetInBits) + "w" +
+ llvm::to_string(getFieldSize(FD, FT, this->Ctx)));
+ }
+};
+
+struct GenDefaultInitializeFuncName
+ : GenUnaryFuncName<GenDefaultInitializeFuncName>,
+ DefaultInitializedTypeVisitor<GenDefaultInitializeFuncName> {
+ using Super = DefaultInitializedTypeVisitor<GenDefaultInitializeFuncName>;
+ GenDefaultInitializeFuncName(CharUnits DstAlignment, ASTContext &Ctx)
+ : GenUnaryFuncName<GenDefaultInitializeFuncName>("__default_constructor_",
+ DstAlignment, Ctx) {}
+ void visitWithKind(QualType::PrimitiveDefaultInitializeKind PDIK, QualType FT,
+ const FieldDecl *FD, CharUnits CurStructOffset) {
+ if (const auto *AT = getContext().getAsArrayType(FT)) {
+ visitArray(PDIK, AT, FT.isVolatileQualified(), FD, CurStructOffset);
+ return;
+ }
+
+ Super::visitWithKind(PDIK, FT, FD, CurStructOffset);
+ }
+};
+
+struct GenDestructorFuncName : GenUnaryFuncName<GenDestructorFuncName>,
+ DestructedTypeVisitor<GenDestructorFuncName> {
+ using Super = DestructedTypeVisitor<GenDestructorFuncName>;
+ GenDestructorFuncName(CharUnits DstAlignment, ASTContext &Ctx)
+ : GenUnaryFuncName<GenDestructorFuncName>("__destructor_", DstAlignment,
+ Ctx) {}
+ void visitWithKind(QualType::DestructionKind DK, QualType FT,
+ const FieldDecl *FD, CharUnits CurStructOffset) {
+ if (const auto *AT = getContext().getAsArrayType(FT)) {
+ visitArray(DK, AT, FT.isVolatileQualified(), FD, CurStructOffset);
+ return;
+ }
+
+ Super::visitWithKind(DK, FT, FD, CurStructOffset);
+ }
+};
+
+// Helper function that creates CGFunctionInfo for an N-ary special function.
+template <size_t N>
+static const CGFunctionInfo &getFunctionInfo(CodeGenModule &CGM,
+ FunctionArgList &Args) {
+ ASTContext &Ctx = CGM.getContext();
+ llvm::SmallVector<ImplicitParamDecl *, N> Params;
+ QualType ParamTy = Ctx.getPointerType(Ctx.VoidPtrTy);
+
+ for (unsigned I = 0; I < N; ++I)
+ Params.push_back(ImplicitParamDecl::Create(
+ Ctx, nullptr, SourceLocation(), &Ctx.Idents.get(ValNameStr[I]), ParamTy,
+ ImplicitParamDecl::Other));
+
+ for (auto &P : Params)
+ Args.push_back(P);
+
+ return CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, Args);
+}
+
+// Template classes that are used as bases for classes that emit special
+// functions.
+template <class Derived> struct GenFuncBase {
+ template <size_t N>
+ void visitStruct(QualType FT, const FieldDecl *FD, CharUnits CurStackOffset,
+ std::array<Address, N> Addrs) {
+ this->asDerived().callSpecialFunction(
+ FT, CurStackOffset + asDerived().getFieldOffset(FD), Addrs);
+ }
+
+ template <class FieldKind, size_t N>
+ void visitArray(FieldKind FK, const ArrayType *AT, bool IsVolatile,
+ const FieldDecl *FD, CharUnits CurStackOffset,
+ std::array<Address, N> Addrs) {
+ // Non-volatile trivial fields are copied when flushTrivialFields is called.
+ if (!FK)
+ return asDerived().visitTrivial(QualType(AT, 0), FD, CurStackOffset,
+ Addrs);
+
+ CodeGenFunction &CGF = *this->CGF;
+ ASTContext &Ctx = CGF.getContext();
+
+ // Compute the end address.
+ QualType BaseEltQT;
+ std::array<Address, N> StartAddrs = Addrs;
+ for (unsigned I = 0; I < N; ++I)
+ StartAddrs[I] = getAddrWithOffset(Addrs[I], CurStackOffset, FD);
+ Address DstAddr = StartAddrs[DstIdx];
+ llvm::Value *NumElts = CGF.emitArrayLength(AT, BaseEltQT, DstAddr);
+ unsigned BaseEltSize = Ctx.getTypeSizeInChars(BaseEltQT).getQuantity();
+ llvm::Value *BaseEltSizeVal =
+ llvm::ConstantInt::get(NumElts->getType(), BaseEltSize);
+ llvm::Value *SizeInBytes =
+ CGF.Builder.CreateNUWMul(BaseEltSizeVal, NumElts);
+ Address BC = CGF.Builder.CreateBitCast(DstAddr, CGF.CGM.Int8PtrTy);
+ llvm::Value *DstArrayEnd =
+ CGF.Builder.CreateInBoundsGEP(BC.getPointer(), SizeInBytes);
+ DstArrayEnd = CGF.Builder.CreateBitCast(DstArrayEnd, CGF.CGM.Int8PtrPtrTy,
+ "dstarray.end");
+ llvm::BasicBlock *PreheaderBB = CGF.Builder.GetInsertBlock();
+
+ // Create the header block and insert the phi instructions.
+ llvm::BasicBlock *HeaderBB = CGF.createBasicBlock("loop.header");
+ CGF.EmitBlock(HeaderBB);
+ llvm::PHINode *PHIs[N];
+
+ for (unsigned I = 0; I < N; ++I) {
+ PHIs[I] = CGF.Builder.CreatePHI(CGF.CGM.Int8PtrPtrTy, 2, "addr.cur");
+ PHIs[I]->addIncoming(StartAddrs[I].getPointer(), PreheaderBB);
+ }
+
+ // Create the exit and loop body blocks.
+ llvm::BasicBlock *ExitBB = CGF.createBasicBlock("loop.exit");
+ llvm::BasicBlock *LoopBB = CGF.createBasicBlock("loop.body");
+
+ // Emit the comparison and conditional branch instruction that jumps to
+ // either the exit or the loop body.
+ llvm::Value *Done =
+ CGF.Builder.CreateICmpEQ(PHIs[DstIdx], DstArrayEnd, "done");
+ CGF.Builder.CreateCondBr(Done, ExitBB, LoopBB);
+
+ // Visit the element of the array in the loop body.
+ CGF.EmitBlock(LoopBB);
+ QualType EltQT = AT->getElementType();
+ CharUnits EltSize = Ctx.getTypeSizeInChars(EltQT);
+ std::array<Address, N> NewAddrs = Addrs;
+
+ for (unsigned I = 0; I < N; ++I)
+ NewAddrs[I] = Address(
+ PHIs[I], StartAddrs[I].getAlignment().alignmentAtOffset(EltSize));
+
+ EltQT = IsVolatile ? EltQT.withVolatile() : EltQT;
+ this->asDerived().visitWithKind(FK, EltQT, nullptr, CharUnits::Zero(),
+ NewAddrs);
+
+ LoopBB = CGF.Builder.GetInsertBlock();
+
+ for (unsigned I = 0; I < N; ++I) {
+ // Instrs to update the destination and source addresses.
+ // Update phi instructions.
+ NewAddrs[I] = getAddrWithOffset(NewAddrs[I], EltSize);
+ PHIs[I]->addIncoming(NewAddrs[I].getPointer(), LoopBB);
+ }
+
+ // Insert an unconditional branch to the header block.
+ CGF.Builder.CreateBr(HeaderBB);
+ CGF.EmitBlock(ExitBB);
+ }
+
+ /// Return an address with the specified offset from the passed address.
+ Address getAddrWithOffset(Address Addr, CharUnits Offset) {
+ assert(Addr.isValid() && "invalid address");
+ if (Offset.getQuantity() == 0)
+ return Addr;
+ Addr = CGF->Builder.CreateBitCast(Addr, CGF->CGM.Int8PtrTy);
+ Addr = CGF->Builder.CreateConstInBoundsGEP(Addr, Offset.getQuantity(),
+ CharUnits::One());
+ return CGF->Builder.CreateBitCast(Addr, CGF->CGM.Int8PtrPtrTy);
+ }
+
+ Address getAddrWithOffset(Address Addr, CharUnits StructFieldOffset,
+ const FieldDecl *FD) {
+ return getAddrWithOffset(Addr, StructFieldOffset +
+ asDerived().getFieldOffset(FD));
+ }
+
+ template <size_t N>
+ llvm::Function *
+ getFunction(StringRef FuncName, QualType QT, std::array<Address, N> Addrs,
+ std::array<CharUnits, N> Alignments, CodeGenModule &CGM) {
+ // If the special function already exists in the module, return it.
+ if (llvm::Function *F = CGM.getModule().getFunction(FuncName)) {
+ bool WrongType = false;
+ if (!F->getReturnType()->isVoidTy())
+ WrongType = true;
+ else {
+ for (const llvm::Argument &Arg : F->args())
+ if (Arg.getType() != CGM.Int8PtrPtrTy)
+ WrongType = true;
+ }
+
+ if (WrongType) {
+ std::string FuncName = F->getName();
+ SourceLocation Loc = QT->castAs<RecordType>()->getDecl()->getLocation();
+ CGM.Error(Loc, "special function " + FuncName +
+ " for non-trivial C struct has incorrect type");
+ return nullptr;
+ }
+ return F;
+ }
+
+ ASTContext &Ctx = CGM.getContext();
+ FunctionArgList Args;
+ const CGFunctionInfo &FI = getFunctionInfo<N>(CGM, Args);
+ llvm::FunctionType *FuncTy = CGM.getTypes().GetFunctionType(FI);
+ llvm::Function *F =
+ llvm::Function::Create(FuncTy, llvm::GlobalValue::LinkOnceODRLinkage,
+ FuncName, &CGM.getModule());
+ F->setVisibility(llvm::GlobalValue::HiddenVisibility);
+ CGM.SetLLVMFunctionAttributes(nullptr, FI, F);
+ CGM.SetLLVMFunctionAttributesForDefinition(nullptr, F);
+ IdentifierInfo *II = &Ctx.Idents.get(FuncName);
+ FunctionDecl *FD = FunctionDecl::Create(
+ Ctx, Ctx.getTranslationUnitDecl(), SourceLocation(), SourceLocation(),
+ II, Ctx.VoidTy, nullptr, SC_PrivateExtern, false, false);
+ CodeGenFunction NewCGF(CGM);
+ setCGF(&NewCGF);
+ CGF->StartFunction(FD, Ctx.VoidTy, F, FI, Args);
+
+ for (unsigned I = 0; I < N; ++I) {
+ llvm::Value *V = CGF->Builder.CreateLoad(CGF->GetAddrOfLocalVar(Args[I]));
+ Addrs[I] = Address(V, Alignments[I]);
+ }
+
+ asDerived().visitStructFields(QT, CharUnits::Zero(), Addrs);
+ CGF->FinishFunction();
+ return F;
+ }
+
+ template <size_t N>
+ void callFunc(StringRef FuncName, QualType QT, std::array<Address, N> Addrs,
+ CodeGenFunction &CallerCGF) {
+ std::array<CharUnits, N> Alignments;
+ llvm::Value *Ptrs[N];
+
+ for (unsigned I = 0; I < N; ++I) {
+ Alignments[I] = Addrs[I].getAlignment();
+ Ptrs[I] =
+ CallerCGF.Builder.CreateBitCast(Addrs[I], CallerCGF.CGM.Int8PtrPtrTy)
+ .getPointer();
+ }
+
+ if (llvm::Function *F =
+ getFunction(FuncName, QT, Addrs, Alignments, CallerCGF.CGM))
+ CallerCGF.EmitNounwindRuntimeCall(F, Ptrs);
+ }
+
+ Derived &asDerived() { return static_cast<Derived &>(*this); }
+
+ void setCGF(CodeGenFunction *F) { CGF = F; }
+
+ CodeGenFunction *CGF = nullptr;
+};
+
+template <class Derived, bool IsMove>
+struct GenBinaryFunc : CopyStructVisitor<Derived, IsMove>,
+ GenFuncBase<Derived> {
+ GenBinaryFunc(ASTContext &Ctx) : CopyStructVisitor<Derived, IsMove>(Ctx) {}
+
+ void flushTrivialFields(std::array<Address, 2> Addrs) {
+ CharUnits Size = this->End - this->Start;
+
+ if (Size.getQuantity() == 0)
+ return;
+
+ Address DstAddr = this->getAddrWithOffset(Addrs[DstIdx], this->Start);
+ Address SrcAddr = this->getAddrWithOffset(Addrs[SrcIdx], this->Start);
+
+ // Emit memcpy.
+ if (Size.getQuantity() >= 16 || !llvm::isPowerOf2_32(Size.getQuantity())) {
+ llvm::Value *SizeVal =
+ llvm::ConstantInt::get(this->CGF->SizeTy, Size.getQuantity());
+ DstAddr =
+ this->CGF->Builder.CreateElementBitCast(DstAddr, this->CGF->Int8Ty);
+ SrcAddr =
+ this->CGF->Builder.CreateElementBitCast(SrcAddr, this->CGF->Int8Ty);
+ this->CGF->Builder.CreateMemCpy(DstAddr, SrcAddr, SizeVal, false);
+ } else {
+ llvm::Type *Ty = llvm::Type::getIntNTy(
+ this->CGF->getLLVMContext(),
+ Size.getQuantity() * this->CGF->getContext().getCharWidth());
+ DstAddr = this->CGF->Builder.CreateElementBitCast(DstAddr, Ty);
+ SrcAddr = this->CGF->Builder.CreateElementBitCast(SrcAddr, Ty);
+ llvm::Value *SrcVal = this->CGF->Builder.CreateLoad(SrcAddr, false);
+ this->CGF->Builder.CreateStore(SrcVal, DstAddr, false);
+ }
+
+ this->Start = this->End = CharUnits::Zero();
+ }
+
+ template <class... Ts>
+ void visitVolatileTrivial(QualType FT, const FieldDecl *FD, CharUnits Offset,
+ std::array<Address, 2> Addrs) {
+ LValue DstLV, SrcLV;
+ if (FD) {
+ QualType RT = QualType(FD->getParent()->getTypeForDecl(), 0);
+ llvm::PointerType *PtrTy = this->CGF->ConvertType(RT)->getPointerTo();
+ Address DstAddr = this->getAddrWithOffset(Addrs[DstIdx], Offset);
+ LValue DstBase = this->CGF->MakeAddrLValue(
+ this->CGF->Builder.CreateBitCast(DstAddr, PtrTy), FT);
+ DstLV = this->CGF->EmitLValueForField(DstBase, FD);
+ Address SrcAddr = this->getAddrWithOffset(Addrs[SrcIdx], Offset);
+ LValue SrcBase = this->CGF->MakeAddrLValue(
+ this->CGF->Builder.CreateBitCast(SrcAddr, PtrTy), FT);
+ SrcLV = this->CGF->EmitLValueForField(SrcBase, FD);
+ } else {
+ llvm::PointerType *Ty = this->CGF->ConvertType(FT)->getPointerTo();
+ Address DstAddr = this->CGF->Builder.CreateBitCast(Addrs[DstIdx], Ty);
+ Address SrcAddr = this->CGF->Builder.CreateBitCast(Addrs[SrcIdx], Ty);
+ DstLV = this->CGF->MakeAddrLValue(DstAddr, FT);
+ SrcLV = this->CGF->MakeAddrLValue(SrcAddr, FT);
+ }
+ RValue SrcVal = this->CGF->EmitLoadOfLValue(SrcLV, SourceLocation());
+ this->CGF->EmitStoreThroughLValue(SrcVal, DstLV);
+ }
+};
+
+// These classes that emit the special functions for a non-trivial struct.
+struct GenDestructor : StructVisitor<GenDestructor>,
+ GenFuncBase<GenDestructor>,
+ DestructedTypeVisitor<GenDestructor> {
+ using Super = DestructedTypeVisitor<GenDestructor>;
+ GenDestructor(ASTContext &Ctx) : StructVisitor<GenDestructor>(Ctx) {}
+
+ void visitWithKind(QualType::DestructionKind DK, QualType FT,
+ const FieldDecl *FD, CharUnits CurStructOffset,
+ std::array<Address, 1> Addrs) {
+ if (const auto *AT = getContext().getAsArrayType(FT)) {
+ visitArray(DK, AT, FT.isVolatileQualified(), FD, CurStructOffset, Addrs);
+ return;
+ }
+
+ Super::visitWithKind(DK, FT, FD, CurStructOffset, Addrs);
+ }
+
+ void visitARCStrong(QualType QT, const FieldDecl *FD,
+ CharUnits CurStackOffset, std::array<Address, 1> Addrs) {
+ CGF->destroyARCStrongImprecise(
+ *CGF, getAddrWithOffset(Addrs[DstIdx], CurStackOffset, FD), QT);
+ }
+
+ void visitARCWeak(QualType QT, const FieldDecl *FD, CharUnits CurStackOffset,
+ std::array<Address, 1> Addrs) {
+ CGF->destroyARCWeak(
+ *CGF, getAddrWithOffset(Addrs[DstIdx], CurStackOffset, FD), QT);
+ }
+
+ void callSpecialFunction(QualType FT, CharUnits Offset,
+ std::array<Address, 1> Addrs) {
+ CGF->callCStructDestructor(
+ CGF->MakeAddrLValue(getAddrWithOffset(Addrs[DstIdx], Offset), FT));
+ }
+};
+
+struct GenDefaultInitialize
+ : StructVisitor<GenDefaultInitialize>,
+ GenFuncBase<GenDefaultInitialize>,
+ DefaultInitializedTypeVisitor<GenDefaultInitialize> {
+ using Super = DefaultInitializedTypeVisitor<GenDefaultInitialize>;
+ typedef GenFuncBase<GenDefaultInitialize> GenFuncBaseTy;
+
+ GenDefaultInitialize(ASTContext &Ctx)
+ : StructVisitor<GenDefaultInitialize>(Ctx) {}
+
+ void visitWithKind(QualType::PrimitiveDefaultInitializeKind PDIK, QualType FT,
+ const FieldDecl *FD, CharUnits CurStructOffset,
+ std::array<Address, 1> Addrs) {
+ if (const auto *AT = getContext().getAsArrayType(FT)) {
+ visitArray(PDIK, AT, FT.isVolatileQualified(), FD, CurStructOffset,
+ Addrs);
+ return;
+ }
+
+ Super::visitWithKind(PDIK, FT, FD, CurStructOffset, Addrs);
+ }
+
+ void visitARCStrong(QualType QT, const FieldDecl *FD,
+ CharUnits CurStackOffset, std::array<Address, 1> Addrs) {
+ CGF->EmitNullInitialization(
+ getAddrWithOffset(Addrs[DstIdx], CurStackOffset, FD), QT);
+ }
+
+ void visitARCWeak(QualType QT, const FieldDecl *FD, CharUnits CurStackOffset,
+ std::array<Address, 1> Addrs) {
+ CGF->EmitNullInitialization(
+ getAddrWithOffset(Addrs[DstIdx], CurStackOffset, FD), QT);
+ }
+
+ template <class FieldKind, size_t... Is>
+ void visitArray(FieldKind FK, const ArrayType *AT, bool IsVolatile,
+ const FieldDecl *FD, CharUnits CurStackOffset,
+ std::array<Address, 1> Addrs) {
+ if (!FK)
+ return visitTrivial(QualType(AT, 0), FD, CurStackOffset, Addrs);
+
+ ASTContext &Ctx = getContext();
+ CharUnits Size = Ctx.getTypeSizeInChars(QualType(AT, 0));
+ QualType EltTy = Ctx.getBaseElementType(QualType(AT, 0));
+
+ if (Size < CharUnits::fromQuantity(16) || EltTy->getAs<RecordType>()) {
+ GenFuncBaseTy::visitArray(FK, AT, IsVolatile, FD, CurStackOffset, Addrs);
+ return;
+ }
+
+ llvm::Constant *SizeVal = CGF->Builder.getInt64(Size.getQuantity());
+ Address DstAddr = getAddrWithOffset(Addrs[DstIdx], CurStackOffset, FD);
+ Address Loc = CGF->Builder.CreateElementBitCast(DstAddr, CGF->Int8Ty);
+ CGF->Builder.CreateMemSet(Loc, CGF->Builder.getInt8(0), SizeVal,
+ IsVolatile);
+ }
+
+ void callSpecialFunction(QualType FT, CharUnits Offset,
+ std::array<Address, 1> Addrs) {
+ CGF->callCStructDefaultConstructor(
+ CGF->MakeAddrLValue(getAddrWithOffset(Addrs[DstIdx], Offset), FT));
+ }
+};
+
+struct GenCopyConstructor : GenBinaryFunc<GenCopyConstructor, false> {
+ GenCopyConstructor(ASTContext &Ctx)
+ : GenBinaryFunc<GenCopyConstructor, false>(Ctx) {}
+
+ void visitARCStrong(QualType QT, const FieldDecl *FD,
+ CharUnits CurStackOffset, std::array<Address, 2> Addrs) {
+ Addrs[DstIdx] = getAddrWithOffset(Addrs[DstIdx], CurStackOffset, FD);
+ Addrs[SrcIdx] = getAddrWithOffset(Addrs[SrcIdx], CurStackOffset, FD);
+ llvm::Value *SrcVal = CGF->EmitLoadOfScalar(
+ Addrs[SrcIdx], QT.isVolatileQualified(), QT, SourceLocation());
+ llvm::Value *Val = CGF->EmitARCRetain(QT, SrcVal);
+ CGF->EmitStoreOfScalar(Val, CGF->MakeAddrLValue(Addrs[DstIdx], QT), true);
+ }
+
+ void visitARCWeak(QualType QT, const FieldDecl *FD, CharUnits CurStackOffset,
+ std::array<Address, 2> Addrs) {
+ Addrs[DstIdx] = getAddrWithOffset(Addrs[DstIdx], CurStackOffset, FD);
+ Addrs[SrcIdx] = getAddrWithOffset(Addrs[SrcIdx], CurStackOffset, FD);
+ CGF->EmitARCCopyWeak(Addrs[DstIdx], Addrs[SrcIdx]);
+ }
+
+ void callSpecialFunction(QualType FT, CharUnits Offset,
+ std::array<Address, 2> Addrs) {
+ CGF->callCStructCopyConstructor(CGF->MakeAddrLValue(Addrs[DstIdx], FT),
+ CGF->MakeAddrLValue(Addrs[SrcIdx], FT));
+ }
+};
+
+struct GenMoveConstructor : GenBinaryFunc<GenMoveConstructor, true> {
+ GenMoveConstructor(ASTContext &Ctx)
+ : GenBinaryFunc<GenMoveConstructor, true>(Ctx) {}
+
+ void visitARCStrong(QualType QT, const FieldDecl *FD,
+ CharUnits CurStackOffset, std::array<Address, 2> Addrs) {
+ Addrs[DstIdx] = getAddrWithOffset(Addrs[DstIdx], CurStackOffset, FD);
+ Addrs[SrcIdx] = getAddrWithOffset(Addrs[SrcIdx], CurStackOffset, FD);
+ LValue SrcLV = CGF->MakeAddrLValue(Addrs[SrcIdx], QT);
+ llvm::Value *SrcVal =
+ CGF->EmitLoadOfLValue(SrcLV, SourceLocation()).getScalarVal();
+ CGF->EmitStoreOfScalar(getNullForVariable(SrcLV.getAddress()), SrcLV);
+ CGF->EmitStoreOfScalar(SrcVal, CGF->MakeAddrLValue(Addrs[DstIdx], QT),
+ /* isInitialization */ true);
+ }
+
+ void visitARCWeak(QualType QT, const FieldDecl *FD, CharUnits CurStackOffset,
+ std::array<Address, 2> Addrs) {
+ Addrs[DstIdx] = getAddrWithOffset(Addrs[DstIdx], CurStackOffset, FD);
+ Addrs[SrcIdx] = getAddrWithOffset(Addrs[SrcIdx], CurStackOffset, FD);
+ CGF->EmitARCMoveWeak(Addrs[DstIdx], Addrs[SrcIdx]);
+ }
+
+ void callSpecialFunction(QualType FT, CharUnits Offset,
+ std::array<Address, 2> Addrs) {
+ CGF->callCStructMoveConstructor(CGF->MakeAddrLValue(Addrs[DstIdx], FT),
+ CGF->MakeAddrLValue(Addrs[SrcIdx], FT));
+ }
+};
+
+struct GenCopyAssignment : GenBinaryFunc<GenCopyAssignment, false> {
+ GenCopyAssignment(ASTContext &Ctx)
+ : GenBinaryFunc<GenCopyAssignment, false>(Ctx) {}
+
+ void visitARCStrong(QualType QT, const FieldDecl *FD,
+ CharUnits CurStackOffset, std::array<Address, 2> Addrs) {
+ Addrs[DstIdx] = getAddrWithOffset(Addrs[DstIdx], CurStackOffset, FD);
+ Addrs[SrcIdx] = getAddrWithOffset(Addrs[SrcIdx], CurStackOffset, FD);
+ llvm::Value *SrcVal = CGF->EmitLoadOfScalar(
+ Addrs[SrcIdx], QT.isVolatileQualified(), QT, SourceLocation());
+ CGF->EmitARCStoreStrong(CGF->MakeAddrLValue(Addrs[DstIdx], QT), SrcVal,
+ false);
+ }
+
+ void visitARCWeak(QualType QT, const FieldDecl *FD, CharUnits CurStackOffset,
+ std::array<Address, 2> Addrs) {
+ Addrs[DstIdx] = getAddrWithOffset(Addrs[DstIdx], CurStackOffset, FD);
+ Addrs[SrcIdx] = getAddrWithOffset(Addrs[SrcIdx], CurStackOffset, FD);
+ CGF->emitARCCopyAssignWeak(QT, Addrs[DstIdx], Addrs[SrcIdx]);
+ }
+
+ void callSpecialFunction(QualType FT, CharUnits Offset,
+ std::array<Address, 2> Addrs) {
+ CGF->callCStructCopyAssignmentOperator(
+ CGF->MakeAddrLValue(Addrs[DstIdx], FT),
+ CGF->MakeAddrLValue(Addrs[SrcIdx], FT));
+ }
+};
+
+struct GenMoveAssignment : GenBinaryFunc<GenMoveAssignment, true> {
+ GenMoveAssignment(ASTContext &Ctx)
+ : GenBinaryFunc<GenMoveAssignment, true>(Ctx) {}
+
+ void visitARCStrong(QualType QT, const FieldDecl *FD,
+ CharUnits CurStackOffset, std::array<Address, 2> Addrs) {
+ Addrs[DstIdx] = getAddrWithOffset(Addrs[DstIdx], CurStackOffset, FD);
+ Addrs[SrcIdx] = getAddrWithOffset(Addrs[SrcIdx], CurStackOffset, FD);
+ LValue SrcLV = CGF->MakeAddrLValue(Addrs[SrcIdx], QT);
+ llvm::Value *SrcVal =
+ CGF->EmitLoadOfLValue(SrcLV, SourceLocation()).getScalarVal();
+ CGF->EmitStoreOfScalar(getNullForVariable(SrcLV.getAddress()), SrcLV);
+ LValue DstLV = CGF->MakeAddrLValue(Addrs[DstIdx], QT);
+ llvm::Value *DstVal =
+ CGF->EmitLoadOfLValue(DstLV, SourceLocation()).getScalarVal();
+ CGF->EmitStoreOfScalar(SrcVal, DstLV);
+ CGF->EmitARCRelease(DstVal, ARCImpreciseLifetime);
+ }
+
+ void visitARCWeak(QualType QT, const FieldDecl *FD, CharUnits CurStackOffset,
+ std::array<Address, 2> Addrs) {
+ Addrs[DstIdx] = getAddrWithOffset(Addrs[DstIdx], CurStackOffset, FD);
+ Addrs[SrcIdx] = getAddrWithOffset(Addrs[SrcIdx], CurStackOffset, FD);
+ CGF->emitARCMoveAssignWeak(QT, Addrs[DstIdx], Addrs[SrcIdx]);
+ }
+
+ void callSpecialFunction(QualType FT, CharUnits Offset,
+ std::array<Address, 2> Addrs) {
+ CGF->callCStructMoveAssignmentOperator(
+ CGF->MakeAddrLValue(Addrs[DstIdx], FT),
+ CGF->MakeAddrLValue(Addrs[SrcIdx], FT));
+ }
+};
+
+} // namespace
+
+void CodeGenFunction::destroyNonTrivialCStruct(CodeGenFunction &CGF,
+ Address Addr, QualType Type) {
+ CGF.callCStructDestructor(CGF.MakeAddrLValue(Addr, Type));
+}
+
+// Default-initialize a variable that is a non-trivial struct or an array of
+// such structure.
+void CodeGenFunction::defaultInitNonTrivialCStructVar(LValue Dst) {
+ GenDefaultInitialize Gen(getContext());
+ Address DstPtr = Builder.CreateBitCast(Dst.getAddress(), CGM.Int8PtrPtrTy);
+ Gen.setCGF(this);
+ QualType QT = Dst.getType();
+ QT = Dst.isVolatile() ? QT.withVolatile() : QT;
+ Gen.visit(QT, nullptr, CharUnits::Zero(), std::array<Address, 1>({{DstPtr}}));
+}
+
+template <class G, size_t N>
+static void callSpecialFunction(G &&Gen, StringRef FuncName, QualType QT,
+ bool IsVolatile, CodeGenFunction &CGF,
+ std::array<Address, N> Addrs) {
+ for (unsigned I = 0; I < N; ++I)
+ Addrs[I] = CGF.Builder.CreateBitCast(Addrs[I], CGF.CGM.Int8PtrPtrTy);
+ QT = IsVolatile ? QT.withVolatile() : QT;
+ Gen.callFunc(FuncName, QT, Addrs, CGF);
+}
+
+// Functions to emit calls to the special functions of a non-trivial C struct.
+void CodeGenFunction::callCStructDefaultConstructor(LValue Dst) {
+ bool IsVolatile = Dst.isVolatile();
+ Address DstPtr = Dst.getAddress();
+ QualType QT = Dst.getType();
+ GenDefaultInitializeFuncName GenName(DstPtr.getAlignment(), getContext());
+ std::string FuncName = GenName.getName(QT, IsVolatile);
+ callSpecialFunction(GenDefaultInitialize(getContext()), FuncName, QT,
+ IsVolatile, *this, std::array<Address, 1>({{DstPtr}}));
+}
+
+void CodeGenFunction::callCStructDestructor(LValue Dst) {
+ bool IsVolatile = Dst.isVolatile();
+ Address DstPtr = Dst.getAddress();
+ QualType QT = Dst.getType();
+ GenDestructorFuncName GenName(DstPtr.getAlignment(), getContext());
+ std::string FuncName = GenName.getName(QT, IsVolatile);
+ callSpecialFunction(GenDestructor(getContext()), FuncName, QT, IsVolatile,
+ *this, std::array<Address, 1>({{DstPtr}}));
+}
+
+void CodeGenFunction::callCStructCopyConstructor(LValue Dst, LValue Src) {
+ bool IsVolatile = Dst.isVolatile() || Src.isVolatile();
+ Address DstPtr = Dst.getAddress(), SrcPtr = Src.getAddress();
+ QualType QT = Dst.getType();
+ GenBinaryFuncName<false> GenName("__copy_constructor_", DstPtr.getAlignment(),
+ SrcPtr.getAlignment(), getContext());
+ std::string FuncName = GenName.getName(QT, IsVolatile);
+ callSpecialFunction(GenCopyConstructor(getContext()), FuncName, QT,
+ IsVolatile, *this,
+ std::array<Address, 2>({{DstPtr, SrcPtr}}));
+}
+
+void CodeGenFunction::callCStructCopyAssignmentOperator(LValue Dst, LValue Src
+
+) {
+ bool IsVolatile = Dst.isVolatile() || Src.isVolatile();
+ Address DstPtr = Dst.getAddress(), SrcPtr = Src.getAddress();
+ QualType QT = Dst.getType();
+ GenBinaryFuncName<false> GenName("__copy_assignment_", DstPtr.getAlignment(),
+ SrcPtr.getAlignment(), getContext());
+ std::string FuncName = GenName.getName(QT, IsVolatile);
+ callSpecialFunction(GenCopyAssignment(getContext()), FuncName, QT, IsVolatile,
+ *this, std::array<Address, 2>({{DstPtr, SrcPtr}}));
+}
+
+void CodeGenFunction::callCStructMoveConstructor(LValue Dst, LValue Src) {
+ bool IsVolatile = Dst.isVolatile() || Src.isVolatile();
+ Address DstPtr = Dst.getAddress(), SrcPtr = Src.getAddress();
+ QualType QT = Dst.getType();
+ GenBinaryFuncName<true> GenName("__move_constructor_", DstPtr.getAlignment(),
+ SrcPtr.getAlignment(), getContext());
+ std::string FuncName = GenName.getName(QT, IsVolatile);
+ callSpecialFunction(GenMoveConstructor(getContext()), FuncName, QT,
+ IsVolatile, *this,
+ std::array<Address, 2>({{DstPtr, SrcPtr}}));
+}
+
+void CodeGenFunction::callCStructMoveAssignmentOperator(LValue Dst, LValue Src
+
+) {
+ bool IsVolatile = Dst.isVolatile() || Src.isVolatile();
+ Address DstPtr = Dst.getAddress(), SrcPtr = Src.getAddress();
+ QualType QT = Dst.getType();
+ GenBinaryFuncName<true> GenName("__move_assignment_", DstPtr.getAlignment(),
+ SrcPtr.getAlignment(), getContext());
+ std::string FuncName = GenName.getName(QT, IsVolatile);
+ callSpecialFunction(GenMoveAssignment(getContext()), FuncName, QT, IsVolatile,
+ *this, std::array<Address, 2>({{DstPtr, SrcPtr}}));
+}
diff --git a/lib/CodeGen/CGObjC.cpp b/lib/CodeGen/CGObjC.cpp
index f26263d9472d..81c1201c0e06 100644
--- a/lib/CodeGen/CGObjC.cpp
+++ b/lib/CodeGen/CGObjC.cpp
@@ -259,7 +259,7 @@ llvm::Value *CodeGenFunction::EmitObjCProtocolExpr(const ObjCProtocolExpr *E) {
return CGM.getObjCRuntime().GenerateProtocolRef(*this, E->getProtocol());
}
-/// \brief Adjust the type of an Objective-C object that doesn't match up due
+/// Adjust the type of an Objective-C object that doesn't match up due
/// to type erasure at various points, e.g., related result types or the use
/// of parameterized classes.
static RValue AdjustObjCObjectType(CodeGenFunction &CGF, QualType ExpT,
@@ -803,7 +803,7 @@ PropertyImplStrategy::PropertyImplStrategy(CodeGenModule &CGM,
Kind = Native;
}
-/// \brief Generate an Objective-C property getter function.
+/// Generate an Objective-C property getter function.
///
/// The given Decl must be an ObjCImplementationDecl. \@synthesize
/// is illegal within a category.
@@ -1008,12 +1008,14 @@ CodeGenFunction::generateObjCGetterBody(const ObjCImplementationDecl *classImpl,
/*init*/ true);
return;
}
- case TEK_Aggregate:
+ case TEK_Aggregate: {
// The return value slot is guaranteed to not be aliased, but
// that's not necessarily the same as "on the stack", so
// we still potentially need objc_memmove_collectable.
- EmitAggregateCopy(ReturnValue, LV.getAddress(), ivarType);
+ EmitAggregateCopy(/* Dest= */ MakeAddrLValue(ReturnValue, ivarType),
+ /* Src= */ LV, ivarType, overlapForReturnValue());
return;
+ }
case TEK_Scalar: {
llvm::Value *value;
if (propType->isReferenceType()) {
@@ -1334,7 +1336,7 @@ CodeGenFunction::generateObjCSetterBody(const ObjCImplementationDecl *classImpl,
EmitStmt(&assign);
}
-/// \brief Generate an Objective-C property setter function.
+/// Generate an Objective-C property setter function.
///
/// The given Decl must be an ObjCImplementationDecl. \@synthesize
/// is illegal within a category.
@@ -1438,7 +1440,8 @@ void CodeGenFunction::GenerateObjCCtorDtorMethod(ObjCImplementationDecl *IMP,
EmitAggExpr(IvarInit->getInit(),
AggValueSlot::forLValue(LV, AggValueSlot::IsDestructed,
AggValueSlot::DoesNotNeedGCBarriers,
- AggValueSlot::IsNotAliased));
+ AggValueSlot::IsNotAliased,
+ AggValueSlot::DoesNotOverlap));
}
// constructor returns 'self'.
CodeGenTypes &Types = CGM.getTypes();
@@ -1814,22 +1817,6 @@ void CodeGenFunction::EmitARCIntrinsicUse(ArrayRef<llvm::Value*> values) {
}
-static bool IsForwarding(StringRef Name) {
- return llvm::StringSwitch<bool>(Name)
- .Cases("objc_autoreleaseReturnValue", // ARCInstKind::AutoreleaseRV
- "objc_autorelease", // ARCInstKind::Autorelease
- "objc_retainAutoreleaseReturnValue", // ARCInstKind::FusedRetainAutoreleaseRV
- "objc_retainAutoreleasedReturnValue", // ARCInstKind::RetainRV
- "objc_retainAutorelease", // ARCInstKind::FusedRetainAutorelease
- "objc_retainedObject", // ARCInstKind::NoopCast
- "objc_retain", // ARCInstKind::Retain
- "objc_unretainedObject", // ARCInstKind::NoopCast
- "objc_unretainedPointer", // ARCInstKind::NoopCast
- "objc_unsafeClaimAutoreleasedReturnValue", // ARCInstKind::ClaimRV
- true)
- .Default(false);
-}
-
static llvm::Constant *createARCRuntimeFunction(CodeGenModule &CGM,
llvm::FunctionType *FTy,
StringRef Name) {
@@ -1847,9 +1834,6 @@ static llvm::Constant *createARCRuntimeFunction(CodeGenModule &CGM,
// performance.
F->addFnAttr(llvm::Attribute::NonLazyBind);
}
-
- if (IsForwarding(Name))
- F->arg_begin()->addAttr(llvm::Attribute::Returned);
}
return RTF;
@@ -2052,7 +2036,7 @@ static void emitAutoreleasedReturnValueMarker(CodeGenFunction &CGF) {
// Call the marker asm if we made one, which we do only at -O0.
if (marker)
- CGF.Builder.CreateCall(marker);
+ CGF.Builder.CreateCall(marker, None, CGF.getBundlesForFunclet(marker));
}
/// Retain the given object which is the result of a function call.
@@ -2070,7 +2054,7 @@ CodeGenFunction::EmitARCRetainAutoreleasedReturnValue(llvm::Value *value) {
/// Claim a possibly-autoreleased return value at +0. This is only
/// valid to do in contexts which do not rely on the retain to keep
-/// the object valid for for all of its uses; for example, when
+/// the object valid for all of its uses; for example, when
/// the value is ignored, or when it is being assigned to an
/// __unsafe_unretained variable.
///
@@ -2325,6 +2309,21 @@ void CodeGenFunction::EmitARCCopyWeak(Address dst, Address src) {
"objc_copyWeak");
}
+void CodeGenFunction::emitARCCopyAssignWeak(QualType Ty, Address DstAddr,
+ Address SrcAddr) {
+ llvm::Value *Object = EmitARCLoadWeakRetained(SrcAddr);
+ Object = EmitObjCConsumeObject(Ty, Object);
+ EmitARCStoreWeak(DstAddr, Object, false);
+}
+
+void CodeGenFunction::emitARCMoveAssignWeak(QualType Ty, Address DstAddr,
+ Address SrcAddr) {
+ llvm::Value *Object = EmitARCLoadWeakRetained(SrcAddr);
+ Object = EmitObjCConsumeObject(Ty, Object);
+ EmitARCStoreWeak(DstAddr, Object, false);
+ EmitARCDestroyWeak(SrcAddr);
+}
+
/// Produce the code to do a objc_autoreleasepool_push.
/// call i8* \@objc_autoreleasePoolPush(void)
llvm::Value *CodeGenFunction::EmitObjCAutoreleasePoolPush() {
@@ -3261,19 +3260,19 @@ CodeGenFunction::GenerateObjCAtomicSetterCopyHelperFunction(
"__assign_helper_atomic_property_",
&CGM.getModule());
- CGM.SetInternalFunctionAttributes(nullptr, Fn, FI);
+ CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FI);
StartFunction(FD, C.VoidTy, Fn, FI, args);
DeclRefExpr DstExpr(&DstDecl, false, DestTy,
VK_RValue, SourceLocation());
UnaryOperator DST(&DstExpr, UO_Deref, DestTy->getPointeeType(),
- VK_LValue, OK_Ordinary, SourceLocation());
+ VK_LValue, OK_Ordinary, SourceLocation(), false);
DeclRefExpr SrcExpr(&SrcDecl, false, SrcTy,
VK_RValue, SourceLocation());
UnaryOperator SRC(&SrcExpr, UO_Deref, SrcTy->getPointeeType(),
- VK_LValue, OK_Ordinary, SourceLocation());
+ VK_LValue, OK_Ordinary, SourceLocation(), false);
Expr *Args[2] = { &DST, &SRC };
CallExpr *CalleeExp = cast<CallExpr>(PID->getSetterCXXAssignment());
@@ -3342,8 +3341,8 @@ CodeGenFunction::GenerateObjCAtomicGetterCopyHelperFunction(
llvm::Function *Fn =
llvm::Function::Create(LTy, llvm::GlobalValue::InternalLinkage,
"__copy_helper_atomic_property_", &CGM.getModule());
-
- CGM.SetInternalFunctionAttributes(nullptr, Fn, FI);
+
+ CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FI);
StartFunction(FD, C.VoidTy, Fn, FI, args);
@@ -3351,7 +3350,7 @@ CodeGenFunction::GenerateObjCAtomicGetterCopyHelperFunction(
VK_RValue, SourceLocation());
UnaryOperator SRC(&SrcExpr, UO_Deref, SrcTy->getPointeeType(),
- VK_LValue, OK_Ordinary, SourceLocation());
+ VK_LValue, OK_Ordinary, SourceLocation(), false);
CXXConstructExpr *CXXConstExpr =
cast<CXXConstructExpr>(PID->getGetterCXXConstructor());
@@ -3384,7 +3383,8 @@ CodeGenFunction::GenerateObjCAtomicGetterCopyHelperFunction(
Qualifiers(),
AggValueSlot::IsDestructed,
AggValueSlot::DoesNotNeedGCBarriers,
- AggValueSlot::IsNotAliased));
+ AggValueSlot::IsNotAliased,
+ AggValueSlot::DoesNotOverlap));
FinishFunction();
HelperFn = llvm::ConstantExpr::getBitCast(Fn, VoidPtrTy);
diff --git a/lib/CodeGen/CGObjCGNU.cpp b/lib/CodeGen/CGObjCGNU.cpp
index c8b8be7f4552..6a0554b46b1c 100644
--- a/lib/CodeGen/CGObjCGNU.cpp
+++ b/lib/CodeGen/CGObjCGNU.cpp
@@ -34,11 +34,24 @@
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
#include "llvm/Support/Compiler.h"
+#include "llvm/Support/ConvertUTF.h"
+#include <cctype>
using namespace clang;
using namespace CodeGen;
namespace {
+
+std::string SymbolNameForMethod( StringRef ClassName,
+ StringRef CategoryName, const Selector MethodName,
+ bool isClassMethod) {
+ std::string MethodNameColonStripped = MethodName.getAsString();
+ std::replace(MethodNameColonStripped.begin(), MethodNameColonStripped.end(),
+ ':', '_');
+ return (Twine(isClassMethod ? "_c_" : "_i_") + ClassName + "_" +
+ CategoryName + "_" + MethodNameColonStripped).str();
+}
+
/// Class that lazily initialises the runtime function. Avoids inserting the
/// types and the function declaration into a module if they're not used, and
/// avoids constructing the type more than once if it's used more than once.
@@ -80,8 +93,7 @@ public:
if (!Function) {
if (!FunctionName)
return nullptr;
- Function =
- cast<llvm::Constant>(CGM->CreateRuntimeFunction(FTy, FunctionName));
+ Function = CGM->CreateRuntimeFunction(FTy, FunctionName);
}
return Function;
}
@@ -114,6 +126,10 @@ protected:
/// Pointer to i8 - LLVM type of char*, for all of the places where the
/// runtime needs to deal with C strings.
llvm::PointerType *PtrToInt8Ty;
+ /// struct objc_protocol type
+ llvm::StructType *ProtocolTy;
+ /// Protocol * type.
+ llvm::PointerType *ProtocolPtrTy;
/// Instance Method Pointer type. This is a pointer to a function that takes,
/// at a minimum, an object and a selector, and is the generic type for
/// Objective-C methods. Due to differences between variadic / non-variadic
@@ -156,11 +172,29 @@ protected:
llvm::IntegerType *Int32Ty;
/// 64-bit integer type, to save us needing to look it up every time it's used.
llvm::IntegerType *Int64Ty;
+ /// The type of struct objc_property.
+ llvm::StructType *PropertyMetadataTy;
/// Metadata kind used to tie method lookups to message sends. The GNUstep
/// runtime provides some LLVM passes that can use this to do things like
/// automatic IMP caching and speculative inlining.
unsigned msgSendMDKind;
+ /// Helper to check if we are targeting a specific runtime version or later.
+ bool isRuntime(ObjCRuntime::Kind kind, unsigned major, unsigned minor=0) {
+ const ObjCRuntime &R = CGM.getLangOpts().ObjCRuntime;
+ return (R.getKind() == kind) &&
+ (R.getVersion() >= VersionTuple(major, minor));
+ }
+
+ std::string SymbolForProtocol(StringRef Name) {
+ return (StringRef("._OBJC_PROTOCOL_") + Name).str();
+ }
+
+ std::string SymbolForProtocolRef(StringRef Name) {
+ return (StringRef("._OBJC_REF_PROTOCOL_") + Name).str();
+ }
+
+
/// Helper function that generates a constant string and returns a pointer to
/// the start of the string. The result of this function can be used anywhere
/// where the C code specifies const char*.
@@ -174,39 +208,28 @@ protected:
/// string value. This allows the linker to combine the strings between
/// different modules. Used for EH typeinfo names, selector strings, and a
/// few other things.
- llvm::Constant *ExportUniqueString(const std::string &Str, StringRef Prefix) {
- std::string Name = Prefix.str() + Str;
- auto *ConstStr = TheModule.getGlobalVariable(Name);
+ llvm::Constant *ExportUniqueString(const std::string &Str,
+ const std::string &prefix,
+ bool Private=false) {
+ std::string name = prefix + Str;
+ auto *ConstStr = TheModule.getGlobalVariable(name);
if (!ConstStr) {
llvm::Constant *value = llvm::ConstantDataArray::getString(VMContext,Str);
- ConstStr = new llvm::GlobalVariable(TheModule, value->getType(), true,
- llvm::GlobalValue::LinkOnceODRLinkage,
- value, Name);
+ auto *GV = new llvm::GlobalVariable(TheModule, value->getType(), true,
+ llvm::GlobalValue::LinkOnceODRLinkage, value, name);
+ if (Private)
+ GV->setVisibility(llvm::GlobalValue::HiddenVisibility);
+ ConstStr = GV;
}
return llvm::ConstantExpr::getGetElementPtr(ConstStr->getValueType(),
ConstStr, Zeros);
}
- /// Generates a global structure, initialized by the elements in the vector.
- /// The element types must match the types of the structure elements in the
- /// first argument.
- llvm::GlobalVariable *MakeGlobal(llvm::Constant *C,
- CharUnits Align,
- StringRef Name="",
- llvm::GlobalValue::LinkageTypes linkage
- =llvm::GlobalValue::InternalLinkage) {
- auto GV = new llvm::GlobalVariable(TheModule, C->getType(), false,
- linkage, C, Name);
- GV->setAlignment(Align.getQuantity());
- return GV;
- }
-
/// Returns a property name and encoding string.
llvm::Constant *MakePropertyEncodingString(const ObjCPropertyDecl *PD,
const Decl *Container) {
- const ObjCRuntime &R = CGM.getLangOpts().ObjCRuntime;
- if ((R.getKind() == ObjCRuntime::GNUstep) &&
- (R.getVersion() >= VersionTuple(1, 6))) {
+ assert(!isRuntime(ObjCRuntime::GNUstep, 2));
+ if (isRuntime(ObjCRuntime::GNUstep, 1, 6)) {
std::string NameAndAttributes;
std::string TypeStr =
CGM.getContext().getObjCEncodingForPropertyDecl(PD, Container);
@@ -222,7 +245,7 @@ protected:
/// Push the property attributes into two structure fields.
void PushPropertyAttributes(ConstantStructBuilder &Fields,
- ObjCPropertyDecl *property, bool isSynthesized=true, bool
+ const ObjCPropertyDecl *property, bool isSynthesized=true, bool
isDynamic=true) {
int attrs = property->getPropertyAttributes();
// For read-only properties, clear the copy and retain flags
@@ -249,6 +272,46 @@ protected:
Fields.addInt(Int8Ty, 0);
}
+ virtual ConstantArrayBuilder PushPropertyListHeader(ConstantStructBuilder &Fields,
+ int count) {
+ // int count;
+ Fields.addInt(IntTy, count);
+ // int size; (only in GNUstep v2 ABI.
+ if (isRuntime(ObjCRuntime::GNUstep, 2)) {
+ llvm::DataLayout td(&TheModule);
+ Fields.addInt(IntTy, td.getTypeSizeInBits(PropertyMetadataTy) /
+ CGM.getContext().getCharWidth());
+ }
+ // struct objc_property_list *next;
+ Fields.add(NULLPtr);
+ // struct objc_property properties[]
+ return Fields.beginArray(PropertyMetadataTy);
+ }
+ virtual void PushProperty(ConstantArrayBuilder &PropertiesArray,
+ const ObjCPropertyDecl *property,
+ const Decl *OCD,
+ bool isSynthesized=true, bool
+ isDynamic=true) {
+ auto Fields = PropertiesArray.beginStruct(PropertyMetadataTy);
+ ASTContext &Context = CGM.getContext();
+ Fields.add(MakePropertyEncodingString(property, OCD));
+ PushPropertyAttributes(Fields, property, isSynthesized, isDynamic);
+ auto addPropertyMethod = [&](const ObjCMethodDecl *accessor) {
+ if (accessor) {
+ std::string TypeStr = Context.getObjCEncodingForMethodDecl(accessor);
+ llvm::Constant *TypeEncoding = MakeConstantString(TypeStr);
+ Fields.add(MakeConstantString(accessor->getSelector().getAsString()));
+ Fields.add(TypeEncoding);
+ } else {
+ Fields.add(NULLPtr);
+ Fields.add(NULLPtr);
+ }
+ };
+ addPropertyMethod(property->getGetterMethodDecl());
+ addPropertyMethod(property->getSetterMethodDecl());
+ Fields.finishAndAddTo(PropertiesArray);
+ }
+
/// Ensures that the value has the required type, by inserting a bitcast if
/// required. This function lets us avoid inserting bitcasts that are
/// redundant.
@@ -268,7 +331,8 @@ protected:
/// LLVM context.
llvm::LLVMContext &VMContext;
-private:
+protected:
+
/// Placeholder for the class. Lots of things refer to the class before we've
/// actually emitted it. We use this alias as a placeholder, and then replace
/// it with a pointer to the class structure before finally emitting the
@@ -352,6 +416,7 @@ private:
/// Function used for non-object declared property setters.
LazyRuntimeFunction SetStructPropertyFn;
+protected:
/// The version of the runtime that this class targets. Must match the
/// version in the runtime.
int RuntimeVersion;
@@ -362,14 +427,18 @@ private:
/// Objective-C 1 property structures when targeting the GCC runtime or it
/// will abort.
const int ProtocolVersion;
-
+ /// The version of the class ABI. This value is used in the class structure
+ /// and indicates how various fields should be interpreted.
+ const int ClassABIVersion;
/// Generates an instance variable list structure. This is a structure
/// containing a size and an array of structures containing instance variable
/// metadata. This is used purely for introspection in the fragile ABI. In
/// the non-fragile ABI, it's used for instance variable fixup.
- llvm::Constant *GenerateIvarList(ArrayRef<llvm::Constant *> IvarNames,
- ArrayRef<llvm::Constant *> IvarTypes,
- ArrayRef<llvm::Constant *> IvarOffsets);
+ virtual llvm::Constant *GenerateIvarList(ArrayRef<llvm::Constant *> IvarNames,
+ ArrayRef<llvm::Constant *> IvarTypes,
+ ArrayRef<llvm::Constant *> IvarOffsets,
+ ArrayRef<llvm::Constant *> IvarAlign,
+ ArrayRef<Qualifiers::ObjCLifetime> IvarOwnership);
/// Generates a method list structure. This is a structure containing a size
/// and an array of structures containing method metadata.
@@ -378,20 +447,20 @@ private:
/// pointer allowing them to be chained together in a linked list.
llvm::Constant *GenerateMethodList(StringRef ClassName,
StringRef CategoryName,
- ArrayRef<Selector> MethodSels,
- ArrayRef<llvm::Constant *> MethodTypes,
+ ArrayRef<const ObjCMethodDecl*> Methods,
bool isClassMethodList);
/// Emits an empty protocol. This is used for \@protocol() where no protocol
/// is found. The runtime will (hopefully) fix up the pointer to refer to the
/// real protocol.
- llvm::Constant *GenerateEmptyProtocol(const std::string &ProtocolName);
+ virtual llvm::Constant *GenerateEmptyProtocol(StringRef ProtocolName);
/// Generates a list of property metadata structures. This follows the same
/// pattern as method and instance variable metadata lists.
- llvm::Constant *GeneratePropertyList(const ObjCImplementationDecl *OID,
- SmallVectorImpl<Selector> &InstanceMethodSels,
- SmallVectorImpl<llvm::Constant*> &InstanceMethodTypes);
+ llvm::Constant *GeneratePropertyList(const Decl *Container,
+ const ObjCContainerDecl *OCD,
+ bool isClassProperty=false,
+ bool protocolOptionalProperties=false);
/// Generates a list of referenced protocols. Classes, categories, and
/// protocols all use this structure.
@@ -422,22 +491,42 @@ private:
/// Generates a method list. This is used by protocols to define the required
/// and optional methods.
- llvm::Constant *GenerateProtocolMethodList(
- ArrayRef<llvm::Constant *> MethodNames,
- ArrayRef<llvm::Constant *> MethodTypes);
+ virtual llvm::Constant *GenerateProtocolMethodList(
+ ArrayRef<const ObjCMethodDecl*> Methods);
+ /// Emits optional and required method lists.
+ template<class T>
+ void EmitProtocolMethodList(T &&Methods, llvm::Constant *&Required,
+ llvm::Constant *&Optional) {
+ SmallVector<const ObjCMethodDecl*, 16> RequiredMethods;
+ SmallVector<const ObjCMethodDecl*, 16> OptionalMethods;
+ for (const auto *I : Methods)
+ if (I->isOptional())
+ OptionalMethods.push_back(I);
+ else
+ RequiredMethods.push_back(I);
+ Required = GenerateProtocolMethodList(RequiredMethods);
+ Optional = GenerateProtocolMethodList(OptionalMethods);
+ }
/// Returns a selector with the specified type encoding. An empty string is
/// used to return an untyped selector (with the types field set to NULL).
- llvm::Value *GetSelector(CodeGenFunction &CGF, Selector Sel,
+ virtual llvm::Value *GetSelector(CodeGenFunction &CGF, Selector Sel,
const std::string &TypeEncoding);
+ /// Returns the name of ivar offset variables. In the GNUstep v1 ABI, this
+ /// contains the class and ivar names, in the v2 ABI this contains the type
+ /// encoding as well.
+ virtual std::string GetIVarOffsetVariableName(const ObjCInterfaceDecl *ID,
+ const ObjCIvarDecl *Ivar) {
+ const std::string Name = "__objc_ivar_offset_" + ID->getNameAsString()
+ + '.' + Ivar->getNameAsString();
+ return Name;
+ }
/// Returns the variable used to store the offset of an instance variable.
llvm::GlobalVariable *ObjCIvarOffsetVariable(const ObjCInterfaceDecl *ID,
const ObjCIvarDecl *Ivar);
/// Emits a reference to a class. This allows the linker to object if there
/// is no class of the matching name.
-
-protected:
void EmitClassRef(const std::string &className);
/// Emits a pointer to the named class
@@ -476,7 +565,7 @@ protected:
public:
CGObjCGNU(CodeGenModule &cgm, unsigned runtimeABIVersion,
- unsigned protocolClassVersion);
+ unsigned protocolClassVersion, unsigned classABI=1);
ConstantAddress GenerateConstantString(const StringLiteral *) override;
@@ -499,6 +588,14 @@ public:
Address GetAddrOfSelector(CodeGenFunction &CGF, Selector Sel) override;
llvm::Value *GetSelector(CodeGenFunction &CGF,
const ObjCMethodDecl *Method) override;
+ virtual llvm::Constant *GetConstantSelector(Selector Sel,
+ const std::string &TypeEncoding) {
+ llvm_unreachable("Runtime unable to